mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Merge pull request #35475 from kssenii/remote-fs-cache-improvements
Allow to write remote fs cache on all write operations. Add `system.remote_filesystem_cache` table. Add `drop remote filesystem cache (<path>)` query. Add `system.remote_data_paths` table.
This commit is contained in:
commit
8ec802bc62
@ -96,7 +96,7 @@ else
|
||||
clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
|
||||
clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
|
||||
clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
|
||||
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits"
|
||||
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0"
|
||||
fi
|
||||
|
||||
clickhouse-client --query "SHOW TABLES FROM test"
|
||||
|
@ -91,6 +91,7 @@ public:
|
||||
struct QueryScope
|
||||
{
|
||||
explicit QueryScope(ContextMutablePtr query_context);
|
||||
explicit QueryScope(ContextPtr query_context);
|
||||
~QueryScope();
|
||||
|
||||
void logPeakMemoryUsage();
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Common/randomSeed.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/hex.h>
|
||||
#include <Common/FileCacheSettings.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/ReadSettings.h>
|
||||
@ -31,13 +32,11 @@ namespace
|
||||
|
||||
IFileCache::IFileCache(
|
||||
const String & cache_base_path_,
|
||||
size_t max_size_,
|
||||
size_t max_element_size_,
|
||||
size_t max_file_segment_size_)
|
||||
const FileCacheSettings & cache_settings_)
|
||||
: cache_base_path(cache_base_path_)
|
||||
, max_size(max_size_)
|
||||
, max_element_size(max_element_size_)
|
||||
, max_file_segment_size(max_file_segment_size_)
|
||||
, max_size(cache_settings_.max_size)
|
||||
, max_element_size(cache_settings_.max_elements)
|
||||
, max_file_segment_size(cache_settings_.max_file_segment_size)
|
||||
{
|
||||
}
|
||||
|
||||
@ -58,7 +57,7 @@ String IFileCache::getPathInLocalCache(const Key & key)
|
||||
return fs::path(cache_base_path) / key_str.substr(0, 3) / key_str;
|
||||
}
|
||||
|
||||
bool IFileCache::shouldBypassCache()
|
||||
bool IFileCache::isReadOnly()
|
||||
{
|
||||
return !CurrentThread::isInitialized()
|
||||
|| !CurrentThread::get().getQueryContext()
|
||||
@ -71,8 +70,8 @@ void IFileCache::assertInitialized() const
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Cache not initialized");
|
||||
}
|
||||
|
||||
LRUFileCache::LRUFileCache(const String & cache_base_path_, size_t max_size_, size_t max_element_size_, size_t max_file_segment_size_)
|
||||
: IFileCache(cache_base_path_, max_size_, max_element_size_, max_file_segment_size_)
|
||||
LRUFileCache::LRUFileCache(const String & cache_base_path_, const FileCacheSettings & cache_settings_)
|
||||
: IFileCache(cache_base_path_, cache_settings_)
|
||||
, log(&Poco::Logger::get("LRUFileCache"))
|
||||
{
|
||||
}
|
||||
@ -205,8 +204,8 @@ FileSegments LRUFileCache::getImpl(
|
||||
return result;
|
||||
}
|
||||
|
||||
FileSegments LRUFileCache::splitRangeIntoEmptyCells(
|
||||
const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock)
|
||||
FileSegments LRUFileCache::splitRangeIntoCells(
|
||||
const Key & key, size_t offset, size_t size, FileSegment::State state, std::lock_guard<std::mutex> & cache_lock)
|
||||
{
|
||||
assert(size > 0);
|
||||
|
||||
@ -222,9 +221,10 @@ FileSegments LRUFileCache::splitRangeIntoEmptyCells(
|
||||
current_cell_size = std::min(remaining_size, max_file_segment_size);
|
||||
remaining_size -= current_cell_size;
|
||||
|
||||
auto * cell = addCell(key, current_pos, current_cell_size, FileSegment::State::EMPTY, cache_lock);
|
||||
auto * cell = addCell(key, current_pos, current_cell_size, state, cache_lock);
|
||||
if (cell)
|
||||
file_segments.push_back(cell->file_segment);
|
||||
assert(cell);
|
||||
|
||||
current_pos += current_cell_size;
|
||||
}
|
||||
@ -250,7 +250,7 @@ FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t
|
||||
|
||||
if (file_segments.empty())
|
||||
{
|
||||
file_segments = splitRangeIntoEmptyCells(key, offset, size, cache_lock);
|
||||
file_segments = splitRangeIntoCells(key, offset, size, FileSegment::State::EMPTY, cache_lock);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -295,7 +295,7 @@ FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t
|
||||
assert(current_pos < segment_range.left);
|
||||
|
||||
auto hole_size = segment_range.left - current_pos;
|
||||
file_segments.splice(it, splitRangeIntoEmptyCells(key, current_pos, hole_size, cache_lock));
|
||||
file_segments.splice(it, splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
|
||||
|
||||
current_pos = segment_range.right + 1;
|
||||
++it;
|
||||
@ -309,7 +309,7 @@ FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t
|
||||
/// segmentN
|
||||
|
||||
auto hole_size = range.right - current_pos + 1;
|
||||
file_segments.splice(file_segments.end(), splitRangeIntoEmptyCells(key, current_pos, hole_size, cache_lock));
|
||||
file_segments.splice(file_segments.end(), splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock));
|
||||
}
|
||||
}
|
||||
|
||||
@ -354,6 +354,21 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell(
|
||||
return &(it->second);
|
||||
}
|
||||
|
||||
FileSegmentsHolder LRUFileCache::setDownloading(const Key & key, size_t offset, size_t size)
|
||||
{
|
||||
std::lock_guard cache_lock(mutex);
|
||||
|
||||
auto * cell = getCell(key, offset, cache_lock);
|
||||
if (cell)
|
||||
throw Exception(
|
||||
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||
"Cache cell already exists for key `{}` and offset {}",
|
||||
keyToStr(key), offset);
|
||||
|
||||
auto file_segments = splitRangeIntoCells(key, offset, size, FileSegment::State::DOWNLOADING, cache_lock);
|
||||
return FileSegmentsHolder(std::move(file_segments));
|
||||
}
|
||||
|
||||
bool LRUFileCache::tryReserve(
|
||||
const Key & key_, size_t offset_, size_t size, std::lock_guard<std::mutex> & cache_lock)
|
||||
{
|
||||
@ -372,7 +387,8 @@ bool LRUFileCache::tryReserve(
|
||||
|
||||
auto is_overflow = [&]
|
||||
{
|
||||
return (current_size + size - removed_size > max_size)
|
||||
/// max_size == 0 means unlimited cache size, max_element_size means unlimited number of cache elements.
|
||||
return (max_size != 0 && current_size + size - removed_size > max_size)
|
||||
|| (max_element_size != 0 && queue_size > max_element_size);
|
||||
};
|
||||
|
||||
@ -484,6 +500,30 @@ void LRUFileCache::remove(const Key & key)
|
||||
fs::remove(key_path);
|
||||
}
|
||||
|
||||
void LRUFileCache::tryRemoveAll()
|
||||
{
|
||||
/// Try remove all cached files by cache_base_path.
|
||||
/// Only releasable file segments are evicted.
|
||||
|
||||
std::lock_guard cache_lock(mutex);
|
||||
|
||||
for (auto it = queue.begin(); it != queue.end();)
|
||||
{
|
||||
auto & [key, offset] = *it++;
|
||||
|
||||
auto * cell = getCell(key, offset, cache_lock);
|
||||
if (cell->releasable())
|
||||
{
|
||||
auto file_segment = cell->file_segment;
|
||||
if (file_segment)
|
||||
{
|
||||
std::lock_guard<std::mutex> segment_lock(file_segment->mutex);
|
||||
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LRUFileCache::remove(
|
||||
Key key, size_t offset,
|
||||
std::lock_guard<std::mutex> & cache_lock, std::lock_guard<std::mutex> & /* segment_lock */)
|
||||
@ -668,6 +708,38 @@ bool LRUFileCache::isLastFileSegmentHolder(
|
||||
return cell->file_segment.use_count() == 2;
|
||||
}
|
||||
|
||||
FileSegments LRUFileCache::getSnapshot() const
|
||||
{
|
||||
std::lock_guard cache_lock(mutex);
|
||||
|
||||
FileSegments file_segments;
|
||||
|
||||
for (const auto & [key, cells_by_offset] : files)
|
||||
{
|
||||
for (const auto & [offset, cell] : cells_by_offset)
|
||||
file_segments.push_back(FileSegment::getSnapshot(cell.file_segment, cache_lock));
|
||||
}
|
||||
|
||||
return file_segments;
|
||||
}
|
||||
|
||||
std::vector<String> LRUFileCache::tryGetCachePaths(const Key & key)
|
||||
{
|
||||
std::lock_guard cache_lock(mutex);
|
||||
|
||||
std::vector<String> cache_paths;
|
||||
|
||||
const auto & cells_by_offset = files[key];
|
||||
|
||||
for (const auto & [offset, cell] : cells_by_offset)
|
||||
{
|
||||
if (cell.file_segment->state() == FileSegment::State::DOWNLOADED)
|
||||
cache_paths.push_back(getPathInLocalCache(key, offset));
|
||||
}
|
||||
|
||||
return cache_paths;
|
||||
}
|
||||
|
||||
LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRUQueue & queue_)
|
||||
: file_segment(file_segment_)
|
||||
{
|
||||
@ -685,12 +757,13 @@ LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRU
|
||||
break;
|
||||
}
|
||||
case FileSegment::State::EMPTY:
|
||||
case FileSegment::State::DOWNLOADING:
|
||||
{
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||
"Can create cell with either DOWNLOADED or EMPTY state, got: {}",
|
||||
"Can create cell with either EMPTY, DOWNLOADED, DOWNLOADING state, got: {}",
|
||||
FileSegment::stateToString(file_segment->download_state));
|
||||
}
|
||||
}
|
||||
|
@ -33,9 +33,7 @@ public:
|
||||
|
||||
IFileCache(
|
||||
const String & cache_base_path_,
|
||||
size_t max_size_,
|
||||
size_t max_element_size_,
|
||||
size_t max_file_segment_size_);
|
||||
const FileCacheSettings & cache_settings_);
|
||||
|
||||
virtual ~IFileCache() = default;
|
||||
|
||||
@ -44,7 +42,9 @@ public:
|
||||
|
||||
virtual void remove(const Key & key) = 0;
|
||||
|
||||
static bool shouldBypassCache();
|
||||
virtual void tryRemoveAll() = 0;
|
||||
|
||||
static bool isReadOnly();
|
||||
|
||||
/// Cache capacity in bytes.
|
||||
size_t capacity() const { return max_size; }
|
||||
@ -55,6 +55,10 @@ public:
|
||||
|
||||
String getPathInLocalCache(const Key & key);
|
||||
|
||||
const String & getBasePath() const { return cache_base_path; }
|
||||
|
||||
virtual std::vector<String> tryGetCachePaths(const Key & key) = 0;
|
||||
|
||||
/**
|
||||
* Given an `offset` and `size` representing [offset, offset + size) bytes interval,
|
||||
* return list of cached non-overlapping non-empty
|
||||
@ -68,6 +72,10 @@ public:
|
||||
*/
|
||||
virtual FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size) = 0;
|
||||
|
||||
virtual FileSegmentsHolder setDownloading(const Key & key, size_t offset, size_t size) = 0;
|
||||
|
||||
virtual FileSegments getSnapshot() const = 0;
|
||||
|
||||
/// For debug.
|
||||
virtual String dumpStructure(const Key & key) = 0;
|
||||
|
||||
@ -112,16 +120,22 @@ class LRUFileCache final : public IFileCache
|
||||
public:
|
||||
LRUFileCache(
|
||||
const String & cache_base_path_,
|
||||
size_t max_size_,
|
||||
size_t max_element_size_ = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS,
|
||||
size_t max_file_segment_size_ = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE);
|
||||
const FileCacheSettings & cache_settings_);
|
||||
|
||||
FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size) override;
|
||||
|
||||
FileSegments getSnapshot() const override;
|
||||
|
||||
FileSegmentsHolder setDownloading(const Key & key, size_t offset, size_t size) override;
|
||||
|
||||
void initialize() override;
|
||||
|
||||
void remove(const Key & key) override;
|
||||
|
||||
void tryRemoveAll() override;
|
||||
|
||||
std::vector<String> tryGetCachePaths(const Key & key) override;
|
||||
|
||||
private:
|
||||
using FileKeyAndOffset = std::pair<Key, size_t>;
|
||||
using LRUQueue = std::list<FileKeyAndOffset>;
|
||||
@ -194,8 +208,8 @@ private:
|
||||
|
||||
void loadCacheInfoIntoMemory();
|
||||
|
||||
FileSegments splitRangeIntoEmptyCells(
|
||||
const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock);
|
||||
FileSegments splitRangeIntoCells(
|
||||
const Key & key, size_t offset, size_t size, FileSegment::State state, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
String dumpStructureImpl(const Key & key_, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
|
@ -15,28 +15,53 @@ FileCacheFactory & FileCacheFactory::instance()
|
||||
return ret;
|
||||
}
|
||||
|
||||
FileCachePtr FileCacheFactory::getImpl(const std::string & cache_base_path, std::lock_guard<std::mutex> &)
|
||||
FileCacheFactory::CacheByBasePath FileCacheFactory::getAll()
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return caches;
|
||||
}
|
||||
|
||||
const FileCacheSettings & FileCacheFactory::getSettings(const std::string & cache_base_path)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
auto * cache_data = getImpl(cache_base_path, lock);
|
||||
if (cache_data)
|
||||
return cache_data->settings;
|
||||
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "No cache found by path: {}", cache_base_path);
|
||||
}
|
||||
|
||||
FileCacheFactory::CacheData * FileCacheFactory::getImpl(const std::string & cache_base_path, std::lock_guard<std::mutex> &)
|
||||
{
|
||||
auto it = caches.find(cache_base_path);
|
||||
if (it == caches.end())
|
||||
return nullptr;
|
||||
return it->second;
|
||||
return &it->second;
|
||||
}
|
||||
|
||||
FileCachePtr FileCacheFactory::get(const std::string & cache_base_path)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
auto * cache_data = getImpl(cache_base_path, lock);
|
||||
if (cache_data)
|
||||
return cache_data->cache;
|
||||
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "No cache found by path: {}", cache_base_path);
|
||||
}
|
||||
|
||||
FileCachePtr FileCacheFactory::getOrCreate(
|
||||
const std::string & cache_base_path, size_t max_size, size_t max_elements_size, size_t max_file_segment_size)
|
||||
const std::string & cache_base_path, const FileCacheSettings & file_cache_settings)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto cache = getImpl(cache_base_path, lock);
|
||||
if (cache)
|
||||
{
|
||||
if (cache->capacity() != max_size)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cache with path `{}` already exists, but has different max size", cache_base_path);
|
||||
return cache;
|
||||
}
|
||||
|
||||
cache = std::make_shared<LRUFileCache>(cache_base_path, max_size, max_elements_size, max_file_segment_size);
|
||||
caches.emplace(cache_base_path, cache);
|
||||
auto * cache_data = getImpl(cache_base_path, lock);
|
||||
if (cache_data)
|
||||
return cache_data->cache;
|
||||
|
||||
auto cache = std::make_shared<LRUFileCache>(cache_base_path, file_cache_settings);
|
||||
caches.emplace(cache_base_path, CacheData(cache, file_cache_settings));
|
||||
return cache;
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/FileCache_fwd.h>
|
||||
#include <Common/FileCacheSettings.h>
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <unordered_map>
|
||||
@ -14,16 +15,32 @@ namespace DB
|
||||
*/
|
||||
class FileCacheFactory final : private boost::noncopyable
|
||||
{
|
||||
struct CacheData
|
||||
{
|
||||
FileCachePtr cache;
|
||||
FileCacheSettings settings;
|
||||
|
||||
CacheData(FileCachePtr cache_, const FileCacheSettings & settings_) : cache(cache_), settings(settings_) {}
|
||||
};
|
||||
|
||||
using CacheByBasePath = std::unordered_map<std::string, CacheData>;
|
||||
|
||||
public:
|
||||
static FileCacheFactory & instance();
|
||||
|
||||
FileCachePtr getOrCreate(const std::string & cache_base_path, size_t max_size, size_t max_elements_size, size_t max_file_segment_size);
|
||||
FileCachePtr getOrCreate(const std::string & cache_base_path, const FileCacheSettings & file_cache_settings);
|
||||
|
||||
FileCachePtr get(const std::string & cache_base_path);
|
||||
|
||||
CacheByBasePath getAll();
|
||||
|
||||
const FileCacheSettings & getSettings(const std::string & cache_base_path);
|
||||
|
||||
private:
|
||||
FileCachePtr getImpl(const std::string & cache_base_path, std::lock_guard<std::mutex> &);
|
||||
CacheData * getImpl(const std::string & cache_base_path, std::lock_guard<std::mutex> &);
|
||||
|
||||
std::mutex mutex;
|
||||
std::unordered_map<std::string, FileCachePtr> caches;
|
||||
CacheByBasePath caches;
|
||||
};
|
||||
|
||||
}
|
||||
|
16
src/Common/FileCacheSettings.cpp
Normal file
16
src/Common/FileCacheSettings.cpp
Normal file
@ -0,0 +1,16 @@
|
||||
#include "FileCacheSettings.h"
|
||||
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
|
||||
{
|
||||
max_size = config.getUInt64(config_prefix + ".data_cache_max_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_CACHE_SIZE);
|
||||
max_elements = config.getUInt64(config_prefix + ".data_cache_max_elements", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS);
|
||||
max_file_segment_size = config.getUInt64(config_prefix + ".max_file_segment_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE);
|
||||
cache_on_write_operations = config.getUInt64(config_prefix + ".cache_on_write_operations", false);
|
||||
}
|
||||
|
||||
}
|
20
src/Common/FileCacheSettings.h
Normal file
20
src/Common/FileCacheSettings.h
Normal file
@ -0,0 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/FileCache_fwd.h>
|
||||
|
||||
namespace Poco { namespace Util { class AbstractConfiguration; } }
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct FileCacheSettings
|
||||
{
|
||||
size_t max_size = 0;
|
||||
size_t max_elements = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS;
|
||||
size_t max_file_segment_size = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE;
|
||||
bool cache_on_write_operations = false;
|
||||
|
||||
void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
|
||||
};
|
||||
|
||||
}
|
@ -4,10 +4,13 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_CACHE_SIZE = 1024 * 1024 * 1024;
|
||||
static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 100 * 1024 * 1024;
|
||||
static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS = 1024 * 1024;
|
||||
|
||||
class IFileCache;
|
||||
using FileCachePtr = std::shared_ptr<IFileCache>;
|
||||
|
||||
struct FileCacheSettings;
|
||||
|
||||
}
|
||||
|
@ -31,10 +31,34 @@ FileSegment::FileSegment(
|
||||
, log(&Poco::Logger::get("FileSegment"))
|
||||
#endif
|
||||
{
|
||||
if (download_state == State::DOWNLOADED)
|
||||
/// On creation, file segment state can be EMPTY, DOWNLOADED, DOWNLOADING.
|
||||
switch (download_state)
|
||||
{
|
||||
/// EMPTY is used when file segment is not in cache and
|
||||
/// someone will _potentially_ want to download it (after calling getOrSetDownloader()).
|
||||
case (State::EMPTY):
|
||||
{
|
||||
break;
|
||||
}
|
||||
/// DOWNLOADED is used either on initial cache metadata load into memory on server startup
|
||||
/// or on reduceSizeToDownloaded() -- when file segment object is updated.
|
||||
case (State::DOWNLOADED):
|
||||
{
|
||||
reserved_size = downloaded_size = size_;
|
||||
else if (download_state != State::EMPTY)
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Can create cell with either DOWNLOADED or EMPTY state");
|
||||
break;
|
||||
}
|
||||
/// DOWNLOADING is used only for write-through caching (e.g. getOrSetDownloader() is not
|
||||
/// needed, downloader is set on file segment creation).
|
||||
case (State::DOWNLOADING):
|
||||
{
|
||||
downloader_id = getCallerId();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Can create cell with either EMPTY, DOWNLOADED, DOWNLOADING state");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FileSegment::State FileSegment::state() const
|
||||
@ -49,6 +73,12 @@ size_t FileSegment::getDownloadOffset() const
|
||||
return range().left + getDownloadedSize(segment_lock);
|
||||
}
|
||||
|
||||
size_t FileSegment::getDownloadedSize() const
|
||||
{
|
||||
std::lock_guard segment_lock(mutex);
|
||||
return getDownloadedSize(segment_lock);
|
||||
}
|
||||
|
||||
size_t FileSegment::getDownloadedSize(std::lock_guard<std::mutex> & /* segment_lock */) const
|
||||
{
|
||||
if (download_state == State::DOWNLOADED)
|
||||
@ -60,24 +90,15 @@ size_t FileSegment::getDownloadedSize(std::lock_guard<std::mutex> & /* segment_l
|
||||
|
||||
String FileSegment::getCallerId()
|
||||
{
|
||||
return getCallerIdImpl(false);
|
||||
return getCallerIdImpl();
|
||||
}
|
||||
|
||||
String FileSegment::getCallerIdImpl(bool allow_non_strict_checking)
|
||||
String FileSegment::getCallerIdImpl()
|
||||
{
|
||||
if (IFileCache::shouldBypassCache())
|
||||
{
|
||||
/// getCallerId() can be called from completeImpl(), which can be called from complete().
|
||||
/// complete() is called from destructor of CachedReadBufferFromRemoteFS when there is no query id anymore.
|
||||
/// Allow non strict checking in this case. This works correctly as if getCallerIdImpl() is called from destructor,
|
||||
/// then we know that caller is not a downloader, because downloader is reset each nextImpl() call either
|
||||
/// manually or via SCOPE_EXIT.
|
||||
|
||||
if (allow_non_strict_checking)
|
||||
return "None";
|
||||
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Cannot use cache without query id");
|
||||
}
|
||||
if (!CurrentThread::isInitialized()
|
||||
|| !CurrentThread::get().getQueryContext()
|
||||
|| CurrentThread::getQueryId().size == 0)
|
||||
return "None:" + toString(getThreadId());
|
||||
|
||||
return CurrentThread::getQueryId().toString() + ":" + toString(getThreadId());
|
||||
}
|
||||
@ -136,7 +157,6 @@ String FileSegment::getDownloader() const
|
||||
bool FileSegment::isDownloader() const
|
||||
{
|
||||
std::lock_guard segment_lock(mutex);
|
||||
LOG_TEST(log, "Checking for current downloader. Caller: {}, downloader: {}, current state: {}", getCallerId(), downloader_id, stateToString(download_state));
|
||||
return getCallerId() == downloader_id;
|
||||
}
|
||||
|
||||
@ -221,15 +241,9 @@ void FileSegment::write(const char * from, size_t size, size_t offset_)
|
||||
{
|
||||
std::lock_guard segment_lock(mutex);
|
||||
|
||||
auto info = getInfoForLogImpl(segment_lock);
|
||||
e.addMessage("while writing into cache, info: " + info);
|
||||
wrapWithCacheInfo(e, "while writing into cache", segment_lock);
|
||||
|
||||
LOG_ERROR(log, "Failed to write to cache. File segment info: {}", info);
|
||||
|
||||
download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION;
|
||||
|
||||
cache_writer->finalize();
|
||||
cache_writer.reset();
|
||||
setDownloadFailed(segment_lock);
|
||||
|
||||
cv.notify_all();
|
||||
|
||||
@ -239,6 +253,77 @@ void FileSegment::write(const char * from, size_t size, size_t offset_)
|
||||
assert(getDownloadOffset() == offset_ + size);
|
||||
}
|
||||
|
||||
void FileSegment::writeInMemory(const char * from, size_t size)
|
||||
{
|
||||
if (!size)
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Attempt to write zero size cache file");
|
||||
|
||||
if (availableSize() < size)
|
||||
throw Exception(
|
||||
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
|
||||
"Not enough space is reserved. Available: {}, expected: {}", availableSize(), size);
|
||||
|
||||
std::lock_guard segment_lock(mutex);
|
||||
|
||||
if (cache_writer)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache writer already initialized");
|
||||
|
||||
auto download_path = cache->getPathInLocalCache(key(), offset());
|
||||
cache_writer = std::make_unique<WriteBufferFromFile>(download_path, size + 1);
|
||||
|
||||
try
|
||||
{
|
||||
cache_writer->write(from, size);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
wrapWithCacheInfo(e, "while writing into cache", segment_lock);
|
||||
|
||||
setDownloadFailed(segment_lock);
|
||||
|
||||
cv.notify_all();
|
||||
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
size_t FileSegment::finalizeWrite()
|
||||
{
|
||||
std::lock_guard segment_lock(mutex);
|
||||
|
||||
if (!cache_writer)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache writer not initialized");
|
||||
|
||||
size_t size = cache_writer->offset();
|
||||
|
||||
if (size == 0)
|
||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Writing size is not allowed");
|
||||
|
||||
try
|
||||
{
|
||||
cache_writer->next();
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
wrapWithCacheInfo(e, "while writing into cache", segment_lock);
|
||||
|
||||
setDownloadFailed(segment_lock);
|
||||
|
||||
cv.notify_all();
|
||||
|
||||
throw;
|
||||
}
|
||||
|
||||
downloaded_size += size;
|
||||
|
||||
if (downloaded_size != range().size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected downloaded size to equal file segment size ({} == {})", downloaded_size, range().size());
|
||||
|
||||
setDownloaded(segment_lock);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
FileSegment::State FileSegment::wait()
|
||||
{
|
||||
std::unique_lock segment_lock(mutex);
|
||||
@ -303,6 +388,20 @@ void FileSegment::setDownloaded(std::lock_guard<std::mutex> & /* segment_lock */
|
||||
{
|
||||
download_state = State::DOWNLOADED;
|
||||
is_downloaded = true;
|
||||
downloader_id.clear();
|
||||
|
||||
if (cache_writer)
|
||||
{
|
||||
cache_writer->finalize();
|
||||
cache_writer.reset();
|
||||
remote_file_reader.reset();
|
||||
}
|
||||
}
|
||||
|
||||
void FileSegment::setDownloadFailed(std::lock_guard<std::mutex> & /* segment_lock */)
|
||||
{
|
||||
download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION;
|
||||
downloader_id.clear();
|
||||
|
||||
if (cache_writer)
|
||||
{
|
||||
@ -360,7 +459,7 @@ void FileSegment::complete(State state)
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (!downloader_id.empty() && downloader_id == getCallerIdImpl(true))
|
||||
if (!downloader_id.empty() && downloader_id == getCallerIdImpl())
|
||||
downloader_id.clear();
|
||||
|
||||
cv.notify_all();
|
||||
@ -385,7 +484,7 @@ void FileSegment::complete(std::lock_guard<std::mutex> & cache_lock)
|
||||
/// Segment state can be changed from DOWNLOADING or EMPTY only if the caller is the
|
||||
/// downloader or the only owner of the segment.
|
||||
|
||||
bool can_update_segment_state = downloader_id == getCallerIdImpl(true)
|
||||
bool can_update_segment_state = downloader_id == getCallerIdImpl()
|
||||
|| cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock);
|
||||
|
||||
if (can_update_segment_state)
|
||||
@ -394,11 +493,11 @@ void FileSegment::complete(std::lock_guard<std::mutex> & cache_lock)
|
||||
|
||||
try
|
||||
{
|
||||
completeImpl(cache_lock, segment_lock, /* allow_non_strict_checking */true);
|
||||
completeImpl(cache_lock, segment_lock);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (!downloader_id.empty() && downloader_id == getCallerIdImpl(true))
|
||||
if (!downloader_id.empty() && downloader_id == getCallerIdImpl())
|
||||
downloader_id.clear();
|
||||
|
||||
cv.notify_all();
|
||||
@ -408,7 +507,7 @@ void FileSegment::complete(std::lock_guard<std::mutex> & cache_lock)
|
||||
cv.notify_all();
|
||||
}
|
||||
|
||||
void FileSegment::completeImpl(std::lock_guard<std::mutex> & cache_lock, std::lock_guard<std::mutex> & segment_lock, bool allow_non_strict_checking)
|
||||
void FileSegment::completeImpl(std::lock_guard<std::mutex> & cache_lock, std::lock_guard<std::mutex> & segment_lock)
|
||||
{
|
||||
bool is_last_holder = cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock);
|
||||
|
||||
@ -444,7 +543,7 @@ void FileSegment::completeImpl(std::lock_guard<std::mutex> & cache_lock, std::lo
|
||||
}
|
||||
}
|
||||
|
||||
if (!downloader_id.empty() && (downloader_id == getCallerIdImpl(allow_non_strict_checking) || is_last_holder))
|
||||
if (!downloader_id.empty() && (downloader_id == getCallerIdImpl() || is_last_holder))
|
||||
{
|
||||
LOG_TEST(log, "Clearing downloader id: {}, current state: {}", downloader_id, stateToString(download_state));
|
||||
downloader_id.clear();
|
||||
@ -471,6 +570,11 @@ String FileSegment::getInfoForLogImpl(std::lock_guard<std::mutex> & segment_lock
|
||||
return info.str();
|
||||
}
|
||||
|
||||
void FileSegment::wrapWithCacheInfo(Exception & e, const String & message, std::lock_guard<std::mutex> & segment_lock) const
|
||||
{
|
||||
e.addMessage(fmt::format("{}, current cache state: {}", message, getInfoForLogImpl(segment_lock)));
|
||||
}
|
||||
|
||||
String FileSegment::stateToString(FileSegment::State state)
|
||||
{
|
||||
switch (state)
|
||||
@ -504,6 +608,23 @@ void FileSegment::assertCorrectnessImpl(std::lock_guard<std::mutex> & /* segment
|
||||
assert(download_state != FileSegment::State::DOWNLOADED || std::filesystem::file_size(cache->getPathInLocalCache(key(), offset())) > 0);
|
||||
}
|
||||
|
||||
FileSegmentPtr FileSegment::getSnapshot(const FileSegmentPtr & file_segment, std::lock_guard<std::mutex> & /* cache_lock */)
|
||||
{
|
||||
auto snapshot = std::make_shared<FileSegment>(
|
||||
file_segment->offset(),
|
||||
file_segment->range().size(),
|
||||
file_segment->key(),
|
||||
nullptr,
|
||||
State::EMPTY);
|
||||
|
||||
snapshot->hits_count = file_segment->getHitsCount();
|
||||
snapshot->ref_count = file_segment.use_count();
|
||||
snapshot->downloaded_size = file_segment->getDownloadedSize();
|
||||
snapshot->download_state = file_segment->state();
|
||||
|
||||
return snapshot;
|
||||
}
|
||||
|
||||
FileSegmentsHolder::~FileSegmentsHolder()
|
||||
{
|
||||
/// In CacheableReadBufferFromRemoteFS file segment's downloader removes file segments from
|
||||
|
@ -97,6 +97,15 @@ public:
|
||||
|
||||
void write(const char * from, size_t size, size_t offset_);
|
||||
|
||||
/**
|
||||
* writeInMemory and finalizeWrite are used together to write a single file with delay.
|
||||
* Both can be called only once, one after another. Used for writing cache via threadpool
|
||||
* on wrote operations. TODO: this solution is temporary, until adding a separate cache layer.
|
||||
*/
|
||||
void writeInMemory(const char * from, size_t size);
|
||||
|
||||
size_t finalizeWrite();
|
||||
|
||||
RemoteFileReaderPtr getRemoteFileReader();
|
||||
|
||||
void setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_);
|
||||
@ -117,14 +126,24 @@ public:
|
||||
|
||||
size_t getDownloadOffset() const;
|
||||
|
||||
size_t getDownloadedSize() const;
|
||||
|
||||
void completeBatchAndResetDownloader();
|
||||
|
||||
void complete(State state);
|
||||
|
||||
String getInfoForLog() const;
|
||||
|
||||
size_t getHitsCount() const { return hits_count; }
|
||||
|
||||
size_t getRefCount() const { return ref_count; }
|
||||
|
||||
void incrementHitsCount() { ++hits_count; }
|
||||
|
||||
void assertCorrectness() const;
|
||||
|
||||
static FileSegmentPtr getSnapshot(const FileSegmentPtr & file_segment, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
private:
|
||||
size_t availableSize() const { return reserved_size - downloaded_size; }
|
||||
|
||||
@ -133,6 +152,9 @@ private:
|
||||
void assertCorrectnessImpl(std::lock_guard<std::mutex> & segment_lock) const;
|
||||
|
||||
void setDownloaded(std::lock_guard<std::mutex> & segment_lock);
|
||||
void setDownloadFailed(std::lock_guard<std::mutex> & segment_lock);
|
||||
|
||||
void wrapWithCacheInfo(Exception & e, const String & message, std::lock_guard<std::mutex> & segment_lock) const;
|
||||
|
||||
bool lastFileSegmentHolder() const;
|
||||
|
||||
@ -144,9 +166,9 @@ private:
|
||||
|
||||
void completeImpl(
|
||||
std::lock_guard<std::mutex> & cache_lock,
|
||||
std::lock_guard<std::mutex> & segment_lock, bool allow_non_strict_checking = false);
|
||||
std::lock_guard<std::mutex> & segment_lock);
|
||||
|
||||
static String getCallerIdImpl(bool allow_non_strict_checking = false);
|
||||
static String getCallerIdImpl();
|
||||
|
||||
void resetDownloaderImpl(std::lock_guard<std::mutex> & segment_lock);
|
||||
|
||||
@ -180,6 +202,8 @@ private:
|
||||
bool detached = false;
|
||||
|
||||
std::atomic<bool> is_downloaded{false};
|
||||
std::atomic<size_t> hits_count = 0; /// cache hits.
|
||||
std::atomic<size_t> ref_count = 0; /// Used for getting snapshot state
|
||||
};
|
||||
|
||||
struct FileSegmentsHolder : private boost::noncopyable
|
||||
|
@ -216,6 +216,11 @@ public:
|
||||
return query_context.lock();
|
||||
}
|
||||
|
||||
auto getGlobalContext() const
|
||||
{
|
||||
return global_context.lock();
|
||||
}
|
||||
|
||||
void disableProfiling()
|
||||
{
|
||||
assert(!query_profiler_real && !query_profiler_cpu);
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <Common/FileCache.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
#include <Common/FileCacheSettings.h>
|
||||
#include <Common/tests/gtest_global_context.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/hex.h>
|
||||
@ -102,7 +103,10 @@ TEST(LRUFileCache, get)
|
||||
query_context->setCurrentQueryId("query_id");
|
||||
DB::CurrentThread::QueryScope query_scope_holder(query_context);
|
||||
|
||||
auto cache = DB::LRUFileCache(cache_base_path, 30, 5);
|
||||
DB::FileCacheSettings settings;
|
||||
settings.max_size = 30;
|
||||
settings.max_elements = 5;
|
||||
auto cache = DB::LRUFileCache(cache_base_path, settings);
|
||||
cache.initialize();
|
||||
auto key = cache.hash("key1");
|
||||
|
||||
@ -472,7 +476,7 @@ TEST(LRUFileCache, get)
|
||||
{
|
||||
/// Test LRUCache::restore().
|
||||
|
||||
auto cache2 = DB::LRUFileCache(cache_base_path, 30, 5);
|
||||
auto cache2 = DB::LRUFileCache(cache_base_path, settings);
|
||||
cache2.initialize();
|
||||
|
||||
ASSERT_EQ(cache2.getStat().downloaded_size, 5);
|
||||
@ -491,7 +495,9 @@ TEST(LRUFileCache, get)
|
||||
{
|
||||
/// Test max file segment size
|
||||
|
||||
auto cache2 = DB::LRUFileCache(caches_dir / "cache2", 30, 5, /* max_file_segment_size */10);
|
||||
auto settings2 = settings;
|
||||
settings2.max_file_segment_size = 10;
|
||||
auto cache2 = DB::LRUFileCache(caches_dir / "cache2", settings2);
|
||||
cache2.initialize();
|
||||
|
||||
auto holder1 = cache2.getOrSet(key, 0, 25); /// Get [0, 24]
|
||||
|
@ -560,8 +560,10 @@ class IColumn;
|
||||
\
|
||||
M(UInt64, remote_fs_read_max_backoff_ms, 10000, "Max wait time when trying to read data for remote disk", 0) \
|
||||
M(UInt64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \
|
||||
M(Bool, remote_fs_enable_cache, true, "Use cache for remote filesystem. This setting does not turn on/off cache for disks (must me done via disk config), but allows to bypass cache for some queries if intended", 0) \
|
||||
M(UInt64, remote_fs_cache_max_wait_sec, 5, "Allow to wait at most this number of seconds for download of current remote_fs_buffer_size bytes, and skip cache if exceeded", 0) \
|
||||
M(Bool, enable_filesystem_cache, true, "Use cache for remote filesystem. This setting does not turn on/off cache for disks (must me done via disk config), but allows to bypass cache for some queries if intended", 0) \
|
||||
M(UInt64, filesystem_cache_max_wait_sec, 5, "Allow to wait at most this number of seconds for download of current remote_fs_buffer_size bytes, and skip cache if exceeded", 0) \
|
||||
M(Bool, enable_filesystem_cache_on_write_operations, false, "Write into cache on write operations. To actually work this setting requires be added to disk config too", 0) \
|
||||
M(Bool, read_from_filesystem_cache_if_exists_otherwise_bypass_cache, false, "", 0) \
|
||||
\
|
||||
M(UInt64, http_max_tries, 10, "Max attempts to read via http.", 0) \
|
||||
M(UInt64, http_retry_initial_backoff_ms, 100, "Min milliseconds for backoff, when retrying read via http", 0) \
|
||||
|
@ -90,7 +90,8 @@ std::unique_ptr<ReadBufferFromFileBase> DiskAzureBlobStorage::readFile(
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskAzureBlobStorage::writeFile(
|
||||
const String & path,
|
||||
size_t buf_size,
|
||||
WriteMode mode)
|
||||
WriteMode mode,
|
||||
const WriteSettings &)
|
||||
{
|
||||
auto blob_path = path + "_" + getRandomASCIIString(8); /// NOTE: path contains the tmp_* prefix in the blob name
|
||||
|
||||
|
@ -56,7 +56,8 @@ public:
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(
|
||||
const String & path,
|
||||
size_t buf_size,
|
||||
WriteMode mode) override;
|
||||
WriteMode mode,
|
||||
const WriteSettings & settings) override;
|
||||
|
||||
DiskType getType() const override;
|
||||
|
||||
|
@ -150,7 +150,7 @@ DiskCacheWrapper::readFile(
|
||||
/// Note: enabling `threadpool` read requires to call setReadUntilEnd().
|
||||
current_read_settings.remote_fs_method = RemoteFSReadMethod::read;
|
||||
/// Disable data cache.
|
||||
current_read_settings.remote_fs_enable_cache = false;
|
||||
current_read_settings.enable_filesystem_cache = false;
|
||||
|
||||
if (metadata->status == DOWNLOADING)
|
||||
{
|
||||
@ -167,7 +167,11 @@ DiskCacheWrapper::readFile(
|
||||
auto tmp_path = path + ".tmp";
|
||||
{
|
||||
auto src_buffer = DiskDecorator::readFile(path, current_read_settings, read_hint, file_size);
|
||||
auto dst_buffer = cache_disk->writeFile(tmp_path, settings.local_fs_buffer_size, WriteMode::Rewrite);
|
||||
|
||||
WriteSettings write_settings;
|
||||
write_settings.enable_filesystem_cache_on_write_operations = false;
|
||||
|
||||
auto dst_buffer = cache_disk->writeFile(tmp_path, settings.local_fs_buffer_size, WriteMode::Rewrite, write_settings);
|
||||
copyData(*src_buffer, *dst_buffer);
|
||||
}
|
||||
cache_disk->moveFile(tmp_path, path);
|
||||
@ -196,10 +200,15 @@ DiskCacheWrapper::readFile(
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase>
|
||||
DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode)
|
||||
DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings)
|
||||
{
|
||||
if (!cache_file_predicate(path))
|
||||
return DiskDecorator::writeFile(path, buf_size, mode);
|
||||
return DiskDecorator::writeFile(path, buf_size, mode, settings);
|
||||
|
||||
WriteSettings current_settings = settings;
|
||||
/// There are two different cache implementations. Disable second one if the first is enabled.
|
||||
/// The first will soon be removed, this disabling is temporary.
|
||||
current_settings.enable_filesystem_cache_on_write_operations = false;
|
||||
|
||||
LOG_TEST(log, "Write file {} to cache", backQuote(path));
|
||||
|
||||
@ -208,15 +217,15 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode
|
||||
cache_disk->createDirectories(dir_path);
|
||||
|
||||
return std::make_unique<WritingToCacheWriteBuffer>(
|
||||
cache_disk->writeFile(path, buf_size, mode),
|
||||
cache_disk->writeFile(path, buf_size, mode, current_settings),
|
||||
[this, path]()
|
||||
{
|
||||
/// Copy file from cache to actual disk when cached buffer is finalized.
|
||||
return cache_disk->readFile(path, ReadSettings(), /* read_hint= */ {}, /* file_size= */ {});
|
||||
},
|
||||
[this, path, buf_size, mode]()
|
||||
[this, path, buf_size, mode, current_settings]()
|
||||
{
|
||||
return DiskDecorator::writeFile(path, buf_size, mode);
|
||||
return DiskDecorator::writeFile(path, buf_size, mode, current_settings);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -40,7 +40,7 @@ public:
|
||||
std::optional<size_t> read_hint,
|
||||
std::optional<size_t> file_size) const override;
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode) override;
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &) override;
|
||||
|
||||
void removeFile(const String & path) override;
|
||||
void removeFileIfExists(const String & path) override;
|
||||
|
@ -121,9 +121,9 @@ DiskDecorator::readFile(
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase>
|
||||
DiskDecorator::writeFile(const String & path, size_t buf_size, WriteMode mode)
|
||||
DiskDecorator::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings)
|
||||
{
|
||||
return delegate->writeFile(path, buf_size, mode);
|
||||
return delegate->writeFile(path, buf_size, mode, settings);
|
||||
}
|
||||
|
||||
void DiskDecorator::removeFile(const String & path)
|
||||
|
@ -44,7 +44,8 @@ public:
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(
|
||||
const String & path,
|
||||
size_t buf_size,
|
||||
WriteMode mode) override;
|
||||
WriteMode mode,
|
||||
const WriteSettings & settings) override;
|
||||
|
||||
void removeFile(const String & path) override;
|
||||
void removeFileIfExists(const String & path) override;
|
||||
@ -71,6 +72,9 @@ public:
|
||||
void shutdown() override;
|
||||
void startup() override;
|
||||
void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) override;
|
||||
String getCacheBasePath() const override { return delegate->getCacheBasePath(); }
|
||||
std::vector<String> getRemotePaths(const String & path) const override { return delegate->getRemotePaths(path); }
|
||||
void getRemotePathsRecursive(const String & path, std::vector<LocalPathWithRemotePaths> & paths_map) override { return delegate->getRemotePathsRecursive(path, paths_map); }
|
||||
|
||||
DiskPtr getMetadataDiskIfExistsOrSelf() override { return delegate->getMetadataDiskIfExistsOrSelf(); }
|
||||
|
||||
|
@ -269,7 +269,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskEncrypted::readFile(
|
||||
return std::make_unique<ReadBufferFromEncryptedFile>(settings.local_fs_buffer_size, std::move(buffer), key, header);
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskEncrypted::writeFile(const String & path, size_t buf_size, WriteMode mode)
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskEncrypted::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &)
|
||||
{
|
||||
auto wrapped_path = wrappedPath(path);
|
||||
FileEncryption::Header header;
|
||||
|
@ -126,7 +126,8 @@ public:
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(
|
||||
const String & path,
|
||||
size_t buf_size,
|
||||
WriteMode mode) override;
|
||||
WriteMode mode,
|
||||
const WriteSettings & settings) override;
|
||||
|
||||
void removeFile(const String & path) override
|
||||
{
|
||||
|
@ -345,7 +345,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskLocal::readFile(const String & path,
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase>
|
||||
DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode)
|
||||
DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &)
|
||||
{
|
||||
int flags = (mode == WriteMode::Append) ? (O_APPEND | O_CREAT | O_WRONLY) : -1;
|
||||
return std::make_unique<WriteBufferFromFile>(fs::path(disk_path) / path, buf_size, flags);
|
||||
@ -624,7 +624,7 @@ bool DiskLocal::setup()
|
||||
pcg32_fast rng(randomSeed());
|
||||
UInt32 magic_number = rng();
|
||||
{
|
||||
auto buf = writeFile(disk_checker_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite);
|
||||
auto buf = writeFile(disk_checker_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {});
|
||||
writeIntBinary(magic_number, *buf);
|
||||
}
|
||||
disk_checker_magic_number = magic_number;
|
||||
|
@ -79,7 +79,8 @@ public:
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(
|
||||
const String & path,
|
||||
size_t buf_size,
|
||||
WriteMode mode) override;
|
||||
WriteMode mode,
|
||||
const WriteSettings & settings) override;
|
||||
|
||||
void removeFile(const String & path) override;
|
||||
void removeFileIfExists(const String & path) override;
|
||||
|
@ -326,7 +326,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskMemory::readFile(const String & path
|
||||
return std::make_unique<ReadIndirectBuffer>(path, iter->second.data);
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskMemory::writeFile(const String & path, size_t buf_size, WriteMode mode)
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskMemory::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
|
@ -71,7 +71,8 @@ public:
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(
|
||||
const String & path,
|
||||
size_t buf_size,
|
||||
WriteMode mode) override;
|
||||
WriteMode mode,
|
||||
const WriteSettings & settings) override;
|
||||
|
||||
void removeFile(const String & path) override;
|
||||
void removeFileIfExists(const String & path) override;
|
||||
|
@ -214,10 +214,10 @@ std::unique_ptr<ReadBufferFromFileBase> DiskRestartProxy::readFile(
|
||||
return std::make_unique<RestartAwareReadBuffer>(*this, std::move(impl));
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskRestartProxy::writeFile(const String & path, size_t buf_size, WriteMode mode)
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskRestartProxy::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings)
|
||||
{
|
||||
ReadLock lock (mutex);
|
||||
auto impl = DiskDecorator::writeFile(path, buf_size, mode);
|
||||
auto impl = DiskDecorator::writeFile(path, buf_size, mode, settings);
|
||||
return std::make_unique<RestartAwareWriteBuffer>(*this, std::move(impl));
|
||||
}
|
||||
|
||||
@ -305,6 +305,24 @@ bool DiskRestartProxy::checkUniqueId(const String & id) const
|
||||
return DiskDecorator::checkUniqueId(id);
|
||||
}
|
||||
|
||||
String DiskRestartProxy::getCacheBasePath() const
|
||||
{
|
||||
ReadLock lock (mutex);
|
||||
return DiskDecorator::getCacheBasePath();
|
||||
}
|
||||
|
||||
std::vector<String> DiskRestartProxy::getRemotePaths(const String & path) const
|
||||
{
|
||||
ReadLock lock (mutex);
|
||||
return DiskDecorator::getRemotePaths(path);
|
||||
}
|
||||
|
||||
void DiskRestartProxy::getRemotePathsRecursive(const String & path, std::vector<LocalPathWithRemotePaths> & paths_map)
|
||||
{
|
||||
ReadLock lock (mutex);
|
||||
return DiskDecorator::getRemotePathsRecursive(path, paths_map);
|
||||
}
|
||||
|
||||
void DiskRestartProxy::restart()
|
||||
{
|
||||
/// Speed up processing unhealthy requests.
|
||||
|
@ -48,7 +48,7 @@ public:
|
||||
const ReadSettings & settings,
|
||||
std::optional<size_t> read_hint,
|
||||
std::optional<size_t> file_size) const override;
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode) override;
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) override;
|
||||
void removeFile(const String & path) override;
|
||||
void removeFileIfExists(const String & path) override;
|
||||
void removeDirectory(const String & path) override;
|
||||
@ -63,6 +63,9 @@ public:
|
||||
void truncateFile(const String & path, size_t size) override;
|
||||
String getUniqueId(const String & path) const override;
|
||||
bool checkUniqueId(const String & id) const override;
|
||||
String getCacheBasePath() const override;
|
||||
std::vector<String> getRemotePaths(const String & path) const override;
|
||||
void getRemotePathsRecursive(const String & path, std::vector<LocalPathWithRemotePaths> & paths_map) override;
|
||||
|
||||
void restart();
|
||||
|
||||
|
@ -77,7 +77,6 @@ public:
|
||||
UInt64 getTotalSpace() const final override { return std::numeric_limits<UInt64>::max(); }
|
||||
|
||||
UInt64 getAvailableSpace() const final override { return std::numeric_limits<UInt64>::max(); }
|
||||
|
||||
UInt64 getUnreservedSpace() const final override { return std::numeric_limits<UInt64>::max(); }
|
||||
|
||||
/// Read-only part
|
||||
@ -100,7 +99,7 @@ public:
|
||||
|
||||
/// Write and modification part
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String &, size_t, WriteMode) override
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String &, size_t, WriteMode, const WriteSettings &) override
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName());
|
||||
}
|
||||
@ -165,6 +164,10 @@ public:
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName());
|
||||
}
|
||||
|
||||
std::vector<String> getRemotePaths(const String &) const override { return {}; }
|
||||
|
||||
void getRemotePathsRecursive(const String &, std::vector<LocalPathWithRemotePaths> &) override {}
|
||||
|
||||
/// Create part
|
||||
|
||||
void createFile(const String &) final override {}
|
||||
|
@ -88,7 +88,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskHDFS::readFile(const String & path,
|
||||
}
|
||||
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskHDFS::writeFile(const String & path, size_t buf_size, WriteMode mode)
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskHDFS::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &)
|
||||
{
|
||||
/// Path to store new HDFS object.
|
||||
auto file_name = getRandomName();
|
||||
|
@ -60,7 +60,7 @@ public:
|
||||
std::optional<size_t> read_hint,
|
||||
std::optional<size_t> file_size) const override;
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode) override;
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) override;
|
||||
|
||||
void removeFromRemoteFS(RemoteFSPathKeeperPtr fs_paths_keeper) override;
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <Disks/Executor.h>
|
||||
#include <Disks/DiskType.h>
|
||||
#include <IO/ReadSettings.h>
|
||||
#include <IO/WriteSettings.h>
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
@ -31,6 +32,11 @@ namespace Poco
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
class IDiskDirectoryIterator;
|
||||
using DiskDirectoryIteratorPtr = std::unique_ptr<IDiskDirectoryIterator>;
|
||||
|
||||
@ -168,7 +174,8 @@ public:
|
||||
virtual std::unique_ptr<WriteBufferFromFileBase> writeFile( /// NOLINT
|
||||
const String & path,
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
WriteMode mode = WriteMode::Rewrite) = 0;
|
||||
WriteMode mode = WriteMode::Rewrite,
|
||||
const WriteSettings & settings = {}) = 0;
|
||||
|
||||
/// Remove file. Throws exception if file doesn't exists or it's a directory.
|
||||
virtual void removeFile(const String & path) = 0;
|
||||
@ -197,6 +204,24 @@ public:
|
||||
/// Second bool param is a flag to remove (true) or keep (false) shared data on S3
|
||||
virtual void removeSharedFileIfExists(const String & path, bool) { removeFileIfExists(path); }
|
||||
|
||||
|
||||
virtual String getCacheBasePath() const { return ""; }
|
||||
|
||||
/// Returns a list of paths because for Log family engines there might be
|
||||
/// multiple files in remote fs for single clickhouse file.
|
||||
virtual std::vector<String> getRemotePaths(const String &) const
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getRemotePaths() not implemented for disk: {}`", getType());
|
||||
}
|
||||
|
||||
/// For one local path there might be multiple remote paths in case of Log family engines.
|
||||
using LocalPathWithRemotePaths = std::pair<String, std::vector<String>>;
|
||||
|
||||
virtual void getRemotePathsRecursive(const String &, std::vector<LocalPathWithRemotePaths> &)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getRemotePathsRecursive() not implemented for disk: {}`", getType());
|
||||
}
|
||||
|
||||
struct RemoveRequest
|
||||
{
|
||||
String path;
|
||||
|
@ -136,13 +136,15 @@ void IDiskRemote::Metadata::load()
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
|
||||
if (e.code() == ErrorCodes::UNKNOWN_FORMAT)
|
||||
throw;
|
||||
|
||||
if (e.code() == ErrorCodes::MEMORY_LIMIT_EXCEEDED)
|
||||
throw;
|
||||
|
||||
throw Exception("Failed to read metadata file", e, ErrorCodes::UNKNOWN_FORMAT);
|
||||
throw Exception("Failed to read metadata file: " + metadata_file_path, e, ErrorCodes::UNKNOWN_FORMAT);
|
||||
}
|
||||
}
|
||||
|
||||
@ -341,6 +343,30 @@ void IDiskRemote::removeMetadataRecursive(const String & path, RemoteFSPathKeepe
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<String> IDiskRemote::getRemotePaths(const String & local_path) const
|
||||
{
|
||||
auto metadata = readMetadata(local_path);
|
||||
|
||||
std::vector<String> remote_paths;
|
||||
for (const auto & [remote_path, _] : metadata.remote_fs_objects)
|
||||
remote_paths.push_back(remote_path);
|
||||
|
||||
return remote_paths;
|
||||
}
|
||||
|
||||
void IDiskRemote::getRemotePathsRecursive(const String & local_path, std::vector<LocalPathWithRemotePaths> & paths_map)
|
||||
{
|
||||
if (metadata_disk->isFile(local_path))
|
||||
{
|
||||
paths_map.emplace_back(local_path, getRemotePaths(local_path));
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto it = iterateDirectory(local_path); it->isValid(); it->next())
|
||||
IDiskRemote::getRemotePathsRecursive(fs::path(local_path) / it->name(), paths_map);
|
||||
}
|
||||
}
|
||||
|
||||
DiskPtr DiskRemoteReservation::getDisk(size_t i) const
|
||||
{
|
||||
if (i != 0)
|
||||
@ -348,7 +374,6 @@ DiskPtr DiskRemoteReservation::getDisk(size_t i) const
|
||||
return disk;
|
||||
}
|
||||
|
||||
|
||||
void DiskRemoteReservation::update(UInt64 new_size)
|
||||
{
|
||||
std::lock_guard lock(disk->reservation_mutex);
|
||||
@ -402,6 +427,12 @@ IDiskRemote::IDiskRemote(
|
||||
}
|
||||
|
||||
|
||||
String IDiskRemote::getCacheBasePath() const
|
||||
{
|
||||
return cache ? cache->getBasePath() : "";
|
||||
}
|
||||
|
||||
|
||||
bool IDiskRemote::exists(const String & path) const
|
||||
{
|
||||
return metadata_disk->exists(path);
|
||||
|
@ -66,6 +66,12 @@ public:
|
||||
|
||||
const String & getPath() const final override { return metadata_disk->getPath(); }
|
||||
|
||||
String getCacheBasePath() const final override;
|
||||
|
||||
std::vector<String> getRemotePaths(const String & local_path) const final override;
|
||||
|
||||
void getRemotePathsRecursive(const String & local_path, std::vector<LocalPathWithRemotePaths> & paths_map) override;
|
||||
|
||||
/// Methods for working with metadata. For some operations (like hardlink
|
||||
/// creation) metadata can be updated concurrently from multiple threads
|
||||
/// (file actually rewritten on disk). So additional RW lock is required for
|
||||
@ -163,6 +169,7 @@ protected:
|
||||
const String remote_fs_root_path;
|
||||
|
||||
DiskPtr metadata_disk;
|
||||
|
||||
FileCachePtr cache;
|
||||
|
||||
private:
|
||||
|
@ -122,10 +122,25 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment(
|
||||
{
|
||||
auto range = file_segment->range();
|
||||
|
||||
size_t wait_download_max_tries = settings.remote_fs_cache_max_wait_sec;
|
||||
size_t wait_download_max_tries = settings.filesystem_cache_max_wait_sec;
|
||||
size_t wait_download_tries = 0;
|
||||
|
||||
auto download_state = file_segment->state();
|
||||
|
||||
if (settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
|
||||
{
|
||||
if (download_state == FileSegment::State::DOWNLOADED)
|
||||
{
|
||||
read_type = ReadType::CACHED;
|
||||
return getCacheReadBuffer(range.left);
|
||||
}
|
||||
else
|
||||
{
|
||||
read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE;
|
||||
return getRemoteFSReadBuffer(file_segment, read_type);
|
||||
}
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
switch (download_state)
|
||||
@ -375,6 +390,9 @@ bool CachedReadBufferFromRemoteFS::completeFileSegmentAndGetNext()
|
||||
|
||||
implementation_buffer = getImplementationBuffer(*current_file_segment_it);
|
||||
|
||||
if (read_type == ReadType::CACHED)
|
||||
(*current_file_segment_it)->incrementHitsCount();
|
||||
|
||||
LOG_TEST(log, "New segment: {}", (*current_file_segment_it)->range().toString());
|
||||
return true;
|
||||
}
|
||||
@ -559,9 +577,6 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
|
||||
{
|
||||
last_caller_id = FileSegment::getCallerId();
|
||||
|
||||
if (IFileCache::shouldBypassCache())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Using cache when not allowed");
|
||||
|
||||
if (!initialized)
|
||||
initialize(file_offset_of_buffer_end, getTotalSizeToRead());
|
||||
|
||||
@ -606,6 +621,9 @@ bool CachedReadBufferFromRemoteFS::nextImplStep()
|
||||
else
|
||||
{
|
||||
implementation_buffer = getImplementationBuffer(*current_file_segment_it);
|
||||
|
||||
if (read_type == ReadType::CACHED)
|
||||
(*current_file_segment_it)->incrementHitsCount();
|
||||
}
|
||||
|
||||
assert(!internal_buffer.empty());
|
||||
|
@ -38,7 +38,7 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const S
|
||||
current_path = path;
|
||||
|
||||
auto cache = settings.remote_fs_cache;
|
||||
bool with_cache = cache && settings.remote_fs_enable_cache && !IFileCache::shouldBypassCache();
|
||||
bool with_cache = cache && settings.enable_filesystem_cache;
|
||||
|
||||
auto remote_file_reader_creator = [=, this]()
|
||||
{
|
||||
|
@ -54,14 +54,14 @@ std::future<IAsynchronousReader::Result> ThreadPoolRemoteFSReader::submit(Reques
|
||||
{
|
||||
ThreadStatus thread_status;
|
||||
|
||||
/// Save query context if any, because cache implementation needs it.
|
||||
if (query_context)
|
||||
thread_status.attachQueryContext(query_context);
|
||||
|
||||
/// To be able to pass ProfileEvents.
|
||||
if (running_group)
|
||||
thread_status.attachQuery(running_group);
|
||||
|
||||
/// Save query context if any, because cache implementation needs it.
|
||||
if (query_context)
|
||||
thread_status.attachQueryContext(query_context);
|
||||
|
||||
setThreadName("VFSRead");
|
||||
|
||||
CurrentMetrics::Increment metric_increment{CurrentMetrics::Read};
|
||||
@ -83,12 +83,11 @@ std::future<IAsynchronousReader::Result> ThreadPoolRemoteFSReader::submit(Reques
|
||||
|
||||
watch.stop();
|
||||
|
||||
if (running_group)
|
||||
CurrentThread::detachQuery();
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::RemoteFSReadMicroseconds, watch.elapsedMicroseconds());
|
||||
ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, result.offset ? result.size - result.offset : result.size);
|
||||
|
||||
thread_status.detachQuery(/* if_not_detached */true);
|
||||
|
||||
return Result{ .size = result.size, .offset = result.offset };
|
||||
});
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <Common/getRandomASCIIString.h>
|
||||
#include <Common/FileCacheFactory.h>
|
||||
#include <Common/FileCache.h>
|
||||
#include <Common/FileCacheSettings.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -64,18 +65,23 @@ FileCachePtr getCachePtrForDisk(
|
||||
if (!fs::exists(cache_base_path))
|
||||
fs::create_directories(cache_base_path);
|
||||
|
||||
LOG_INFO(&Poco::Logger::get("Disk(" + name + ")"), "Disk registered with cache path: {}", cache_base_path);
|
||||
|
||||
auto metadata_path = getDiskMetadataPath(name, config, config_prefix, context);
|
||||
if (metadata_path == cache_base_path)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Metadata path and cache base path must be different: {}", metadata_path);
|
||||
|
||||
size_t max_cache_size = config.getUInt64(config_prefix + ".data_cache_max_size", 1024*1024*1024);
|
||||
size_t max_cache_elements = config.getUInt64(config_prefix + ".data_cache_max_elements", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS);
|
||||
size_t max_file_segment_size = config.getUInt64(config_prefix + ".max_file_segment_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE);
|
||||
FileCacheSettings file_cache_settings;
|
||||
file_cache_settings.loadFromConfig(config, config_prefix);
|
||||
|
||||
auto cache = FileCacheFactory::instance().getOrCreate(cache_base_path, max_cache_size, max_cache_elements, max_file_segment_size);
|
||||
auto cache = FileCacheFactory::instance().getOrCreate(cache_base_path, file_cache_settings);
|
||||
cache->initialize();
|
||||
|
||||
auto * log = &Poco::Logger::get("Disk(" + name + ")");
|
||||
LOG_INFO(log, "Disk registered with cache path: {}. Cache size: {}, max cache elements size: {}, max_file_segment_size: {}",
|
||||
cache_base_path,
|
||||
file_cache_settings.max_size ? toString(file_cache_settings.max_size) : "UNLIMITED",
|
||||
file_cache_settings.max_elements ? toString(file_cache_settings.max_elements) : "UNLIMITED",
|
||||
file_cache_settings.max_file_segment_size);
|
||||
|
||||
return cache;
|
||||
}
|
||||
|
||||
|
@ -18,6 +18,8 @@
|
||||
#include <Common/quoteString.h>
|
||||
#include <Common/thread_local_rng.h>
|
||||
#include <Common/getRandomASCIIString.h>
|
||||
#include <Common/FileCacheFactory.h>
|
||||
#include <Common/FileCache.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/threadPoolCallbackRunner.h>
|
||||
@ -227,7 +229,12 @@ std::unique_ptr<ReadBufferFromFileBase> DiskS3::readFile(const String & path, co
|
||||
|
||||
ReadSettings disk_read_settings{read_settings};
|
||||
if (cache)
|
||||
{
|
||||
if (IFileCache::isReadOnly())
|
||||
disk_read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true;
|
||||
|
||||
disk_read_settings.remote_fs_cache = cache;
|
||||
}
|
||||
|
||||
auto s3_impl = std::make_unique<ReadBufferFromS3Gather>(
|
||||
path, settings->client, bucket, metadata,
|
||||
@ -245,7 +252,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskS3::readFile(const String & path, co
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path, size_t buf_size, WriteMode mode)
|
||||
std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & write_settings)
|
||||
{
|
||||
auto settings = current_settings.get();
|
||||
|
||||
@ -265,6 +272,11 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
|
||||
LOG_TRACE(log, "{} to file by path: {}. S3 path: {}",
|
||||
mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), remote_fs_root_path + blob_name);
|
||||
|
||||
bool cache_on_write = cache
|
||||
&& fs::path(path).extension() != ".tmp"
|
||||
&& write_settings.enable_filesystem_cache_on_write_operations
|
||||
&& FileCacheFactory::instance().getSettings(getCacheBasePath()).cache_on_write_operations;
|
||||
|
||||
auto s3_buffer = std::make_unique<WriteBufferFromS3>(
|
||||
settings->client,
|
||||
bucket,
|
||||
@ -274,7 +286,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
|
||||
settings->s3_upload_part_size_multiply_parts_count_threshold,
|
||||
settings->s3_max_single_part_upload_size,
|
||||
std::move(object_metadata),
|
||||
buf_size, threadPoolCallbackRunner(getThreadPoolWriter()));
|
||||
buf_size, threadPoolCallbackRunner(getThreadPoolWriter()), blob_name, cache_on_write ? cache : nullptr);
|
||||
|
||||
auto create_metadata_callback = [this, path, blob_name, mode] (size_t count)
|
||||
{
|
||||
|
@ -88,7 +88,8 @@ public:
|
||||
std::unique_ptr<WriteBufferFromFileBase> writeFile(
|
||||
const String & path,
|
||||
size_t buf_size,
|
||||
WriteMode mode) override;
|
||||
WriteMode mode,
|
||||
const WriteSettings & settings) override;
|
||||
|
||||
void removeFromRemoteFS(RemoteFSPathKeeperPtr keeper) override;
|
||||
|
||||
|
@ -96,7 +96,7 @@ TEST_F(DiskEncryptedTest, WriteAndRead)
|
||||
|
||||
/// Write a file.
|
||||
{
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite);
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {});
|
||||
writeString(std::string_view{"Some text"}, *buf);
|
||||
}
|
||||
|
||||
@ -122,7 +122,7 @@ TEST_F(DiskEncryptedTest, Append)
|
||||
|
||||
/// Write a file (we use the append mode).
|
||||
{
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append);
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {});
|
||||
writeString(std::string_view{"Some text"}, *buf);
|
||||
}
|
||||
|
||||
@ -132,7 +132,7 @@ TEST_F(DiskEncryptedTest, Append)
|
||||
|
||||
/// Append the file.
|
||||
{
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append);
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {});
|
||||
writeString(std::string_view{" Another text"}, *buf);
|
||||
}
|
||||
|
||||
@ -148,7 +148,7 @@ TEST_F(DiskEncryptedTest, Truncate)
|
||||
|
||||
/// Write a file (we use the append mode).
|
||||
{
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append);
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {});
|
||||
writeString(std::string_view{"Some text"}, *buf);
|
||||
}
|
||||
|
||||
@ -178,7 +178,7 @@ TEST_F(DiskEncryptedTest, ZeroFileSize)
|
||||
|
||||
/// Write nothing to a file.
|
||||
{
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite);
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {});
|
||||
}
|
||||
|
||||
EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 0);
|
||||
@ -187,7 +187,7 @@ TEST_F(DiskEncryptedTest, ZeroFileSize)
|
||||
|
||||
/// Append the file with nothing.
|
||||
{
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append);
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {});
|
||||
}
|
||||
|
||||
EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 0);
|
||||
@ -211,7 +211,7 @@ TEST_F(DiskEncryptedTest, AnotherFolder)
|
||||
|
||||
/// Write a file.
|
||||
{
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite);
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {});
|
||||
writeString(std::string_view{"Some text"}, *buf);
|
||||
}
|
||||
|
||||
@ -231,11 +231,11 @@ TEST_F(DiskEncryptedTest, RandomIV)
|
||||
|
||||
/// Write two files with the same contents.
|
||||
{
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite);
|
||||
auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {});
|
||||
writeString(std::string_view{"Some text"}, *buf);
|
||||
}
|
||||
{
|
||||
auto buf = encrypted_disk->writeFile("b.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite);
|
||||
auto buf = encrypted_disk->writeFile("b.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {});
|
||||
writeString(std::string_view{"Some text"}, *buf);
|
||||
}
|
||||
|
||||
@ -277,7 +277,7 @@ TEST_F(DiskEncryptedTest, RemoveFileDuringWriting)
|
||||
std::thread t1{[&]
|
||||
{
|
||||
for (size_t i = 0; i != n; ++i)
|
||||
encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append);
|
||||
encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {});
|
||||
}};
|
||||
|
||||
std::thread t2{[&]
|
||||
|
@ -77,8 +77,9 @@ struct ReadSettings
|
||||
|
||||
size_t remote_fs_read_max_backoff_ms = 10000;
|
||||
size_t remote_fs_read_backoff_max_tries = 4;
|
||||
bool remote_fs_enable_cache = true;
|
||||
size_t remote_fs_cache_max_wait_sec = 1;
|
||||
bool enable_filesystem_cache = true;
|
||||
size_t filesystem_cache_max_wait_sec = 1;
|
||||
bool read_from_filesystem_cache_if_exists_otherwise_bypass_cache = false;
|
||||
|
||||
size_t remote_read_min_bytes_for_seek = DBMS_DEFAULT_BUFFER_SIZE;
|
||||
|
||||
|
@ -2,15 +2,18 @@
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include <base/logger_useful.h>
|
||||
#include <Common/FileCache.h>
|
||||
|
||||
#include <IO/WriteBufferFromS3.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include <aws/s3/S3Client.h>
|
||||
#include <aws/s3/model/CreateMultipartUploadRequest.h>
|
||||
#include <aws/s3/model/CompleteMultipartUploadRequest.h>
|
||||
#include <aws/s3/model/PutObjectRequest.h>
|
||||
#include <aws/s3/model/UploadPartRequest.h>
|
||||
# include <base/logger_useful.h>
|
||||
|
||||
#include <utility>
|
||||
|
||||
@ -18,9 +21,9 @@
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event S3WriteBytes;
|
||||
extern const Event RemoteFSCacheDownloadBytes;
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
// S3 protocol does not allow to have multipart upload with more than 10000 parts.
|
||||
@ -32,6 +35,7 @@ const int S3_WARN_MAX_PARTS = 10000;
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int S3_ERROR;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
struct WriteBufferFromS3::UploadPartTask
|
||||
@ -40,6 +44,7 @@ struct WriteBufferFromS3::UploadPartTask
|
||||
bool is_finised = false;
|
||||
std::string tag;
|
||||
std::exception_ptr exception;
|
||||
std::optional<FileSegmentsHolder> cache_files;
|
||||
};
|
||||
|
||||
struct WriteBufferFromS3::PutObjectTask
|
||||
@ -47,6 +52,7 @@ struct WriteBufferFromS3::PutObjectTask
|
||||
Aws::S3::Model::PutObjectRequest req;
|
||||
bool is_finised = false;
|
||||
std::exception_ptr exception;
|
||||
std::optional<FileSegmentsHolder> cache_files;
|
||||
};
|
||||
|
||||
WriteBufferFromS3::WriteBufferFromS3(
|
||||
@ -59,7 +65,9 @@ WriteBufferFromS3::WriteBufferFromS3(
|
||||
size_t max_single_part_upload_size_,
|
||||
std::optional<std::map<String, String>> object_metadata_,
|
||||
size_t buffer_size_,
|
||||
ScheduleFunc schedule_)
|
||||
ScheduleFunc schedule_,
|
||||
const String & blob_name_,
|
||||
FileCachePtr cache_)
|
||||
: BufferWithOwnMemory<WriteBuffer>(buffer_size_, nullptr, 0)
|
||||
, bucket(bucket_)
|
||||
, key(key_)
|
||||
@ -70,6 +78,8 @@ WriteBufferFromS3::WriteBufferFromS3(
|
||||
, upload_part_size_multiply_threshold(upload_part_size_multiply_threshold_)
|
||||
, max_single_part_upload_size(max_single_part_upload_size_)
|
||||
, schedule(std::move(schedule_))
|
||||
, blob_name(blob_name_)
|
||||
, cache(cache_)
|
||||
{
|
||||
allocateBuffer();
|
||||
}
|
||||
@ -83,7 +93,41 @@ void WriteBufferFromS3::nextImpl()
|
||||
if (temporary_buffer->tellp() == -1)
|
||||
allocateBuffer();
|
||||
|
||||
temporary_buffer->write(working_buffer.begin(), offset());
|
||||
size_t size = offset();
|
||||
temporary_buffer->write(working_buffer.begin(), size);
|
||||
|
||||
ThreadGroupStatusPtr running_group = CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup()
|
||||
? CurrentThread::get().getThreadGroup()
|
||||
: MainThreadStatus::getInstance().getThreadGroup();
|
||||
|
||||
if (cacheEnabled())
|
||||
{
|
||||
if (blob_name.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty blob name");
|
||||
|
||||
auto cache_key = cache->hash(blob_name);
|
||||
file_segments_holder.emplace(cache->setDownloading(cache_key, current_download_offset, size));
|
||||
current_download_offset += size;
|
||||
|
||||
size_t remaining_size = size;
|
||||
auto & file_segments = file_segments_holder->file_segments;
|
||||
for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end(); ++file_segment_it)
|
||||
{
|
||||
auto & file_segment = *file_segment_it;
|
||||
size_t current_size = std::min(file_segment->range().size(), remaining_size);
|
||||
remaining_size -= current_size;
|
||||
|
||||
if (file_segment->reserve(current_size))
|
||||
{
|
||||
file_segment->writeInMemory(working_buffer.begin(), current_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
file_segments.erase(file_segment_it, file_segments.end());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::S3WriteBytes, offset());
|
||||
|
||||
@ -95,7 +139,6 @@ void WriteBufferFromS3::nextImpl()
|
||||
|
||||
if (!multipart_upload_id.empty() && last_part_size > upload_part_size)
|
||||
{
|
||||
|
||||
writePart();
|
||||
|
||||
allocateBuffer();
|
||||
@ -126,6 +169,11 @@ WriteBufferFromS3::~WriteBufferFromS3()
|
||||
}
|
||||
}
|
||||
|
||||
bool WriteBufferFromS3::cacheEnabled() const
|
||||
{
|
||||
return cache != nullptr;
|
||||
}
|
||||
|
||||
void WriteBufferFromS3::preFinalize()
|
||||
{
|
||||
next();
|
||||
@ -213,6 +261,13 @@ void WriteBufferFromS3::writePart()
|
||||
}
|
||||
|
||||
fillUploadRequest(task->req, part_number);
|
||||
|
||||
if (file_segments_holder)
|
||||
{
|
||||
task->cache_files.emplace(std::move(*file_segments_holder));
|
||||
file_segments_holder.reset();
|
||||
}
|
||||
|
||||
schedule([this, task]()
|
||||
{
|
||||
try
|
||||
@ -224,6 +279,15 @@ void WriteBufferFromS3::writePart()
|
||||
task->exception = std::current_exception();
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
finalizeCacheIfNeeded(task->cache_files);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard lock(bg_tasks_mutex);
|
||||
task->is_finised = true;
|
||||
@ -240,8 +304,14 @@ void WriteBufferFromS3::writePart()
|
||||
{
|
||||
UploadPartTask task;
|
||||
fillUploadRequest(task.req, part_tags.size() + 1);
|
||||
if (file_segments_holder)
|
||||
{
|
||||
task.cache_files.emplace(std::move(*file_segments_holder));
|
||||
file_segments_holder.reset();
|
||||
}
|
||||
processUploadRequest(task);
|
||||
part_tags.push_back(task.tag);
|
||||
finalizeCacheIfNeeded(task.cache_files);
|
||||
}
|
||||
}
|
||||
|
||||
@ -328,7 +398,14 @@ void WriteBufferFromS3::makeSinglepartUpload()
|
||||
if (schedule)
|
||||
{
|
||||
put_object_task = std::make_unique<PutObjectTask>();
|
||||
|
||||
fillPutRequest(put_object_task->req);
|
||||
if (file_segments_holder)
|
||||
{
|
||||
put_object_task->cache_files.emplace(std::move(*file_segments_holder));
|
||||
file_segments_holder.reset();
|
||||
}
|
||||
|
||||
schedule([this]()
|
||||
{
|
||||
try
|
||||
@ -340,6 +417,15 @@ void WriteBufferFromS3::makeSinglepartUpload()
|
||||
put_object_task->exception = std::current_exception();
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
finalizeCacheIfNeeded(put_object_task->cache_files);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard lock(bg_tasks_mutex);
|
||||
put_object_task->is_finised = true;
|
||||
@ -349,14 +435,19 @@ void WriteBufferFromS3::makeSinglepartUpload()
|
||||
/// Releasing lock and condvar notification.
|
||||
bg_tasks_condvar.notify_one();
|
||||
}
|
||||
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
PutObjectTask task;
|
||||
fillPutRequest(task.req);
|
||||
if (file_segments_holder)
|
||||
{
|
||||
task.cache_files.emplace(std::move(*file_segments_holder));
|
||||
file_segments_holder.reset();
|
||||
}
|
||||
processPutRequest(task);
|
||||
finalizeCacheIfNeeded(task.cache_files);
|
||||
}
|
||||
}
|
||||
|
||||
@ -384,6 +475,28 @@ void WriteBufferFromS3::processPutRequest(PutObjectTask & task)
|
||||
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
||||
}
|
||||
|
||||
void WriteBufferFromS3::finalizeCacheIfNeeded(std::optional<FileSegmentsHolder> & file_segments_holder)
|
||||
{
|
||||
if (!file_segments_holder)
|
||||
return;
|
||||
|
||||
auto & file_segments = file_segments_holder->file_segments;
|
||||
for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end();)
|
||||
{
|
||||
try
|
||||
{
|
||||
size_t size = (*file_segment_it)->finalizeWrite();
|
||||
file_segment_it = file_segments.erase(file_segment_it);
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::RemoteFSCacheDownloadBytes, size);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WriteBufferFromS3::waitForReadyBackGroundTasks()
|
||||
{
|
||||
if (schedule)
|
||||
|
@ -10,6 +10,10 @@
|
||||
#include <base/logger_useful.h>
|
||||
#include <base/types.h>
|
||||
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Common/FileCache_fwd.h>
|
||||
#include <Common/FileSegment.h>
|
||||
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
|
||||
@ -30,6 +34,7 @@ namespace DB
|
||||
{
|
||||
|
||||
using ScheduleFunc = std::function<void(std::function<void()>)>;
|
||||
class WriteBufferFromFile;
|
||||
|
||||
/**
|
||||
* Buffer to write a data to a S3 object with specified bucket and key.
|
||||
@ -51,7 +56,9 @@ public:
|
||||
size_t max_single_part_upload_size_,
|
||||
std::optional<std::map<String, String>> object_metadata_ = std::nullopt,
|
||||
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
ScheduleFunc schedule_ = {});
|
||||
ScheduleFunc schedule_ = {},
|
||||
const String & blob_name = "",
|
||||
FileCachePtr cache_ = nullptr);
|
||||
|
||||
~WriteBufferFromS3() override;
|
||||
|
||||
@ -82,6 +89,8 @@ private:
|
||||
void waitForReadyBackGroundTasks();
|
||||
void waitForAllBackGroundTasks();
|
||||
|
||||
bool cacheEnabled() const;
|
||||
|
||||
String bucket;
|
||||
String key;
|
||||
std::optional<std::map<String, String>> object_metadata;
|
||||
@ -113,6 +122,12 @@ private:
|
||||
std::condition_variable bg_tasks_condvar;
|
||||
|
||||
Poco::Logger * log = &Poco::Logger::get("WriteBufferFromS3");
|
||||
|
||||
const String blob_name;
|
||||
FileCachePtr cache;
|
||||
size_t current_download_offset = 0;
|
||||
std::optional<FileSegmentsHolder> file_segments_holder;
|
||||
static void finalizeCacheIfNeeded(std::optional<FileSegmentsHolder> &);
|
||||
};
|
||||
|
||||
}
|
||||
|
12
src/IO/WriteSettings.h
Normal file
12
src/IO/WriteSettings.h
Normal file
@ -0,0 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Settings to be passed to IDisk::writeFile()
|
||||
struct WriteSettings
|
||||
{
|
||||
bool enable_filesystem_cache_on_write_operations = false;
|
||||
};
|
||||
|
||||
}
|
@ -68,6 +68,7 @@
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/UncompressedCache.h>
|
||||
#include <IO/MMappedFileCache.h>
|
||||
#include <IO/WriteSettings.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
@ -3242,8 +3243,9 @@ ReadSettings Context::getReadSettings() const
|
||||
|
||||
res.remote_fs_read_max_backoff_ms = settings.remote_fs_read_max_backoff_ms;
|
||||
res.remote_fs_read_backoff_max_tries = settings.remote_fs_read_backoff_max_tries;
|
||||
res.remote_fs_enable_cache = settings.remote_fs_enable_cache;
|
||||
res.remote_fs_cache_max_wait_sec = settings.remote_fs_cache_max_wait_sec;
|
||||
res.enable_filesystem_cache = settings.enable_filesystem_cache;
|
||||
res.filesystem_cache_max_wait_sec = settings.filesystem_cache_max_wait_sec;
|
||||
res.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache;
|
||||
|
||||
res.remote_read_min_bytes_for_seek = settings.remote_read_min_bytes_for_seek;
|
||||
|
||||
@ -3262,4 +3264,13 @@ ReadSettings Context::getReadSettings() const
|
||||
return res;
|
||||
}
|
||||
|
||||
WriteSettings Context::getWriteSettings() const
|
||||
{
|
||||
WriteSettings res;
|
||||
|
||||
res.enable_filesystem_cache_on_write_operations = settings.enable_filesystem_cache_on_write_operations;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -119,6 +119,7 @@ struct PartUUIDs;
|
||||
using PartUUIDsPtr = std::shared_ptr<PartUUIDs>;
|
||||
class KeeperDispatcher;
|
||||
class Session;
|
||||
struct WriteSettings;
|
||||
|
||||
class IInputFormat;
|
||||
class IOutputFormat;
|
||||
@ -913,6 +914,9 @@ public:
|
||||
/** Get settings for reading from filesystem. */
|
||||
ReadSettings getReadSettings() const;
|
||||
|
||||
/** Get settings for writing to filesystem. */
|
||||
WriteSettings getWriteSettings() const;
|
||||
|
||||
private:
|
||||
std::unique_lock<std::recursive_mutex> getLock() const;
|
||||
|
||||
|
@ -7,6 +7,8 @@
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/ShellCommand.h>
|
||||
#include <Common/FileCacheFactory.h>
|
||||
#include <Common/FileCache.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/DatabaseCatalog.h>
|
||||
#include <Interpreters/ExternalDictionariesLoader.h>
|
||||
@ -297,6 +299,21 @@ BlockIO InterpreterSystemQuery::execute()
|
||||
cache->reset();
|
||||
break;
|
||||
#endif
|
||||
case Type::DROP_FILESYSTEM_CACHE:
|
||||
{
|
||||
if (query.filesystem_cache_path.empty())
|
||||
{
|
||||
auto caches = FileCacheFactory::instance().getAll();
|
||||
for (const auto & [_, cache_data] : caches)
|
||||
cache_data.cache->tryRemoveAll();
|
||||
}
|
||||
else
|
||||
{
|
||||
auto cache = FileCacheFactory::instance().get(query.filesystem_cache_path);
|
||||
cache->tryRemoveAll();
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Type::RELOAD_DICTIONARY:
|
||||
{
|
||||
getContext()->checkAccess(AccessType::SYSTEM_RELOAD_DICTIONARY);
|
||||
@ -760,6 +777,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
|
||||
case Type::DROP_UNCOMPRESSED_CACHE:
|
||||
case Type::DROP_INDEX_MARK_CACHE:
|
||||
case Type::DROP_INDEX_UNCOMPRESSED_CACHE:
|
||||
case Type::DROP_FILESYSTEM_CACHE:
|
||||
{
|
||||
required_access.emplace_back(AccessType::SYSTEM_DROP_CACHE);
|
||||
break;
|
||||
|
@ -597,6 +597,16 @@ CurrentThread::QueryScope::QueryScope(ContextMutablePtr query_context)
|
||||
query_context->makeQueryContext();
|
||||
}
|
||||
|
||||
CurrentThread::QueryScope::QueryScope(ContextPtr query_context)
|
||||
{
|
||||
if (!query_context->hasQueryContext())
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR, "Cannot initialize query scope without query context");
|
||||
|
||||
CurrentThread::initializeQuery();
|
||||
CurrentThread::attachQueryContext(query_context);
|
||||
}
|
||||
|
||||
void CurrentThread::QueryScope::logPeakMemoryUsage()
|
||||
{
|
||||
auto group = CurrentThread::getGroup();
|
||||
|
@ -28,6 +28,7 @@ public:
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
DROP_COMPILED_EXPRESSION_CACHE,
|
||||
#endif
|
||||
DROP_FILESYSTEM_CACHE,
|
||||
STOP_LISTEN_QUERIES,
|
||||
START_LISTEN_QUERIES,
|
||||
RESTART_REPLICAS,
|
||||
@ -88,6 +89,7 @@ public:
|
||||
String volume;
|
||||
String disk;
|
||||
UInt64 seconds{};
|
||||
String filesystem_cache_path;
|
||||
|
||||
String getID(char) const override { return "SYSTEM query"; }
|
||||
|
||||
|
@ -230,7 +230,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
|
||||
case MergeAlgorithm::Vertical :
|
||||
{
|
||||
ctx->rows_sources_file = createTemporaryFile(ctx->tmp_disk->getPath());
|
||||
ctx->rows_sources_uncompressed_write_buf = ctx->tmp_disk->writeFile(fileName(ctx->rows_sources_file->path()));
|
||||
ctx->rows_sources_uncompressed_write_buf = ctx->tmp_disk->writeFile(fileName(ctx->rows_sources_file->path()), DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, global_ctx->context->getWriteSettings());
|
||||
ctx->rows_sources_write_buf = std::make_unique<CompressedWriteBuffer>(*ctx->rows_sources_uncompressed_write_buf);
|
||||
|
||||
MergeTreeDataPartInMemory::ColumnToSize local_merged_column_to_size;
|
||||
@ -261,7 +261,8 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
|
||||
MergeTreeIndexFactory::instance().getMany(global_ctx->metadata_snapshot->getSecondaryIndices()),
|
||||
ctx->compression_codec,
|
||||
/*reset_columns=*/ true,
|
||||
ctx->blocks_are_granules_size);
|
||||
ctx->blocks_are_granules_size,
|
||||
global_ctx->context->getWriteSettings());
|
||||
|
||||
global_ctx->rows_written = 0;
|
||||
ctx->initial_reservation = global_ctx->space_reservation ? global_ctx->space_reservation->getSize() : 0;
|
||||
|
@ -305,7 +305,7 @@ MergeTreeData::MergeTreeData(
|
||||
format_version = min_format_version;
|
||||
if (!version_file.second->isReadOnly())
|
||||
{
|
||||
auto buf = version_file.second->writeFile(version_file.first);
|
||||
auto buf = version_file.second->writeFile(version_file.first, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, context_->getWriteSettings());
|
||||
writeIntText(format_version.toUnderType(), *buf);
|
||||
if (getContext()->getSettingsRef().fsync_metadata)
|
||||
buf->sync();
|
||||
@ -3798,9 +3798,9 @@ private:
|
||||
|
||||
for (const String & filename : filenames)
|
||||
{
|
||||
auto backup_entry = backup->readFile(data_path_in_backup + part_name + "/" + filename);
|
||||
auto backup_entry = backup->readFile(fs::path(data_path_in_backup) / part_name / filename);
|
||||
auto read_buffer = backup_entry->getReadBuffer();
|
||||
auto write_buffer = disk->writeFile(temp_part_dir + "/" + filename);
|
||||
auto write_buffer = disk->writeFile(fs::path(temp_part_dir) / filename);
|
||||
copyData(*read_buffer, *write_buffer);
|
||||
reservation->update(reservation->getSize() - backup_entry->getSize());
|
||||
}
|
||||
|
@ -24,12 +24,14 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
|
||||
, plain_file(data_part->volume->getDisk()->writeFile(
|
||||
part_path + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION,
|
||||
settings.max_compress_block_size,
|
||||
WriteMode::Rewrite))
|
||||
WriteMode::Rewrite,
|
||||
settings_.query_write_settings))
|
||||
, plain_hashing(*plain_file)
|
||||
, marks_file(data_part->volume->getDisk()->writeFile(
|
||||
part_path + MergeTreeDataPartCompact::DATA_FILE_NAME + marks_file_extension_,
|
||||
4096,
|
||||
WriteMode::Rewrite))
|
||||
WriteMode::Rewrite,
|
||||
settings_.query_write_settings))
|
||||
, marks(*marks_file)
|
||||
{
|
||||
const auto & storage_columns = metadata_snapshot->getColumns();
|
||||
|
@ -47,15 +47,16 @@ MergeTreeDataPartWriterOnDisk::Stream::Stream(
|
||||
const std::string & marks_path_,
|
||||
const std::string & marks_file_extension_,
|
||||
const CompressionCodecPtr & compression_codec_,
|
||||
size_t max_compress_block_size_) :
|
||||
size_t max_compress_block_size_,
|
||||
const WriteSettings & query_write_settings) :
|
||||
escaped_column_name(escaped_column_name_),
|
||||
data_file_extension{data_file_extension_},
|
||||
marks_file_extension{marks_file_extension_},
|
||||
plain_file(disk_->writeFile(data_path_ + data_file_extension, max_compress_block_size_, WriteMode::Rewrite)),
|
||||
plain_file(disk_->writeFile(data_path_ + data_file_extension, max_compress_block_size_, WriteMode::Rewrite, query_write_settings)),
|
||||
plain_hashing(*plain_file),
|
||||
compressed_buf(plain_hashing, compression_codec_, max_compress_block_size_),
|
||||
compressed(compressed_buf),
|
||||
marks_file(disk_->writeFile(marks_path_ + marks_file_extension, 4096, WriteMode::Rewrite)), marks(*marks_file)
|
||||
marks_file(disk_->writeFile(marks_path_ + marks_file_extension, 4096, WriteMode::Rewrite, query_write_settings)), marks(*marks_file)
|
||||
{
|
||||
}
|
||||
|
||||
@ -156,7 +157,7 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex()
|
||||
{
|
||||
if (metadata_snapshot->hasPrimaryKey())
|
||||
{
|
||||
index_file_stream = data_part->volume->getDisk()->writeFile(part_path + "primary.idx", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite);
|
||||
index_file_stream = data_part->volume->getDisk()->writeFile(part_path + "primary.idx", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, settings.query_write_settings);
|
||||
index_stream = std::make_unique<HashingWriteBuffer>(*index_file_stream);
|
||||
}
|
||||
}
|
||||
@ -172,7 +173,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices()
|
||||
data_part->volume->getDisk(),
|
||||
part_path + stream_name, index_helper->getSerializedFileExtension(),
|
||||
part_path + stream_name, marks_file_extension,
|
||||
default_codec, settings.max_compress_block_size));
|
||||
default_codec, settings.max_compress_block_size, settings.query_write_settings));
|
||||
skip_indices_aggregators.push_back(index_helper->createIndexAggregator());
|
||||
skip_index_accumulated_marks.push_back(0);
|
||||
}
|
||||
|
@ -55,7 +55,8 @@ public:
|
||||
const std::string & marks_path_,
|
||||
const std::string & marks_file_extension_,
|
||||
const CompressionCodecPtr & compression_codec_,
|
||||
size_t max_compress_block_size_);
|
||||
size_t max_compress_block_size_,
|
||||
const WriteSettings & query_write_settings);
|
||||
|
||||
String escaped_column_name;
|
||||
std::string data_file_extension;
|
||||
|
@ -115,7 +115,8 @@ void MergeTreeDataPartWriterWide::addStreams(
|
||||
part_path + stream_name, DATA_FILE_EXTENSION,
|
||||
part_path + stream_name, marks_file_extension,
|
||||
compression_codec,
|
||||
settings.max_compress_block_size);
|
||||
settings.max_compress_block_size,
|
||||
settings.query_write_settings);
|
||||
};
|
||||
|
||||
ISerialization::SubstreamPath path;
|
||||
|
@ -433,7 +433,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart(
|
||||
|
||||
const auto & index_factory = MergeTreeIndexFactory::instance();
|
||||
auto out = std::make_unique<MergedBlockOutputStream>(new_data_part, metadata_snapshot, columns,
|
||||
index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec);
|
||||
index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec, false, false, context->getWriteSettings());
|
||||
|
||||
out->writeWithPermutation(block, perm_ptr);
|
||||
|
||||
@ -448,7 +448,11 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart(
|
||||
temp_part.streams.emplace_back(std::move(stream));
|
||||
}
|
||||
}
|
||||
auto finalizer = out->finalizePartAsync(new_data_part, data_settings->fsync_after_insert);
|
||||
auto finalizer = out->finalizePartAsync(
|
||||
new_data_part,
|
||||
data_settings->fsync_after_insert,
|
||||
nullptr, nullptr,
|
||||
context->getWriteSettings());
|
||||
|
||||
temp_part.part = new_data_part;
|
||||
temp_part.streams.emplace_back(TemporaryPart::Stream{.stream = std::move(out), .finalizer = std::move(finalizer)});
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <cstddef>
|
||||
#include <Core/Settings.h>
|
||||
#include <Storages/MergeTree/MergeTreeSettings.h>
|
||||
#include <IO/WriteSettings.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -30,6 +31,7 @@ struct MergeTreeWriterSettings
|
||||
|
||||
MergeTreeWriterSettings(
|
||||
const Settings & global_settings,
|
||||
const WriteSettings & query_write_settings_,
|
||||
const MergeTreeSettingsPtr & storage_settings,
|
||||
bool can_use_adaptive_granularity_,
|
||||
bool rewrite_primary_key_,
|
||||
@ -42,6 +44,7 @@ struct MergeTreeWriterSettings
|
||||
, can_use_adaptive_granularity(can_use_adaptive_granularity_)
|
||||
, rewrite_primary_key(rewrite_primary_key_)
|
||||
, blocks_are_granules_size(blocks_are_granules_size_)
|
||||
, query_write_settings(query_write_settings_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -50,6 +53,7 @@ struct MergeTreeWriterSettings
|
||||
bool can_use_adaptive_granularity;
|
||||
bool rewrite_primary_key;
|
||||
bool blocks_are_granules_size;
|
||||
WriteSettings query_write_settings;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -43,7 +43,7 @@ UInt64 MergeTreeMutationEntry::parseFileName(const String & file_name_)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot parse mutation version from file name, expected 'mutation_<UInt64>.txt', got '{}'", file_name_);
|
||||
}
|
||||
|
||||
MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk_, const String & path_prefix_, UInt64 tmp_number)
|
||||
MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk_, const String & path_prefix_, UInt64 tmp_number, const WriteSettings & settings)
|
||||
: create_time(time(nullptr))
|
||||
, commands(std::move(commands_))
|
||||
, disk(std::move(disk_))
|
||||
@ -53,7 +53,7 @@ MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskP
|
||||
{
|
||||
try
|
||||
{
|
||||
auto out = disk->writeFile(path_prefix + file_name);
|
||||
auto out = disk->writeFile(std::filesystem::path(path_prefix) / file_name, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, settings);
|
||||
*out << "format version: 1\n"
|
||||
<< "create time: " << LocalDateTime(create_time) << "\n";
|
||||
*out << "commands: ";
|
||||
|
@ -29,7 +29,7 @@ struct MergeTreeMutationEntry
|
||||
String latest_fail_reason;
|
||||
|
||||
/// Create a new entry and write it to a temporary file.
|
||||
MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk, const String & path_prefix_, UInt64 tmp_number);
|
||||
MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk, const String & path_prefix_, UInt64 tmp_number, const WriteSettings & settings);
|
||||
MergeTreeMutationEntry(const MergeTreeMutationEntry &) = delete;
|
||||
MergeTreeMutationEntry(MergeTreeMutationEntry &&) = default;
|
||||
|
||||
|
@ -384,16 +384,18 @@ void MergeTreePartition::load(const MergeTreeData & storage, const PartMetadataM
|
||||
std::unique_ptr<WriteBufferFromFileBase> MergeTreePartition::store(const MergeTreeData & storage, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const
|
||||
{
|
||||
auto metadata_snapshot = storage.getInMemoryMetadataPtr();
|
||||
const auto & partition_key_sample = adjustPartitionKey(metadata_snapshot, storage.getContext()).sample_block;
|
||||
return store(partition_key_sample, disk, part_path, checksums);
|
||||
const auto & context = storage.getContext();
|
||||
const auto & partition_key_sample = adjustPartitionKey(metadata_snapshot, context).sample_block;
|
||||
return store(partition_key_sample, disk, part_path, checksums, context->getWriteSettings());
|
||||
}
|
||||
|
||||
std::unique_ptr<WriteBufferFromFileBase> MergeTreePartition::store(const Block & partition_key_sample, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const
|
||||
std::unique_ptr<WriteBufferFromFileBase> MergeTreePartition::store(
|
||||
const Block & partition_key_sample, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const
|
||||
{
|
||||
if (!partition_key_sample)
|
||||
return nullptr;
|
||||
|
||||
auto out = disk->writeFile(part_path + "partition.dat");
|
||||
auto out = disk->writeFile(std::filesystem::path(part_path) / "partition.dat", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, settings);
|
||||
HashingWriteBuffer out_hashing(*out);
|
||||
for (size_t i = 0; i < value.size(); ++i)
|
||||
{
|
||||
|
@ -43,7 +43,7 @@ public:
|
||||
/// Store functions return write buffer with written but not finalized data.
|
||||
/// User must call finish() for returned object.
|
||||
[[nodiscard]] std::unique_ptr<WriteBufferFromFileBase> store(const MergeTreeData & storage, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const;
|
||||
[[nodiscard]] std::unique_ptr<WriteBufferFromFileBase> store(const Block & partition_key_sample, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const;
|
||||
[[nodiscard]] std::unique_ptr<WriteBufferFromFileBase> store(const Block & partition_key_sample, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const;
|
||||
|
||||
void assign(const MergeTreePartition & other) { value = other.value; }
|
||||
|
||||
|
@ -19,13 +19,15 @@ MergedBlockOutputStream::MergedBlockOutputStream(
|
||||
const MergeTreeIndices & skip_indices,
|
||||
CompressionCodecPtr default_codec_,
|
||||
bool reset_columns_,
|
||||
bool blocks_are_granules_size)
|
||||
bool blocks_are_granules_size,
|
||||
const WriteSettings & write_settings)
|
||||
: IMergedBlockOutputStream(data_part, metadata_snapshot_, columns_list_, reset_columns_)
|
||||
, columns_list(columns_list_)
|
||||
, default_codec(default_codec_)
|
||||
{
|
||||
MergeTreeWriterSettings writer_settings(
|
||||
storage.getContext()->getSettings(),
|
||||
write_settings,
|
||||
storage.getSettings(),
|
||||
data_part->index_granularity_info.is_adaptive,
|
||||
/* rewrite_primary_key = */ true,
|
||||
@ -122,7 +124,8 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync(
|
||||
MergeTreeData::MutableDataPartPtr & new_part,
|
||||
bool sync,
|
||||
const NamesAndTypesList * total_columns_list,
|
||||
MergeTreeData::DataPart::Checksums * additional_column_checksums)
|
||||
MergeTreeData::DataPart::Checksums * additional_column_checksums,
|
||||
const WriteSettings & write_settings)
|
||||
{
|
||||
/// Finish write and get checksums.
|
||||
MergeTreeData::DataPart::Checksums checksums;
|
||||
@ -156,7 +159,7 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync(
|
||||
|
||||
auto finalizer = std::make_unique<Finalizer::Impl>(*writer, new_part, files_to_remove_after_sync, sync);
|
||||
if (new_part->isStoredOnDisk())
|
||||
finalizer->written_files = finalizePartOnDisk(new_part, checksums);
|
||||
finalizer->written_files = finalizePartOnDisk(new_part, checksums, write_settings);
|
||||
|
||||
new_part->rows_count = rows_count;
|
||||
new_part->modification_time = time(nullptr);
|
||||
@ -174,14 +177,15 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync(
|
||||
|
||||
MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDisk(
|
||||
const MergeTreeData::DataPartPtr & new_part,
|
||||
MergeTreeData::DataPart::Checksums & checksums)
|
||||
MergeTreeData::DataPart::Checksums & checksums,
|
||||
const WriteSettings & settings)
|
||||
{
|
||||
WrittenFiles written_files;
|
||||
if (new_part->isProjectionPart())
|
||||
{
|
||||
if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || isCompactPart(new_part))
|
||||
{
|
||||
auto count_out = volume->getDisk()->writeFile(part_path + "count.txt", 4096);
|
||||
auto count_out = volume->getDisk()->writeFile(part_path + "count.txt", 4096, WriteMode::Rewrite, settings);
|
||||
HashingWriteBuffer count_out_hashing(*count_out);
|
||||
writeIntText(rows_count, count_out_hashing);
|
||||
count_out_hashing.next();
|
||||
@ -195,7 +199,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis
|
||||
{
|
||||
if (new_part->uuid != UUIDHelpers::Nil)
|
||||
{
|
||||
auto out = volume->getDisk()->writeFile(fs::path(part_path) / IMergeTreeDataPart::UUID_FILE_NAME, 4096);
|
||||
auto out = volume->getDisk()->writeFile(fs::path(part_path) / IMergeTreeDataPart::UUID_FILE_NAME, 4096, WriteMode::Rewrite, settings);
|
||||
HashingWriteBuffer out_hashing(*out);
|
||||
writeUUIDText(new_part->uuid, out_hashing);
|
||||
checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_size = out_hashing.count();
|
||||
@ -221,7 +225,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis
|
||||
}
|
||||
|
||||
{
|
||||
auto count_out = volume->getDisk()->writeFile(fs::path(part_path) / "count.txt", 4096);
|
||||
auto count_out = volume->getDisk()->writeFile(fs::path(part_path) / "count.txt", 4096, WriteMode::Rewrite, settings);
|
||||
HashingWriteBuffer count_out_hashing(*count_out);
|
||||
writeIntText(rows_count, count_out_hashing);
|
||||
count_out_hashing.next();
|
||||
@ -235,7 +239,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis
|
||||
if (!new_part->ttl_infos.empty())
|
||||
{
|
||||
/// Write a file with ttl infos in json format.
|
||||
auto out = volume->getDisk()->writeFile(fs::path(part_path) / "ttl.txt", 4096);
|
||||
auto out = volume->getDisk()->writeFile(fs::path(part_path) / "ttl.txt", 4096, WriteMode::Rewrite, settings);
|
||||
HashingWriteBuffer out_hashing(*out);
|
||||
new_part->ttl_infos.write(out_hashing);
|
||||
checksums.files["ttl.txt"].file_size = out_hashing.count();
|
||||
@ -246,7 +250,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis
|
||||
|
||||
if (!new_part->getSerializationInfos().empty())
|
||||
{
|
||||
auto out = volume->getDisk()->writeFile(part_path + IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096);
|
||||
auto out = volume->getDisk()->writeFile(part_path + IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096, WriteMode::Rewrite, settings);
|
||||
HashingWriteBuffer out_hashing(*out);
|
||||
new_part->getSerializationInfos().writeJSON(out_hashing);
|
||||
checksums.files[IMergeTreeDataPart::SERIALIZATION_FILE_NAME].file_size = out_hashing.count();
|
||||
@ -257,7 +261,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis
|
||||
|
||||
{
|
||||
/// Write a file with a description of columns.
|
||||
auto out = volume->getDisk()->writeFile(fs::path(part_path) / "columns.txt", 4096);
|
||||
auto out = volume->getDisk()->writeFile(fs::path(part_path) / "columns.txt", 4096, WriteMode::Rewrite, settings);
|
||||
new_part->getColumns().writeText(*out);
|
||||
out->preFinalize();
|
||||
written_files.emplace_back(std::move(out));
|
||||
@ -265,7 +269,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis
|
||||
|
||||
if (default_codec != nullptr)
|
||||
{
|
||||
auto out = volume->getDisk()->writeFile(part_path + IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096);
|
||||
auto out = volume->getDisk()->writeFile(part_path + IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096, WriteMode::Rewrite, settings);
|
||||
DB::writeText(queryToString(default_codec->getFullCodecDesc()), *out);
|
||||
out->preFinalize();
|
||||
written_files.emplace_back(std::move(out));
|
||||
@ -278,7 +282,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis
|
||||
|
||||
{
|
||||
/// Write file with checksums.
|
||||
auto out = volume->getDisk()->writeFile(fs::path(part_path) / "checksums.txt", 4096);
|
||||
auto out = volume->getDisk()->writeFile(fs::path(part_path) / "checksums.txt", 4096, WriteMode::Rewrite, settings);
|
||||
checksums.write(*out);
|
||||
out->preFinalize();
|
||||
written_files.emplace_back(std::move(out));
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <Storages/MergeTree/IMergedBlockOutputStream.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <IO/WriteSettings.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -20,7 +21,8 @@ public:
|
||||
const MergeTreeIndices & skip_indices,
|
||||
CompressionCodecPtr default_codec_,
|
||||
bool reset_columns_ = false,
|
||||
bool blocks_are_granules_size = false);
|
||||
bool blocks_are_granules_size = false,
|
||||
const WriteSettings & write_settings = {});
|
||||
|
||||
Block getHeader() const { return metadata_snapshot->getSampleBlock(); }
|
||||
|
||||
@ -54,7 +56,8 @@ public:
|
||||
MergeTreeData::MutableDataPartPtr & new_part,
|
||||
bool sync,
|
||||
const NamesAndTypesList * total_columns_list = nullptr,
|
||||
MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr);
|
||||
MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr,
|
||||
const WriteSettings & settings = {});
|
||||
|
||||
void finalizePart(
|
||||
MergeTreeData::MutableDataPartPtr & new_part,
|
||||
@ -71,7 +74,8 @@ private:
|
||||
using WrittenFiles = std::vector<std::unique_ptr<WriteBufferFromFileBase>>;
|
||||
WrittenFiles finalizePartOnDisk(
|
||||
const MergeTreeData::DataPartPtr & new_part,
|
||||
MergeTreeData::DataPart::Checksums & checksums);
|
||||
MergeTreeData::DataPart::Checksums & checksums,
|
||||
const WriteSettings & write_settings);
|
||||
|
||||
NamesAndTypesList columns_list;
|
||||
IMergeTreeDataPart::MinMaxIndex minmax_idx;
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Storages/MergeTree/MergedColumnOnlyOutputStream.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <IO/WriteSettings.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -26,6 +27,7 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream(
|
||||
|
||||
MergeTreeWriterSettings writer_settings(
|
||||
global_settings,
|
||||
data_part->storage.getContext()->getWriteSettings(),
|
||||
storage_settings,
|
||||
index_granularity_info ? index_granularity_info->is_adaptive : data_part->storage.canUseAdaptiveGranularity(),
|
||||
/* rewrite_primary_key = */false);
|
||||
|
@ -419,14 +419,15 @@ void finalizeMutatedPart(
|
||||
const MergeTreeDataPartPtr & source_part,
|
||||
MergeTreeData::MutableDataPartPtr new_data_part,
|
||||
ExecuteTTLType execute_ttl_type,
|
||||
const CompressionCodecPtr & codec)
|
||||
const CompressionCodecPtr & codec,
|
||||
ContextPtr context)
|
||||
{
|
||||
auto disk = new_data_part->volume->getDisk();
|
||||
auto part_path = fs::path(new_data_part->getFullRelativePath());
|
||||
|
||||
if (new_data_part->uuid != UUIDHelpers::Nil)
|
||||
{
|
||||
auto out = disk->writeFile(part_path / IMergeTreeDataPart::UUID_FILE_NAME, 4096);
|
||||
auto out = disk->writeFile(part_path / IMergeTreeDataPart::UUID_FILE_NAME, 4096, WriteMode::Rewrite, context->getWriteSettings());
|
||||
HashingWriteBuffer out_hashing(*out);
|
||||
writeUUIDText(new_data_part->uuid, out_hashing);
|
||||
new_data_part->checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_size = out_hashing.count();
|
||||
@ -436,7 +437,7 @@ void finalizeMutatedPart(
|
||||
if (execute_ttl_type != ExecuteTTLType::NONE)
|
||||
{
|
||||
/// Write a file with ttl infos in json format.
|
||||
auto out_ttl = disk->writeFile(part_path / "ttl.txt", 4096);
|
||||
auto out_ttl = disk->writeFile(part_path / "ttl.txt", 4096, WriteMode::Rewrite, context->getWriteSettings());
|
||||
HashingWriteBuffer out_hashing(*out_ttl);
|
||||
new_data_part->ttl_infos.write(out_hashing);
|
||||
new_data_part->checksums.files["ttl.txt"].file_size = out_hashing.count();
|
||||
@ -445,7 +446,7 @@ void finalizeMutatedPart(
|
||||
|
||||
if (!new_data_part->getSerializationInfos().empty())
|
||||
{
|
||||
auto out = disk->writeFile(part_path / IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096);
|
||||
auto out = disk->writeFile(part_path / IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096, WriteMode::Rewrite, context->getWriteSettings());
|
||||
HashingWriteBuffer out_hashing(*out);
|
||||
new_data_part->getSerializationInfos().writeJSON(out_hashing);
|
||||
new_data_part->checksums.files[IMergeTreeDataPart::SERIALIZATION_FILE_NAME].file_size = out_hashing.count();
|
||||
@ -454,18 +455,18 @@ void finalizeMutatedPart(
|
||||
|
||||
{
|
||||
/// Write file with checksums.
|
||||
auto out_checksums = disk->writeFile(part_path / "checksums.txt", 4096);
|
||||
auto out_checksums = disk->writeFile(part_path / "checksums.txt", 4096, WriteMode::Rewrite, context->getWriteSettings());
|
||||
new_data_part->checksums.write(*out_checksums);
|
||||
} /// close fd
|
||||
|
||||
{
|
||||
auto out = disk->writeFile(part_path / IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096);
|
||||
auto out = disk->writeFile(part_path / IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096, WriteMode::Rewrite, context->getWriteSettings());
|
||||
DB::writeText(queryToString(codec->getFullCodecDesc()), *out);
|
||||
}
|
||||
|
||||
{
|
||||
/// Write a file with a description of columns.
|
||||
auto out_columns = disk->writeFile(part_path / "columns.txt", 4096);
|
||||
auto out_columns = disk->writeFile(part_path / "columns.txt", 4096, WriteMode::Rewrite, context->getWriteSettings());
|
||||
new_data_part->getColumns().writeText(*out_columns);
|
||||
} /// close fd
|
||||
|
||||
@ -1162,7 +1163,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
MutationHelpers::finalizeMutatedPart(ctx->source_part, ctx->new_data_part, ctx->execute_ttl_type, ctx->compression_codec);
|
||||
MutationHelpers::finalizeMutatedPart(ctx->source_part, ctx->new_data_part, ctx->execute_ttl_type, ctx->compression_codec, ctx->context);
|
||||
}
|
||||
|
||||
|
||||
|
@ -423,7 +423,7 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, String
|
||||
{
|
||||
std::lock_guard lock(currently_processing_in_background_mutex);
|
||||
|
||||
MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get());
|
||||
MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), getContext()->getWriteSettings());
|
||||
version = increment.get();
|
||||
entry.commit(version);
|
||||
mutation_file_name = entry.file_name;
|
||||
@ -1618,7 +1618,7 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_
|
||||
{
|
||||
auto calculated_checksums = checkDataPart(part, false);
|
||||
calculated_checksums.checkEqual(part->checksums, true);
|
||||
auto out = disk->writeFile(tmp_checksums_path, 4096);
|
||||
auto out = disk->writeFile(tmp_checksums_path, 4096, WriteMode::Rewrite, local_context->getWriteSettings());
|
||||
part->checksums.write(*out);
|
||||
disk->moveFile(tmp_checksums_path, checksums_path);
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Storages/System/StorageSystemDisks.h>
|
||||
#include <Processors/Sources/SourceFromSingleChunk.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Disks/IDiskRemote.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -22,6 +23,7 @@ StorageSystemDisks::StorageSystemDisks(const StorageID & table_id_)
|
||||
{"total_space", std::make_shared<DataTypeUInt64>()},
|
||||
{"keep_free_space", std::make_shared<DataTypeUInt64>()},
|
||||
{"type", std::make_shared<DataTypeString>()},
|
||||
{"cache_path", std::make_shared<DataTypeString>()},
|
||||
}));
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
}
|
||||
@ -43,6 +45,7 @@ Pipe StorageSystemDisks::read(
|
||||
MutableColumnPtr col_total = ColumnUInt64::create();
|
||||
MutableColumnPtr col_keep = ColumnUInt64::create();
|
||||
MutableColumnPtr col_type = ColumnString::create();
|
||||
MutableColumnPtr col_cache_path = ColumnString::create();
|
||||
|
||||
for (const auto & [disk_name, disk_ptr] : context->getDisksMap())
|
||||
{
|
||||
@ -52,6 +55,12 @@ Pipe StorageSystemDisks::read(
|
||||
col_total->insert(disk_ptr->getTotalSpace());
|
||||
col_keep->insert(disk_ptr->getKeepingFreeSpace());
|
||||
col_type->insert(toString(disk_ptr->getType()));
|
||||
|
||||
String cache_path;
|
||||
if (disk_ptr->isRemote())
|
||||
cache_path = disk_ptr->getCacheBasePath();
|
||||
|
||||
col_cache_path->insert(cache_path);
|
||||
}
|
||||
|
||||
Columns res_columns;
|
||||
@ -61,6 +70,7 @@ Pipe StorageSystemDisks::read(
|
||||
res_columns.emplace_back(std::move(col_total));
|
||||
res_columns.emplace_back(std::move(col_keep));
|
||||
res_columns.emplace_back(std::move(col_type));
|
||||
res_columns.emplace_back(std::move(col_cache_path));
|
||||
|
||||
UInt64 num_rows = res_columns.at(0)->size();
|
||||
Chunk chunk(std::move(res_columns), num_rows);
|
||||
|
60
src/Storages/System/StorageSystemFilesystemCache.cpp
Normal file
60
src/Storages/System/StorageSystemFilesystemCache.cpp
Normal file
@ -0,0 +1,60 @@
|
||||
#include "StorageSystemFilesystemCache.h"
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <Common/FileCache.h>
|
||||
#include <Common/FileCacheFactory.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Disks/IDisk.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
NamesAndTypesList StorageSystemFilesystemCache::getNamesAndTypes()
|
||||
{
|
||||
return {
|
||||
{"cache_base_path", std::make_shared<DataTypeString>()},
|
||||
{"cache_path", std::make_shared<DataTypeString>()},
|
||||
{"file_segment_range_begin", std::make_shared<DataTypeUInt64>()},
|
||||
{"file_segment_range_end", std::make_shared<DataTypeUInt64>()},
|
||||
{"size", std::make_shared<DataTypeUInt64>()},
|
||||
{"state", std::make_shared<DataTypeString>()},
|
||||
{"cache_hits", std::make_shared<DataTypeUInt64>()},
|
||||
{"references", std::make_shared<DataTypeUInt64>()},
|
||||
{"downloaded_size", std::make_shared<DataTypeUInt64>()},
|
||||
};
|
||||
}
|
||||
|
||||
StorageSystemFilesystemCache::StorageSystemFilesystemCache(const StorageID & table_id_)
|
||||
: IStorageSystemOneBlock(table_id_)
|
||||
{
|
||||
}
|
||||
|
||||
void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const
|
||||
{
|
||||
auto caches = FileCacheFactory::instance().getAll();
|
||||
|
||||
for (const auto & [cache_base_path, cache_data] : caches)
|
||||
{
|
||||
const auto & cache = cache_data.cache;
|
||||
auto file_segments = cache->getSnapshot();
|
||||
|
||||
for (const auto & file_segment : file_segments)
|
||||
{
|
||||
res_columns[0]->insert(cache_base_path);
|
||||
res_columns[1]->insert(cache->getPathInLocalCache(file_segment->key(), file_segment->offset()));
|
||||
|
||||
const auto & range = file_segment->range();
|
||||
res_columns[2]->insert(range.left);
|
||||
res_columns[3]->insert(range.right);
|
||||
res_columns[4]->insert(range.size());
|
||||
res_columns[5]->insert(FileSegment::stateToString(file_segment->state()));
|
||||
res_columns[6]->insert(file_segment->getHitsCount());
|
||||
res_columns[7]->insert(file_segment->getRefCount());
|
||||
res_columns[8]->insert(file_segment->getDownloadedSize());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
47
src/Storages/System/StorageSystemFilesystemCache.h
Normal file
47
src/Storages/System/StorageSystemFilesystemCache.h
Normal file
@ -0,0 +1,47 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/shared_ptr_helper.h>
|
||||
#include <Storages/System/IStorageSystemOneBlock.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/**
|
||||
* Usgae example. How to get mapping from local paths to remote paths:
|
||||
* SELECT
|
||||
* cache_path,
|
||||
* cache_hits,
|
||||
* remote_path,
|
||||
* local_path,
|
||||
* file_segment_range_begin,
|
||||
* file_segment_range_end,
|
||||
* size,
|
||||
* state
|
||||
* FROM
|
||||
* (
|
||||
* SELECT
|
||||
* arrayJoin(cache_paths) AS cache_path,
|
||||
* local_path,
|
||||
* remote_path
|
||||
* FROM system.remote_data_paths
|
||||
* ) AS data_paths
|
||||
* INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path
|
||||
* FORMAT Vertical
|
||||
*/
|
||||
|
||||
class StorageSystemFilesystemCache final : public shared_ptr_helper<StorageSystemFilesystemCache>,
|
||||
public IStorageSystemOneBlock<StorageSystemFilesystemCache>
|
||||
{
|
||||
friend struct shared_ptr_helper<StorageSystemFilesystemCache>;
|
||||
public:
|
||||
std::string getName() const override { return "SystemFilesystemCache"; }
|
||||
|
||||
static NamesAndTypesList getNamesAndTypes();
|
||||
|
||||
protected:
|
||||
explicit StorageSystemFilesystemCache(const StorageID & table_id_);
|
||||
|
||||
void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override;
|
||||
};
|
||||
|
||||
}
|
100
src/Storages/System/StorageSystemRemoteDataPaths.cpp
Normal file
100
src/Storages/System/StorageSystemRemoteDataPaths.cpp
Normal file
@ -0,0 +1,100 @@
|
||||
#include "StorageSystemRemoteDataPaths.h"
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <Common/FileCache.h>
|
||||
#include <Common/FileCacheFactory.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Disks/IDisk.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
StorageSystemRemoteDataPaths::StorageSystemRemoteDataPaths(const StorageID & table_id_)
|
||||
: IStorage(table_id_)
|
||||
{
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
storage_metadata.setColumns(ColumnsDescription(
|
||||
{
|
||||
{"disk_name", std::make_shared<DataTypeString>()},
|
||||
{"path", std::make_shared<DataTypeString>()},
|
||||
{"cache_base_path", std::make_shared<DataTypeString>()},
|
||||
{"local_path", std::make_shared<DataTypeString>()},
|
||||
{"remote_path", std::make_shared<DataTypeString>()},
|
||||
{"cache_paths", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
}));
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
}
|
||||
|
||||
Pipe StorageSystemRemoteDataPaths::read(
|
||||
const Names & column_names,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
SelectQueryInfo & /*query_info*/,
|
||||
ContextPtr context,
|
||||
QueryProcessingStage::Enum /*processed_stage*/,
|
||||
const size_t /*max_block_size*/,
|
||||
const unsigned /*num_streams*/)
|
||||
{
|
||||
storage_snapshot->check(column_names);
|
||||
|
||||
MutableColumnPtr col_disk_name = ColumnString::create();
|
||||
MutableColumnPtr col_base_path = ColumnString::create();
|
||||
MutableColumnPtr col_cache_base_path = ColumnString::create();
|
||||
MutableColumnPtr col_local_path = ColumnString::create();
|
||||
MutableColumnPtr col_remote_path = ColumnString::create();
|
||||
MutableColumnPtr col_cache_paths = ColumnArray::create(ColumnString::create());
|
||||
|
||||
auto disks = context->getDisksMap();
|
||||
for (const auto & [disk_name, disk] : disks)
|
||||
{
|
||||
if (disk->isRemote())
|
||||
{
|
||||
std::vector<IDisk::LocalPathWithRemotePaths> remote_paths_by_local_path;
|
||||
disk->getRemotePathsRecursive("store", remote_paths_by_local_path);
|
||||
|
||||
FileCachePtr cache;
|
||||
auto cache_base_path = disk->getCacheBasePath();
|
||||
if (!cache_base_path.empty())
|
||||
cache = FileCacheFactory::instance().get(cache_base_path);
|
||||
|
||||
for (const auto & [local_path, remote_paths] : remote_paths_by_local_path)
|
||||
{
|
||||
for (const auto & remote_path : remote_paths)
|
||||
{
|
||||
col_disk_name->insert(disk_name);
|
||||
col_base_path->insert(disk->getPath());
|
||||
col_cache_base_path->insert(cache_base_path);
|
||||
col_local_path->insert(local_path);
|
||||
col_remote_path->insert(remote_path);
|
||||
|
||||
if (cache)
|
||||
{
|
||||
auto cache_paths = cache->tryGetCachePaths(cache->hash(remote_path));
|
||||
col_cache_paths->insert(Array(cache_paths.begin(), cache_paths.end()));
|
||||
}
|
||||
else
|
||||
{
|
||||
col_cache_paths->insertDefault();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Columns res_columns;
|
||||
res_columns.emplace_back(std::move(col_disk_name));
|
||||
res_columns.emplace_back(std::move(col_base_path));
|
||||
res_columns.emplace_back(std::move(col_cache_base_path));
|
||||
res_columns.emplace_back(std::move(col_local_path));
|
||||
res_columns.emplace_back(std::move(col_remote_path));
|
||||
res_columns.emplace_back(std::move(col_cache_paths));
|
||||
|
||||
UInt64 num_rows = res_columns.at(0)->size();
|
||||
Chunk chunk(std::move(res_columns), num_rows);
|
||||
|
||||
return Pipe(std::make_shared<SourceFromSingleChunk>(storage_snapshot->metadata->getSampleBlock(), std::move(chunk)));
|
||||
}
|
||||
|
||||
}
|
30
src/Storages/System/StorageSystemRemoteDataPaths.h
Normal file
30
src/Storages/System/StorageSystemRemoteDataPaths.h
Normal file
@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/shared_ptr_helper.h>
|
||||
#include <Storages/System/IStorageSystemOneBlock.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class StorageSystemRemoteDataPaths : public shared_ptr_helper<StorageSystemRemoteDataPaths>, public IStorage
|
||||
{
|
||||
friend struct shared_ptr_helper<StorageSystemRemoteDataPaths>;
|
||||
public:
|
||||
std::string getName() const override { return "SystemRemoteDataPaths"; }
|
||||
|
||||
bool isSystemStorage() const override { return true; }
|
||||
|
||||
Pipe read(
|
||||
const Names & column_names,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
SelectQueryInfo & query_info,
|
||||
ContextPtr context,
|
||||
QueryProcessingStage::Enum processed_stage,
|
||||
size_t max_block_size,
|
||||
unsigned num_streams) override;
|
||||
|
||||
protected:
|
||||
explicit StorageSystemRemoteDataPaths(const StorageID & table_id_);
|
||||
};
|
||||
|
||||
}
|
@ -68,6 +68,8 @@
|
||||
#include <Storages/System/StorageSystemUserDirectories.h>
|
||||
#include <Storages/System/StorageSystemPrivileges.h>
|
||||
#include <Storages/System/StorageSystemAsynchronousInserts.h>
|
||||
#include <Storages/System/StorageSystemFilesystemCache.h>
|
||||
#include <Storages/System/StorageSystemRemoteDataPaths.h>
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#include <Storages/System/StorageSystemStackTrace.h>
|
||||
@ -161,6 +163,8 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b
|
||||
attach<StorageSystemReplicatedFetches>(context, system_database, "replicated_fetches");
|
||||
attach<StorageSystemPartMovesBetweenShards>(context, system_database, "part_moves_between_shards");
|
||||
attach<StorageSystemAsynchronousInserts>(context, system_database, "asynchronous_inserts");
|
||||
attach<StorageSystemFilesystemCache>(context, system_database, "filesystem_cache");
|
||||
attach<StorageSystemRemoteDataPaths>(context, system_database, "remote_data_paths");
|
||||
|
||||
if (has_zookeeper)
|
||||
attach<StorageSystemZooKeeper>(context, system_database, "zookeeper");
|
||||
|
@ -7,7 +7,9 @@
|
||||
<access_key_id>clickhouse</access_key_id>
|
||||
<secret_access_key>clickhouse</secret_access_key>
|
||||
<data_cache_enabled>1</data_cache_enabled>
|
||||
<cache_enabled>0</cache_enabled>
|
||||
<data_cache_max_size>22548578304</data_cache_max_size>
|
||||
<cache_on_write_operations>1</cache_on_write_operations>
|
||||
</s3_cache>
|
||||
</disks>
|
||||
<policies>
|
||||
|
@ -85,6 +85,7 @@ fi
|
||||
|
||||
if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then
|
||||
ln -sf $SRC_PATH/config.d/storage_conf.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/users.d/s3_cache.xml $DEST_SERVER_PATH/users.d/
|
||||
fi
|
||||
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
|
8
tests/config/users.d/s3_cache.xml
Normal file
8
tests/config/users.d/s3_cache.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<clickhouse>
|
||||
<profiles>
|
||||
<default>
|
||||
<enable_filesystem_cache_on_write_operations>1</enable_filesystem_cache_on_write_operations>
|
||||
<enable_filesystem_cache>1</enable_filesystem_cache>
|
||||
</default>
|
||||
</profiles>
|
||||
</clickhouse>
|
@ -22,7 +22,7 @@ OPTIMIZE TABLE adaptive_table FINAL;
|
||||
|
||||
SELECT marks FROM system.parts WHERE table = 'adaptive_table' and database=currentDatabase() and active;
|
||||
|
||||
SET remote_fs_enable_cache = 0;
|
||||
SET enable_filesystem_cache = 0;
|
||||
|
||||
-- If we have computed granularity incorrectly than we will exceed this limit.
|
||||
SET max_memory_usage='30M';
|
||||
|
@ -3,7 +3,7 @@
|
||||
drop table if exists data_01641;
|
||||
|
||||
-- Disable cache for s3 storage tests because it increases memory usage.
|
||||
set remote_fs_enable_cache=0;
|
||||
set enable_filesystem_cache=0;
|
||||
set remote_filesystem_read_method='read';
|
||||
|
||||
create table data_01641 (key Int, value String) engine=MergeTree order by (key, repeat(value, 40)) settings old_parts_lifetime=0, min_bytes_for_wide_part=0;
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
DROP TABLE IF EXISTS order_by_desc;
|
||||
|
||||
SET remote_fs_enable_cache=0;
|
||||
SET enable_filesystem_cache=0;
|
||||
|
||||
CREATE TABLE order_by_desc (u UInt32, s String)
|
||||
ENGINE MergeTree ORDER BY u PARTITION BY u % 100
|
||||
|
@ -12,7 +12,7 @@ CREATE TABLE system.data_type_families\n(\n `name` String,\n `case_insensi
|
||||
CREATE TABLE system.databases\n(\n `name` String,\n `engine` String,\n `data_path` String,\n `metadata_path` String,\n `uuid` UUID,\n `comment` String,\n `database` String\n)\nENGINE = SystemDatabases()\nCOMMENT \'SYSTEM TABLE is built on the fly.\'
|
||||
CREATE TABLE system.detached_parts\n(\n `database` String,\n `table` String,\n `partition_id` Nullable(String),\n `name` String,\n `disk` String,\n `reason` Nullable(String),\n `min_block_number` Nullable(Int64),\n `max_block_number` Nullable(Int64),\n `level` Nullable(UInt32)\n)\nENGINE = SystemDetachedParts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\'
|
||||
CREATE TABLE system.dictionaries\n(\n `database` String,\n `name` String,\n `uuid` UUID,\n `status` Enum8(\'NOT_LOADED\' = 0, \'LOADED\' = 1, \'FAILED\' = 2, \'LOADING\' = 3, \'FAILED_AND_RELOADING\' = 4, \'LOADED_AND_RELOADING\' = 5, \'NOT_EXIST\' = 6),\n `origin` String,\n `type` String,\n `key.names` Array(String),\n `key.types` Array(String),\n `attribute.names` Array(String),\n `attribute.types` Array(String),\n `bytes_allocated` UInt64,\n `query_count` UInt64,\n `hit_rate` Float64,\n `found_rate` Float64,\n `element_count` UInt64,\n `load_factor` Float64,\n `source` String,\n `lifetime_min` UInt64,\n `lifetime_max` UInt64,\n `loading_start_time` DateTime,\n `last_successful_update_time` DateTime,\n `loading_duration` Float32,\n `last_exception` String,\n `comment` String\n)\nENGINE = SystemDictionaries()\nCOMMENT \'SYSTEM TABLE is built on the fly.\'
|
||||
CREATE TABLE system.disks\n(\n `name` String,\n `path` String,\n `free_space` UInt64,\n `total_space` UInt64,\n `keep_free_space` UInt64,\n `type` String\n)\nENGINE = SystemDisks()\nCOMMENT \'SYSTEM TABLE is built on the fly.\'
|
||||
CREATE TABLE system.disks\n(\n `name` String,\n `path` String,\n `free_space` UInt64,\n `total_space` UInt64,\n `keep_free_space` UInt64,\n `type` String,\n `cache_path` String\n)\nENGINE = SystemDisks()\nCOMMENT \'SYSTEM TABLE is built on the fly.\'
|
||||
CREATE TABLE system.distributed_ddl_queue\n(\n `entry` String,\n `entry_version` Nullable(UInt8),\n `initiator_host` Nullable(String),\n `initiator_port` Nullable(UInt16),\n `cluster` String,\n `query` String,\n `settings` Map(String, String),\n `query_create_time` DateTime,\n `host` Nullable(String),\n `port` Nullable(UInt16),\n `status` Nullable(Enum8(\'Inactive\' = 0, \'Active\' = 1, \'Finished\' = 2, \'Removing\' = 3, \'Unknown\' = 4)),\n `exception_code` Nullable(UInt16),\n `exception_text` Nullable(String),\n `query_finish_time` Nullable(DateTime),\n `query_duration_ms` Nullable(UInt64)\n)\nENGINE = SystemDDLWorkerQueue()\nCOMMENT \'SYSTEM TABLE is built on the fly.\'
|
||||
CREATE TABLE system.distribution_queue\n(\n `database` String,\n `table` String,\n `data_path` String,\n `is_blocked` UInt8,\n `error_count` UInt64,\n `data_files` UInt64,\n `data_compressed_bytes` UInt64,\n `broken_data_files` UInt64,\n `broken_data_compressed_bytes` UInt64,\n `last_exception` String\n)\nENGINE = SystemDistributionQueue()\nCOMMENT \'SYSTEM TABLE is built on the fly.\'
|
||||
CREATE TABLE system.enabled_roles\n(\n `role_name` String,\n `with_admin_option` UInt8,\n `is_current` UInt8,\n `is_default` UInt8\n)\nENGINE = SystemEnabledRoles()\nCOMMENT \'SYSTEM TABLE is built on the fly.\'
|
||||
|
@ -1,2 +1,4 @@
|
||||
SELECT 1, * FROM test LIMIT 10 FORMAT Null; 1 0 1
|
||||
SELECT 2, * FROM test LIMIT 10 FORMAT Null; 0 1 0
|
||||
0
|
||||
SELECT 3, * FROM test LIMIT 10 FORMAT Null; 1 1 0
|
||||
|
@ -1,7 +1,9 @@
|
||||
-- Tags: no-parallel, no-fasttest, long
|
||||
|
||||
SET max_memory_usage='20G';
|
||||
SET enable_filesystem_cache_on_write_operations = 0;
|
||||
|
||||
DROP TABLE IF EXISTS test;
|
||||
CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache';
|
||||
INSERT INTO test SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000;
|
||||
|
||||
@ -41,4 +43,27 @@ SET remote_filesystem_read_method='threadpool';
|
||||
|
||||
SELECT * FROM test WHERE value LIKE '%abc%' ORDER BY value LIMIT 10 FORMAT Null;
|
||||
|
||||
SET enable_filesystem_cache_on_write_operations = 1;
|
||||
|
||||
TRUNCATE TABLE test;
|
||||
SELECT count() FROM test;
|
||||
|
||||
SYSTEM DROP FILESYSTEM CACHE;
|
||||
|
||||
INSERT INTO test SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000;
|
||||
|
||||
SELECT 3, * FROM test LIMIT 10 FORMAT Null;
|
||||
|
||||
SYSTEM FLUSH LOGS;
|
||||
SELECT query,
|
||||
ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read,
|
||||
ProfileEvents['RemoteFSCacheReadBytes'] > 0 as remote_fs_cache_read,
|
||||
ProfileEvents['RemoteFSCacheDownloadBytes'] > 0 as remote_fs_read_and_download
|
||||
FROM system.query_log
|
||||
WHERE query LIKE 'SELECT 3, * FROM test LIMIT%'
|
||||
AND type = 'QueryFinish'
|
||||
AND current_database = currentDatabase()
|
||||
ORDER BY query_start_time DESC
|
||||
LIMIT 1;
|
||||
|
||||
DROP TABLE test;
|
||||
|
@ -0,0 +1,19 @@
|
||||
-- { echo }
|
||||
|
||||
SYSTEM DROP FILESYSTEM CACHE;
|
||||
SET enable_filesystem_cache_on_write_operations=0;
|
||||
DROP TABLE IF EXISTS test;
|
||||
CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760;
|
||||
INSERT INTO test SELECT number, toString(number) FROM numbers(100);
|
||||
SELECT * FROM test FORMAT Null;
|
||||
SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size;
|
||||
0 0 1
|
||||
0 79 80
|
||||
0 745 746
|
||||
SYSTEM DROP FILESYSTEM CACHE;
|
||||
SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache;
|
||||
SELECT * FROM test FORMAT Null;
|
||||
SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache;
|
||||
0 745 746
|
||||
SYSTEM DROP FILESYSTEM CACHE;
|
||||
SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache;
|
@ -0,0 +1,18 @@
|
||||
-- Tags: no-parallel, no-fasttest, no-s3-storage
|
||||
|
||||
-- { echo }
|
||||
|
||||
SYSTEM DROP FILESYSTEM CACHE;
|
||||
SET enable_filesystem_cache_on_write_operations=0;
|
||||
DROP TABLE IF EXISTS test;
|
||||
CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760;
|
||||
INSERT INTO test SELECT number, toString(number) FROM numbers(100);
|
||||
|
||||
SELECT * FROM test FORMAT Null;
|
||||
SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size;
|
||||
SYSTEM DROP FILESYSTEM CACHE;
|
||||
SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache;
|
||||
SELECT * FROM test FORMAT Null;
|
||||
SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache;
|
||||
SYSTEM DROP FILESYSTEM CACHE;
|
||||
SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache;
|
@ -0,0 +1,75 @@
|
||||
-- { echo }
|
||||
|
||||
SET enable_filesystem_cache_on_write_operations=1;
|
||||
DROP TABLE IF EXISTS test;
|
||||
CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760;
|
||||
SYSTEM DROP FILESYSTEM CACHE;
|
||||
SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical;
|
||||
SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path;
|
||||
0
|
||||
SELECT count() FROM system.filesystem_cache;
|
||||
0
|
||||
INSERT INTO test SELECT number, toString(number) FROM numbers(100);
|
||||
SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical;
|
||||
Row 1:
|
||||
──────
|
||||
file_segment_range_begin: 0
|
||||
file_segment_range_end: 745
|
||||
size: 746
|
||||
state: DOWNLOADED
|
||||
SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path;
|
||||
7
|
||||
SELECT count() FROM system.filesystem_cache;
|
||||
7
|
||||
SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0;
|
||||
0
|
||||
SELECT * FROM test FORMAT Null;
|
||||
SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0;
|
||||
2
|
||||
SELECT * FROM test FORMAT Null;
|
||||
SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0;
|
||||
2
|
||||
SELECT count() size FROM system.filesystem_cache;
|
||||
7
|
||||
SYSTEM DROP FILESYSTEM CACHE;
|
||||
INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200);
|
||||
SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical;
|
||||
Row 1:
|
||||
──────
|
||||
file_segment_range_begin: 0
|
||||
file_segment_range_end: 1659
|
||||
size: 1660
|
||||
state: DOWNLOADED
|
||||
SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path;
|
||||
7
|
||||
SELECT count() FROM system.filesystem_cache;
|
||||
7
|
||||
SELECT count() FROM system.filesystem_cache;
|
||||
7
|
||||
INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0;
|
||||
SELECT count() FROM system.filesystem_cache;
|
||||
7
|
||||
INSERT INTO test SELECT number, toString(number) FROM numbers(100);
|
||||
INSERT INTO test SELECT number, toString(number) FROM numbers(300, 10000);
|
||||
SELECT count() FROM system.filesystem_cache;
|
||||
21
|
||||
OPTIMIZE TABLE test FINAL;
|
||||
SELECT count() FROM system.filesystem_cache;
|
||||
27
|
||||
SET mutations_sync=2;
|
||||
ALTER TABLE test UPDATE value = 'kek' WHERE key = 100;
|
||||
SELECT count() FROM system.filesystem_cache;
|
||||
28
|
||||
INSERT INTO test SELECT number, toString(number) FROM numbers(5000000);
|
||||
SYSTEM FLUSH LOGS;
|
||||
SELECT query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read
|
||||
FROM system.query_log
|
||||
WHERE query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%'
|
||||
AND type = 'QueryFinish'
|
||||
AND current_database = currentDatabase()
|
||||
ORDER BY query_start_time DESC
|
||||
LIMIT 1;
|
||||
SELECT count() FROM test;
|
||||
5010500
|
||||
SELECT count() FROM test WHERE value LIKE '%010%';
|
||||
18816
|
@ -0,0 +1,64 @@
|
||||
-- Tags: no-parallel, no-fasttest, no-s3-storage
|
||||
|
||||
-- { echo }
|
||||
|
||||
SET enable_filesystem_cache_on_write_operations=1;
|
||||
|
||||
DROP TABLE IF EXISTS test;
|
||||
CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760;
|
||||
|
||||
SYSTEM DROP FILESYSTEM CACHE;
|
||||
|
||||
SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical;
|
||||
SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path;
|
||||
SELECT count() FROM system.filesystem_cache;
|
||||
|
||||
INSERT INTO test SELECT number, toString(number) FROM numbers(100);
|
||||
|
||||
SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical;
|
||||
SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path;
|
||||
SELECT count() FROM system.filesystem_cache;
|
||||
|
||||
SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0;
|
||||
|
||||
SELECT * FROM test FORMAT Null;
|
||||
SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0;
|
||||
|
||||
SELECT * FROM test FORMAT Null;
|
||||
SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0;
|
||||
|
||||
SELECT count() size FROM system.filesystem_cache;
|
||||
|
||||
SYSTEM DROP FILESYSTEM CACHE;
|
||||
|
||||
INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200);
|
||||
|
||||
SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical;
|
||||
SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path;
|
||||
SELECT count() FROM system.filesystem_cache;
|
||||
|
||||
SELECT count() FROM system.filesystem_cache;
|
||||
INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0;
|
||||
SELECT count() FROM system.filesystem_cache;
|
||||
|
||||
INSERT INTO test SELECT number, toString(number) FROM numbers(100);
|
||||
INSERT INTO test SELECT number, toString(number) FROM numbers(300, 10000);
|
||||
SELECT count() FROM system.filesystem_cache;
|
||||
OPTIMIZE TABLE test FINAL;
|
||||
SELECT count() FROM system.filesystem_cache;
|
||||
|
||||
SET mutations_sync=2;
|
||||
ALTER TABLE test UPDATE value = 'kek' WHERE key = 100;
|
||||
SELECT count() FROM system.filesystem_cache;
|
||||
|
||||
INSERT INTO test SELECT number, toString(number) FROM numbers(5000000);
|
||||
SYSTEM FLUSH LOGS;
|
||||
SELECT query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read
|
||||
FROM system.query_log
|
||||
WHERE query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%'
|
||||
AND type = 'QueryFinish'
|
||||
AND current_database = currentDatabase()
|
||||
ORDER BY query_start_time DESC
|
||||
LIMIT 1;
|
||||
SELECT count() FROM test;
|
||||
SELECT count() FROM test WHERE value LIKE '%010%';
|
Loading…
Reference in New Issue
Block a user