From af3bb3b7aff2aee5620c1bdf5943e59feb1bfc0c Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 14 Mar 2022 19:40:53 +0100 Subject: [PATCH 01/27] Add WriteSettings --- src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp | 3 ++- src/Disks/AzureBlobStorage/DiskAzureBlobStorage.h | 3 ++- src/Disks/DiskCacheWrapper.cpp | 2 +- src/Disks/DiskCacheWrapper.h | 2 +- src/Disks/DiskDecorator.cpp | 4 ++-- src/Disks/DiskDecorator.h | 3 ++- src/Disks/DiskEncrypted.cpp | 2 +- src/Disks/DiskEncrypted.h | 3 ++- src/Disks/DiskLocal.cpp | 2 +- src/Disks/DiskLocal.h | 3 ++- src/Disks/DiskMemory.cpp | 2 +- src/Disks/DiskMemory.h | 3 ++- src/Disks/DiskRestartProxy.cpp | 4 ++-- src/Disks/DiskRestartProxy.h | 2 +- src/Disks/DiskWebServer.h | 2 +- src/Disks/HDFS/DiskHDFS.cpp | 2 +- src/Disks/HDFS/DiskHDFS.h | 2 +- src/Disks/IDisk.h | 4 +++- src/Disks/S3/DiskS3.cpp | 2 +- src/Disks/S3/DiskS3.h | 3 ++- src/IO/WriteSettings.h | 11 +++++++++++ 21 files changed, 42 insertions(+), 22 deletions(-) create mode 100644 src/IO/WriteSettings.h diff --git a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp index fb07d8c356b..97e98fb3a3e 100644 --- a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp +++ b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp @@ -90,7 +90,8 @@ std::unique_ptr DiskAzureBlobStorage::readFile( std::unique_ptr DiskAzureBlobStorage::writeFile( const String & path, size_t buf_size, - WriteMode mode) + WriteMode mode, + const WriteSettings &) { auto blob_path = path + "_" + getRandomASCIIString(8); /// NOTE: path contains the tmp_* prefix in the blob name diff --git a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.h b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.h index 63c3c735812..efc245e7eb3 100644 --- a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.h +++ b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.h @@ -56,7 +56,8 @@ public: std::unique_ptr writeFile( const String & path, size_t buf_size, - WriteMode mode) override; + WriteMode mode, + const WriteSettings & settings) override; DiskType getType() const override; diff --git a/src/Disks/DiskCacheWrapper.cpp b/src/Disks/DiskCacheWrapper.cpp index 3519b1212a4..d73c1ed5042 100644 --- a/src/Disks/DiskCacheWrapper.cpp +++ b/src/Disks/DiskCacheWrapper.cpp @@ -196,7 +196,7 @@ DiskCacheWrapper::readFile( } std::unique_ptr -DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode) +DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &) { if (!cache_file_predicate(path)) return DiskDecorator::writeFile(path, buf_size, mode); diff --git a/src/Disks/DiskCacheWrapper.h b/src/Disks/DiskCacheWrapper.h index dc66333758f..e413a3742f3 100644 --- a/src/Disks/DiskCacheWrapper.h +++ b/src/Disks/DiskCacheWrapper.h @@ -40,7 +40,7 @@ public: std::optional read_hint, std::optional file_size) const override; - std::unique_ptr writeFile(const String & path, size_t buf_size, WriteMode mode) override; + std::unique_ptr writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &) override; void removeFile(const String & path) override; void removeFileIfExists(const String & path) override; diff --git a/src/Disks/DiskDecorator.cpp b/src/Disks/DiskDecorator.cpp index 37911f16913..14f507af55d 100644 --- a/src/Disks/DiskDecorator.cpp +++ b/src/Disks/DiskDecorator.cpp @@ -121,9 +121,9 @@ DiskDecorator::readFile( } std::unique_ptr -DiskDecorator::writeFile(const String & path, size_t buf_size, WriteMode mode) +DiskDecorator::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) { - return delegate->writeFile(path, buf_size, mode); + return delegate->writeFile(path, buf_size, mode, settings); } void DiskDecorator::removeFile(const String & path) diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h index bace54ff22a..33272ba385b 100644 --- a/src/Disks/DiskDecorator.h +++ b/src/Disks/DiskDecorator.h @@ -44,7 +44,8 @@ public: std::unique_ptr writeFile( const String & path, size_t buf_size, - WriteMode mode) override; + WriteMode mode, + const WriteSettings & settings) override; void removeFile(const String & path) override; void removeFileIfExists(const String & path) override; diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index 714264b7720..3cee205fafc 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -269,7 +269,7 @@ std::unique_ptr DiskEncrypted::readFile( return std::make_unique(settings.local_fs_buffer_size, std::move(buffer), key, header); } -std::unique_ptr DiskEncrypted::writeFile(const String & path, size_t buf_size, WriteMode mode) +std::unique_ptr DiskEncrypted::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &) { auto wrapped_path = wrappedPath(path); FileEncryption::Header header; diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index d99fe17457d..07a2ad81010 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -126,7 +126,8 @@ public: std::unique_ptr writeFile( const String & path, size_t buf_size, - WriteMode mode) override; + WriteMode mode, + const WriteSettings & settings) override; void removeFile(const String & path) override { diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 44fdbb77323..c07a6ed32f6 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -344,7 +344,7 @@ std::unique_ptr DiskLocal::readFile(const String & path, } std::unique_ptr -DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode) +DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &) { int flags = (mode == WriteMode::Append) ? (O_APPEND | O_CREAT | O_WRONLY) : -1; return std::make_unique(fs::path(disk_path) / path, buf_size, flags); diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 76d5a88a626..59dcf5e5c13 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -79,7 +79,8 @@ public: std::unique_ptr writeFile( const String & path, size_t buf_size, - WriteMode mode) override; + WriteMode mode, + const WriteSettings & settings) override; void removeFile(const String & path) override; void removeFileIfExists(const String & path) override; diff --git a/src/Disks/DiskMemory.cpp b/src/Disks/DiskMemory.cpp index abaea0846a5..4f0e881e079 100644 --- a/src/Disks/DiskMemory.cpp +++ b/src/Disks/DiskMemory.cpp @@ -326,7 +326,7 @@ std::unique_ptr DiskMemory::readFile(const String & path return std::make_unique(path, iter->second.data); } -std::unique_ptr DiskMemory::writeFile(const String & path, size_t buf_size, WriteMode mode) +std::unique_ptr DiskMemory::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &) { std::lock_guard lock(mutex); diff --git a/src/Disks/DiskMemory.h b/src/Disks/DiskMemory.h index fe108f53c68..726be8bc3b5 100644 --- a/src/Disks/DiskMemory.h +++ b/src/Disks/DiskMemory.h @@ -71,7 +71,8 @@ public: std::unique_ptr writeFile( const String & path, size_t buf_size, - WriteMode mode) override; + WriteMode mode, + const WriteSettings & settings) override; void removeFile(const String & path) override; void removeFileIfExists(const String & path) override; diff --git a/src/Disks/DiskRestartProxy.cpp b/src/Disks/DiskRestartProxy.cpp index 43011a4cf72..a1c63d1e5a9 100644 --- a/src/Disks/DiskRestartProxy.cpp +++ b/src/Disks/DiskRestartProxy.cpp @@ -214,10 +214,10 @@ std::unique_ptr DiskRestartProxy::readFile( return std::make_unique(*this, std::move(impl)); } -std::unique_ptr DiskRestartProxy::writeFile(const String & path, size_t buf_size, WriteMode mode) +std::unique_ptr DiskRestartProxy::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) { ReadLock lock (mutex); - auto impl = DiskDecorator::writeFile(path, buf_size, mode); + auto impl = DiskDecorator::writeFile(path, buf_size, mode, settings); return std::make_unique(*this, std::move(impl)); } diff --git a/src/Disks/DiskRestartProxy.h b/src/Disks/DiskRestartProxy.h index 30f553f4fe0..2a0d40bffb6 100644 --- a/src/Disks/DiskRestartProxy.h +++ b/src/Disks/DiskRestartProxy.h @@ -48,7 +48,7 @@ public: const ReadSettings & settings, std::optional read_hint, std::optional file_size) const override; - std::unique_ptr writeFile(const String & path, size_t buf_size, WriteMode mode) override; + std::unique_ptr writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) override; void removeFile(const String & path) override; void removeFileIfExists(const String & path) override; void removeDirectory(const String & path) override; diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h index e2da0b2a1e1..5dbfbe8994a 100644 --- a/src/Disks/DiskWebServer.h +++ b/src/Disks/DiskWebServer.h @@ -100,7 +100,7 @@ public: /// Write and modification part - std::unique_ptr writeFile(const String &, size_t, WriteMode) override + std::unique_ptr writeFile(const String &, size_t, WriteMode, const WriteSettings &) override { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); } diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp index 7f60b219a4b..f78ecd2669a 100644 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ b/src/Disks/HDFS/DiskHDFS.cpp @@ -88,7 +88,7 @@ std::unique_ptr DiskHDFS::readFile(const String & path, } -std::unique_ptr DiskHDFS::writeFile(const String & path, size_t buf_size, WriteMode mode) +std::unique_ptr DiskHDFS::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &) { /// Path to store new HDFS object. auto file_name = getRandomName(); diff --git a/src/Disks/HDFS/DiskHDFS.h b/src/Disks/HDFS/DiskHDFS.h index 23a108507b4..eba58101bc4 100644 --- a/src/Disks/HDFS/DiskHDFS.h +++ b/src/Disks/HDFS/DiskHDFS.h @@ -60,7 +60,7 @@ public: std::optional read_hint, std::optional file_size) const override; - std::unique_ptr writeFile(const String & path, size_t buf_size, WriteMode mode) override; + std::unique_ptr writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) override; void removeFromRemoteFS(RemoteFSPathKeeperPtr fs_paths_keeper) override; diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 4fa73b8eba8..ac48f4f4d89 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -168,7 +169,8 @@ public: virtual std::unique_ptr writeFile( /// NOLINT const String & path, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, - WriteMode mode = WriteMode::Rewrite) = 0; + WriteMode mode = WriteMode::Rewrite, + const WriteSettings & settings = {}) = 0; /// Remove file. Throws exception if file doesn't exists or it's a directory. virtual void removeFile(const String & path) = 0; diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index e46620d9d1f..3061754bd2d 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -244,7 +244,7 @@ std::unique_ptr DiskS3::readFile(const String & path, co } } -std::unique_ptr DiskS3::writeFile(const String & path, size_t buf_size, WriteMode mode) +std::unique_ptr DiskS3::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &) { auto settings = current_settings.get(); diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h index 7e39c9d9b3c..32eb9ee7aef 100644 --- a/src/Disks/S3/DiskS3.h +++ b/src/Disks/S3/DiskS3.h @@ -88,7 +88,8 @@ public: std::unique_ptr writeFile( const String & path, size_t buf_size, - WriteMode mode) override; + WriteMode mode, + const WriteSettings & settings) override; void removeFromRemoteFS(RemoteFSPathKeeperPtr keeper) override; diff --git a/src/IO/WriteSettings.h b/src/IO/WriteSettings.h new file mode 100644 index 00000000000..bcc89fbfc68 --- /dev/null +++ b/src/IO/WriteSettings.h @@ -0,0 +1,11 @@ + + +namespace DB +{ + +struct WriteSettings +{ + bool remote_fs_cache_on_insert = false; +}; + +} From 1d80262a2a4d150098e267b52660d768807bbe08 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 14 Mar 2022 20:15:07 +0100 Subject: [PATCH 02/27] Add write settings, file cache settings --- src/Common/FileCache.cpp | 17 ++++---- src/Common/FileCache.h | 8 +--- src/Common/FileCacheFactory.cpp | 8 +--- src/Common/FileCacheFactory.h | 2 +- src/Common/FileCacheSettings.cpp | 16 ++++++++ src/Common/FileCacheSettings.h | 18 +++++++++ src/Common/FileCache_fwd.h | 3 ++ src/Disks/RemoteDisksCommon.cpp | 18 ++++++--- src/IO/WriteBufferFromS3.cpp | 40 ++++++++++++++----- src/IO/WriteBufferFromS3.h | 26 +++++++----- .../MergeTree/MergedBlockOutputStream.cpp | 8 ++-- .../MergeTree/MergedBlockOutputStream.h | 7 +++- 12 files changed, 118 insertions(+), 53 deletions(-) create mode 100644 src/Common/FileCacheSettings.cpp create mode 100644 src/Common/FileCacheSettings.h diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index dffa4fac44d..5cc2e707667 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -31,13 +32,11 @@ namespace IFileCache::IFileCache( const String & cache_base_path_, - size_t max_size_, - size_t max_element_size_, - size_t max_file_segment_size_) + const FileCacheSettings & cache_settings_) : cache_base_path(cache_base_path_) - , max_size(max_size_) - , max_element_size(max_element_size_) - , max_file_segment_size(max_file_segment_size_) + , max_size(cache_settings_.max_cache_size) + , max_element_size(cache_settings_.max_cache_elements) + , max_file_segment_size(cache_settings_.max_cache_elements) { } @@ -71,8 +70,8 @@ void IFileCache::assertInitialized() const throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Cache not initialized"); } -LRUFileCache::LRUFileCache(const String & cache_base_path_, size_t max_size_, size_t max_element_size_, size_t max_file_segment_size_) - : IFileCache(cache_base_path_, max_size_, max_element_size_, max_file_segment_size_) +LRUFileCache::LRUFileCache(const String & cache_base_path_, const FileCacheSettings & cache_settings_) + : IFileCache(cache_base_path_, cache_settings_) , log(&Poco::Logger::get("LRUFileCache")) { } @@ -364,7 +363,7 @@ bool LRUFileCache::tryReserve( auto is_overflow = [&] { - return (current_size + size - removed_size > max_size) + return (max_size != 0 && current_size + size - removed_size > max_size) || (max_element_size != 0 && queue_size > max_element_size); }; diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index d51dfe7a9ff..dd585369853 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -32,9 +32,7 @@ public: IFileCache( const String & cache_base_path_, - size_t max_size_, - size_t max_element_size_, - size_t max_file_segment_size_); + const FileCacheSettings & cache_settings_); virtual ~IFileCache() = default; @@ -111,9 +109,7 @@ class LRUFileCache final : public IFileCache public: LRUFileCache( const String & cache_base_path_, - size_t max_size_, - size_t max_element_size_ = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS, - size_t max_file_segment_size_ = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE); + const FileCacheSettings & cache_settings_); FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size) override; diff --git a/src/Common/FileCacheFactory.cpp b/src/Common/FileCacheFactory.cpp index fc8dff0b26c..567d091fdeb 100644 --- a/src/Common/FileCacheFactory.cpp +++ b/src/Common/FileCacheFactory.cpp @@ -24,18 +24,14 @@ FileCachePtr FileCacheFactory::getImpl(const std::string & cache_base_path, std: } FileCachePtr FileCacheFactory::getOrCreate( - const std::string & cache_base_path, size_t max_size, size_t max_elements_size, size_t max_file_segment_size) + const std::string & cache_base_path, const FileCacheSettings & file_cache_settings) { std::lock_guard lock(mutex); auto cache = getImpl(cache_base_path, lock); if (cache) - { - if (cache->capacity() != max_size) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cache with path `{}` already exists, but has different max size", cache_base_path); return cache; - } - cache = std::make_shared(cache_base_path, max_size, max_elements_size, max_file_segment_size); + cache = std::make_shared(cache_base_path, file_cache_settings); caches.emplace(cache_base_path, cache); return cache; } diff --git a/src/Common/FileCacheFactory.h b/src/Common/FileCacheFactory.h index f2432f03cae..176b96a862e 100644 --- a/src/Common/FileCacheFactory.h +++ b/src/Common/FileCacheFactory.h @@ -17,7 +17,7 @@ class FileCacheFactory final : private boost::noncopyable public: static FileCacheFactory & instance(); - FileCachePtr getOrCreate(const std::string & cache_base_path, size_t max_size, size_t max_elements_size, size_t max_file_segment_size); + FileCachePtr getOrCreate(const std::string & cache_base_path, const FileCacheSettings & file_cache_settings); private: FileCachePtr getImpl(const std::string & cache_base_path, std::lock_guard &); diff --git a/src/Common/FileCacheSettings.cpp b/src/Common/FileCacheSettings.cpp new file mode 100644 index 00000000000..39a6a2ec723 --- /dev/null +++ b/src/Common/FileCacheSettings.cpp @@ -0,0 +1,16 @@ +#include "FileCacheSettings.h" + +#include + +namespace DB +{ + +void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) +{ + max_cache_size = config.getUInt64(config_prefix + ".data_cache_max_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_CACHE_SIZE); + max_cache_elements = config.getUInt64(config_prefix + ".data_cache_max_elements", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS); + max_file_segment_size = config.getUInt64(config_prefix + ".max_file_segment_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE); + cache_on_insert = config.getUInt64(config_prefix + ".cache_on_insert", false); +} + +} diff --git a/src/Common/FileCacheSettings.h b/src/Common/FileCacheSettings.h new file mode 100644 index 00000000000..8aa2bfb9214 --- /dev/null +++ b/src/Common/FileCacheSettings.h @@ -0,0 +1,18 @@ +#include + +namespace Poco { namespace Util { class AbstractConfiguration; }} + +namespace DB +{ + +struct FileCacheSettings +{ + size_t max_cache_size = 0; + size_t max_cache_elements = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS; + size_t max_file_segment_size = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE; + bool cache_on_insert = false; + + void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); +}; + +} diff --git a/src/Common/FileCache_fwd.h b/src/Common/FileCache_fwd.h index cab1525600b..7448f0c8c89 100644 --- a/src/Common/FileCache_fwd.h +++ b/src/Common/FileCache_fwd.h @@ -4,10 +4,13 @@ namespace DB { +static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_CACHE_SIZE = 1024 * 1024 * 1024; static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 100 * 1024 * 1024; static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS = 1024 * 1024; class IFileCache; using FileCachePtr = std::shared_ptr; +struct FileCacheSettings; + } diff --git a/src/Disks/RemoteDisksCommon.cpp b/src/Disks/RemoteDisksCommon.cpp index 36f2aed3e7c..4805434e5ee 100644 --- a/src/Disks/RemoteDisksCommon.cpp +++ b/src/Disks/RemoteDisksCommon.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB { @@ -64,18 +65,23 @@ FileCachePtr getCachePtrForDisk( if (!fs::exists(cache_base_path)) fs::create_directories(cache_base_path); - LOG_INFO(&Poco::Logger::get("Disk(" + name + ")"), "Disk registered with cache path: {}", cache_base_path); - auto metadata_path = getDiskMetadataPath(name, config, config_prefix, context); if (metadata_path == cache_base_path) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Metadata path and cache base path must be different: {}", metadata_path); - size_t max_cache_size = config.getUInt64(config_prefix + ".data_cache_max_size", 1024*1024*1024); - size_t max_cache_elements = config.getUInt64(config_prefix + ".data_cache_max_elements", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS); - size_t max_file_segment_size = config.getUInt64(config_prefix + ".max_file_segment_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE); + FileCacheSettings file_cache_settings; + file_cache_settings.loadFromConfig(config, config_prefix); - auto cache = FileCacheFactory::instance().getOrCreate(cache_base_path, max_cache_size, max_cache_elements, max_file_segment_size); + auto cache = FileCacheFactory::instance().getOrCreate(cache_base_path, file_cache_settings); cache->initialize(); + + auto * log = &Poco::Logger::get("Disk(" + name + ")"); + LOG_INFO(log, "Disk registered with cache path: {}. Cache size: {}, max cache elements size: {}, max_file_segment_size: {}", + cache_base_path, + max_cache_size ? toString(max_cache_size) : "UNLIMITED", + max_cache_elements ? toString(max_cache_elements) : "UNLIMITED", + max_file_segment_size); + return cache; } diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index eda7bb6f8ae..4f1016d43bd 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -2,17 +2,19 @@ #if USE_AWS_S3 -# include -# include +#include +#include -# include -# include -# include -# include -# include -# include +#include +#include -# include +#include +#include +#include +#include +#include + +#include namespace ProfileEvents @@ -59,7 +61,8 @@ WriteBufferFromS3::WriteBufferFromS3( size_t max_single_part_upload_size_, std::optional> object_metadata_, size_t buffer_size_, - ScheduleFunc schedule_) + ScheduleFunc schedule_, + FileCachePtr cache_) : BufferWithOwnMemory(buffer_size_, nullptr, 0) , bucket(bucket_) , key(key_) @@ -70,6 +73,7 @@ WriteBufferFromS3::WriteBufferFromS3( , upload_part_size_multiply_threshold(upload_part_size_multiply_threshold_) , max_single_part_upload_size(max_single_part_upload_size_) , schedule(std::move(schedule_)) + , cache(cache_) { allocateBuffer(); } @@ -95,7 +99,6 @@ void WriteBufferFromS3::nextImpl() if (!multipart_upload_id.empty() && last_part_size > upload_part_size) { - writePart(); allocateBuffer(); @@ -126,6 +129,21 @@ WriteBufferFromS3::~WriteBufferFromS3() } } +void WriteBufferFromS3::tryWriteToCacheIfNeeded() +{ + if (!cache || IFileCache::shouldBypassCache()) + return; + + try + { + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + throw; + } +} + void WriteBufferFromS3::preFinalize() { next(); diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index a4fbcbcdeeb..595a7a929c1 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -4,16 +4,19 @@ #if USE_AWS_S3 -# include -# include -# include -# include -# include +#include +#include +#include +#include +#include -# include -# include +#include +#include -# include +#include +#include + +#include namespace Aws::S3 { @@ -51,7 +54,8 @@ public: size_t max_single_part_upload_size_, std::optional> object_metadata_ = std::nullopt, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, - ScheduleFunc schedule_ = {}); + ScheduleFunc schedule_ = {}, + FileCachePtr cache_ = nullptr); ~WriteBufferFromS3() override; @@ -82,6 +86,8 @@ private: void waitForReadyBackGroundTasks(); void waitForAllBackGroundTasks(); + void tryWriteToCacheIfNeeded(); + String bucket; String key; std::optional> object_metadata; @@ -113,6 +119,8 @@ private: std::condition_variable bg_tasks_condvar; Poco::Logger * log = &Poco::Logger::get("WriteBufferFromS3"); + + FileCachePtr cache; }; } diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index f94c89e20bd..5bd9217226d 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -122,7 +122,8 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( MergeTreeData::MutableDataPartPtr & new_part, bool sync, const NamesAndTypesList * total_columns_list, - MergeTreeData::DataPart::Checksums * additional_column_checksums) + MergeTreeData::DataPart::Checksums * additional_column_checksums, + const WriteSettings & write_settings) { /// Finish write and get checksums. MergeTreeData::DataPart::Checksums checksums; @@ -156,7 +157,7 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( auto finalizer = std::make_unique(*writer, new_part, files_to_remove_after_sync, sync); if (new_part->isStoredOnDisk()) - finalizer->written_files = finalizePartOnDisk(new_part, checksums); + finalizer->written_files = finalizePartOnDisk(new_part, checksums, write_settings); new_part->rows_count = rows_count; new_part->modification_time = time(nullptr); @@ -174,7 +175,8 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDisk( const MergeTreeData::DataPartPtr & new_part, - MergeTreeData::DataPart::Checksums & checksums) + MergeTreeData::DataPart::Checksums & checksums, + const WriteSettings & write_settings) { WrittenFiles written_files; if (new_part->isProjectionPart()) diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index c17cfd22cd8..05f70239517 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -54,7 +55,8 @@ public: MergeTreeData::MutableDataPartPtr & new_part, bool sync, const NamesAndTypesList * total_columns_list = nullptr, - MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr); + MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr, + const WriteSettings & write_settings = {}); void finalizePart( MergeTreeData::MutableDataPartPtr & new_part, @@ -71,7 +73,8 @@ private: using WrittenFiles = std::vector>; WrittenFiles finalizePartOnDisk( const MergeTreeData::DataPartPtr & new_part, - MergeTreeData::DataPart::Checksums & checksums); + MergeTreeData::DataPart::Checksums & checksums, + const WriteSettings & write_settings); NamesAndTypesList columns_list; IMergeTreeDataPart::MinMaxIndex minmax_idx; From 59b7394caf516e9ecca91f529dc02f7bf7955cf0 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 21 Mar 2022 09:52:48 +0100 Subject: [PATCH 03/27] WriteSettings --- src/Disks/DiskCacheWrapper.cpp | 19 +++++++++++++------ src/Disks/DiskLocal.cpp | 2 +- src/Disks/S3/DiskS3.cpp | 4 ++-- src/IO/WriteSettings.h | 2 +- src/Interpreters/Context.cpp | 10 ++++++++++ src/Interpreters/Context.h | 4 ++++ .../MergeTree/MergeTreeDataWriter.cpp | 6 +++++- .../MergeTree/MergedBlockOutputStream.cpp | 18 +++++++++--------- .../MergeTree/MergedBlockOutputStream.h | 2 +- 9 files changed, 46 insertions(+), 21 deletions(-) diff --git a/src/Disks/DiskCacheWrapper.cpp b/src/Disks/DiskCacheWrapper.cpp index d73c1ed5042..de5397a87c2 100644 --- a/src/Disks/DiskCacheWrapper.cpp +++ b/src/Disks/DiskCacheWrapper.cpp @@ -167,7 +167,11 @@ DiskCacheWrapper::readFile( auto tmp_path = path + ".tmp"; { auto src_buffer = DiskDecorator::readFile(path, current_read_settings, read_hint, file_size); - auto dst_buffer = cache_disk->writeFile(tmp_path, settings.local_fs_buffer_size, WriteMode::Rewrite); + + WriteSettings write_settings; + write_settings.remote_fs_cache_on_insert = false; + + auto dst_buffer = cache_disk->writeFile(tmp_path, settings.local_fs_buffer_size, WriteMode::Rewrite, write_settings); copyData(*src_buffer, *dst_buffer); } cache_disk->moveFile(tmp_path, path); @@ -196,10 +200,13 @@ DiskCacheWrapper::readFile( } std::unique_ptr -DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &) +DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) { if (!cache_file_predicate(path)) - return DiskDecorator::writeFile(path, buf_size, mode); + return DiskDecorator::writeFile(path, buf_size, mode, settings); + + WriteSettings current_settings = settings; + current_settings.remote_fs_cache_on_insert = false; LOG_TEST(log, "Write file {} to cache", backQuote(path)); @@ -208,15 +215,15 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode cache_disk->createDirectories(dir_path); return std::make_unique( - cache_disk->writeFile(path, buf_size, mode), + cache_disk->writeFile(path, buf_size, mode, current_settings), [this, path]() { /// Copy file from cache to actual disk when cached buffer is finalized. return cache_disk->readFile(path, ReadSettings(), /* read_hint= */ {}, /* file_size= */ {}); }, - [this, path, buf_size, mode]() + [this, path, buf_size, mode, current_settings]() { - return DiskDecorator::writeFile(path, buf_size, mode); + return DiskDecorator::writeFile(path, buf_size, mode, current_settings); }); } diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index c07a6ed32f6..78be223edee 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -623,7 +623,7 @@ bool DiskLocal::setup() pcg32_fast rng(randomSeed()); UInt32 magic_number = rng(); { - auto buf = writeFile(disk_checker_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + auto buf = writeFile(disk_checker_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {}); writeIntBinary(magic_number, *buf); } disk_checker_magic_number = magic_number; diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index 3061754bd2d..797b7ed15f4 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -244,7 +244,7 @@ std::unique_ptr DiskS3::readFile(const String & path, co } } -std::unique_ptr DiskS3::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &) +std::unique_ptr DiskS3::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & write_settings) { auto settings = current_settings.get(); @@ -299,7 +299,7 @@ std::unique_ptr DiskS3::writeFile(const String & path, settings->s3_upload_part_size_multiply_parts_count_threshold, settings->s3_max_single_part_upload_size, std::move(object_metadata), - buf_size, std::move(schedule)); + buf_size, std::move(schedule), write_settings.remote_fs_cache_on_insert ? cache : nullptr); auto create_metadata_callback = [this, path, blob_name, mode] (size_t count) { diff --git a/src/IO/WriteSettings.h b/src/IO/WriteSettings.h index bcc89fbfc68..c34193574bb 100644 --- a/src/IO/WriteSettings.h +++ b/src/IO/WriteSettings.h @@ -1,4 +1,4 @@ - +#pragma once namespace DB { diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index ac1bfc620b0..a64ef3a88be 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -68,6 +68,7 @@ #include #include #include +#include #include #include #include @@ -3198,4 +3199,13 @@ ReadSettings Context::getReadSettings() const return res; } +WriteSettings Context::getWriteSettings() const +{ + WriteSettings res; + + res.remote_fs_cache_on_insert = settings.remote_fs_cache_on_insert; + + return res; +} + } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index c3615db9068..9043a690cff 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -116,6 +116,7 @@ struct PartUUIDs; using PartUUIDsPtr = std::shared_ptr; class KeeperDispatcher; class Session; +struct WriteSettings; class IInputFormat; class IOutputFormat; @@ -892,6 +893,9 @@ public: /** Get settings for reading from filesystem. */ ReadSettings getReadSettings() const; + /** Get settings for writing to filesystem. */ + WriteSettings getWriteSettings() const; + private: std::unique_lock getLock() const; diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 4805a273c70..2fb075a54bb 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -452,7 +452,11 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( temp_part.streams.emplace_back(std::move(stream)); } } - auto finalizer = out->finalizePartAsync(new_data_part, data_settings->fsync_after_insert); + auto finalizer = out->finalizePartAsync( + new_data_part, + data_settings->fsync_after_insert, + nullptr, nullptr, + context->getWriteSettings()); temp_part.part = new_data_part; temp_part.streams.emplace_back(TemporaryPart::Stream{.stream = std::move(out), .finalizer = std::move(finalizer)}); diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 5bd9217226d..be6bb453ad5 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -176,14 +176,14 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDisk( const MergeTreeData::DataPartPtr & new_part, MergeTreeData::DataPart::Checksums & checksums, - const WriteSettings & write_settings) + const WriteSettings & settings) { WrittenFiles written_files; if (new_part->isProjectionPart()) { if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || isCompactPart(new_part)) { - auto count_out = volume->getDisk()->writeFile(part_path + "count.txt", 4096); + auto count_out = volume->getDisk()->writeFile(part_path + "count.txt", 4096, WriteMode::Rewrite, settings); HashingWriteBuffer count_out_hashing(*count_out); writeIntText(rows_count, count_out_hashing); count_out_hashing.next(); @@ -197,7 +197,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis { if (new_part->uuid != UUIDHelpers::Nil) { - auto out = volume->getDisk()->writeFile(fs::path(part_path) / IMergeTreeDataPart::UUID_FILE_NAME, 4096); + auto out = volume->getDisk()->writeFile(fs::path(part_path) / IMergeTreeDataPart::UUID_FILE_NAME, 4096, WriteMode::Rewrite, settings); HashingWriteBuffer out_hashing(*out); writeUUIDText(new_part->uuid, out_hashing); checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_size = out_hashing.count(); @@ -223,7 +223,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis } { - auto count_out = volume->getDisk()->writeFile(fs::path(part_path) / "count.txt", 4096); + auto count_out = volume->getDisk()->writeFile(fs::path(part_path) / "count.txt", 4096, WriteMode::Rewrite, settings); HashingWriteBuffer count_out_hashing(*count_out); writeIntText(rows_count, count_out_hashing); count_out_hashing.next(); @@ -237,7 +237,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis if (!new_part->ttl_infos.empty()) { /// Write a file with ttl infos in json format. - auto out = volume->getDisk()->writeFile(fs::path(part_path) / "ttl.txt", 4096); + auto out = volume->getDisk()->writeFile(fs::path(part_path) / "ttl.txt", 4096, WriteMode::Rewrite, settings); HashingWriteBuffer out_hashing(*out); new_part->ttl_infos.write(out_hashing); checksums.files["ttl.txt"].file_size = out_hashing.count(); @@ -248,7 +248,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis if (!new_part->getSerializationInfos().empty()) { - auto out = volume->getDisk()->writeFile(part_path + IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096); + auto out = volume->getDisk()->writeFile(part_path + IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096, WriteMode::Rewrite, settings); HashingWriteBuffer out_hashing(*out); new_part->getSerializationInfos().writeJSON(out_hashing); checksums.files[IMergeTreeDataPart::SERIALIZATION_FILE_NAME].file_size = out_hashing.count(); @@ -259,7 +259,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis { /// Write a file with a description of columns. - auto out = volume->getDisk()->writeFile(fs::path(part_path) / "columns.txt", 4096); + auto out = volume->getDisk()->writeFile(fs::path(part_path) / "columns.txt", 4096, WriteMode::Rewrite, settings); new_part->getColumns().writeText(*out); out->preFinalize(); written_files.emplace_back(std::move(out)); @@ -267,7 +267,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis if (default_codec != nullptr) { - auto out = volume->getDisk()->writeFile(part_path + IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096); + auto out = volume->getDisk()->writeFile(part_path + IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096, WriteMode::Rewrite, settings); DB::writeText(queryToString(default_codec->getFullCodecDesc()), *out); out->preFinalize(); written_files.emplace_back(std::move(out)); @@ -280,7 +280,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis { /// Write file with checksums. - auto out = volume->getDisk()->writeFile(fs::path(part_path) / "checksums.txt", 4096); + auto out = volume->getDisk()->writeFile(fs::path(part_path) / "checksums.txt", 4096, WriteMode::Rewrite, settings); checksums.write(*out); out->preFinalize(); written_files.emplace_back(std::move(out)); diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index 05f70239517..6c26e918673 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -56,7 +56,7 @@ public: bool sync, const NamesAndTypesList * total_columns_list = nullptr, MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr, - const WriteSettings & write_settings = {}); + const WriteSettings & settings = {}); void finalizePart( MergeTreeData::MutableDataPartPtr & new_part, From eabbce69a72f515f0b769c05d7eb1d78e8448dd1 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 21 Mar 2022 12:30:25 +0100 Subject: [PATCH 04/27] Add system drop remote filesystem cache query, allow to write cache on insert --- src/Common/FileCache.cpp | 77 ++++++++++++++++--- src/Common/FileCache.h | 18 ++++- src/Common/FileCacheFactory.cpp | 16 ++++ src/Common/FileCacheFactory.h | 8 +- src/Common/FileCacheSettings.cpp | 4 +- src/Common/FileCacheSettings.h | 4 +- src/Common/FileSegment.cpp | 32 +++++++- src/Core/Settings.h | 1 + src/Disks/IDiskRemote.cpp | 6 ++ src/Disks/IDiskRemote.h | 3 + src/Disks/RemoteDisksCommon.cpp | 6 +- src/IO/WriteBufferFromS3.cpp | 52 +++++++++---- src/IO/WriteBufferFromS3.h | 5 +- src/Interpreters/InterpreterSystemQuery.cpp | 18 +++++ src/Parsers/ASTSystemQuery.h | 2 + .../MergeTreeDataPartWriterCompact.cpp | 6 +- .../MergeTree/MergeTreeDataWriter.cpp | 2 +- src/Storages/MergeTree/MergeTreeIOSettings.h | 4 + .../MergeTree/MergedBlockOutputStream.cpp | 5 +- .../MergeTree/MergedBlockOutputStream.h | 3 +- .../MergedColumnOnlyOutputStream.cpp | 2 + src/Storages/System/StorageSystemDisks.cpp | 9 +++ .../StorageSystemRemoteFilesystemCache.cpp | 49 ++++++++++++ .../StorageSystemRemoteFilesystemCache.h | 24 ++++++ src/Storages/System/attachSystemTables.cpp | 2 + ...0_system_remote_filesystem_cache.reference | 18 +++++ .../02240_system_remote_filesystem_cache.sql | 17 ++++ ...emote_filesystem_cache_on_insert.reference | 16 ++++ ...2241_remote_filesystem_cache_on_insert.sql | 16 ++++ 29 files changed, 381 insertions(+), 44 deletions(-) create mode 100644 src/Storages/System/StorageSystemRemoteFilesystemCache.cpp create mode 100644 src/Storages/System/StorageSystemRemoteFilesystemCache.h create mode 100644 tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference create mode 100644 tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql create mode 100644 tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference create mode 100644 tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index 5cc2e707667..ba39039c378 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -20,6 +20,7 @@ namespace ErrorCodes { extern const int REMOTE_FS_OBJECT_CACHE_ERROR; extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } namespace @@ -34,9 +35,9 @@ IFileCache::IFileCache( const String & cache_base_path_, const FileCacheSettings & cache_settings_) : cache_base_path(cache_base_path_) - , max_size(cache_settings_.max_cache_size) - , max_element_size(cache_settings_.max_cache_elements) - , max_file_segment_size(cache_settings_.max_cache_elements) + , max_size(cache_settings_.max_size) + , max_element_size(cache_settings_.max_elements) + , max_file_segment_size(cache_settings_.max_file_segment_size) { } @@ -204,8 +205,8 @@ FileSegments LRUFileCache::getImpl( return result; } -FileSegments LRUFileCache::splitRangeIntoEmptyCells( - const Key & key, size_t offset, size_t size, std::lock_guard & cache_lock) +FileSegments LRUFileCache::splitRangeIntoCells( + const Key & key, size_t offset, size_t size, FileSegment::State state, std::lock_guard & cache_lock) { assert(size > 0); @@ -221,9 +222,10 @@ FileSegments LRUFileCache::splitRangeIntoEmptyCells( current_cell_size = std::min(remaining_size, max_file_segment_size); remaining_size -= current_cell_size; - auto * cell = addCell(key, current_pos, current_cell_size, FileSegment::State::EMPTY, cache_lock); + auto * cell = addCell(key, current_pos, current_cell_size, state, cache_lock); if (cell) file_segments.push_back(cell->file_segment); + assert(cell); current_pos += current_cell_size; } @@ -245,7 +247,7 @@ FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t if (file_segments.empty()) { - file_segments = splitRangeIntoEmptyCells(key, offset, size, cache_lock); + file_segments = splitRangeIntoCells(key, offset, size, FileSegment::State::EMPTY, cache_lock); } else { @@ -290,7 +292,7 @@ FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t assert(current_pos < segment_range.left); auto hole_size = segment_range.left - current_pos; - file_segments.splice(it, splitRangeIntoEmptyCells(key, current_pos, hole_size, cache_lock)); + file_segments.splice(it, splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock)); current_pos = segment_range.right + 1; ++it; @@ -304,7 +306,7 @@ FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t /// segmentN auto hole_size = range.right - current_pos + 1; - file_segments.splice(file_segments.end(), splitRangeIntoEmptyCells(key, current_pos, hole_size, cache_lock)); + file_segments.splice(file_segments.end(), splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock)); } } @@ -345,6 +347,22 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell( return &(it->second); } +FileSegmentsHolder LRUFileCache::setDownloading(const Key & key, size_t offset, size_t size) +{ + std::lock_guard cache_lock(mutex); + + auto * cell = getCell(key, offset, cache_lock); + if (cell) + throw Exception( + ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + "Cache cell already exists for key `{}` and offset {}", + keyToStr(key), offset); + + auto file_segments = splitRangeIntoCells(key, offset, size, FileSegment::State::DOWNLOADING, cache_lock); + + return FileSegmentsHolder(std::move(file_segments)); +} + bool LRUFileCache::tryReserve( const Key & key_, size_t offset_, size_t size, std::lock_guard & cache_lock) { @@ -475,6 +493,30 @@ void LRUFileCache::remove(const Key & key) fs::remove(key_path); } +void LRUFileCache::tryRemoveAll() +{ + /// Try remove all cached files by cache_base_path. + /// Only releasable file segments are evicted. + + std::lock_guard cache_lock(mutex); + + for (auto it = queue.begin(); it != queue.end();) + { + auto & [key, offset] = *it++; + + auto * cell = getCell(key, offset, cache_lock); + if (cell->releasable()) + { + auto file_segment = cell->file_segment; + if (file_segment) + { + std::lock_guard segment_lock(file_segment->mutex); + remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock); + } + } + } +} + void LRUFileCache::remove( Key key, size_t offset, std::lock_guard & cache_lock, std::lock_guard & /* segment_lock */) @@ -659,6 +701,20 @@ bool LRUFileCache::isLastFileSegmentHolder( return cell->file_segment.use_count() == 2; } +FileSegmentsHolder LRUFileCache::getAll() +{ + std::lock_guard cache_lock(mutex); + + FileSegments file_segments; + for (const auto & [key, offset] : queue) + { + auto * cell = getCell(key, offset, cache_lock); + file_segments.push_back(cell->file_segment); + } + + return FileSegmentsHolder(std::move(file_segments)); +} + LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRUQueue & queue_) : file_segment(file_segment_) { @@ -676,12 +732,13 @@ LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRU break; } case FileSegment::State::EMPTY: + case FileSegment::State::DOWNLOADING: { break; } default: throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, - "Can create cell with either DOWNLOADED or EMPTY state, got: {}", + "Can create cell with either EMPTY, DOWNLOADED, DOWNLOADING state, got: {}", FileSegment::stateToString(file_segment->download_state)); } } diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index dd585369853..30bd3679d71 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -41,6 +41,8 @@ public: virtual void remove(const Key & key) = 0; + virtual void tryRemoveAll() = 0; + static bool shouldBypassCache(); /// Cache capacity in bytes. @@ -52,6 +54,8 @@ public: String getPathInLocalCache(const Key & key); + const String & getBasePath() const { return cache_base_path; } + /** * Given an `offset` and `size` representing [offset, offset + size) bytes interval, * return list of cached non-overlapping non-empty @@ -65,6 +69,10 @@ public: */ virtual FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size) = 0; + virtual FileSegmentsHolder getAll() = 0; + + virtual FileSegmentsHolder setDownloading(const Key & key, size_t offset, size_t size) = 0; + /// For debug. virtual String dumpStructure(const Key & key) = 0; @@ -113,10 +121,16 @@ public: FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size) override; + FileSegmentsHolder getAll() override; + + FileSegmentsHolder setDownloading(const Key & key, size_t offset, size_t size) override; + void initialize() override; void remove(const Key & key) override; + void tryRemoveAll() override; + private: using FileKeyAndOffset = std::pair; using LRUQueue = std::list; @@ -189,8 +203,8 @@ private: void loadCacheInfoIntoMemory(); - FileSegments splitRangeIntoEmptyCells( - const Key & key, size_t offset, size_t size, std::lock_guard & cache_lock); + FileSegments splitRangeIntoCells( + const Key & key, size_t offset, size_t size, FileSegment::State state, std::lock_guard & cache_lock); public: struct Stat diff --git a/src/Common/FileCacheFactory.cpp b/src/Common/FileCacheFactory.cpp index 567d091fdeb..d5a63153e83 100644 --- a/src/Common/FileCacheFactory.cpp +++ b/src/Common/FileCacheFactory.cpp @@ -15,6 +15,12 @@ FileCacheFactory & FileCacheFactory::instance() return ret; } +FileCacheFactory::CacheByBasePath FileCacheFactory::getAll() +{ + std::lock_guard lock(mutex); + return caches; +} + FileCachePtr FileCacheFactory::getImpl(const std::string & cache_base_path, std::lock_guard &) { auto it = caches.find(cache_base_path); @@ -23,6 +29,16 @@ FileCachePtr FileCacheFactory::getImpl(const std::string & cache_base_path, std: return it->second; } +FileCachePtr FileCacheFactory::get(const std::string & cache_base_path) +{ + std::lock_guard lock(mutex); + auto cache = getImpl(cache_base_path, lock); + if (cache) + return cache; + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No cache found by path: {}", cache_base_path); +} + FileCachePtr FileCacheFactory::getOrCreate( const std::string & cache_base_path, const FileCacheSettings & file_cache_settings) { diff --git a/src/Common/FileCacheFactory.h b/src/Common/FileCacheFactory.h index 176b96a862e..7156d8e6f04 100644 --- a/src/Common/FileCacheFactory.h +++ b/src/Common/FileCacheFactory.h @@ -14,16 +14,22 @@ namespace DB */ class FileCacheFactory final : private boost::noncopyable { + using CacheByBasePath = std::unordered_map; + public: static FileCacheFactory & instance(); FileCachePtr getOrCreate(const std::string & cache_base_path, const FileCacheSettings & file_cache_settings); + FileCachePtr get(const std::string & cache_base_path); + + CacheByBasePath getAll(); + private: FileCachePtr getImpl(const std::string & cache_base_path, std::lock_guard &); std::mutex mutex; - std::unordered_map caches; + CacheByBasePath caches; }; } diff --git a/src/Common/FileCacheSettings.cpp b/src/Common/FileCacheSettings.cpp index 39a6a2ec723..02009d95550 100644 --- a/src/Common/FileCacheSettings.cpp +++ b/src/Common/FileCacheSettings.cpp @@ -7,8 +7,8 @@ namespace DB void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) { - max_cache_size = config.getUInt64(config_prefix + ".data_cache_max_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_CACHE_SIZE); - max_cache_elements = config.getUInt64(config_prefix + ".data_cache_max_elements", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS); + max_size = config.getUInt64(config_prefix + ".data_cache_max_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_CACHE_SIZE); + max_elements = config.getUInt64(config_prefix + ".data_cache_max_elements", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS); max_file_segment_size = config.getUInt64(config_prefix + ".max_file_segment_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE); cache_on_insert = config.getUInt64(config_prefix + ".cache_on_insert", false); } diff --git a/src/Common/FileCacheSettings.h b/src/Common/FileCacheSettings.h index 8aa2bfb9214..9025f35445c 100644 --- a/src/Common/FileCacheSettings.h +++ b/src/Common/FileCacheSettings.h @@ -7,8 +7,8 @@ namespace DB struct FileCacheSettings { - size_t max_cache_size = 0; - size_t max_cache_elements = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS; + size_t max_size = 0; + size_t max_elements = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS; size_t max_file_segment_size = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE; bool cache_on_insert = false; diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index ceb16249c66..0a83f96e49f 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -31,10 +31,34 @@ FileSegment::FileSegment( , log(&Poco::Logger::get("FileSegment")) #endif { - if (download_state == State::DOWNLOADED) - reserved_size = downloaded_size = size_; - else if (download_state != State::EMPTY) - throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Can create cell with either DOWNLOADED or EMPTY state"); + /// On creation, file segment state can be EMPTY, DOWNLOADED, DOWNLOADING. + switch (download_state) + { + /// EMPTY is used when file segment is not in cache and + /// someone will _potentially_ want to download it (after calling getOrSetDownloader()). + case (State::EMPTY): + { + break; + } + /// DOWNLOADED is used either on inital cache metadata load into memory on server startup + /// or on reduceSizeToDownloaded() -- when file segment object is updated. + case (State::DOWNLOADED): + { + reserved_size = downloaded_size = size_; + break; + } + /// DOWNLOADING is used only for write-through caching (e.g. getOrSetDownloader() is not + /// needed, downloader is set on file segment creation). + case (State::DOWNLOADING): + { + downloader_id = getCallerId(); + break; + } + default: + { + throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Can create cell with either EMPTY, DOWNLOADED, DOWNLOADING state"); + } + } } FileSegment::State FileSegment::state() const diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 8d28696094b..662d2c612d2 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -555,6 +555,7 @@ class IColumn; M(UInt64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \ M(Bool, remote_fs_enable_cache, true, "Use cache for remote filesystem. This setting does not turn on/off cache for disks (must me done via disk config), but allows to bypass cache for some queries if intended", 0) \ M(UInt64, remote_fs_cache_max_wait_sec, 5, "Allow to wait at most this number of seconds for download of current remote_fs_buffer_size bytes, and skip cache if exceeded", 0) \ + M(Bool, remote_fs_cache_on_insert, false, "Write into cache on INSERT query", 0) \ \ M(UInt64, http_max_tries, 10, "Max attempts to read via http.", 0) \ M(UInt64, http_retry_initial_backoff_ms, 100, "Min milliseconds for backoff, when retrying read via http", 0) \ diff --git a/src/Disks/IDiskRemote.cpp b/src/Disks/IDiskRemote.cpp index fa4189abc53..6d4350dcfcc 100644 --- a/src/Disks/IDiskRemote.cpp +++ b/src/Disks/IDiskRemote.cpp @@ -402,6 +402,12 @@ IDiskRemote::IDiskRemote( } +String IDiskRemote::getCachePath() const +{ + return cache ? cache->getBasePath() : ""; +} + + bool IDiskRemote::exists(const String & path) const { return metadata_disk->exists(path); diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index 82e76b8f68d..54f1604b99e 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -66,6 +66,8 @@ public: const String & getPath() const final override { return metadata_disk->getPath(); } + String getCachePath() const; + /// Methods for working with metadata. For some operations (like hardlink /// creation) metadata can be updated concurrently from multiple threads /// (file actually rewritten on disk). So additional RW lock is required for @@ -163,6 +165,7 @@ protected: const String remote_fs_root_path; DiskPtr metadata_disk; + FileCachePtr cache; private: diff --git a/src/Disks/RemoteDisksCommon.cpp b/src/Disks/RemoteDisksCommon.cpp index 4805434e5ee..da6ffed5f11 100644 --- a/src/Disks/RemoteDisksCommon.cpp +++ b/src/Disks/RemoteDisksCommon.cpp @@ -78,9 +78,9 @@ FileCachePtr getCachePtrForDisk( auto * log = &Poco::Logger::get("Disk(" + name + ")"); LOG_INFO(log, "Disk registered with cache path: {}. Cache size: {}, max cache elements size: {}, max_file_segment_size: {}", cache_base_path, - max_cache_size ? toString(max_cache_size) : "UNLIMITED", - max_cache_elements ? toString(max_cache_elements) : "UNLIMITED", - max_file_segment_size); + file_cache_settings.max_size ? toString(file_cache_settings.max_size) : "UNLIMITED", + file_cache_settings.max_elements ? toString(file_cache_settings.max_elements) : "UNLIMITED", + file_cache_settings.max_file_segment_size); return cache; } diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 4f1016d43bd..bd8cf1baff1 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -20,9 +20,9 @@ namespace ProfileEvents { extern const Event S3WriteBytes; + extern const Event RemoteFSCacheDownloadBytes; } - namespace DB { // S3 protocol does not allow to have multipart upload with more than 10000 parts. @@ -87,7 +87,41 @@ void WriteBufferFromS3::nextImpl() if (temporary_buffer->tellp() == -1) allocateBuffer(); - temporary_buffer->write(working_buffer.begin(), offset()); + size_t size = offset(); + temporary_buffer->write(working_buffer.begin(), size); + + if (cacheEnabled()) + { + std::cerr << "\n\n\n\n\n\n\nCache is enabled!\n\n\n\n\n"; + + /// Use max_single_part_upload_size as file segment size. Space reservation is incremental, + /// so this size does not really mean anything apart from the final file segment size limit. + /// If single part is uploaded with the smaller size, just resize file segment. + + // size_t max_file_segment_size = max_single_part_upload_size; + auto cache_key = cache->hash(key); + + auto file_segments_holder = cache->setDownloading(cache_key, current_download_offset, size); + assert(file_segments_holder.file_segments.back()->range().right - file_segments_holder.file_segments.begin()->range().left + 1 == size); + + size_t remaining_size = size; + for (const auto & file_segment : file_segments_holder.file_segments) + { + size_t current_size = std::min(file_segment->range().size(), remaining_size); + remaining_size -= current_size; + + if (file_segment->reserve(current_size)) + { + file_segment->write(working_buffer.begin(), current_size); + ProfileEvents::increment(ProfileEvents::RemoteFSCacheDownloadBytes, current_size); + } + else + { + /// TODO: add try catch, add complete() + break; + } + } + } ProfileEvents::increment(ProfileEvents::S3WriteBytes, offset()); @@ -129,19 +163,9 @@ WriteBufferFromS3::~WriteBufferFromS3() } } -void WriteBufferFromS3::tryWriteToCacheIfNeeded() +bool WriteBufferFromS3::cacheEnabled() const { - if (!cache || IFileCache::shouldBypassCache()) - return; - - try - { - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - throw; - } + return cache && IFileCache::shouldBypassCache() == false; } void WriteBufferFromS3::preFinalize() diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 595a7a929c1..9773eedcce8 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -33,6 +33,7 @@ namespace DB { using ScheduleFunc = std::function)>; +class WriteBufferFromFile; /** * Buffer to write a data to a S3 object with specified bucket and key. @@ -86,7 +87,7 @@ private: void waitForReadyBackGroundTasks(); void waitForAllBackGroundTasks(); - void tryWriteToCacheIfNeeded(); + bool cacheEnabled() const; String bucket; String key; @@ -121,6 +122,8 @@ private: Poco::Logger * log = &Poco::Logger::get("WriteBufferFromS3"); FileCachePtr cache; + std::unique_ptr cache_writer; + size_t current_download_offset = 0; }; } diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index f2ac2565a7f..c70205769b7 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include #include #include @@ -296,6 +298,21 @@ BlockIO InterpreterSystemQuery::execute() cache->reset(); break; #endif + case Type::DROP_REMOTE_FILESYSTEM_CACHE: + { + if (query.remote_filesystem_cache_path.empty()) + { + auto caches = FileCacheFactory::instance().getAll(); + for (const auto & [_, cache] : caches) + cache->tryRemoveAll(); + } + else + { + auto cache = FileCacheFactory::instance().get(query.remote_filesystem_cache_path); + cache->tryRemoveAll(); + } + break; + } case Type::RELOAD_DICTIONARY: { getContext()->checkAccess(AccessType::SYSTEM_RELOAD_DICTIONARY); @@ -758,6 +775,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() case Type::DROP_UNCOMPRESSED_CACHE: case Type::DROP_INDEX_MARK_CACHE: case Type::DROP_INDEX_UNCOMPRESSED_CACHE: + case Type::DROP_REMOTE_FILESYSTEM_CACHE: { required_access.emplace_back(AccessType::SYSTEM_DROP_CACHE); break; diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 22488e35e12..7113698789f 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -28,6 +28,7 @@ public: #if USE_EMBEDDED_COMPILER DROP_COMPILED_EXPRESSION_CACHE, #endif + DROP_REMOTE_FILESYSTEM_CACHE, STOP_LISTEN_QUERIES, START_LISTEN_QUERIES, RESTART_REPLICAS, @@ -88,6 +89,7 @@ public: String volume; String disk; UInt64 seconds{}; + String remote_filesystem_cache_path; String getID(char) const override { return "SYSTEM query"; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index d7b8f2c4165..6caff7c683f 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -24,12 +24,14 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( , plain_file(data_part->volume->getDisk()->writeFile( part_path + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION, settings.max_compress_block_size, - WriteMode::Rewrite)) + WriteMode::Rewrite, + settings_.query_write_settings)) , plain_hashing(*plain_file) , marks_file(data_part->volume->getDisk()->writeFile( part_path + MergeTreeDataPartCompact::DATA_FILE_NAME + marks_file_extension_, 4096, - WriteMode::Rewrite)) + WriteMode::Rewrite, + settings_.query_write_settings)) , marks(*marks_file) { const auto & storage_columns = metadata_snapshot->getColumns(); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 2fb075a54bb..2803514cb9a 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -437,7 +437,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( const auto & index_factory = MergeTreeIndexFactory::instance(); auto out = std::make_unique(new_data_part, metadata_snapshot, columns, - index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec); + index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec, false, false, context->getWriteSettings()); out->writeWithPermutation(block, perm_ptr); diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index aaa8fae7dba..5841daafb8f 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB @@ -28,6 +29,7 @@ struct MergeTreeWriterSettings MergeTreeWriterSettings( const Settings & global_settings, + const WriteSettings & query_write_settings_, const MergeTreeSettingsPtr & storage_settings, bool can_use_adaptive_granularity_, bool rewrite_primary_key_, @@ -40,6 +42,7 @@ struct MergeTreeWriterSettings , can_use_adaptive_granularity(can_use_adaptive_granularity_) , rewrite_primary_key(rewrite_primary_key_) , blocks_are_granules_size(blocks_are_granules_size_) + , query_write_settings(query_write_settings_) { } @@ -48,6 +51,7 @@ struct MergeTreeWriterSettings bool can_use_adaptive_granularity; bool rewrite_primary_key; bool blocks_are_granules_size; + WriteSettings query_write_settings; }; } diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index be6bb453ad5..b4440dffe75 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -19,13 +19,15 @@ MergedBlockOutputStream::MergedBlockOutputStream( const MergeTreeIndices & skip_indices, CompressionCodecPtr default_codec_, bool reset_columns_, - bool blocks_are_granules_size) + bool blocks_are_granules_size, + const WriteSettings & write_settings) : IMergedBlockOutputStream(data_part, metadata_snapshot_, columns_list_, reset_columns_) , columns_list(columns_list_) , default_codec(default_codec_) { MergeTreeWriterSettings writer_settings( storage.getContext()->getSettings(), + write_settings, storage.getSettings(), data_part->index_granularity_info.is_adaptive, /* rewrite_primary_key = */ true, @@ -125,6 +127,7 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( MergeTreeData::DataPart::Checksums * additional_column_checksums, const WriteSettings & write_settings) { + std::cerr << "\n\n\n\nCACHE ON INSERT: " << write_settings.remote_fs_cache_on_insert << "\n\n\n"; /// Finish write and get checksums. MergeTreeData::DataPart::Checksums checksums; diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index 6c26e918673..7beb9c65ca5 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -21,7 +21,8 @@ public: const MergeTreeIndices & skip_indices, CompressionCodecPtr default_codec_, bool reset_columns_ = false, - bool blocks_are_granules_size = false); + bool blocks_are_granules_size = false, + const WriteSettings & write_settings = {}); Block getHeader() const { return metadata_snapshot->getSampleBlock(); } diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 5a706165000..005d8093bba 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB { @@ -26,6 +27,7 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( MergeTreeWriterSettings writer_settings( global_settings, + WriteSettings{}, storage_settings, index_granularity_info ? index_granularity_info->is_adaptive : data_part->storage.canUseAdaptiveGranularity(), /* rewrite_primary_key = */false); diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index 3841abc2f2d..ddebfe3d63e 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB { @@ -22,6 +23,7 @@ StorageSystemDisks::StorageSystemDisks(const StorageID & table_id_) {"total_space", std::make_shared()}, {"keep_free_space", std::make_shared()}, {"type", std::make_shared()}, + {"cache_path", std::make_shared()}, })); setInMemoryMetadata(storage_metadata); } @@ -43,6 +45,7 @@ Pipe StorageSystemDisks::read( MutableColumnPtr col_total = ColumnUInt64::create(); MutableColumnPtr col_keep = ColumnUInt64::create(); MutableColumnPtr col_type = ColumnString::create(); + MutableColumnPtr col_cache_path = ColumnString::create(); for (const auto & [disk_name, disk_ptr] : context->getDisksMap()) { @@ -52,6 +55,12 @@ Pipe StorageSystemDisks::read( col_total->insert(disk_ptr->getTotalSpace()); col_keep->insert(disk_ptr->getKeepingFreeSpace()); col_type->insert(toString(disk_ptr->getType())); + + if (disk_ptr->isRemote()) + { + const auto * remote_disk = assert_cast(disk_ptr.get()); + col_cache_path->insert(remote_disk->getCachePath()); + } } Columns res_columns; diff --git a/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp b/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp new file mode 100644 index 00000000000..69927564b27 --- /dev/null +++ b/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp @@ -0,0 +1,49 @@ +#include "StorageSystemRemoteFilesystemCache.h" +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +NamesAndTypesList StorageSystemRemoteFilesystemCache::getNamesAndTypes() +{ + return { + {"cache_base_path", std::make_shared()}, + {"cache_path", std::make_shared()}, + {"file_segment_range", std::make_shared(DataTypes{std::make_shared(), std::make_shared()})}, + {"size", std::make_shared()}, + }; +} + +StorageSystemRemoteFilesystemCache::StorageSystemRemoteFilesystemCache(const StorageID & table_id_) + : IStorageSystemOneBlock(table_id_) +{ +} + +void StorageSystemRemoteFilesystemCache::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +{ + auto caches = FileCacheFactory::instance().getAll(); + + for (const auto & [cache_base_path, cache] : caches) + { + auto holder = cache->getAll(); + for (const auto & file_segment : holder.file_segments) + { + res_columns[0]->insert(cache_base_path); + res_columns[1]->insert(cache->getPathInLocalCache(file_segment->key(), file_segment->offset())); + + const auto & range = file_segment->range(); + res_columns[2]->insert(Tuple({range.left, range.right})); + res_columns[3]->insert(range.size()); + } + } +} + +} diff --git a/src/Storages/System/StorageSystemRemoteFilesystemCache.h b/src/Storages/System/StorageSystemRemoteFilesystemCache.h new file mode 100644 index 00000000000..6f7053b9197 --- /dev/null +++ b/src/Storages/System/StorageSystemRemoteFilesystemCache.h @@ -0,0 +1,24 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class StorageSystemRemoteFilesystemCache final : public shared_ptr_helper, + public IStorageSystemOneBlock +{ + friend struct shared_ptr_helper; +public: + std::string getName() const override { return "SystemRemoteFilesystemCache"; } + + static NamesAndTypesList getNamesAndTypes(); + +protected: + StorageSystemRemoteFilesystemCache(const StorageID & table_id_); + + void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; +}; + +} diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index 023ced35a6b..21f2211a9d2 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -68,6 +68,7 @@ #include #include #include +#include #ifdef OS_LINUX #include @@ -159,6 +160,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attach(context, system_database, "replicated_fetches"); attach(context, system_database, "part_moves_between_shards"); attach(context, system_database, "asynchronous_inserts"); + attach(context, system_database, "remote_filesystem_cache"); if (has_zookeeper) attach(context, system_database, "zookeeper"); diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference new file mode 100644 index 00000000000..c938833f060 --- /dev/null +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference @@ -0,0 +1,18 @@ +-- Tags: no-parallel + +-- { echo } + +DROP TABLE IF EXISTS test; +SYSTEM DROP REMOTE FILESYSTEM CACHE; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; +INSERT INTO test SELECT number, toString(number) FROM numbers(100); +SELECT * FROM test FORMAT Null; +SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; +./disks/s3/data_cache/ (0,745) 746 +SYSTEM DROP REMOTE FILESYSTEM CACHE; +SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; +SELECT * FROM test FORMAT Null; +SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; +./disks/s3/data_cache/ (0,745) 746 +SYSTEM DROP REMOTE FILESYSTEM CACHE; +SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql new file mode 100644 index 00000000000..59f487c8350 --- /dev/null +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql @@ -0,0 +1,17 @@ +-- Tags: no-parallel, no-fasttest + +-- { echo } + +DROP TABLE IF EXISTS test; +SYSTEM DROP REMOTE FILESYSTEM CACHE; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; +INSERT INTO test SELECT number, toString(number) FROM numbers(100); + +SELECT * FROM test FORMAT Null; +SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; +SYSTEM DROP REMOTE FILESYSTEM CACHE; +SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; +SELECT * FROM test FORMAT Null; +SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; +SYSTEM DROP REMOTE FILESYSTEM CACHE; +SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference new file mode 100644 index 00000000000..5e72207ae55 --- /dev/null +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference @@ -0,0 +1,16 @@ +-- { echo } + +DROP TABLE IF EXISTS test; +SYSTEM DROP REMOTE FILESYSTEM CACHE; +-- CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3'; +INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_insert=1; +SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; +./disks/s3/data_cache/ (0,745) 746 +SELECT * FROM test FORMAT Null; +SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; +./disks/s3/data_cache/ (0,745) 746 +./disks/s3/data_cache/ (0,745) 746 +SYSTEM DROP REMOTE FILESYSTEM CACHE; +INSERT INTO test SELECT number, toString(number) FROM numbers(100, 100); +SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql new file mode 100644 index 00000000000..1151d097ecf --- /dev/null +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql @@ -0,0 +1,16 @@ +-- Tags: no-parallel, no-fasttest, long + +-- { echo } + +DROP TABLE IF EXISTS test; +SYSTEM DROP REMOTE FILESYSTEM CACHE; +-- CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3'; +INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_insert=1; + +SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; +SELECT * FROM test FORMAT Null; +SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; +SYSTEM DROP REMOTE FILESYSTEM CACHE; +INSERT INTO test SELECT number, toString(number) FROM numbers(100, 100); +SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; From afd0c64a1ad43589dfe53e567de1c31702526112 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 21 Mar 2022 14:56:38 +0100 Subject: [PATCH 05/27] Better --- src/Common/FileCache.cpp | 2 +- src/Common/FileCacheFactory.cpp | 31 +++++++++++++------ src/Common/FileCacheFactory.h | 15 +++++++-- src/Common/FileCacheSettings.h | 2 ++ src/Disks/S3/DiskS3.cpp | 5 ++- src/IO/WriteBufferFromS3.cpp | 8 ----- src/Interpreters/InterpreterSystemQuery.cpp | 4 +-- .../MergeTreeDataPartWriterOnDisk.cpp | 11 ++++--- .../MergeTree/MergeTreeDataPartWriterOnDisk.h | 3 +- .../MergeTree/MergeTreeDataPartWriterWide.cpp | 3 +- .../MergeTree/MergedBlockOutputStream.cpp | 1 - .../StorageSystemRemoteFilesystemCache.cpp | 4 ++- ...0_system_remote_filesystem_cache.reference | 2 +- .../02240_system_remote_filesystem_cache.sql | 2 +- ...emote_filesystem_cache_on_insert.reference | 18 ++++++----- ...2241_remote_filesystem_cache_on_insert.sql | 13 ++++---- 16 files changed, 75 insertions(+), 49 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index ba39039c378..cff70fe0fb4 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -359,7 +359,6 @@ FileSegmentsHolder LRUFileCache::setDownloading(const Key & key, size_t offset, keyToStr(key), offset); auto file_segments = splitRangeIntoCells(key, offset, size, FileSegment::State::DOWNLOADING, cache_lock); - return FileSegmentsHolder(std::move(file_segments)); } @@ -381,6 +380,7 @@ bool LRUFileCache::tryReserve( auto is_overflow = [&] { + /// max_size == 0 means unlimited cache size, max_element_size means unlimited number of cache elements. return (max_size != 0 && current_size + size - removed_size > max_size) || (max_element_size != 0 && queue_size > max_element_size); }; diff --git a/src/Common/FileCacheFactory.cpp b/src/Common/FileCacheFactory.cpp index d5a63153e83..683676041d2 100644 --- a/src/Common/FileCacheFactory.cpp +++ b/src/Common/FileCacheFactory.cpp @@ -21,20 +21,31 @@ FileCacheFactory::CacheByBasePath FileCacheFactory::getAll() return caches; } -FileCachePtr FileCacheFactory::getImpl(const std::string & cache_base_path, std::lock_guard &) +const FileCacheSettings & FileCacheFactory::getSettings(const std::string & cache_base_path) +{ + std::lock_guard lock(mutex); + auto * cache_data = getImpl(cache_base_path, lock); + + if (cache_data) + return cache_data->settings; + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No cache found by path: {}", cache_base_path); +} + +FileCacheFactory::CacheData * FileCacheFactory::getImpl(const std::string & cache_base_path, std::lock_guard &) { auto it = caches.find(cache_base_path); if (it == caches.end()) return nullptr; - return it->second; + return &it->second; } FileCachePtr FileCacheFactory::get(const std::string & cache_base_path) { std::lock_guard lock(mutex); - auto cache = getImpl(cache_base_path, lock); - if (cache) - return cache; + auto * cache_data = getImpl(cache_base_path, lock); + if (cache_data) + return cache_data->cache; throw Exception(ErrorCodes::BAD_ARGUMENTS, "No cache found by path: {}", cache_base_path); } @@ -43,12 +54,12 @@ FileCachePtr FileCacheFactory::getOrCreate( const std::string & cache_base_path, const FileCacheSettings & file_cache_settings) { std::lock_guard lock(mutex); - auto cache = getImpl(cache_base_path, lock); - if (cache) - return cache; + auto * cache_data = getImpl(cache_base_path, lock); + if (cache_data) + return cache_data->cache; - cache = std::make_shared(cache_base_path, file_cache_settings); - caches.emplace(cache_base_path, cache); + auto cache = std::make_shared(cache_base_path, file_cache_settings); + caches.emplace(cache_base_path, CacheData(cache, file_cache_settings)); return cache; } diff --git a/src/Common/FileCacheFactory.h b/src/Common/FileCacheFactory.h index 7156d8e6f04..3518f487b6d 100644 --- a/src/Common/FileCacheFactory.h +++ b/src/Common/FileCacheFactory.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -14,7 +15,15 @@ namespace DB */ class FileCacheFactory final : private boost::noncopyable { - using CacheByBasePath = std::unordered_map; + struct CacheData + { + FileCachePtr cache; + FileCacheSettings settings; + + CacheData(FileCachePtr cache_, const FileCacheSettings & settings_) : cache(cache_), settings(settings_) {} + }; + + using CacheByBasePath = std::unordered_map; public: static FileCacheFactory & instance(); @@ -25,8 +34,10 @@ public: CacheByBasePath getAll(); + const FileCacheSettings & getSettings(const std::string & cache_base_path); + private: - FileCachePtr getImpl(const std::string & cache_base_path, std::lock_guard &); + CacheData * getImpl(const std::string & cache_base_path, std::lock_guard &); std::mutex mutex; CacheByBasePath caches; diff --git a/src/Common/FileCacheSettings.h b/src/Common/FileCacheSettings.h index 9025f35445c..c7956e48282 100644 --- a/src/Common/FileCacheSettings.h +++ b/src/Common/FileCacheSettings.h @@ -1,3 +1,5 @@ +#pragma once + #include namespace Poco { namespace Util { class AbstractConfiguration; }} diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index 797b7ed15f4..eb74ef614b5 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -290,6 +291,8 @@ std::unique_ptr DiskS3::writeFile(const String & path, }); }; + bool cache_on_insert = write_settings.remote_fs_cache_on_insert || FileCacheFactory::instance().getSettings(getCachePath()).cache_on_insert; + auto s3_buffer = std::make_unique( settings->client, bucket, @@ -299,7 +302,7 @@ std::unique_ptr DiskS3::writeFile(const String & path, settings->s3_upload_part_size_multiply_parts_count_threshold, settings->s3_max_single_part_upload_size, std::move(object_metadata), - buf_size, std::move(schedule), write_settings.remote_fs_cache_on_insert ? cache : nullptr); + buf_size, std::move(schedule), cache_on_insert ? cache : nullptr); auto create_metadata_callback = [this, path, blob_name, mode] (size_t count) { diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index bd8cf1baff1..20cb689fbd7 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -92,13 +92,6 @@ void WriteBufferFromS3::nextImpl() if (cacheEnabled()) { - std::cerr << "\n\n\n\n\n\n\nCache is enabled!\n\n\n\n\n"; - - /// Use max_single_part_upload_size as file segment size. Space reservation is incremental, - /// so this size does not really mean anything apart from the final file segment size limit. - /// If single part is uploaded with the smaller size, just resize file segment. - - // size_t max_file_segment_size = max_single_part_upload_size; auto cache_key = cache->hash(key); auto file_segments_holder = cache->setDownloading(cache_key, current_download_offset, size); @@ -117,7 +110,6 @@ void WriteBufferFromS3::nextImpl() } else { - /// TODO: add try catch, add complete() break; } } diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index c70205769b7..5f030159b2e 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -303,8 +303,8 @@ BlockIO InterpreterSystemQuery::execute() if (query.remote_filesystem_cache_path.empty()) { auto caches = FileCacheFactory::instance().getAll(); - for (const auto & [_, cache] : caches) - cache->tryRemoveAll(); + for (const auto & [_, cache_data] : caches) + cache_data.cache->tryRemoveAll(); } else { diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 8dca93f574f..6cba4db19e3 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -47,15 +47,16 @@ MergeTreeDataPartWriterOnDisk::Stream::Stream( const std::string & marks_path_, const std::string & marks_file_extension_, const CompressionCodecPtr & compression_codec_, - size_t max_compress_block_size_) : + size_t max_compress_block_size_, + const WriteSettings & query_write_settings) : escaped_column_name(escaped_column_name_), data_file_extension{data_file_extension_}, marks_file_extension{marks_file_extension_}, - plain_file(disk_->writeFile(data_path_ + data_file_extension, max_compress_block_size_, WriteMode::Rewrite)), + plain_file(disk_->writeFile(data_path_ + data_file_extension, max_compress_block_size_, WriteMode::Rewrite, query_write_settings)), plain_hashing(*plain_file), compressed_buf(plain_hashing, compression_codec_, max_compress_block_size_), compressed(compressed_buf), - marks_file(disk_->writeFile(marks_path_ + marks_file_extension, 4096, WriteMode::Rewrite)), marks(*marks_file) + marks_file(disk_->writeFile(marks_path_ + marks_file_extension, 4096, WriteMode::Rewrite, query_write_settings)), marks(*marks_file) { } @@ -156,7 +157,7 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex() { if (metadata_snapshot->hasPrimaryKey()) { - index_file_stream = data_part->volume->getDisk()->writeFile(part_path + "primary.idx", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + index_file_stream = data_part->volume->getDisk()->writeFile(part_path + "primary.idx", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, settings.query_write_settings); index_stream = std::make_unique(*index_file_stream); } } @@ -172,7 +173,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() data_part->volume->getDisk(), part_path + stream_name, index_helper->getSerializedFileExtension(), part_path + stream_name, marks_file_extension, - default_codec, settings.max_compress_block_size)); + default_codec, settings.max_compress_block_size, settings.query_write_settings)); skip_indices_aggregators.push_back(index_helper->createIndexAggregator()); skip_index_accumulated_marks.push_back(0); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index 5af8cbc1650..67b51df7d56 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -55,7 +55,8 @@ public: const std::string & marks_path_, const std::string & marks_file_extension_, const CompressionCodecPtr & compression_codec_, - size_t max_compress_block_size_); + size_t max_compress_block_size_, + const WriteSettings & query_write_settings); String escaped_column_name; std::string data_file_extension; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index a3eec3e54bc..933814d27ba 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -115,7 +115,8 @@ void MergeTreeDataPartWriterWide::addStreams( part_path + stream_name, DATA_FILE_EXTENSION, part_path + stream_name, marks_file_extension, compression_codec, - settings.max_compress_block_size); + settings.max_compress_block_size, + settings.query_write_settings); }; ISerialization::SubstreamPath path; diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index b4440dffe75..4fce24fae74 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -127,7 +127,6 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( MergeTreeData::DataPart::Checksums * additional_column_checksums, const WriteSettings & write_settings) { - std::cerr << "\n\n\n\nCACHE ON INSERT: " << write_settings.remote_fs_cache_on_insert << "\n\n\n"; /// Finish write and get checksums. MergeTreeData::DataPart::Checksums checksums; diff --git a/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp b/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp index 69927564b27..23aaffd3a4c 100644 --- a/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp +++ b/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp @@ -31,9 +31,11 @@ void StorageSystemRemoteFilesystemCache::fillData(MutableColumns & res_columns, { auto caches = FileCacheFactory::instance().getAll(); - for (const auto & [cache_base_path, cache] : caches) + for (const auto & [cache_base_path, cache_data] : caches) { + auto & cache = cache_data.cache; auto holder = cache->getAll(); + for (const auto & file_segment : holder.file_segments) { res_columns[0]->insert(cache_base_path); diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference index c938833f060..db07f1dd577 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference @@ -2,10 +2,10 @@ -- { echo } -DROP TABLE IF EXISTS test; SYSTEM DROP REMOTE FILESYSTEM CACHE; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; INSERT INTO test SELECT number, toString(number) FROM numbers(100); +DROP TABLE IF EXISTS test; SELECT * FROM test FORMAT Null; SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; ./disks/s3/data_cache/ (0,745) 746 diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql index 59f487c8350..fb6dd8d61b4 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql @@ -2,10 +2,10 @@ -- { echo } -DROP TABLE IF EXISTS test; SYSTEM DROP REMOTE FILESYSTEM CACHE; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; INSERT INTO test SELECT number, toString(number) FROM numbers(100); +DROP TABLE IF EXISTS test; SELECT * FROM test FORMAT Null; SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference index 5e72207ae55..3c3e31d215a 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference @@ -2,15 +2,17 @@ DROP TABLE IF EXISTS test; SYSTEM DROP REMOTE FILESYSTEM CACHE; --- CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3'; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_insert=1; -SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; -./disks/s3/data_cache/ (0,745) 746 +SELECT count() FROM system.remote_filesystem_cache; +8 SELECT * FROM test FORMAT Null; -SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; -./disks/s3/data_cache/ (0,745) 746 -./disks/s3/data_cache/ (0,745) 746 +SELECT count() size FROM system.remote_filesystem_cache; +9 SYSTEM DROP REMOTE FILESYSTEM CACHE; INSERT INTO test SELECT number, toString(number) FROM numbers(100, 100); -SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; +SELECT count() size FROM system.remote_filesystem_cache; +7 +INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_insert=0; +SELECT count() size FROM system.remote_filesystem_cache; +14 diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql index 1151d097ecf..48ae85c8e61 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql @@ -1,16 +1,17 @@ --- Tags: no-parallel, no-fasttest, long +-- Tags: no-parallel, no-fasttest -- { echo } DROP TABLE IF EXISTS test; SYSTEM DROP REMOTE FILESYSTEM CACHE; --- CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3'; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_insert=1; -SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; +SELECT count() FROM system.remote_filesystem_cache; SELECT * FROM test FORMAT Null; -SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; +SELECT count() size FROM system.remote_filesystem_cache; SYSTEM DROP REMOTE FILESYSTEM CACHE; INSERT INTO test SELECT number, toString(number) FROM numbers(100, 100); -SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; +SELECT count() size FROM system.remote_filesystem_cache; +INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_insert=0; -- still writes cache because now config setting is used +SELECT count() size FROM system.remote_filesystem_cache; From a9ba14e38606ca7fb7ac2109983481fe2346d9c7 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 21 Mar 2022 21:20:15 +0100 Subject: [PATCH 06/27] Fix checks --- src/Common/FileCache.cpp | 1 - src/Common/FileSegment.cpp | 2 +- src/Storages/System/StorageSystemDisks.cpp | 5 ++++- .../0_stateless/02117_show_create_table_system.reference | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index cff70fe0fb4..f3db4a2de88 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -20,7 +20,6 @@ namespace ErrorCodes { extern const int REMOTE_FS_OBJECT_CACHE_ERROR; extern const int LOGICAL_ERROR; - extern const int BAD_ARGUMENTS; } namespace diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index 0a83f96e49f..f13cbc6a464 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -40,7 +40,7 @@ FileSegment::FileSegment( { break; } - /// DOWNLOADED is used either on inital cache metadata load into memory on server startup + /// DOWNLOADED is used either on initial cache metadata load into memory on server startup /// or on reduceSizeToDownloaded() -- when file segment object is updated. case (State::DOWNLOADED): { diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index ddebfe3d63e..f1896b5e49c 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -56,11 +56,13 @@ Pipe StorageSystemDisks::read( col_keep->insert(disk_ptr->getKeepingFreeSpace()); col_type->insert(toString(disk_ptr->getType())); + String cache_path; if (disk_ptr->isRemote()) { const auto * remote_disk = assert_cast(disk_ptr.get()); - col_cache_path->insert(remote_disk->getCachePath()); + cache_path = remote_disk->getCachePath(); } + col_cache_path->insert(cache_path); } Columns res_columns; @@ -70,6 +72,7 @@ Pipe StorageSystemDisks::read( res_columns.emplace_back(std::move(col_total)); res_columns.emplace_back(std::move(col_keep)); res_columns.emplace_back(std::move(col_type)); + res_columns.emplace_back(std::move(col_cache_path)); UInt64 num_rows = res_columns.at(0)->size(); Chunk chunk(std::move(res_columns), num_rows); diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index cecdd0498b1..432dcd2068d 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -12,7 +12,7 @@ CREATE TABLE system.data_type_families\n(\n `name` String,\n `case_insensi CREATE TABLE system.databases\n(\n `name` String,\n `engine` String,\n `data_path` String,\n `metadata_path` String,\n `uuid` UUID,\n `comment` String,\n `database` String\n)\nENGINE = SystemDatabases()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.detached_parts\n(\n `database` String,\n `table` String,\n `partition_id` Nullable(String),\n `name` String,\n `disk` String,\n `reason` Nullable(String),\n `min_block_number` Nullable(Int64),\n `max_block_number` Nullable(Int64),\n `level` Nullable(UInt32)\n)\nENGINE = SystemDetachedParts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.dictionaries\n(\n `database` String,\n `name` String,\n `uuid` UUID,\n `status` Enum8(\'NOT_LOADED\' = 0, \'LOADED\' = 1, \'FAILED\' = 2, \'LOADING\' = 3, \'FAILED_AND_RELOADING\' = 4, \'LOADED_AND_RELOADING\' = 5, \'NOT_EXIST\' = 6),\n `origin` String,\n `type` String,\n `key.names` Array(String),\n `key.types` Array(String),\n `attribute.names` Array(String),\n `attribute.types` Array(String),\n `bytes_allocated` UInt64,\n `query_count` UInt64,\n `hit_rate` Float64,\n `found_rate` Float64,\n `element_count` UInt64,\n `load_factor` Float64,\n `source` String,\n `lifetime_min` UInt64,\n `lifetime_max` UInt64,\n `loading_start_time` DateTime,\n `last_successful_update_time` DateTime,\n `loading_duration` Float32,\n `last_exception` String,\n `comment` String\n)\nENGINE = SystemDictionaries()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.disks\n(\n `name` String,\n `path` String,\n `free_space` UInt64,\n `total_space` UInt64,\n `keep_free_space` UInt64,\n `type` String\n)\nENGINE = SystemDisks()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.disks\n(\n `name` String,\n `path` String,\n `free_space` UInt64,\n `total_space` UInt64,\n `keep_free_space` UInt64,\n `type` String,\n `cache_path` String\n)\nENGINE = SystemDisks()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.distributed_ddl_queue\n(\n `entry` String,\n `entry_version` Nullable(UInt8),\n `initiator_host` Nullable(String),\n `initiator_port` Nullable(UInt16),\n `cluster` String,\n `query` String,\n `settings` Map(String, String),\n `query_create_time` DateTime,\n `host` Nullable(String),\n `port` Nullable(UInt16),\n `status` Nullable(Enum8(\'Inactive\' = 0, \'Active\' = 1, \'Finished\' = 2, \'Removing\' = 3, \'Unknown\' = 4)),\n `exception_code` Nullable(UInt16),\n `exception_text` Nullable(String),\n `query_finish_time` Nullable(DateTime),\n `query_duration_ms` Nullable(UInt64)\n)\nENGINE = SystemDDLWorkerQueue()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.distribution_queue\n(\n `database` String,\n `table` String,\n `data_path` String,\n `is_blocked` UInt8,\n `error_count` UInt64,\n `data_files` UInt64,\n `data_compressed_bytes` UInt64,\n `broken_data_files` UInt64,\n `broken_data_compressed_bytes` UInt64,\n `last_exception` String\n)\nENGINE = SystemDistributionQueue()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.enabled_roles\n(\n `role_name` String,\n `with_admin_option` UInt8,\n `is_current` UInt8,\n `is_default` UInt8\n)\nENGINE = SystemEnabledRoles()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' From f3e1ca44a9253d3ecda7689c230a0854e1eb56de Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 23 Mar 2022 13:01:18 +0100 Subject: [PATCH 07/27] Add system.remote_data_paths table --- src/Common/FileCache.cpp | 21 +++- src/Common/FileCache.h | 4 + src/Disks/DiskDecorator.h | 3 + src/Disks/DiskLocal.cpp | 6 ++ src/Disks/DiskRestartProxy.cpp | 18 ++++ src/Disks/DiskRestartProxy.h | 3 + src/Disks/IDisk.h | 26 +++++ src/Disks/IDiskRemote.cpp | 31 +++++- src/Disks/IDiskRemote.h | 8 +- .../System/StorageSystemRemoteDataPaths.cpp | 100 ++++++++++++++++++ .../System/StorageSystemRemoteDataPaths.h | 30 ++++++ 11 files changed, 243 insertions(+), 7 deletions(-) create mode 100644 src/Storages/System/StorageSystemRemoteDataPaths.cpp create mode 100644 src/Storages/System/StorageSystemRemoteDataPaths.h diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index f3db4a2de88..07429a448f2 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -705,15 +705,30 @@ FileSegmentsHolder LRUFileCache::getAll() std::lock_guard cache_lock(mutex); FileSegments file_segments; - for (const auto & [key, offset] : queue) + + for (const auto & [key, cells_by_offset] : files) { - auto * cell = getCell(key, offset, cache_lock); - file_segments.push_back(cell->file_segment); + for (const auto & [offset, cell] : cells_by_offset) + file_segments.push_back(cell.file_segment); } return FileSegmentsHolder(std::move(file_segments)); } +std::vector LRUFileCache::tryGetCachePaths(const Key & key) +{ + std::lock_guard cache_lock(mutex); + + std::vector cache_paths; + const auto & cells_by_offset = files[key]; + + for (const auto & [offset, cell] : cells_by_offset) + if (cell.file_segment->state() == FileSegment::State::DOWNLOADED) + cache_paths.push_back(getPathInLocalCache(key, offset)); + + return cache_paths; +} + LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRUQueue & queue_) : file_segment(file_segment_) { diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index 30bd3679d71..e8280fba08a 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -56,6 +56,8 @@ public: const String & getBasePath() const { return cache_base_path; } + virtual std::vector tryGetCachePaths(const Key & key) = 0; + /** * Given an `offset` and `size` representing [offset, offset + size) bytes interval, * return list of cached non-overlapping non-empty @@ -131,6 +133,8 @@ public: void tryRemoveAll() override; + std::vector tryGetCachePaths(const Key & key) override; + private: using FileKeyAndOffset = std::pair; using LRUQueue = std::list; diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h index 33272ba385b..e5c9c7699bf 100644 --- a/src/Disks/DiskDecorator.h +++ b/src/Disks/DiskDecorator.h @@ -72,6 +72,9 @@ public: void shutdown() override; void startup() override; void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) override; + String getCacheBasePath() const override { return delegate->getCacheBasePath(); } + std::vector getRemotePaths(const String & path) const override { return delegate->getRemotePaths(path); } + void getRemotePathsRecursive(const String & path, std::vector & paths_map) override { return delegate->getRemotePathsRecursive(path, paths_map); } DiskPtr getMetadataDiskIfExistsOrSelf() override { return delegate->getMetadataDiskIfExistsOrSelf(); } diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 78be223edee..874405cda53 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -318,10 +318,16 @@ void DiskLocal::moveDirectory(const String & from_path, const String & to_path) DiskDirectoryIteratorPtr DiskLocal::iterateDirectory(const String & path) { fs::path meta_path = fs::path(disk_path) / path; + std::cerr << "Disk local: " << meta_path << "\n"; if (!broken && fs::exists(meta_path) && fs::is_directory(meta_path)) + { return std::make_unique(disk_path, path); + } else + { + std::cerr << "\n\n\n iterating Fail\n\n"; return std::make_unique(); + } } void DiskLocal::moveFile(const String & from_path, const String & to_path) diff --git a/src/Disks/DiskRestartProxy.cpp b/src/Disks/DiskRestartProxy.cpp index a1c63d1e5a9..8045a0e8c72 100644 --- a/src/Disks/DiskRestartProxy.cpp +++ b/src/Disks/DiskRestartProxy.cpp @@ -305,6 +305,24 @@ bool DiskRestartProxy::checkUniqueId(const String & id) const return DiskDecorator::checkUniqueId(id); } +String DiskRestartProxy::getCacheBasePath() const +{ + ReadLock lock (mutex); + return DiskDecorator::getCacheBasePath(); +} + +std::vector DiskRestartProxy::getRemotePaths(const String & path) const +{ + ReadLock lock (mutex); + return DiskDecorator::getRemotePaths(path); +} + +void DiskRestartProxy::getRemotePathsRecursive(const String & path, std::vector & paths_map) +{ + ReadLock lock (mutex); + return DiskDecorator::getRemotePathsRecursive(path, paths_map); +} + void DiskRestartProxy::restart() { /// Speed up processing unhealthy requests. diff --git a/src/Disks/DiskRestartProxy.h b/src/Disks/DiskRestartProxy.h index 2a0d40bffb6..baa57386e68 100644 --- a/src/Disks/DiskRestartProxy.h +++ b/src/Disks/DiskRestartProxy.h @@ -63,6 +63,9 @@ public: void truncateFile(const String & path, size_t size) override; String getUniqueId(const String & path) const override; bool checkUniqueId(const String & id) const override; + String getCacheBasePath() const override; + std::vector getRemotePaths(const String & path) const override; + void getRemotePathsRecursive(const String & path, std::vector & paths_map) override; void restart(); diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index ac48f4f4d89..0f6e31dd0f7 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -32,6 +32,11 @@ namespace Poco namespace DB { +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + class IDiskDirectoryIterator; using DiskDirectoryIteratorPtr = std::unique_ptr; @@ -199,6 +204,27 @@ public: /// Second bool param is a flag to remove (true) or keep (false) shared data on S3 virtual void removeSharedFileIfExists(const String & path, bool) { removeFileIfExists(path); } + + virtual String getCacheBasePath() const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getCacheBasePath() not implemented fro disk: {}`", getType()); + } + + /// Returnes a list of paths because for Log family engines + /// there might be multiple files in remote fs for single clickhouse file. + virtual std::vector getRemotePaths(const String &) const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getRemotePaths() not implemented fro disk: {}`", getType()); + } + + /// For one local path there might be multiple remote paths in case of Log family engines. + using LocalPathWithRemotePaths = std::pair>; + + virtual void getRemotePathsRecursive(const String &, std::vector &) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getRemotePathsRecursive() not implemented fro disk: {}`", getType()); + } + struct RemoveRequest { String path; diff --git a/src/Disks/IDiskRemote.cpp b/src/Disks/IDiskRemote.cpp index 6d4350dcfcc..0f430e69a5e 100644 --- a/src/Disks/IDiskRemote.cpp +++ b/src/Disks/IDiskRemote.cpp @@ -136,13 +136,15 @@ void IDiskRemote::Metadata::load() } catch (Exception & e) { + tryLogCurrentException(__PRETTY_FUNCTION__); + if (e.code() == ErrorCodes::UNKNOWN_FORMAT) throw; if (e.code() == ErrorCodes::MEMORY_LIMIT_EXCEEDED) throw; - throw Exception("Failed to read metadata file", e, ErrorCodes::UNKNOWN_FORMAT); + throw Exception("Failed to read metadata file: " + metadata_file_path, e, ErrorCodes::UNKNOWN_FORMAT); } } @@ -341,6 +343,30 @@ void IDiskRemote::removeMetadataRecursive(const String & path, RemoteFSPathKeepe } } +std::vector IDiskRemote::getRemotePaths(const String & path) const +{ + auto metadata = readMetadata(path); + + std::vector remote_paths; + for (const auto & [remote_path, _] : metadata.remote_fs_objects) + remote_paths.push_back(remote_path); + + return remote_paths; +} + +void IDiskRemote::getRemotePathsRecursive(const String & path, std::vector & paths_map) +{ + if (metadata_disk->isFile(path)) + { + paths_map.emplace_back(path, getRemotePaths(path)); + } + else + { + for (auto it = iterateDirectory(path); it->isValid(); it->next()) + IDiskRemote::getRemotePathsRecursive(fs::path(path) / it->name(), paths_map); + } +} + DiskPtr DiskRemoteReservation::getDisk(size_t i) const { if (i != 0) @@ -348,7 +374,6 @@ DiskPtr DiskRemoteReservation::getDisk(size_t i) const return disk; } - void DiskRemoteReservation::update(UInt64 new_size) { std::lock_guard lock(disk->reservation_mutex); @@ -402,7 +427,7 @@ IDiskRemote::IDiskRemote( } -String IDiskRemote::getCachePath() const +String IDiskRemote::getCacheBasePath() const { return cache ? cache->getBasePath() : ""; } diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index 54f1604b99e..a1d6092a286 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -66,7 +66,13 @@ public: const String & getPath() const final override { return metadata_disk->getPath(); } - String getCachePath() const; + String getCacheBasePath() const final override; + + /// Returnes a list of paths because for Log family engines + /// there might be multiple files in remote fs for single clickhouse file. + std::vector getRemotePaths(const String & path) const final override; + + void getRemotePathsRecursive(const String & path, std::vector & paths_map) override; /// Methods for working with metadata. For some operations (like hardlink /// creation) metadata can be updated concurrently from multiple threads diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp new file mode 100644 index 00000000000..28ae20be8e1 --- /dev/null +++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp @@ -0,0 +1,100 @@ +#include "StorageSystemRemoteDataPaths.h" +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +StorageSystemRemoteDataPaths::StorageSystemRemoteDataPaths(const StorageID & table_id_) + : IStorage(table_id_) +{ + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(ColumnsDescription( + { + {"disk_name", std::make_shared()}, + {"path", std::make_shared()}, + {"cache_dir", std::make_shared()}, + {"local_path", std::make_shared()}, + {"remote_path", std::make_shared()}, + {"cache_paths", std::make_shared(std::make_shared())}, + })); + setInMemoryMetadata(storage_metadata); +} + +Pipe StorageSystemRemoteDataPaths::read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & /*query_info*/, + ContextPtr context, + QueryProcessingStage::Enum /*processed_stage*/, + const size_t /*max_block_size*/, + const unsigned /*num_streams*/) +{ + storage_snapshot->check(column_names); + + MutableColumnPtr col_disk_name = ColumnString::create(); + MutableColumnPtr col_base_path = ColumnString::create(); + MutableColumnPtr col_cache_base_path = ColumnString::create(); + MutableColumnPtr col_local_path = ColumnString::create(); + MutableColumnPtr col_remote_path = ColumnString::create(); + MutableColumnPtr col_cache_paths = ColumnArray::create(ColumnString::create()); + + auto disks = context->getDisksMap(); + for (const auto & [disk_name, disk] : disks) + { + if (disk->isRemote()) + { + std::vector remote_paths_by_local_path; + disk->getRemotePathsRecursive("store", remote_paths_by_local_path); + + FileCachePtr cache; + auto cache_base_path = disk->getCacheBasePath(); + if (!cache_base_path.empty()) + cache = FileCacheFactory::instance().get(cache_base_path); + + for (const auto & [local_path, remote_paths] : remote_paths_by_local_path) + { + for (const auto & remote_path : remote_paths) + { + col_disk_name->insert(disk_name); + col_base_path->insert(disk->getPath()); + col_cache_base_path->insert(cache_base_path); + col_local_path->insert(local_path); + col_remote_path->insert(remote_path); + + if (cache) + { + auto cache_paths = cache->tryGetCachePaths(cache->hash(remote_path)); + col_cache_paths->insert(Array(cache_paths.begin(), cache_paths.end())); + } + else + { + col_cache_paths->insertDefault(); + } + } + } + } + } + + Columns res_columns; + res_columns.emplace_back(std::move(col_disk_name)); + res_columns.emplace_back(std::move(col_base_path)); + res_columns.emplace_back(std::move(col_cache_base_path)); + res_columns.emplace_back(std::move(col_local_path)); + res_columns.emplace_back(std::move(col_remote_path)); + res_columns.emplace_back(std::move(col_cache_paths)); + + UInt64 num_rows = res_columns.at(0)->size(); + Chunk chunk(std::move(res_columns), num_rows); + + return Pipe(std::make_shared(storage_snapshot->metadata->getSampleBlock(), std::move(chunk))); +} + +} diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.h b/src/Storages/System/StorageSystemRemoteDataPaths.h new file mode 100644 index 00000000000..0057dcf12f2 --- /dev/null +++ b/src/Storages/System/StorageSystemRemoteDataPaths.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class StorageSystemRemoteDataPaths : public shared_ptr_helper, public IStorage +{ + friend struct shared_ptr_helper; +public: + std::string getName() const override { return "SystemRemoteDataPaths"; } + + bool isSystemStorage() const override { return true; } + + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; + +protected: + explicit StorageSystemRemoteDataPaths(const StorageID & table_id_); +}; + +} From 34c0690a69622ba8c3837cf58ca9b5987366ff56 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 23 Mar 2022 15:35:15 +0100 Subject: [PATCH 08/27] Better --- src/Common/FileSegment.h | 5 ++ src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 12 ++++- src/Disks/S3/DiskS3.cpp | 8 ++- src/IO/WriteBufferFromS3.cpp | 7 ++- src/IO/WriteBufferFromS3.h | 3 +- src/Storages/System/StorageSystemDisks.cpp | 6 +-- .../StorageSystemRemoteFilesystemCache.cpp | 4 ++ .../StorageSystemRemoteFilesystemCache.h | 18 ++++++- src/Storages/System/attachSystemTables.cpp | 2 + ...emote_filesystem_cache_on_insert.reference | 51 ++++++++++++++----- ...2241_remote_filesystem_cache_on_insert.sql | 40 +++++++++++---- 11 files changed, 122 insertions(+), 34 deletions(-) diff --git a/src/Common/FileSegment.h b/src/Common/FileSegment.h index f0c6c5de9b6..66dd186d1a9 100644 --- a/src/Common/FileSegment.h +++ b/src/Common/FileSegment.h @@ -121,6 +121,10 @@ public: String getInfoForLog() const; + size_t hits() const { return hits_num; } + + void hit() { ++hits_num; } + private: size_t availableSize() const { return reserved_size - downloaded_size; } bool lastFileSegmentHolder() const; @@ -162,6 +166,7 @@ private: bool detached = false; std::atomic is_downloaded{false}; + std::atomic hits_num = 0; /// cache hits. }; struct FileSegmentsHolder : private boost::noncopyable diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index 5cab2cb2995..d093e29b28e 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -365,9 +365,14 @@ bool CachedReadBufferFromRemoteFS::completeFileSegmentAndGetNext() if (current_file_segment_it == file_segments_holder->file_segments.end()) return false; - implementation_buffer = getImplementationBuffer(*current_file_segment_it); + file_segment = *current_file_segment_it; - LOG_TEST(log, "New segment: {}", (*current_file_segment_it)->range().toString()); + implementation_buffer = getImplementationBuffer(file_segment); + + if (read_type == ReadType::CACHED) + file_segment->hit(); + + LOG_TEST(log, "New segment: {}", file_segment->range().toString()); return true; } @@ -583,6 +588,9 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() else { implementation_buffer = getImplementationBuffer(*current_file_segment_it); + + if (read_type == ReadType::CACHED) + (*current_file_segment_it)->hit(); } assert(!internal_buffer.empty()); diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index eb74ef614b5..18cbcc3d659 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -46,6 +46,8 @@ #include +#include +#include namespace DB { @@ -291,7 +293,9 @@ std::unique_ptr DiskS3::writeFile(const String & path, }); }; - bool cache_on_insert = write_settings.remote_fs_cache_on_insert || FileCacheFactory::instance().getSettings(getCachePath()).cache_on_insert; + bool cache_on_insert = fs::path(path).extension() != ".tmp" + && write_settings.remote_fs_cache_on_insert + && FileCacheFactory::instance().getSettings(getCacheBasePath()).cache_on_insert; auto s3_buffer = std::make_unique( settings->client, @@ -302,7 +306,7 @@ std::unique_ptr DiskS3::writeFile(const String & path, settings->s3_upload_part_size_multiply_parts_count_threshold, settings->s3_max_single_part_upload_size, std::move(object_metadata), - buf_size, std::move(schedule), cache_on_insert ? cache : nullptr); + buf_size, std::move(schedule), blob_name, cache_on_insert ? cache : nullptr); auto create_metadata_callback = [this, path, blob_name, mode] (size_t count) { diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 20cb689fbd7..53118bbf867 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -62,6 +62,7 @@ WriteBufferFromS3::WriteBufferFromS3( std::optional> object_metadata_, size_t buffer_size_, ScheduleFunc schedule_, + const String & blob_name_, FileCachePtr cache_) : BufferWithOwnMemory(buffer_size_, nullptr, 0) , bucket(bucket_) @@ -73,6 +74,7 @@ WriteBufferFromS3::WriteBufferFromS3( , upload_part_size_multiply_threshold(upload_part_size_multiply_threshold_) , max_single_part_upload_size(max_single_part_upload_size_) , schedule(std::move(schedule_)) + , blob_name(blob_name_) , cache(cache_) { allocateBuffer(); @@ -92,10 +94,11 @@ void WriteBufferFromS3::nextImpl() if (cacheEnabled()) { - auto cache_key = cache->hash(key); + if (blob_name.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty blob name"); + auto cache_key = cache->hash(blob_name); auto file_segments_holder = cache->setDownloading(cache_key, current_download_offset, size); - assert(file_segments_holder.file_segments.back()->range().right - file_segments_holder.file_segments.begin()->range().left + 1 == size); size_t remaining_size = size; for (const auto & file_segment : file_segments_holder.file_segments) diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 9773eedcce8..ecddd72b9e8 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -56,6 +56,7 @@ public: std::optional> object_metadata_ = std::nullopt, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, ScheduleFunc schedule_ = {}, + const String & blob_name = "", FileCachePtr cache_ = nullptr); ~WriteBufferFromS3() override; @@ -121,8 +122,8 @@ private: Poco::Logger * log = &Poco::Logger::get("WriteBufferFromS3"); + const String blob_name; FileCachePtr cache; - std::unique_ptr cache_writer; size_t current_download_offset = 0; }; diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index f1896b5e49c..fb6a055c6e5 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -58,10 +58,8 @@ Pipe StorageSystemDisks::read( String cache_path; if (disk_ptr->isRemote()) - { - const auto * remote_disk = assert_cast(disk_ptr.get()); - cache_path = remote_disk->getCachePath(); - } + cache_path = disk_ptr->getCacheBasePath(); + col_cache_path->insert(cache_path); } diff --git a/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp b/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp index 23aaffd3a4c..e37dcb74829 100644 --- a/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp +++ b/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp @@ -19,6 +19,8 @@ NamesAndTypesList StorageSystemRemoteFilesystemCache::getNamesAndTypes() {"cache_path", std::make_shared()}, {"file_segment_range", std::make_shared(DataTypes{std::make_shared(), std::make_shared()})}, {"size", std::make_shared()}, + {"state", std::make_shared()}, + {"cache_hits", std::make_shared()}, }; } @@ -44,6 +46,8 @@ void StorageSystemRemoteFilesystemCache::fillData(MutableColumns & res_columns, const auto & range = file_segment->range(); res_columns[2]->insert(Tuple({range.left, range.right})); res_columns[3]->insert(range.size()); + res_columns[4]->insert(FileSegment::stateToString(file_segment->state())); + res_columns[5]->insert(file_segment->hits()); } } } diff --git a/src/Storages/System/StorageSystemRemoteFilesystemCache.h b/src/Storages/System/StorageSystemRemoteFilesystemCache.h index 6f7053b9197..4ac68671823 100644 --- a/src/Storages/System/StorageSystemRemoteFilesystemCache.h +++ b/src/Storages/System/StorageSystemRemoteFilesystemCache.h @@ -6,6 +6,22 @@ namespace DB { +/** + * SELECT + * cache_path, + * local_path, + * remote_path + * FROM + * ( + * SELECT + * arrayJoin(cache_paths) AS cache_path, + * local_path, + * remote_path + * FROM system.remote_data_paths + * ) AS data_paths + * INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path + */ + class StorageSystemRemoteFilesystemCache final : public shared_ptr_helper, public IStorageSystemOneBlock { @@ -16,7 +32,7 @@ public: static NamesAndTypesList getNamesAndTypes(); protected: - StorageSystemRemoteFilesystemCache(const StorageID & table_id_); + explicit StorageSystemRemoteFilesystemCache(const StorageID & table_id_); void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; }; diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index 21f2211a9d2..db30c265dc2 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -69,6 +69,7 @@ #include #include #include +#include #ifdef OS_LINUX #include @@ -161,6 +162,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attach(context, system_database, "part_moves_between_shards"); attach(context, system_database, "asynchronous_inserts"); attach(context, system_database, "remote_filesystem_cache"); + attach(context, system_database, "remote_data_paths"); if (has_zookeeper) attach(context, system_database, "zookeeper"); diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference index 3c3e31d215a..0ba020a359b 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference @@ -1,18 +1,45 @@ -- { echo } +SET remote_fs_cache_on_insert=1; DROP TABLE IF EXISTS test; -SYSTEM DROP REMOTE FILESYSTEM CACHE; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; -INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_insert=1; -SELECT count() FROM system.remote_filesystem_cache; -8 -SELECT * FROM test FORMAT Null; -SELECT count() size FROM system.remote_filesystem_cache; -9 SYSTEM DROP REMOTE FILESYSTEM CACHE; -INSERT INTO test SELECT number, toString(number) FROM numbers(100, 100); +SELECT file_segment_range, size, state FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path FORMAT Vertical; +SELECT file_segment_range, size, state FROM system.remote_filesystem_cache format Vertical; +INSERT INTO test SELECT number, toString(number) FROM numbers(100); +SELECT file_segment_range, size, state FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path FORMAT Vertical; +Row 1: +────── +file_segment_range: (0,745) +size: 746 +state: DOWNLOADED +SELECT file_segment_range, size, state FROM system.remote_filesystem_cache format Vertical; +Row 1: +────── +file_segment_range: (0,745) +size: 746 +state: DOWNLOADED +SELECT cache_hits FROM system.remote_filesystem_cache; +0 +SELECT * FROM test FORMAT Null; +SELECT cache_hits FROM system.remote_filesystem_cache; +1 +SELECT * FROM test FORMAT Null; +SELECT cache_hits FROM system.remote_filesystem_cache; +2 SELECT count() size FROM system.remote_filesystem_cache; -7 -INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_insert=0; -SELECT count() size FROM system.remote_filesystem_cache; -14 +1 +SYSTEM DROP REMOTE FILESYSTEM CACHE; +INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200); +SELECT file_segment_range, size, state FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path ORDER BY size FORMAT Vertical; +Row 1: +────── +file_segment_range: (0,1659) +size: 1660 +state: DOWNLOADED +SELECT file_segment_range, size, state FROM system.remote_filesystem_cache format Vertical; +Row 1: +────── +file_segment_range: (0,1659) +size: 1660 +state: DOWNLOADED diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql index 48ae85c8e61..8a4339db731 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql @@ -2,16 +2,36 @@ -- { echo } -DROP TABLE IF EXISTS test; -SYSTEM DROP REMOTE FILESYSTEM CACHE; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; -INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_insert=1; +SET remote_fs_cache_on_insert=1; + +DROP TABLE IF EXISTS test; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; -SELECT count() FROM system.remote_filesystem_cache; -SELECT * FROM test FORMAT Null; -SELECT count() size FROM system.remote_filesystem_cache; SYSTEM DROP REMOTE FILESYSTEM CACHE; -INSERT INTO test SELECT number, toString(number) FROM numbers(100, 100); -SELECT count() size FROM system.remote_filesystem_cache; -INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_insert=0; -- still writes cache because now config setting is used + +SELECT file_segment_range, size, state FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path FORMAT Vertical; +SELECT file_segment_range, size, state FROM system.remote_filesystem_cache format Vertical; + +INSERT INTO test SELECT number, toString(number) FROM numbers(100); + +SELECT file_segment_range, size, state FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path FORMAT Vertical; +SELECT file_segment_range, size, state FROM system.remote_filesystem_cache format Vertical; + +SELECT cache_hits FROM system.remote_filesystem_cache; + +SELECT * FROM test FORMAT Null; +SELECT cache_hits FROM system.remote_filesystem_cache; + +SELECT * FROM test FORMAT Null; +SELECT cache_hits FROM system.remote_filesystem_cache; + SELECT count() size FROM system.remote_filesystem_cache; + +SYSTEM DROP REMOTE FILESYSTEM CACHE; + +INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200); + +SELECT file_segment_range, size, state FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path ORDER BY size FORMAT Vertical; +SELECT file_segment_range, size, state FROM system.remote_filesystem_cache format Vertical; + +-- INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_insert=0; -- still writes cache because now config setting is used From 8c1be8950eda174a44260aa164b9f8d744fccded Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 23 Mar 2022 16:54:43 +0100 Subject: [PATCH 09/27] Improve test --- src/Storages/MergeTree/MergeTreeData.cpp | 10 ++-- src/Storages/StorageMergeTree.cpp | 2 +- tests/config/config.d/storage_conf.xml | 2 + ...emote_filesystem_cache_on_insert.reference | 48 ++++++++++--------- ...2241_remote_filesystem_cache_on_insert.sql | 29 ++++++----- 5 files changed, 52 insertions(+), 39 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 4fbc3376b7e..29b9460729f 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -303,7 +303,7 @@ MergeTreeData::MergeTreeData( format_version = min_format_version; if (!version_file.second->isReadOnly()) { - auto buf = version_file.second->writeFile(version_file.first); + auto buf = version_file.second->writeFile(version_file.first, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, context_->getWriteSettings()); writeIntText(format_version.toUnderType(), *buf); if (getContext()->getSettingsRef().fsync_metadata) buf->sync(); @@ -3699,9 +3699,9 @@ RestoreDataTasks MergeTreeData::restoreDataPartsFromBackup(const BackupPtr & bac continue; UInt64 total_size_of_part = 0; - Strings filenames = backup->listFiles(data_path_in_backup + part_name + "/", ""); + Strings filenames = backup->listFiles(fs::path(data_path_in_backup) / part_name / "", ""); for (const String & filename : filenames) - total_size_of_part += backup->getFileSize(data_path_in_backup + part_name + "/" + filename); + total_size_of_part += backup->getFileSize(fs::path(data_path_in_backup) / part_name / filename); std::shared_ptr reservation = getStoragePolicy()->reserveAndCheck(total_size_of_part); @@ -3725,9 +3725,9 @@ RestoreDataTasks MergeTreeData::restoreDataPartsFromBackup(const BackupPtr & bac for (const String & filename : filenames) { - auto backup_entry = backup->readFile(data_path_in_backup + part_name + "/" + filename); + auto backup_entry = backup->readFile(fs::path(data_path_in_backup) / part_name / filename); auto read_buffer = backup_entry->getReadBuffer(); - auto write_buffer = disk->writeFile(temp_part_dir + "/" + filename); + auto write_buffer = disk->writeFile(fs::path(temp_part_dir) / filename); copyData(*read_buffer, *write_buffer); } diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 812e2264adb..0f2a7e90870 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1613,7 +1613,7 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_ { auto calculated_checksums = checkDataPart(part, false); calculated_checksums.checkEqual(part->checksums, true); - auto out = disk->writeFile(tmp_checksums_path, 4096); + auto out = disk->writeFile(tmp_checksums_path, 4096, WriteMode::Rewrite, local_context->getWriteSettings()); part->checksums.write(*out); disk->moveFile(tmp_checksums_path, checksums_path); results.emplace_back(part->name, true, "Checksums recounted and written to disk."); diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 2e43f735605..dd148147367 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -7,7 +7,9 @@ clickhouse clickhouse 1 + 0 22548578304 + 1 diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference index 0ba020a359b..3d0d9dadf6a 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference @@ -2,44 +2,48 @@ SET remote_fs_cache_on_insert=1; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3'; SYSTEM DROP REMOTE FILESYSTEM CACHE; -SELECT file_segment_range, size, state FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path FORMAT Vertical; -SELECT file_segment_range, size, state FROM system.remote_filesystem_cache format Vertical; +SELECT file_segment_range, size, state FROM (SELECT file_segment_range, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; +0 +SELECT count() FROM system.remote_filesystem_cache; +0 INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT file_segment_range, size, state FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path FORMAT Vertical; +SELECT file_segment_range, size, state FROM (SELECT file_segment_range, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; Row 1: ────── file_segment_range: (0,745) size: 746 state: DOWNLOADED -SELECT file_segment_range, size, state FROM system.remote_filesystem_cache format Vertical; -Row 1: -────── -file_segment_range: (0,745) -size: 746 -state: DOWNLOADED -SELECT cache_hits FROM system.remote_filesystem_cache; +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; +7 +SELECT count() FROM system.remote_filesystem_cache; +7 +SELECT count() FROM system.remote_filesystem_cache WHERE cache_hits > 0; 0 SELECT * FROM test FORMAT Null; -SELECT cache_hits FROM system.remote_filesystem_cache; -1 +SELECT count() FROM system.remote_filesystem_cache WHERE cache_hits > 0; +2 SELECT * FROM test FORMAT Null; -SELECT cache_hits FROM system.remote_filesystem_cache; +SELECT count() FROM system.remote_filesystem_cache WHERE cache_hits > 0; 2 SELECT count() size FROM system.remote_filesystem_cache; -1 +7 SYSTEM DROP REMOTE FILESYSTEM CACHE; INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200); -SELECT file_segment_range, size, state FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path ORDER BY size FORMAT Vertical; -Row 1: -────── -file_segment_range: (0,1659) -size: 1660 -state: DOWNLOADED -SELECT file_segment_range, size, state FROM system.remote_filesystem_cache format Vertical; +SELECT file_segment_range, size, state FROM (SELECT file_segment_range, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; Row 1: ────── file_segment_range: (0,1659) size: 1660 state: DOWNLOADED +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; +7 +SELECT count() FROM system.remote_filesystem_cache; +7 +SELECT count() FROM system.remote_filesystem_cache; +7 +INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_insert=0; -- still writes cache because now config setting is used +SELECT count() FROM system.remote_filesystem_cache; +7 diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql index 8a4339db731..dfcc617e0f4 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql @@ -5,25 +5,27 @@ SET remote_fs_cache_on_insert=1; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3'; SYSTEM DROP REMOTE FILESYSTEM CACHE; -SELECT file_segment_range, size, state FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path FORMAT Vertical; -SELECT file_segment_range, size, state FROM system.remote_filesystem_cache format Vertical; +SELECT file_segment_range, size, state FROM (SELECT file_segment_range, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; +SELECT count() FROM system.remote_filesystem_cache; INSERT INTO test SELECT number, toString(number) FROM numbers(100); -SELECT file_segment_range, size, state FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path FORMAT Vertical; -SELECT file_segment_range, size, state FROM system.remote_filesystem_cache format Vertical; +SELECT file_segment_range, size, state FROM (SELECT file_segment_range, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; +SELECT count() FROM system.remote_filesystem_cache; -SELECT cache_hits FROM system.remote_filesystem_cache; +SELECT count() FROM system.remote_filesystem_cache WHERE cache_hits > 0; SELECT * FROM test FORMAT Null; -SELECT cache_hits FROM system.remote_filesystem_cache; +SELECT count() FROM system.remote_filesystem_cache WHERE cache_hits > 0; SELECT * FROM test FORMAT Null; -SELECT cache_hits FROM system.remote_filesystem_cache; +SELECT count() FROM system.remote_filesystem_cache WHERE cache_hits > 0; SELECT count() size FROM system.remote_filesystem_cache; @@ -31,7 +33,12 @@ SYSTEM DROP REMOTE FILESYSTEM CACHE; INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200); -SELECT file_segment_range, size, state FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path ORDER BY size FORMAT Vertical; -SELECT file_segment_range, size, state FROM system.remote_filesystem_cache format Vertical; +SELECT file_segment_range, size, state FROM (SELECT file_segment_range, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; +SELECT count() FROM system.remote_filesystem_cache; + +SELECT count() FROM system.remote_filesystem_cache; +INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_insert=0; -- still writes cache because now config setting is used +SELECT count() FROM system.remote_filesystem_cache; + --- INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_insert=0; -- still writes cache because now config setting is used From e39aba37a2bd5273f7afa3791396b3e8ae4e4456 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 23 Mar 2022 18:11:52 +0100 Subject: [PATCH 10/27] Minor changes --- src/Common/FileCache.cpp | 2 ++ src/Common/FileCacheFactory.cpp | 4 ++- src/Disks/DiskLocal.cpp | 6 ---- src/Disks/S3/DiskS3.cpp | 2 -- .../System/StorageSystemRemoteDataPaths.cpp | 2 +- .../StorageSystemRemoteFilesystemCache.h | 31 +++++++++++-------- 6 files changed, 24 insertions(+), 23 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index 07429a448f2..f0d2bba33d4 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -723,8 +723,10 @@ std::vector LRUFileCache::tryGetCachePaths(const Key & key) const auto & cells_by_offset = files[key]; for (const auto & [offset, cell] : cells_by_offset) + { if (cell.file_segment->state() == FileSegment::State::DOWNLOADED) cache_paths.push_back(getPathInLocalCache(key, offset)); + } return cache_paths; } diff --git a/src/Common/FileCacheFactory.cpp b/src/Common/FileCacheFactory.cpp index 683676041d2..9eadea05547 100644 --- a/src/Common/FileCacheFactory.cpp +++ b/src/Common/FileCacheFactory.cpp @@ -24,8 +24,8 @@ FileCacheFactory::CacheByBasePath FileCacheFactory::getAll() const FileCacheSettings & FileCacheFactory::getSettings(const std::string & cache_base_path) { std::lock_guard lock(mutex); - auto * cache_data = getImpl(cache_base_path, lock); + auto * cache_data = getImpl(cache_base_path, lock); if (cache_data) return cache_data->settings; @@ -43,6 +43,7 @@ FileCacheFactory::CacheData * FileCacheFactory::getImpl(const std::string & cach FileCachePtr FileCacheFactory::get(const std::string & cache_base_path) { std::lock_guard lock(mutex); + auto * cache_data = getImpl(cache_base_path, lock); if (cache_data) return cache_data->cache; @@ -54,6 +55,7 @@ FileCachePtr FileCacheFactory::getOrCreate( const std::string & cache_base_path, const FileCacheSettings & file_cache_settings) { std::lock_guard lock(mutex); + auto * cache_data = getImpl(cache_base_path, lock); if (cache_data) return cache_data->cache; diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 874405cda53..78be223edee 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -318,16 +318,10 @@ void DiskLocal::moveDirectory(const String & from_path, const String & to_path) DiskDirectoryIteratorPtr DiskLocal::iterateDirectory(const String & path) { fs::path meta_path = fs::path(disk_path) / path; - std::cerr << "Disk local: " << meta_path << "\n"; if (!broken && fs::exists(meta_path) && fs::is_directory(meta_path)) - { return std::make_unique(disk_path, path); - } else - { - std::cerr << "\n\n\n iterating Fail\n\n"; return std::make_unique(); - } } void DiskLocal::moveFile(const String & from_path, const String & to_path) diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index 18cbcc3d659..a9e58efbfb9 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -46,8 +46,6 @@ #include -#include -#include namespace DB { diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp index 28ae20be8e1..410d1ae6dd4 100644 --- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp +++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp @@ -20,7 +20,7 @@ StorageSystemRemoteDataPaths::StorageSystemRemoteDataPaths(const StorageID & tab { {"disk_name", std::make_shared()}, {"path", std::make_shared()}, - {"cache_dir", std::make_shared()}, + {"cache_base_path", std::make_shared()}, {"local_path", std::make_shared()}, {"remote_path", std::make_shared()}, {"cache_paths", std::make_shared(std::make_shared())}, diff --git a/src/Storages/System/StorageSystemRemoteFilesystemCache.h b/src/Storages/System/StorageSystemRemoteFilesystemCache.h index 4ac68671823..b4ace8a7fe8 100644 --- a/src/Storages/System/StorageSystemRemoteFilesystemCache.h +++ b/src/Storages/System/StorageSystemRemoteFilesystemCache.h @@ -7,19 +7,24 @@ namespace DB { /** - * SELECT - * cache_path, - * local_path, - * remote_path - * FROM - * ( - * SELECT - * arrayJoin(cache_paths) AS cache_path, - * local_path, - * remote_path - * FROM system.remote_data_paths - * ) AS data_paths - * INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path + * SELECT + * cache_path, + * cache_hits, + * remote_path, + * local_path, + * file_segment_range, + * size, + * state + * FROM + * ( + * SELECT + * arrayJoin(cache_paths) AS cache_path, + * local_path, + * remote_path + * FROM system.remote_data_paths + * ) AS data_paths + * INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path + * FORMAT Vertical */ class StorageSystemRemoteFilesystemCache final : public shared_ptr_helper, From d2a3cfe5dc4fc42d6ea3f536f28d97008ff77234 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 23 Mar 2022 19:00:42 +0100 Subject: [PATCH 11/27] Cache on all write operations --- src/Common/FileCacheSettings.cpp | 2 +- src/Common/FileCacheSettings.h | 2 +- src/Core/Settings.h | 2 +- src/Disks/DiskCacheWrapper.cpp | 4 ++-- src/Disks/S3/DiskS3.cpp | 4 ++-- src/IO/WriteSettings.h | 2 +- src/Interpreters/Context.cpp | 2 +- src/Storages/MergeTree/MergeTask.cpp | 2 +- .../MergeTree/MergeTreeMutationEntry.cpp | 4 ++-- .../MergeTree/MergeTreeMutationEntry.h | 2 +- src/Storages/MergeTree/MergeTreePartition.cpp | 10 ++++++---- src/Storages/MergeTree/MergeTreePartition.h | 2 +- src/Storages/MergeTree/MutateTask.cpp | 17 +++++++++-------- src/Storages/StorageMergeTree.cpp | 2 +- ...emote_filesystem_cache_on_insert.reference | 19 +++++++++++++++---- ...2241_remote_filesystem_cache_on_insert.sql | 16 ++++++++++++---- 16 files changed, 57 insertions(+), 35 deletions(-) diff --git a/src/Common/FileCacheSettings.cpp b/src/Common/FileCacheSettings.cpp index 02009d95550..f555de277b2 100644 --- a/src/Common/FileCacheSettings.cpp +++ b/src/Common/FileCacheSettings.cpp @@ -10,7 +10,7 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & max_size = config.getUInt64(config_prefix + ".data_cache_max_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_CACHE_SIZE); max_elements = config.getUInt64(config_prefix + ".data_cache_max_elements", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS); max_file_segment_size = config.getUInt64(config_prefix + ".max_file_segment_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE); - cache_on_insert = config.getUInt64(config_prefix + ".cache_on_insert", false); + cache_on_write_operations = config.getUInt64(config_prefix + ".cache_on_write_operations", false); } } diff --git a/src/Common/FileCacheSettings.h b/src/Common/FileCacheSettings.h index c7956e48282..53c28400c86 100644 --- a/src/Common/FileCacheSettings.h +++ b/src/Common/FileCacheSettings.h @@ -12,7 +12,7 @@ struct FileCacheSettings size_t max_size = 0; size_t max_elements = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS; size_t max_file_segment_size = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE; - bool cache_on_insert = false; + bool cache_on_write_operations = false; void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); }; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f8a0ea3c7e7..6d275ad6790 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -557,7 +557,7 @@ class IColumn; M(UInt64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \ M(Bool, remote_fs_enable_cache, true, "Use cache for remote filesystem. This setting does not turn on/off cache for disks (must me done via disk config), but allows to bypass cache for some queries if intended", 0) \ M(UInt64, remote_fs_cache_max_wait_sec, 5, "Allow to wait at most this number of seconds for download of current remote_fs_buffer_size bytes, and skip cache if exceeded", 0) \ - M(Bool, remote_fs_cache_on_insert, false, "Write into cache on INSERT query", 0) \ + M(Bool, remote_fs_cache_on_write_operations, true, "Write into cache on INSERT query To actually work this setting requires be added to disk config too", 0) \ \ M(UInt64, http_max_tries, 10, "Max attempts to read via http.", 0) \ M(UInt64, http_retry_initial_backoff_ms, 100, "Min milliseconds for backoff, when retrying read via http", 0) \ diff --git a/src/Disks/DiskCacheWrapper.cpp b/src/Disks/DiskCacheWrapper.cpp index de5397a87c2..568fbf160c0 100644 --- a/src/Disks/DiskCacheWrapper.cpp +++ b/src/Disks/DiskCacheWrapper.cpp @@ -169,7 +169,7 @@ DiskCacheWrapper::readFile( auto src_buffer = DiskDecorator::readFile(path, current_read_settings, read_hint, file_size); WriteSettings write_settings; - write_settings.remote_fs_cache_on_insert = false; + write_settings.remote_fs_cache_on_write_operations = false; auto dst_buffer = cache_disk->writeFile(tmp_path, settings.local_fs_buffer_size, WriteMode::Rewrite, write_settings); copyData(*src_buffer, *dst_buffer); @@ -206,7 +206,7 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode return DiskDecorator::writeFile(path, buf_size, mode, settings); WriteSettings current_settings = settings; - current_settings.remote_fs_cache_on_insert = false; + current_settings.remote_fs_cache_on_write_operations = false; LOG_TEST(log, "Write file {} to cache", backQuote(path)); diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index a9e58efbfb9..292699b5e22 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -292,8 +292,8 @@ std::unique_ptr DiskS3::writeFile(const String & path, }; bool cache_on_insert = fs::path(path).extension() != ".tmp" - && write_settings.remote_fs_cache_on_insert - && FileCacheFactory::instance().getSettings(getCacheBasePath()).cache_on_insert; + && write_settings.remote_fs_cache_on_write_operations + && FileCacheFactory::instance().getSettings(getCacheBasePath()).cache_on_write_operations; auto s3_buffer = std::make_unique( settings->client, diff --git a/src/IO/WriteSettings.h b/src/IO/WriteSettings.h index c34193574bb..81a6705cbab 100644 --- a/src/IO/WriteSettings.h +++ b/src/IO/WriteSettings.h @@ -5,7 +5,7 @@ namespace DB struct WriteSettings { - bool remote_fs_cache_on_insert = false; + bool remote_fs_cache_on_write_operations = false; }; } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index a64ef3a88be..a3169f435e4 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -3203,7 +3203,7 @@ WriteSettings Context::getWriteSettings() const { WriteSettings res; - res.remote_fs_cache_on_insert = settings.remote_fs_cache_on_insert; + res.remote_fs_cache_on_write_operations = settings.remote_fs_cache_on_write_operations; return res; } diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 935a11ec5fa..22c7c6af83e 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -230,7 +230,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() case MergeAlgorithm::Vertical : { ctx->rows_sources_file = createTemporaryFile(ctx->tmp_disk->getPath()); - ctx->rows_sources_uncompressed_write_buf = ctx->tmp_disk->writeFile(fileName(ctx->rows_sources_file->path())); + ctx->rows_sources_uncompressed_write_buf = ctx->tmp_disk->writeFile(fileName(ctx->rows_sources_file->path()), DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, global_ctx->context->getWriteSettings()); ctx->rows_sources_write_buf = std::make_unique(*ctx->rows_sources_uncompressed_write_buf); MergeTreeDataPartInMemory::ColumnToSize local_merged_column_to_size; diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp index 0f71742fb09..2147575f1d5 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp @@ -43,7 +43,7 @@ UInt64 MergeTreeMutationEntry::parseFileName(const String & file_name_) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot parse mutation version from file name, expected 'mutation_.txt', got '{}'", file_name_); } -MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk_, const String & path_prefix_, UInt64 tmp_number) +MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk_, const String & path_prefix_, UInt64 tmp_number, const WriteSettings & settings) : create_time(time(nullptr)) , commands(std::move(commands_)) , disk(std::move(disk_)) @@ -53,7 +53,7 @@ MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskP { try { - auto out = disk->writeFile(path_prefix + file_name); + auto out = disk->writeFile(std::filesystem::path(path_prefix) / file_name, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, settings); *out << "format version: 1\n" << "create time: " << LocalDateTime(create_time) << "\n"; *out << "commands: "; diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.h b/src/Storages/MergeTree/MergeTreeMutationEntry.h index 7554a03836e..fa3a4058ae6 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.h +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.h @@ -29,7 +29,7 @@ struct MergeTreeMutationEntry String latest_fail_reason; /// Create a new entry and write it to a temporary file. - MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk, const String & path_prefix_, UInt64 tmp_number); + MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk, const String & path_prefix_, UInt64 tmp_number, const WriteSettings & settings); MergeTreeMutationEntry(const MergeTreeMutationEntry &) = delete; MergeTreeMutationEntry(MergeTreeMutationEntry &&) = default; diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index 4edf23bc0fb..128c40929b3 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -390,16 +390,18 @@ void MergeTreePartition::load(const MergeTreeData & storage, const DiskPtr & dis std::unique_ptr MergeTreePartition::store(const MergeTreeData & storage, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const { auto metadata_snapshot = storage.getInMemoryMetadataPtr(); - const auto & partition_key_sample = adjustPartitionKey(metadata_snapshot, storage.getContext()).sample_block; - return store(partition_key_sample, disk, part_path, checksums); + const auto & context = storage.getContext(); + const auto & partition_key_sample = adjustPartitionKey(metadata_snapshot, context).sample_block; + return store(partition_key_sample, disk, part_path, checksums, context->getWriteSettings()); } -std::unique_ptr MergeTreePartition::store(const Block & partition_key_sample, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const +std::unique_ptr MergeTreePartition::store( + const Block & partition_key_sample, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const { if (!partition_key_sample) return nullptr; - auto out = disk->writeFile(part_path + "partition.dat"); + auto out = disk->writeFile(std::filesystem::path(part_path) / "partition.dat", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, settings); HashingWriteBuffer out_hashing(*out); for (size_t i = 0; i < value.size(); ++i) partition_key_sample.getByPosition(i).type->getDefaultSerialization()->serializeBinary(value[i], out_hashing); diff --git a/src/Storages/MergeTree/MergeTreePartition.h b/src/Storages/MergeTree/MergeTreePartition.h index f149fcbcb7e..3bd9202822f 100644 --- a/src/Storages/MergeTree/MergeTreePartition.h +++ b/src/Storages/MergeTree/MergeTreePartition.h @@ -41,7 +41,7 @@ public: /// Store functions return write buffer with written but not finalized data. /// User must call finish() for returned object. [[nodiscard]] std::unique_ptr store(const MergeTreeData & storage, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const; - [[nodiscard]] std::unique_ptr store(const Block & partition_key_sample, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const; + [[nodiscard]] std::unique_ptr store(const Block & partition_key_sample, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const; void assign(const MergeTreePartition & other) { value = other.value; } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 1fe701c54ae..9641299f1f8 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -419,14 +419,15 @@ void finalizeMutatedPart( const MergeTreeDataPartPtr & source_part, MergeTreeData::MutableDataPartPtr new_data_part, ExecuteTTLType execute_ttl_type, - const CompressionCodecPtr & codec) + const CompressionCodecPtr & codec, + ContextPtr context) { auto disk = new_data_part->volume->getDisk(); auto part_path = fs::path(new_data_part->getFullRelativePath()); if (new_data_part->uuid != UUIDHelpers::Nil) { - auto out = disk->writeFile(part_path / IMergeTreeDataPart::UUID_FILE_NAME, 4096); + auto out = disk->writeFile(part_path / IMergeTreeDataPart::UUID_FILE_NAME, 4096, WriteMode::Rewrite, context->getWriteSettings()); HashingWriteBuffer out_hashing(*out); writeUUIDText(new_data_part->uuid, out_hashing); new_data_part->checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_size = out_hashing.count(); @@ -436,7 +437,7 @@ void finalizeMutatedPart( if (execute_ttl_type != ExecuteTTLType::NONE) { /// Write a file with ttl infos in json format. - auto out_ttl = disk->writeFile(part_path / "ttl.txt", 4096); + auto out_ttl = disk->writeFile(part_path / "ttl.txt", 4096, WriteMode::Rewrite, context->getWriteSettings()); HashingWriteBuffer out_hashing(*out_ttl); new_data_part->ttl_infos.write(out_hashing); new_data_part->checksums.files["ttl.txt"].file_size = out_hashing.count(); @@ -445,7 +446,7 @@ void finalizeMutatedPart( if (!new_data_part->getSerializationInfos().empty()) { - auto out = disk->writeFile(part_path / IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096); + auto out = disk->writeFile(part_path / IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096, WriteMode::Rewrite, context->getWriteSettings()); HashingWriteBuffer out_hashing(*out); new_data_part->getSerializationInfos().writeJSON(out_hashing); new_data_part->checksums.files[IMergeTreeDataPart::SERIALIZATION_FILE_NAME].file_size = out_hashing.count(); @@ -454,18 +455,18 @@ void finalizeMutatedPart( { /// Write file with checksums. - auto out_checksums = disk->writeFile(part_path / "checksums.txt", 4096); + auto out_checksums = disk->writeFile(part_path / "checksums.txt", 4096, WriteMode::Rewrite, context->getWriteSettings()); new_data_part->checksums.write(*out_checksums); } /// close fd { - auto out = disk->writeFile(part_path / IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096); + auto out = disk->writeFile(part_path / IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096, WriteMode::Rewrite, context->getWriteSettings()); DB::writeText(queryToString(codec->getFullCodecDesc()), *out); } { /// Write a file with a description of columns. - auto out_columns = disk->writeFile(part_path / "columns.txt", 4096); + auto out_columns = disk->writeFile(part_path / "columns.txt", 4096, WriteMode::Rewrite, context->getWriteSettings()); new_data_part->getColumns().writeText(*out_columns); } /// close fd @@ -1162,7 +1163,7 @@ private: } } - MutationHelpers::finalizeMutatedPart(ctx->source_part, ctx->new_data_part, ctx->execute_ttl_type, ctx->compression_codec); + MutationHelpers::finalizeMutatedPart(ctx->source_part, ctx->new_data_part, ctx->execute_ttl_type, ctx->compression_codec, ctx->context); } diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 5de1b959d7c..ae6d9e5474e 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -423,7 +423,7 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, String { std::lock_guard lock(currently_processing_in_background_mutex); - MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get()); + MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), getContext()->getWriteSettings()); version = increment.get(); entry.commit(version); mutation_file_name = entry.file_name; diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference index 3d0d9dadf6a..5bc2049204a 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference @@ -1,15 +1,15 @@ -- { echo } -SET remote_fs_cache_on_insert=1; +SET remote_fs_cache_on_write_operations=1; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3'; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; SYSTEM DROP REMOTE FILESYSTEM CACHE; SELECT file_segment_range, size, state FROM (SELECT file_segment_range, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 0 SELECT count() FROM system.remote_filesystem_cache; 0 -INSERT INTO test SELECT number, toString(number) FROM numbers(100); +INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_write_operations=1; SELECT file_segment_range, size, state FROM (SELECT file_segment_range, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; Row 1: ────── @@ -44,6 +44,17 @@ SELECT count() FROM system.remote_filesystem_cache; 7 SELECT count() FROM system.remote_filesystem_cache; 7 -INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_insert=0; -- still writes cache because now config setting is used +INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_write_operations=0; SELECT count() FROM system.remote_filesystem_cache; 7 +INSERT INTO test SELECT number, toString(number) FROM numbers(100); +INSERT INTO test SELECT number, toString(number) FROM numbers(300, 10000); +SELECT count() FROM system.remote_filesystem_cache; +21 +OPTIMIZE TABLE test FINAL; +SELECT count() FROM system.remote_filesystem_cache; +24 +SET mutations_sync=2; +ALTER TABLE test UPDATE value = 'kek' WHERE key = 100; +SELECT count() FROM system.remote_filesystem_cache; +25 diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql index dfcc617e0f4..58f4adb5980 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql @@ -2,10 +2,10 @@ -- { echo } -SET remote_fs_cache_on_insert=1; +SET remote_fs_cache_on_write_operations=1; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3'; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; SYSTEM DROP REMOTE FILESYSTEM CACHE; @@ -13,7 +13,7 @@ SELECT file_segment_range, size, state FROM (SELECT file_segment_range, size, st SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; SELECT count() FROM system.remote_filesystem_cache; -INSERT INTO test SELECT number, toString(number) FROM numbers(100); +INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_write_operations=1; SELECT file_segment_range, size, state FROM (SELECT file_segment_range, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; @@ -38,7 +38,15 @@ SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, re SELECT count() FROM system.remote_filesystem_cache; SELECT count() FROM system.remote_filesystem_cache; -INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_insert=0; -- still writes cache because now config setting is used +INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_write_operations=0; SELECT count() FROM system.remote_filesystem_cache; +INSERT INTO test SELECT number, toString(number) FROM numbers(100); +INSERT INTO test SELECT number, toString(number) FROM numbers(300, 10000); +SELECT count() FROM system.remote_filesystem_cache; +OPTIMIZE TABLE test FINAL; +SELECT count() FROM system.remote_filesystem_cache; +SET mutations_sync=2; +ALTER TABLE test UPDATE value = 'kek' WHERE key = 100; +SELECT count() FROM system.remote_filesystem_cache; From d4161b59256e1b3d41fed267a4d43d4e5373513a Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 23 Mar 2022 19:46:28 +0100 Subject: [PATCH 12/27] Add optin `read_from_cache_if_exists_otherwise_bypass_cache` (for merges) --- src/Common/FileCache.cpp | 2 +- src/Common/FileCache.h | 3 ++- src/Common/FileSegment.cpp | 3 ++- src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 24 +++++++++++++++++-- src/Disks/IO/CachedReadBufferFromRemoteFS.h | 2 ++ src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 5 +++- src/IO/ReadSettings.h | 1 + src/IO/WriteBufferFromS3.cpp | 2 +- src/Storages/MergeTree/MergeTask.cpp | 3 ++- tests/config/config.d/storage_conf.xml | 2 +- 10 files changed, 38 insertions(+), 9 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index f0d2bba33d4..37a8ac78e98 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -57,7 +57,7 @@ String IFileCache::getPathInLocalCache(const Key & key) return fs::path(cache_base_path) / key_str.substr(0, 3) / key_str; } -bool IFileCache::shouldBypassCache() +bool IFileCache::isReadOnly() { return !CurrentThread::isInitialized() || !CurrentThread::get().getQueryContext() diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index e8280fba08a..90632a54edd 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -43,7 +43,8 @@ public: virtual void tryRemoveAll() = 0; - static bool shouldBypassCache(); + /// If cache can be used as read only. (For merges, for example). + static bool isReadOnly(); /// Cache capacity in bytes. size_t capacity() const { return max_size; } diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index f13cbc6a464..ce1253a6f5c 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -89,7 +89,8 @@ String FileSegment::getCallerId() String FileSegment::getCallerIdImpl(bool allow_non_strict_checking) { - if (IFileCache::shouldBypassCache()) + /// Cache is read only, if it is read operation (which can potentially do cache writes), but there is no query attached. + if (IFileCache::isReadOnly()) { /// getCallerId() can be called from completeImpl(), which can be called from complete(). /// complete() is called from destructor of CachedReadBufferFromRemoteFS when there is no query id anymore. diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index d093e29b28e..84490d84801 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -124,6 +124,21 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment( size_t wait_download_tries = 0; auto download_state = file_segment->state(); + + if (settings.remote_fs_read_from_cache_if_exists_otherwise_bypass_cache) + { + if (download_state == FileSegment::State::DOWNLOADED) + { + read_type = ReadType::CACHED; + return getCacheReadBuffer(range.left); + } + else + { + read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE; + return getRemoteFSReadBuffer(file_segment, read_type); + } + } + while (true) { switch (download_state) @@ -544,8 +559,7 @@ bool CachedReadBufferFromRemoteFS::nextImpl() bool CachedReadBufferFromRemoteFS::nextImplStep() { - if (IFileCache::shouldBypassCache()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Using cache when not allowed"); + assertCacheAllowed(); if (!initialized) initialize(file_offset_of_buffer_end, getTotalSizeToRead()); @@ -758,6 +772,12 @@ std::optional CachedReadBufferFromRemoteFS::getLastNonDownloadedOffset() return std::nullopt; } +void CachedReadBufferFromRemoteFS::assertCacheAllowed() const +{ + if (IFileCache::isReadOnly() && !settings.remote_fs_read_from_cache_if_exists_otherwise_bypass_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache used when not allowed"); +} + String CachedReadBufferFromRemoteFS::getInfoForLog() { return fmt::format("Buffer path: {}, hash key: {}, file_offset_of_buffer_end: {}, internal buffer remaining read range: {}, file segment info: {}", diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.h b/src/Disks/IO/CachedReadBufferFromRemoteFS.h index 3d03debcd01..d5567588019 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.h +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.h @@ -50,6 +50,8 @@ private: bool nextImplStep(); + void assertCacheAllowed() const; + enum class ReadType { CACHED, diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 8f91804bbbe..91f448a2ea7 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -38,7 +38,7 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const S current_path = path; auto cache = settings.remote_fs_cache; - bool with_cache = cache && settings.remote_fs_enable_cache && !IFileCache::shouldBypassCache(); + bool with_cache = cache && settings.remote_fs_enable_cache; auto remote_file_reader_creator = [=, this]() { @@ -49,6 +49,9 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const S if (with_cache) { + if (IFileCache::isReadOnly()) + settings.remote_fs_read_from_cache_if_exists_otherwise_bypass_cache = true; + return std::make_shared( path, cache, remote_file_reader_creator, settings, read_until_position ? read_until_position : file_size); } diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index e321eecf104..936de1673b4 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -79,6 +79,7 @@ struct ReadSettings size_t remote_fs_read_backoff_max_tries = 4; bool remote_fs_enable_cache = true; size_t remote_fs_cache_max_wait_sec = 1; + bool remote_fs_read_from_cache_if_exists_otherwise_bypass_cache = false; size_t remote_read_min_bytes_for_seek = DBMS_DEFAULT_BUFFER_SIZE; diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 53118bbf867..9ed008907c9 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -160,7 +160,7 @@ WriteBufferFromS3::~WriteBufferFromS3() bool WriteBufferFromS3::cacheEnabled() const { - return cache && IFileCache::shouldBypassCache() == false; + return cache != nullptr; } void WriteBufferFromS3::preFinalize() diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 22c7c6af83e..d9cf6c21091 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -261,7 +261,8 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() MergeTreeIndexFactory::instance().getMany(global_ctx->metadata_snapshot->getSecondaryIndices()), ctx->compression_codec, /*reset_columns=*/ true, - ctx->blocks_are_granules_size); + ctx->blocks_are_granules_size, + global_ctx->context->getWriteSettings()); global_ctx->rows_written = 0; ctx->initial_reservation = global_ctx->space_reservation ? global_ctx->space_reservation->getSize() : 0; diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index dd148147367..3dd4811b1bf 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -9,7 +9,7 @@ 1 0 22548578304 - 1 + 1 From d6ab6de30a3594c435754f4a292f5e2f453ad577 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 24 Mar 2022 15:32:08 +0100 Subject: [PATCH 13/27] Minor changes --- src/Common/FileSegment.cpp | 3 +-- src/Disks/DiskWebServer.h | 7 +++++-- src/Disks/IDisk.h | 13 +++++------- src/Disks/IDiskRemote.h | 2 -- src/IO/WriteBufferFromS3.cpp | 1 + .../StorageSystemRemoteFilesystemCache.cpp | 14 ++++++++----- ...emote_filesystem_cache_on_insert.reference | 20 ++++++++++--------- ...2241_remote_filesystem_cache_on_insert.sql | 6 +++--- 8 files changed, 35 insertions(+), 31 deletions(-) diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index ce1253a6f5c..1dda76ba4b5 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -161,8 +161,7 @@ String FileSegment::getDownloader() const bool FileSegment::isDownloader() const { std::lock_guard segment_lock(mutex); - LOG_TEST(log, "Checking for current downloader. Caller: {}, downloader: {}, current state: {}", getCallerId(), downloader_id, stateToString(download_state)); - return getCallerId() == downloader_id; + return getCallerIdImpl(true) == downloader_id; } FileSegment::RemoteFileReaderPtr FileSegment::getRemoteFileReader() diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h index 5dbfbe8994a..94ba32939da 100644 --- a/src/Disks/DiskWebServer.h +++ b/src/Disks/DiskWebServer.h @@ -77,8 +77,7 @@ public: UInt64 getTotalSpace() const final override { return std::numeric_limits::max(); } UInt64 getAvailableSpace() const final override { return std::numeric_limits::max(); } - - UInt64 getUnreservedSpace() const final override { return std::numeric_limits::max(); } +UInt64 getUnreservedSpace() const final override { return std::numeric_limits::max(); } /// Read-only part @@ -165,6 +164,10 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); } + std::vector getRemotePaths(const String &) const override { return {}; } + + void getRemotePathsRecursive(const String &, std::vector &) override {} + /// Create part void createFile(const String &) final override {} diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 0f6e31dd0f7..81cdf47e1fb 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -205,16 +205,13 @@ public: virtual void removeSharedFileIfExists(const String & path, bool) { removeFileIfExists(path); } - virtual String getCacheBasePath() const - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getCacheBasePath() not implemented fro disk: {}`", getType()); - } + virtual String getCacheBasePath() const { return ""; } - /// Returnes a list of paths because for Log family engines - /// there might be multiple files in remote fs for single clickhouse file. + /// Returns a list of paths because for Log family engines there might be + /// multiple files in remote fs for single clickhouse file. virtual std::vector getRemotePaths(const String &) const { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getRemotePaths() not implemented fro disk: {}`", getType()); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getRemotePaths() not implemented for disk: {}`", getType()); } /// For one local path there might be multiple remote paths in case of Log family engines. @@ -222,7 +219,7 @@ public: virtual void getRemotePathsRecursive(const String &, std::vector &) { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getRemotePathsRecursive() not implemented fro disk: {}`", getType()); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getRemotePathsRecursive() not implemented for disk: {}`", getType()); } struct RemoveRequest diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index a1d6092a286..6b16a1f753c 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -68,8 +68,6 @@ public: String getCacheBasePath() const final override; - /// Returnes a list of paths because for Log family engines - /// there might be multiple files in remote fs for single clickhouse file. std::vector getRemotePaths(const String & path) const final override; void getRemotePathsRecursive(const String & path, std::vector & paths_map) override; diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 9ed008907c9..dbd8e1cf743 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -34,6 +34,7 @@ const int S3_WARN_MAX_PARTS = 10000; namespace ErrorCodes { extern const int S3_ERROR; + extern const int LOGICAL_ERROR; } struct WriteBufferFromS3::UploadPartTask diff --git a/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp b/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp index e37dcb74829..da5865e2043 100644 --- a/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp +++ b/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp @@ -17,10 +17,12 @@ NamesAndTypesList StorageSystemRemoteFilesystemCache::getNamesAndTypes() return { {"cache_base_path", std::make_shared()}, {"cache_path", std::make_shared()}, - {"file_segment_range", std::make_shared(DataTypes{std::make_shared(), std::make_shared()})}, + {"file_segment_range_begin", std::make_shared()}, + {"file_segment_range_end", std::make_shared()}, {"size", std::make_shared()}, {"state", std::make_shared()}, {"cache_hits", std::make_shared()}, + {"references", std::make_shared()}, }; } @@ -44,10 +46,12 @@ void StorageSystemRemoteFilesystemCache::fillData(MutableColumns & res_columns, res_columns[1]->insert(cache->getPathInLocalCache(file_segment->key(), file_segment->offset())); const auto & range = file_segment->range(); - res_columns[2]->insert(Tuple({range.left, range.right})); - res_columns[3]->insert(range.size()); - res_columns[4]->insert(FileSegment::stateToString(file_segment->state())); - res_columns[5]->insert(file_segment->hits()); + res_columns[2]->insert(range.left); + res_columns[3]->insert(range.right); + res_columns[4]->insert(range.size()); + res_columns[5]->insert(FileSegment::stateToString(file_segment->state())); + res_columns[6]->insert(file_segment->hits()); + res_columns[7]->insert(file_segment.use_count()); } } } diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference index 5bc2049204a..941ab9089f7 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference @@ -4,18 +4,19 @@ SET remote_fs_cache_on_write_operations=1; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; SYSTEM DROP REMOTE FILESYSTEM CACHE; -SELECT file_segment_range, size, state FROM (SELECT file_segment_range, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 0 SELECT count() FROM system.remote_filesystem_cache; 0 INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_write_operations=1; -SELECT file_segment_range, size, state FROM (SELECT file_segment_range, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; Row 1: ────── -file_segment_range: (0,745) -size: 746 -state: DOWNLOADED +file_segment_range_begin: 0 +file_segment_range_end: 745 +size: 746 +state: DOWNLOADED SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 7 SELECT count() FROM system.remote_filesystem_cache; @@ -32,12 +33,13 @@ SELECT count() size FROM system.remote_filesystem_cache; 7 SYSTEM DROP REMOTE FILESYSTEM CACHE; INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200); -SELECT file_segment_range, size, state FROM (SELECT file_segment_range, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; Row 1: ────── -file_segment_range: (0,1659) -size: 1660 -state: DOWNLOADED +file_segment_range_begin: 0 +file_segment_range_end: 1659 +size: 1660 +state: DOWNLOADED SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 7 SELECT count() FROM system.remote_filesystem_cache; diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql index 58f4adb5980..cd255d7df51 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql @@ -9,13 +9,13 @@ CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SET SYSTEM DROP REMOTE FILESYSTEM CACHE; -SELECT file_segment_range, size, state FROM (SELECT file_segment_range, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; SELECT count() FROM system.remote_filesystem_cache; INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_write_operations=1; -SELECT file_segment_range, size, state FROM (SELECT file_segment_range, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; SELECT count() FROM system.remote_filesystem_cache; @@ -33,7 +33,7 @@ SYSTEM DROP REMOTE FILESYSTEM CACHE; INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200); -SELECT file_segment_range, size, state FROM (SELECT file_segment_range, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; SELECT count() FROM system.remote_filesystem_cache; From 141c290344cca3e7db082e89822ecde71edc14a3 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 25 Mar 2022 18:31:15 +0100 Subject: [PATCH 14/27] Fix build --- src/Common/tests/gtest_lru_file_cache.cpp | 12 ++++++++--- src/Disks/tests/gtest_disk_encrypted.cpp | 20 +++++++++---------- .../StorageSystemRemoteFilesystemCache.cpp | 2 +- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/Common/tests/gtest_lru_file_cache.cpp b/src/Common/tests/gtest_lru_file_cache.cpp index d5a76f9daad..77f4467fa36 100644 --- a/src/Common/tests/gtest_lru_file_cache.cpp +++ b/src/Common/tests/gtest_lru_file_cache.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -102,7 +103,10 @@ TEST(LRUFileCache, get) query_context->setCurrentQueryId("query_id"); DB::CurrentThread::QueryScope query_scope_holder(query_context); - auto cache = DB::LRUFileCache(cache_base_path, 30, 5); + DB::FileCacheSettings settings; + settings.max_size = 30; + settings.max_elements = 5; + auto cache = DB::LRUFileCache(cache_base_path, settings); cache.initialize(); auto key = cache.hash("key1"); @@ -472,7 +476,7 @@ TEST(LRUFileCache, get) { /// Test LRUCache::restore(). - auto cache2 = DB::LRUFileCache(cache_base_path, 30, 5); + auto cache2 = DB::LRUFileCache(cache_base_path, settings); cache2.initialize(); ASSERT_EQ(cache2.getStat().downloaded_size, 5); @@ -491,7 +495,9 @@ TEST(LRUFileCache, get) { /// Test max file segment size - auto cache2 = DB::LRUFileCache(caches_dir / "cache2", 30, 5, /* max_file_segment_size */10); + auto settings2 = settings; + settings.max_file_segment_size = 10; + auto cache2 = DB::LRUFileCache(caches_dir / "cache2", settings2); cache2.initialize(); auto holder1 = cache2.getOrSet(key, 0, 25); /// Get [0, 24] diff --git a/src/Disks/tests/gtest_disk_encrypted.cpp b/src/Disks/tests/gtest_disk_encrypted.cpp index fd3cc1acbe5..da041437951 100644 --- a/src/Disks/tests/gtest_disk_encrypted.cpp +++ b/src/Disks/tests/gtest_disk_encrypted.cpp @@ -96,7 +96,7 @@ TEST_F(DiskEncryptedTest, WriteAndRead) /// Write a file. { - auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {}); writeString(std::string_view{"Some text"}, *buf); } @@ -122,7 +122,7 @@ TEST_F(DiskEncryptedTest, Append) /// Write a file (we use the append mode). { - auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append); + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); writeString(std::string_view{"Some text"}, *buf); } @@ -132,7 +132,7 @@ TEST_F(DiskEncryptedTest, Append) /// Append the file. { - auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append); + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); writeString(std::string_view{" Another text"}, *buf); } @@ -148,7 +148,7 @@ TEST_F(DiskEncryptedTest, Truncate) /// Write a file (we use the append mode). { - auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append); + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); writeString(std::string_view{"Some text"}, *buf); } @@ -178,7 +178,7 @@ TEST_F(DiskEncryptedTest, ZeroFileSize) /// Write nothing to a file. { - auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {}); } EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 0); @@ -187,7 +187,7 @@ TEST_F(DiskEncryptedTest, ZeroFileSize) /// Append the file with nothing. { - auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append); + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); } EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 0); @@ -211,7 +211,7 @@ TEST_F(DiskEncryptedTest, AnotherFolder) /// Write a file. { - auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {}); writeString(std::string_view{"Some text"}, *buf); } @@ -231,11 +231,11 @@ TEST_F(DiskEncryptedTest, RandomIV) /// Write two files with the same contents. { - auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {}); writeString(std::string_view{"Some text"}, *buf); } { - auto buf = encrypted_disk->writeFile("b.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + auto buf = encrypted_disk->writeFile("b.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {}); writeString(std::string_view{"Some text"}, *buf); } @@ -277,7 +277,7 @@ TEST_F(DiskEncryptedTest, RemoveFileDuringWriting) std::thread t1{[&] { for (size_t i = 0; i != n; ++i) - encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append); + encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); }}; std::thread t2{[&] diff --git a/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp b/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp index da5865e2043..c0d8ffc67bf 100644 --- a/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp +++ b/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp @@ -37,7 +37,7 @@ void StorageSystemRemoteFilesystemCache::fillData(MutableColumns & res_columns, for (const auto & [cache_base_path, cache_data] : caches) { - auto & cache = cache_data.cache; + const auto & cache = cache_data.cache; auto holder = cache->getAll(); for (const auto & file_segment : holder.file_segments) From 421b1e5a815a583ead854a4254cb02a0e6703799 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 29 Mar 2022 17:33:02 +0200 Subject: [PATCH 15/27] Update --- src/Common/FileSegment.cpp | 2 +- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 6 +++++- src/Disks/IO/ThreadPoolRemoteFSReader.cpp | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index 1dda76ba4b5..7d341d9bbb9 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -161,7 +161,7 @@ String FileSegment::getDownloader() const bool FileSegment::isDownloader() const { std::lock_guard segment_lock(mutex); - return getCallerIdImpl(true) == downloader_id; + return getCallerId() == downloader_id; } FileSegment::RemoteFileReaderPtr FileSegment::getRemoteFileReader() diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 91f448a2ea7..d106260e6b3 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -38,7 +38,9 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const S current_path = path; auto cache = settings.remote_fs_cache; - bool with_cache = cache && settings.remote_fs_enable_cache; + // auto global_context = CurrentThread::isInitialized() ? CurrentThread::get().getGlobalContext() : nullptr; + // bool with_cache = cache && settings.remote_fs_enable_cache && global_context; + bool with_cache = false; auto remote_file_reader_creator = [=, this]() { @@ -50,7 +52,9 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const S if (with_cache) { if (IFileCache::isReadOnly()) + { settings.remote_fs_read_from_cache_if_exists_otherwise_bypass_cache = true; + } return std::make_shared( path, cache, remote_file_reader_creator, settings, read_until_position ? read_until_position : file_size); diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index bdb012a6376..4a3e0ba27a2 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -74,7 +74,8 @@ std::future ThreadPoolRemoteFSReader::submit(Reques ProfileEvents::increment(ProfileEvents::RemoteFSReadMicroseconds, watch.elapsedMicroseconds()); ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, bytes_read); - if (running_group) + /// Query id cound be attached artificially. + if (running_group || (CurrentThread::isInitialized() && CurrentThread::getQueryId().size != 0)) thread_status.detachQuery(); return Result{ .size = bytes_read, .offset = offset }; From f0e0d977524bfe9accb4ac2b134b1ad2e619e98f Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 29 Mar 2022 19:49:42 +0200 Subject: [PATCH 16/27] Fix lask of query id in merges --- src/Common/FileCache.cpp | 2 +- src/Common/FileCache.h | 2 +- src/Common/FileSegment.cpp | 3 +- src/Common/ThreadStatus.h | 5 ++++ src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 2 +- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 9 +----- src/Disks/IO/ThreadPoolRemoteFSReader.cpp | 28 ++++++++++++++++--- src/Disks/IO/ThreadPoolRemoteFSReader.h | 1 + src/Disks/S3/DiskS3.cpp | 6 ++++ src/IO/WriteBufferFromS3.cpp | 2 +- ...emote_filesystem_cache_on_insert.reference | 4 +-- 11 files changed, 44 insertions(+), 20 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index 37a8ac78e98..f0d2bba33d4 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -57,7 +57,7 @@ String IFileCache::getPathInLocalCache(const Key & key) return fs::path(cache_base_path) / key_str.substr(0, 3) / key_str; } -bool IFileCache::isReadOnly() +bool IFileCache::shouldBypassCache() { return !CurrentThread::isInitialized() || !CurrentThread::get().getQueryContext() diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index 90632a54edd..3a444b1c201 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -44,7 +44,7 @@ public: virtual void tryRemoveAll() = 0; /// If cache can be used as read only. (For merges, for example). - static bool isReadOnly(); + static bool shouldBypassCache(); /// Cache capacity in bytes. size_t capacity() const { return max_size; } diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index 7d341d9bbb9..ae74e9b4c22 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -89,8 +89,7 @@ String FileSegment::getCallerId() String FileSegment::getCallerIdImpl(bool allow_non_strict_checking) { - /// Cache is read only, if it is read operation (which can potentially do cache writes), but there is no query attached. - if (IFileCache::isReadOnly()) + if (IFileCache::shouldBypassCache()) { /// getCallerId() can be called from completeImpl(), which can be called from complete(). /// complete() is called from destructor of CachedReadBufferFromRemoteFS when there is no query id anymore. diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index f3920474111..3d7ec08cdaf 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -216,6 +216,11 @@ public: return query_context.lock(); } + auto getGlobalContext() const + { + return global_context.lock(); + } + void disableProfiling() { assert(!query_profiler_real && !query_profiler_cpu); diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index 84490d84801..a810c7b9f66 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -774,7 +774,7 @@ std::optional CachedReadBufferFromRemoteFS::getLastNonDownloadedOffset() void CachedReadBufferFromRemoteFS::assertCacheAllowed() const { - if (IFileCache::isReadOnly() && !settings.remote_fs_read_from_cache_if_exists_otherwise_bypass_cache) + if (IFileCache::shouldBypassCache() && !settings.remote_fs_read_from_cache_if_exists_otherwise_bypass_cache) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache used when not allowed"); } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index d106260e6b3..8f91804bbbe 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -38,9 +38,7 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const S current_path = path; auto cache = settings.remote_fs_cache; - // auto global_context = CurrentThread::isInitialized() ? CurrentThread::get().getGlobalContext() : nullptr; - // bool with_cache = cache && settings.remote_fs_enable_cache && global_context; - bool with_cache = false; + bool with_cache = cache && settings.remote_fs_enable_cache && !IFileCache::shouldBypassCache(); auto remote_file_reader_creator = [=, this]() { @@ -51,11 +49,6 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const S if (with_cache) { - if (IFileCache::isReadOnly()) - { - settings.remote_fs_read_from_cache_if_exists_otherwise_bypass_cache = true; - } - return std::make_shared( path, cache, remote_file_reader_creator, settings, read_until_position ? read_until_position : file_size); } diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index 4a3e0ba27a2..7e82dc627d5 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -1,5 +1,6 @@ #include "ThreadPoolRemoteFSReader.h" +#include #include #include #include @@ -50,18 +51,37 @@ std::future ThreadPoolRemoteFSReader::submit(Reques if (CurrentThread::isInitialized()) query_context = CurrentThread::get().getQueryContext(); + if (!query_context) + { + if (!shared_query_context) + { + ContextPtr global_context = CurrentThread::isInitialized() ? CurrentThread::get().getGlobalContext() : nullptr; + if (global_context) + { + shared_query_context = Context::createCopy(global_context); + shared_query_context->makeQueryContext(); + } + } + + if (shared_query_context) + { + shared_query_context->setCurrentQueryId(toString(UUIDHelpers::generateV4())); + query_context = shared_query_context; + } + } + auto task = std::make_shared>([request, running_group, query_context] { ThreadStatus thread_status; - /// Save query context if any, because cache implementation needs it. - if (query_context) - thread_status.attachQueryContext(query_context); - /// To be able to pass ProfileEvents. if (running_group) thread_status.attachQuery(running_group); + /// Save query context if any, because cache implementation needs it. + if (query_context) + thread_status.attachQueryContext(query_context); + setThreadName("VFSRead"); CurrentMetrics::Increment metric_increment{CurrentMetrics::Read}; diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.h b/src/Disks/IO/ThreadPoolRemoteFSReader.h index b2d5f11724a..a2a1e77c834 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.h +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.h @@ -15,6 +15,7 @@ class ThreadPoolRemoteFSReader : public IAsynchronousReader private: ThreadPool pool; + ContextMutablePtr shared_query_context; public: ThreadPoolRemoteFSReader(size_t pool_size, size_t queue_size_); diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index 23510cfee93..517972da876 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -228,7 +229,12 @@ std::unique_ptr DiskS3::readFile(const String & path, co ReadSettings disk_read_settings{read_settings}; if (cache) + { + if (IFileCache::shouldBypassCache()) + disk_read_settings.remote_fs_read_from_cache_if_exists_otherwise_bypass_cache = true; + disk_read_settings.remote_fs_cache = cache; + } auto s3_impl = std::make_unique( path, settings->client, bucket, metadata, diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index dbd8e1cf743..0eee7366775 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -161,7 +161,7 @@ WriteBufferFromS3::~WriteBufferFromS3() bool WriteBufferFromS3::cacheEnabled() const { - return cache != nullptr; + return cache != nullptr && !IFileCache::shouldBypassCache(); } void WriteBufferFromS3::preFinalize() diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference index 941ab9089f7..1f470d5644f 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference @@ -55,8 +55,8 @@ SELECT count() FROM system.remote_filesystem_cache; 21 OPTIMIZE TABLE test FINAL; SELECT count() FROM system.remote_filesystem_cache; -24 +27 SET mutations_sync=2; ALTER TABLE test UPDATE value = 'kek' WHERE key = 100; SELECT count() FROM system.remote_filesystem_cache; -25 +28 From ce9131f905e58b7b523db24d022a69272b25f40e Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 29 Mar 2022 19:57:55 +0200 Subject: [PATCH 17/27] Fix unit test --- src/Common/tests/gtest_lru_file_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/tests/gtest_lru_file_cache.cpp b/src/Common/tests/gtest_lru_file_cache.cpp index 77f4467fa36..8a45e881487 100644 --- a/src/Common/tests/gtest_lru_file_cache.cpp +++ b/src/Common/tests/gtest_lru_file_cache.cpp @@ -496,7 +496,7 @@ TEST(LRUFileCache, get) /// Test max file segment size auto settings2 = settings; - settings.max_file_segment_size = 10; + settings2.max_file_segment_size = 10; auto cache2 = DB::LRUFileCache(caches_dir / "cache2", settings2); cache2.initialize(); From 98ad3f4a911f10ef58cf102c6055652ad40d66f8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 30 Mar 2022 11:54:42 +0200 Subject: [PATCH 18/27] Fix tests, rename some remote mentions --- src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 8 +++--- src/Disks/IO/ThreadPoolRemoteFSReader.cpp | 4 +-- src/Disks/S3/DiskS3.cpp | 1 - ...e.cpp => StorageSystemFilesystemCache.cpp} | 9 +++---- ...Cache.h => StorageSystemFilesystemCache.h} | 10 ++++---- src/Storages/System/attachSystemTables.cpp | 4 +-- .../0_stateless/02226_s3_with_cache.reference | 2 ++ .../0_stateless/02226_s3_with_cache.sql | 25 +++++++++++++++++++ ...0_system_remote_filesystem_cache.reference | 21 ++++++++-------- .../02240_system_remote_filesystem_cache.sql | 13 +++++----- 10 files changed, 60 insertions(+), 37 deletions(-) rename src/Storages/System/{StorageSystemRemoteFilesystemCache.cpp => StorageSystemFilesystemCache.cpp} (80%) rename src/Storages/System/{StorageSystemRemoteFilesystemCache.h => StorageSystemFilesystemCache.h} (64%) diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index a810c7b9f66..0bd06e44496 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -380,14 +380,12 @@ bool CachedReadBufferFromRemoteFS::completeFileSegmentAndGetNext() if (current_file_segment_it == file_segments_holder->file_segments.end()) return false; - file_segment = *current_file_segment_it; - - implementation_buffer = getImplementationBuffer(file_segment); + implementation_buffer = getImplementationBuffer(*current_file_segment_it); if (read_type == ReadType::CACHED) - file_segment->hit(); + (*current_file_segment_it)->hit(); - LOG_TEST(log, "New segment: {}", file_segment->range().toString()); + LOG_TEST(log, "New segment: {}", (*current_file_segment_it)->range().toString()); return true; } diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index 7e82dc627d5..83a5d8b276a 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -94,9 +94,7 @@ std::future ThreadPoolRemoteFSReader::submit(Reques ProfileEvents::increment(ProfileEvents::RemoteFSReadMicroseconds, watch.elapsedMicroseconds()); ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, bytes_read); - /// Query id cound be attached artificially. - if (running_group || (CurrentThread::isInitialized() && CurrentThread::getQueryId().size != 0)) - thread_status.detachQuery(); + thread_status.detachQuery(/* if_not_detached */true); return Result{ .size = bytes_read, .offset = offset }; }); diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index 517972da876..e682adb1487 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -272,7 +272,6 @@ std::unique_ptr DiskS3::writeFile(const String & path, LOG_TRACE(log, "{} to file by path: {}. S3 path: {}", mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), remote_fs_root_path + blob_name); - bool cache_on_insert = fs::path(path).extension() != ".tmp" && write_settings.remote_fs_cache_on_write_operations && FileCacheFactory::instance().getSettings(getCacheBasePath()).cache_on_write_operations; diff --git a/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp b/src/Storages/System/StorageSystemFilesystemCache.cpp similarity index 80% rename from src/Storages/System/StorageSystemRemoteFilesystemCache.cpp rename to src/Storages/System/StorageSystemFilesystemCache.cpp index c0d8ffc67bf..08a62c47f27 100644 --- a/src/Storages/System/StorageSystemRemoteFilesystemCache.cpp +++ b/src/Storages/System/StorageSystemFilesystemCache.cpp @@ -1,4 +1,4 @@ -#include "StorageSystemRemoteFilesystemCache.h" +#include "StorageSystemFilesystemCache.h" #include #include #include @@ -6,13 +6,12 @@ #include #include #include -#include namespace DB { -NamesAndTypesList StorageSystemRemoteFilesystemCache::getNamesAndTypes() +NamesAndTypesList StorageSystemFilesystemCache::getNamesAndTypes() { return { {"cache_base_path", std::make_shared()}, @@ -26,12 +25,12 @@ NamesAndTypesList StorageSystemRemoteFilesystemCache::getNamesAndTypes() }; } -StorageSystemRemoteFilesystemCache::StorageSystemRemoteFilesystemCache(const StorageID & table_id_) +StorageSystemFilesystemCache::StorageSystemFilesystemCache(const StorageID & table_id_) : IStorageSystemOneBlock(table_id_) { } -void StorageSystemRemoteFilesystemCache::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const { auto caches = FileCacheFactory::instance().getAll(); diff --git a/src/Storages/System/StorageSystemRemoteFilesystemCache.h b/src/Storages/System/StorageSystemFilesystemCache.h similarity index 64% rename from src/Storages/System/StorageSystemRemoteFilesystemCache.h rename to src/Storages/System/StorageSystemFilesystemCache.h index b4ace8a7fe8..cfd938a50ce 100644 --- a/src/Storages/System/StorageSystemRemoteFilesystemCache.h +++ b/src/Storages/System/StorageSystemFilesystemCache.h @@ -27,17 +27,17 @@ namespace DB * FORMAT Vertical */ -class StorageSystemRemoteFilesystemCache final : public shared_ptr_helper, - public IStorageSystemOneBlock +class StorageSystemFilesystemCache final : public shared_ptr_helper, + public IStorageSystemOneBlock { - friend struct shared_ptr_helper; + friend struct shared_ptr_helper; public: - std::string getName() const override { return "SystemRemoteFilesystemCache"; } + std::string getName() const override { return "SystemFilesystemCache"; } static NamesAndTypesList getNamesAndTypes(); protected: - explicit StorageSystemRemoteFilesystemCache(const StorageID & table_id_); + explicit StorageSystemFilesystemCache(const StorageID & table_id_); void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; }; diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index db30c265dc2..f84f26a5a78 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -68,7 +68,7 @@ #include #include #include -#include +#include #include #ifdef OS_LINUX @@ -161,7 +161,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attach(context, system_database, "replicated_fetches"); attach(context, system_database, "part_moves_between_shards"); attach(context, system_database, "asynchronous_inserts"); - attach(context, system_database, "remote_filesystem_cache"); + attach(context, system_database, "filesystem_cache"); attach(context, system_database, "remote_data_paths"); if (has_zookeeper) diff --git a/tests/queries/0_stateless/02226_s3_with_cache.reference b/tests/queries/0_stateless/02226_s3_with_cache.reference index 214addac2d6..4041f51b3f9 100644 --- a/tests/queries/0_stateless/02226_s3_with_cache.reference +++ b/tests/queries/0_stateless/02226_s3_with_cache.reference @@ -1,2 +1,4 @@ SELECT 1, * FROM test LIMIT 10 FORMAT Null; 1 0 1 SELECT 2, * FROM test LIMIT 10 FORMAT Null; 0 1 0 +0 +SELECT 3, * FROM test LIMIT 10 FORMAT Null; 1 1 0 diff --git a/tests/queries/0_stateless/02226_s3_with_cache.sql b/tests/queries/0_stateless/02226_s3_with_cache.sql index b3126a419df..5b0d4ff3e44 100644 --- a/tests/queries/0_stateless/02226_s3_with_cache.sql +++ b/tests/queries/0_stateless/02226_s3_with_cache.sql @@ -1,7 +1,9 @@ -- Tags: no-parallel, no-fasttest, long SET max_memory_usage='20G'; +SET remote_fs_cache_on_write_operations = 0; +DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; INSERT INTO test SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000; @@ -41,4 +43,27 @@ SET remote_filesystem_read_method='threadpool'; SELECT * FROM test WHERE value LIKE '%abc%' ORDER BY value LIMIT 10 FORMAT Null; +SET remote_fs_cache_on_write_operations = 1; + +TRUNCATE TABLE test; +SELECT count() FROM test; + +SYSTEM DROP REMOTE FILESYSTEM CACHE; + +INSERT INTO test SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000; + +SELECT 3, * FROM test LIMIT 10 FORMAT Null; + +SYSTEM FLUSH LOGS; +SELECT query, + ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read, + ProfileEvents['RemoteFSCacheReadBytes'] > 0 as remote_fs_cache_read, + ProfileEvents['RemoteFSCacheDownloadBytes'] > 0 as remote_fs_read_and_download +FROM system.query_log +WHERE query LIKE 'SELECT 3, * FROM test LIMIT%' +AND type = 'QueryFinish' +AND current_database = currentDatabase() +ORDER BY query_start_time DESC +LIMIT 1; + DROP TABLE test; diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference index db07f1dd577..59c4d43d8ae 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference @@ -1,18 +1,19 @@ --- Tags: no-parallel - -- { echo } SYSTEM DROP REMOTE FILESYSTEM CACHE; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; -INSERT INTO test SELECT number, toString(number) FROM numbers(100); +SET remote_fs_cache_on_write_operations=0; DROP TABLE IF EXISTS test; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3'; +INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; -SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; -./disks/s3/data_cache/ (0,745) 746 +SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; +./disks/s3/data_cache/ 0 0 1 +./disks/s3/data_cache/ 0 79 80 +./disks/s3/data_cache/ 0 745 746 SYSTEM DROP REMOTE FILESYSTEM CACHE; -SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; +SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; SELECT * FROM test FORMAT Null; -SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; -./disks/s3/data_cache/ (0,745) 746 +SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +./disks/s3/data_cache/ 0 745 746 SYSTEM DROP REMOTE FILESYSTEM CACHE; -SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; +SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql index fb6dd8d61b4..24ea62aabf8 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql @@ -3,15 +3,16 @@ -- { echo } SYSTEM DROP REMOTE FILESYSTEM CACHE; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; -INSERT INTO test SELECT number, toString(number) FROM numbers(100); +SET remote_fs_cache_on_write_operations=0; DROP TABLE IF EXISTS test; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3'; +INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; -SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; +SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; SYSTEM DROP REMOTE FILESYSTEM CACHE; -SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; +SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; SELECT * FROM test FORMAT Null; -SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; +SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; SYSTEM DROP REMOTE FILESYSTEM CACHE; -SELECT cache_base_path, file_segment_range, size FROM system.remote_filesystem_cache; +SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; From 0fc92fe2aaf615b49eed9fbaf688422dbf6c589c Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 30 Mar 2022 13:47:44 +0200 Subject: [PATCH 19/27] Get rid of all "remote" mentions --- docker/test/stateful/run.sh | 2 +- src/Common/FileCache.cpp | 1 + src/Common/FileCache.h | 1 - src/Core/Settings.h | 7 +-- src/Disks/DiskCacheWrapper.cpp | 6 +-- src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 6 +-- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 2 +- src/Disks/S3/DiskS3.cpp | 4 +- src/IO/ReadSettings.h | 6 +-- src/IO/WriteSettings.h | 2 +- src/Interpreters/Context.cpp | 7 +-- src/Interpreters/InterpreterSystemQuery.cpp | 8 ++-- src/Parsers/ASTSystemQuery.h | 4 +- .../System/StorageSystemFilesystemCache.h | 2 +- tests/config/users.d/s3_cache.xml | 8 ++++ ...605_adaptive_granularity_block_borders.sql | 2 +- .../01641_memory_tracking_insert_optimize.sql | 2 +- .../0_stateless/01926_order_by_desc_limit.sql | 2 +- .../0_stateless/02226_s3_with_cache.sql | 6 +-- ...0_system_remote_filesystem_cache.reference | 10 ++-- .../02240_system_remote_filesystem_cache.sql | 10 ++-- ...emote_filesystem_cache_on_insert.reference | 46 +++++++++---------- ...2241_remote_filesystem_cache_on_insert.sql | 46 +++++++++---------- 23 files changed, 100 insertions(+), 90 deletions(-) create mode 100644 tests/config/users.d/s3_cache.xml diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 77dc61e6cd0..e91acaa0b2a 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -96,7 +96,7 @@ else clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" - clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits" + clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0" fi clickhouse-client --query "SHOW TABLES FROM test" diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index f0d2bba33d4..0eb53f71bc4 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -720,6 +720,7 @@ std::vector LRUFileCache::tryGetCachePaths(const Key & key) std::lock_guard cache_lock(mutex); std::vector cache_paths; + const auto & cells_by_offset = files[key]; for (const auto & [offset, cell] : cells_by_offset) diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index 3a444b1c201..e8280fba08a 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -43,7 +43,6 @@ public: virtual void tryRemoveAll() = 0; - /// If cache can be used as read only. (For merges, for example). static bool shouldBypassCache(); /// Cache capacity in bytes. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 86dc16f6220..8891a49a7de 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -555,9 +555,10 @@ class IColumn; \ M(UInt64, remote_fs_read_max_backoff_ms, 10000, "Max wait time when trying to read data for remote disk", 0) \ M(UInt64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \ - M(Bool, remote_fs_enable_cache, true, "Use cache for remote filesystem. This setting does not turn on/off cache for disks (must me done via disk config), but allows to bypass cache for some queries if intended", 0) \ - M(UInt64, remote_fs_cache_max_wait_sec, 5, "Allow to wait at most this number of seconds for download of current remote_fs_buffer_size bytes, and skip cache if exceeded", 0) \ - M(Bool, remote_fs_cache_on_write_operations, true, "Write into cache on INSERT query To actually work this setting requires be added to disk config too", 0) \ + M(Bool, enable_filesystem_cache, true, "Use cache for remote filesystem. This setting does not turn on/off cache for disks (must me done via disk config), but allows to bypass cache for some queries if intended", 0) \ + M(UInt64, filesystem_cache_max_wait_sec, 5, "Allow to wait at most this number of seconds for download of current remote_fs_buffer_size bytes, and skip cache if exceeded", 0) \ + M(Bool, enable_filesystem_cache_on_write_operations, false, "Write into cache on write operations. To actually work this setting requires be added to disk config too", 0) \ + M(Bool, read_from_filesystem_cache_if_exists_otherwise_bypass_cache, false, "", 0) \ \ M(UInt64, http_max_tries, 10, "Max attempts to read via http.", 0) \ M(UInt64, http_retry_initial_backoff_ms, 100, "Min milliseconds for backoff, when retrying read via http", 0) \ diff --git a/src/Disks/DiskCacheWrapper.cpp b/src/Disks/DiskCacheWrapper.cpp index 568fbf160c0..178caa0c496 100644 --- a/src/Disks/DiskCacheWrapper.cpp +++ b/src/Disks/DiskCacheWrapper.cpp @@ -150,7 +150,7 @@ DiskCacheWrapper::readFile( /// Note: enabling `threadpool` read requires to call setReadUntilEnd(). current_read_settings.remote_fs_method = RemoteFSReadMethod::read; /// Disable data cache. - current_read_settings.remote_fs_enable_cache = false; + current_read_settings.enable_filesystem_cache = false; if (metadata->status == DOWNLOADING) { @@ -169,7 +169,7 @@ DiskCacheWrapper::readFile( auto src_buffer = DiskDecorator::readFile(path, current_read_settings, read_hint, file_size); WriteSettings write_settings; - write_settings.remote_fs_cache_on_write_operations = false; + write_settings.enable_filesystem_cache_on_write_operations = false; auto dst_buffer = cache_disk->writeFile(tmp_path, settings.local_fs_buffer_size, WriteMode::Rewrite, write_settings); copyData(*src_buffer, *dst_buffer); @@ -206,7 +206,7 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode return DiskDecorator::writeFile(path, buf_size, mode, settings); WriteSettings current_settings = settings; - current_settings.remote_fs_cache_on_write_operations = false; + current_settings.enable_filesystem_cache_on_write_operations = false; LOG_TEST(log, "Write file {} to cache", backQuote(path)); diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index 0bd06e44496..6aa5f71139a 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -120,12 +120,12 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment( auto range = file_segment->range(); /// Each wait() call has a timeout of 1 second. - size_t wait_download_max_tries = settings.remote_fs_cache_max_wait_sec; + size_t wait_download_max_tries = settings.filesystem_cache_max_wait_sec; size_t wait_download_tries = 0; auto download_state = file_segment->state(); - if (settings.remote_fs_read_from_cache_if_exists_otherwise_bypass_cache) + if (settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache) { if (download_state == FileSegment::State::DOWNLOADED) { @@ -772,7 +772,7 @@ std::optional CachedReadBufferFromRemoteFS::getLastNonDownloadedOffset() void CachedReadBufferFromRemoteFS::assertCacheAllowed() const { - if (IFileCache::shouldBypassCache() && !settings.remote_fs_read_from_cache_if_exists_otherwise_bypass_cache) + if (IFileCache::shouldBypassCache() && !settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache used when not allowed"); } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 8f91804bbbe..abbcd5c8add 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -38,7 +38,7 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const S current_path = path; auto cache = settings.remote_fs_cache; - bool with_cache = cache && settings.remote_fs_enable_cache && !IFileCache::shouldBypassCache(); + bool with_cache = cache && settings.enable_filesystem_cache && !IFileCache::shouldBypassCache(); auto remote_file_reader_creator = [=, this]() { diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index e682adb1487..07d27f67d1e 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -231,7 +231,7 @@ std::unique_ptr DiskS3::readFile(const String & path, co if (cache) { if (IFileCache::shouldBypassCache()) - disk_read_settings.remote_fs_read_from_cache_if_exists_otherwise_bypass_cache = true; + disk_read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; disk_read_settings.remote_fs_cache = cache; } @@ -273,7 +273,7 @@ std::unique_ptr DiskS3::writeFile(const String & path, mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), remote_fs_root_path + blob_name); bool cache_on_insert = fs::path(path).extension() != ".tmp" - && write_settings.remote_fs_cache_on_write_operations + && write_settings.enable_filesystem_cache_on_write_operations && FileCacheFactory::instance().getSettings(getCacheBasePath()).cache_on_write_operations; auto s3_buffer = std::make_unique( diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index 936de1673b4..92346615a7a 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -77,9 +77,9 @@ struct ReadSettings size_t remote_fs_read_max_backoff_ms = 10000; size_t remote_fs_read_backoff_max_tries = 4; - bool remote_fs_enable_cache = true; - size_t remote_fs_cache_max_wait_sec = 1; - bool remote_fs_read_from_cache_if_exists_otherwise_bypass_cache = false; + bool enable_filesystem_cache = true; + size_t filesystem_cache_max_wait_sec = 1; + bool read_from_filesystem_cache_if_exists_otherwise_bypass_cache = false; size_t remote_read_min_bytes_for_seek = DBMS_DEFAULT_BUFFER_SIZE; diff --git a/src/IO/WriteSettings.h b/src/IO/WriteSettings.h index 81a6705cbab..af26452e8e6 100644 --- a/src/IO/WriteSettings.h +++ b/src/IO/WriteSettings.h @@ -5,7 +5,7 @@ namespace DB struct WriteSettings { - bool remote_fs_cache_on_write_operations = false; + bool enable_filesystem_cache_on_write_operations = false; }; } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index a3169f435e4..83236781418 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -3179,8 +3179,9 @@ ReadSettings Context::getReadSettings() const res.remote_fs_read_max_backoff_ms = settings.remote_fs_read_max_backoff_ms; res.remote_fs_read_backoff_max_tries = settings.remote_fs_read_backoff_max_tries; - res.remote_fs_enable_cache = settings.remote_fs_enable_cache; - res.remote_fs_cache_max_wait_sec = settings.remote_fs_cache_max_wait_sec; + res.enable_filesystem_cache = settings.enable_filesystem_cache; + res.filesystem_cache_max_wait_sec = settings.filesystem_cache_max_wait_sec; + res.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache; res.remote_read_min_bytes_for_seek = settings.remote_read_min_bytes_for_seek; @@ -3203,7 +3204,7 @@ WriteSettings Context::getWriteSettings() const { WriteSettings res; - res.remote_fs_cache_on_write_operations = settings.remote_fs_cache_on_write_operations; + res.enable_filesystem_cache_on_write_operations = settings.enable_filesystem_cache_on_write_operations; return res; } diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 5f030159b2e..353e5393a03 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -298,9 +298,9 @@ BlockIO InterpreterSystemQuery::execute() cache->reset(); break; #endif - case Type::DROP_REMOTE_FILESYSTEM_CACHE: + case Type::DROP_FILESYSTEM_CACHE: { - if (query.remote_filesystem_cache_path.empty()) + if (query.filesystem_cache_path.empty()) { auto caches = FileCacheFactory::instance().getAll(); for (const auto & [_, cache_data] : caches) @@ -308,7 +308,7 @@ BlockIO InterpreterSystemQuery::execute() } else { - auto cache = FileCacheFactory::instance().get(query.remote_filesystem_cache_path); + auto cache = FileCacheFactory::instance().get(query.filesystem_cache_path); cache->tryRemoveAll(); } break; @@ -775,7 +775,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() case Type::DROP_UNCOMPRESSED_CACHE: case Type::DROP_INDEX_MARK_CACHE: case Type::DROP_INDEX_UNCOMPRESSED_CACHE: - case Type::DROP_REMOTE_FILESYSTEM_CACHE: + case Type::DROP_FILESYSTEM_CACHE: { required_access.emplace_back(AccessType::SYSTEM_DROP_CACHE); break; diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 7113698789f..600525f9abe 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -28,7 +28,7 @@ public: #if USE_EMBEDDED_COMPILER DROP_COMPILED_EXPRESSION_CACHE, #endif - DROP_REMOTE_FILESYSTEM_CACHE, + DROP_FILESYSTEM_CACHE, STOP_LISTEN_QUERIES, START_LISTEN_QUERIES, RESTART_REPLICAS, @@ -89,7 +89,7 @@ public: String volume; String disk; UInt64 seconds{}; - String remote_filesystem_cache_path; + String filesystem_cache_path; String getID(char) const override { return "SYSTEM query"; } diff --git a/src/Storages/System/StorageSystemFilesystemCache.h b/src/Storages/System/StorageSystemFilesystemCache.h index cfd938a50ce..0f0bd81e760 100644 --- a/src/Storages/System/StorageSystemFilesystemCache.h +++ b/src/Storages/System/StorageSystemFilesystemCache.h @@ -23,7 +23,7 @@ namespace DB * remote_path * FROM system.remote_data_paths * ) AS data_paths - * INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path + * INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path * FORMAT Vertical */ diff --git a/tests/config/users.d/s3_cache.xml b/tests/config/users.d/s3_cache.xml new file mode 100644 index 00000000000..69b24ecbbc4 --- /dev/null +++ b/tests/config/users.d/s3_cache.xml @@ -0,0 +1,8 @@ + + + + 1 + 1 + + + diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql index 750809da338..7654be4eb29 100644 --- a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql +++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql @@ -22,7 +22,7 @@ OPTIMIZE TABLE adaptive_table FINAL; SELECT marks FROM system.parts WHERE table = 'adaptive_table' and database=currentDatabase() and active; -SET remote_fs_enable_cache = 0; +SET enable_filesystem_cache = 0; -- If we have computed granularity incorrectly than we will exceed this limit. SET max_memory_usage='30M'; diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql index 7ec3153886c..36b6c97460c 100644 --- a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql +++ b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql @@ -3,7 +3,7 @@ drop table if exists data_01641; -- Disable cache for s3 storage tests because it increases memory usage. -set remote_fs_enable_cache=0; +set enable_filesystem_cache=0; set remote_filesystem_read_method='read'; create table data_01641 (key Int, value String) engine=MergeTree order by (key, repeat(value, 40)) settings old_parts_lifetime=0, min_bytes_for_wide_part=0; diff --git a/tests/queries/0_stateless/01926_order_by_desc_limit.sql b/tests/queries/0_stateless/01926_order_by_desc_limit.sql index 9f65cf73252..86468b4fcd6 100644 --- a/tests/queries/0_stateless/01926_order_by_desc_limit.sql +++ b/tests/queries/0_stateless/01926_order_by_desc_limit.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS order_by_desc; -SET remote_fs_enable_cache=0; +SET enable_filesystem_cache=0; CREATE TABLE order_by_desc (u UInt32, s String) ENGINE MergeTree ORDER BY u PARTITION BY u % 100 diff --git a/tests/queries/0_stateless/02226_s3_with_cache.sql b/tests/queries/0_stateless/02226_s3_with_cache.sql index 5b0d4ff3e44..d470f2ef140 100644 --- a/tests/queries/0_stateless/02226_s3_with_cache.sql +++ b/tests/queries/0_stateless/02226_s3_with_cache.sql @@ -1,7 +1,7 @@ -- Tags: no-parallel, no-fasttest, long SET max_memory_usage='20G'; -SET remote_fs_cache_on_write_operations = 0; +SET enable_filesystem_cache_on_write_operations = 0; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; @@ -43,12 +43,12 @@ SET remote_filesystem_read_method='threadpool'; SELECT * FROM test WHERE value LIKE '%abc%' ORDER BY value LIMIT 10 FORMAT Null; -SET remote_fs_cache_on_write_operations = 1; +SET enable_filesystem_cache_on_write_operations = 1; TRUNCATE TABLE test; SELECT count() FROM test; -SYSTEM DROP REMOTE FILESYSTEM CACHE; +SYSTEM DROP FILESYSTEM CACHE; INSERT INTO test SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000; diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference index 59c4d43d8ae..20cb2329604 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference @@ -1,19 +1,19 @@ -- { echo } -SYSTEM DROP REMOTE FILESYSTEM CACHE; -SET remote_fs_cache_on_write_operations=0; +SYSTEM DROP FILESYSTEM CACHE; +SET enable_filesystem_cache_on_write_operations=0; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3'; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; ./disks/s3/data_cache/ 0 0 1 ./disks/s3/data_cache/ 0 79 80 ./disks/s3/data_cache/ 0 745 746 -SYSTEM DROP REMOTE FILESYSTEM CACHE; +SYSTEM DROP FILESYSTEM CACHE; SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; SELECT * FROM test FORMAT Null; SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; ./disks/s3/data_cache/ 0 745 746 -SYSTEM DROP REMOTE FILESYSTEM CACHE; +SYSTEM DROP FILESYSTEM CACHE; SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql index 24ea62aabf8..a889bea0fcf 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql @@ -2,17 +2,17 @@ -- { echo } -SYSTEM DROP REMOTE FILESYSTEM CACHE; -SET remote_fs_cache_on_write_operations=0; +SYSTEM DROP FILESYSTEM CACHE; +SET enable_filesystem_cache_on_write_operations=0; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3'; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; -SYSTEM DROP REMOTE FILESYSTEM CACHE; +SYSTEM DROP FILESYSTEM CACHE; SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; SELECT * FROM test FORMAT Null; SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -SYSTEM DROP REMOTE FILESYSTEM CACHE; +SYSTEM DROP FILESYSTEM CACHE; SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference index 1f470d5644f..e83b5551821 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference @@ -1,62 +1,62 @@ -- { echo } -SET remote_fs_cache_on_write_operations=1; +SET enable_filesystem_cache_on_write_operations=1; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; -SYSTEM DROP REMOTE FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 0 -SELECT count() FROM system.remote_filesystem_cache; +SELECT count() FROM system.filesystem_cache; 0 -INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_write_operations=1; -SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=1; +SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 745 size: 746 state: DOWNLOADED -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 7 -SELECT count() FROM system.remote_filesystem_cache; +SELECT count() FROM system.filesystem_cache; 7 -SELECT count() FROM system.remote_filesystem_cache WHERE cache_hits > 0; +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; 0 SELECT * FROM test FORMAT Null; -SELECT count() FROM system.remote_filesystem_cache WHERE cache_hits > 0; +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; 2 SELECT * FROM test FORMAT Null; -SELECT count() FROM system.remote_filesystem_cache WHERE cache_hits > 0; +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; 2 -SELECT count() size FROM system.remote_filesystem_cache; +SELECT count() size FROM system.filesystem_cache; 7 -SYSTEM DROP REMOTE FILESYSTEM CACHE; +SYSTEM DROP FILESYSTEM CACHE; INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200); -SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 1659 size: 1660 state: DOWNLOADED -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; 7 -SELECT count() FROM system.remote_filesystem_cache; +SELECT count() FROM system.filesystem_cache; 7 -SELECT count() FROM system.remote_filesystem_cache; +SELECT count() FROM system.filesystem_cache; 7 -INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_write_operations=0; -SELECT count() FROM system.remote_filesystem_cache; +INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0; +SELECT count() FROM system.filesystem_cache; 7 INSERT INTO test SELECT number, toString(number) FROM numbers(100); INSERT INTO test SELECT number, toString(number) FROM numbers(300, 10000); -SELECT count() FROM system.remote_filesystem_cache; +SELECT count() FROM system.filesystem_cache; 21 OPTIMIZE TABLE test FINAL; -SELECT count() FROM system.remote_filesystem_cache; +SELECT count() FROM system.filesystem_cache; 27 SET mutations_sync=2; ALTER TABLE test UPDATE value = 'kek' WHERE key = 100; -SELECT count() FROM system.remote_filesystem_cache; +SELECT count() FROM system.filesystem_cache; 28 diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql index cd255d7df51..745af904c5f 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql @@ -2,51 +2,51 @@ -- { echo } -SET remote_fs_cache_on_write_operations=1; +SET enable_filesystem_cache_on_write_operations=1; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; -SYSTEM DROP REMOTE FILESYSTEM CACHE; +SYSTEM DROP FILESYSTEM CACHE; -SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; -SELECT count() FROM system.remote_filesystem_cache; +SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; +SELECT count() FROM system.filesystem_cache; -INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_write_operations=1; +INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=1; -SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; -SELECT count() FROM system.remote_filesystem_cache; +SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; +SELECT count() FROM system.filesystem_cache; -SELECT count() FROM system.remote_filesystem_cache WHERE cache_hits > 0; +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; SELECT * FROM test FORMAT Null; -SELECT count() FROM system.remote_filesystem_cache WHERE cache_hits > 0; +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; SELECT * FROM test FORMAT Null; -SELECT count() FROM system.remote_filesystem_cache WHERE cache_hits > 0; +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; -SELECT count() size FROM system.remote_filesystem_cache; +SELECT count() size FROM system.filesystem_cache; -SYSTEM DROP REMOTE FILESYSTEM CACHE; +SYSTEM DROP FILESYSTEM CACHE; INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200); -SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; -SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.remote_filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; -SELECT count() FROM system.remote_filesystem_cache; +SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; +SELECT count() FROM system.filesystem_cache; -SELECT count() FROM system.remote_filesystem_cache; -INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS remote_fs_cache_on_write_operations=0; -SELECT count() FROM system.remote_filesystem_cache; +SELECT count() FROM system.filesystem_cache; +INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0; +SELECT count() FROM system.filesystem_cache; INSERT INTO test SELECT number, toString(number) FROM numbers(100); INSERT INTO test SELECT number, toString(number) FROM numbers(300, 10000); -SELECT count() FROM system.remote_filesystem_cache; +SELECT count() FROM system.filesystem_cache; OPTIMIZE TABLE test FINAL; -SELECT count() FROM system.remote_filesystem_cache; +SELECT count() FROM system.filesystem_cache; SET mutations_sync=2; ALTER TABLE test UPDATE value = 'kek' WHERE key = 100; -SELECT count() FROM system.remote_filesystem_cache; +SELECT count() FROM system.filesystem_cache; From 74ec5eb1cc97381edece677a0d364d10a7503d9b Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 31 Mar 2022 14:35:40 +0200 Subject: [PATCH 20/27] Fix checks --- tests/config/install.sh | 1 + ...2240_system_remote_filesystem_cache.reference | 16 ++++++++-------- .../02240_system_remote_filesystem_cache.sql | 8 ++++---- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/tests/config/install.sh b/tests/config/install.sh index c499ffa88f7..2125f515734 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -49,6 +49,7 @@ ln -sf $SRC_PATH/users.d/session_log_test.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/memory_profiler.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/no_fsync_metadata.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/filelog.xml $DEST_SERVER_PATH/users.d/ +ln -sf $SRC_PATH/users.d/s3_cache.xml $DEST_SERVER_PATH/s3_cache/ # FIXME DataPartsExchange may hang for http_send_timeout seconds # when nobody is going to read from the other side of socket (due to "Fetching of part was cancelled"), diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference index 20cb2329604..a26133180e4 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference @@ -6,14 +6,14 @@ DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; -SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; -./disks/s3/data_cache/ 0 0 1 -./disks/s3/data_cache/ 0 79 80 -./disks/s3/data_cache/ 0 745 746 +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; +0 0 1 +0 79 80 +0 745 746 SYSTEM DROP FILESYSTEM CACHE; -SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; SELECT * FROM test FORMAT Null; -SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; -./disks/s3/data_cache/ 0 745 746 +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +0 745 746 SYSTEM DROP FILESYSTEM CACHE; -SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql index a889bea0fcf..cc5fd259ce8 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql @@ -9,10 +9,10 @@ CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SET INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; -SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; SYSTEM DROP FILESYSTEM CACHE; -SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; SELECT * FROM test FORMAT Null; -SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; SYSTEM DROP FILESYSTEM CACHE; -SELECT cache_base_path, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; From 603dcbb1b74fa2695abd9ece6c8ed49470527aa7 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 31 Mar 2022 15:27:48 +0200 Subject: [PATCH 21/27] Extend test --- src/IO/WriteBufferFromS3.cpp | 1 + tests/config/install.sh | 2 +- ...41_remote_filesystem_cache_on_insert.reference | 15 ++++++++++++++- .../02241_remote_filesystem_cache_on_insert.sql | 14 +++++++++++++- 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 0eee7366775..b5e61724ede 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -100,6 +100,7 @@ void WriteBufferFromS3::nextImpl() auto cache_key = cache->hash(blob_name); auto file_segments_holder = cache->setDownloading(cache_key, current_download_offset, size); + current_download_offset += size; size_t remaining_size = size; for (const auto & file_segment : file_segments_holder.file_segments) diff --git a/tests/config/install.sh b/tests/config/install.sh index 2125f515734..f1b4fe1a588 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -49,7 +49,6 @@ ln -sf $SRC_PATH/users.d/session_log_test.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/memory_profiler.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/no_fsync_metadata.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/filelog.xml $DEST_SERVER_PATH/users.d/ -ln -sf $SRC_PATH/users.d/s3_cache.xml $DEST_SERVER_PATH/s3_cache/ # FIXME DataPartsExchange may hang for http_send_timeout seconds # when nobody is going to read from the other side of socket (due to "Fetching of part was cancelled"), @@ -86,6 +85,7 @@ fi if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then ln -sf $SRC_PATH/config.d/storage_conf.xml $DEST_SERVER_PATH/config.d/ + ln -sf $SRC_PATH/users.d/s3_cache.xml $DEST_SERVER_PATH/s3_cache/ fi if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference index e83b5551821..5bc18d48655 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference @@ -9,7 +9,7 @@ SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, re 0 SELECT count() FROM system.filesystem_cache; 0 -INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=1; +INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; Row 1: ────── @@ -60,3 +60,16 @@ SET mutations_sync=2; ALTER TABLE test UPDATE value = 'kek' WHERE key = 100; SELECT count() FROM system.filesystem_cache; 28 +INSERT INTO test SELECT number, toString(number) FROM numbers(5000000); +SYSTEM FLUSH LOGS; +SELECT query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read +FROM system.query_log +WHERE query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%' +AND type = 'QueryFinish' +AND current_database = currentDatabase() +ORDER BY query_start_time DESC +LIMIT 1; +SELECT count() FROM test; +5010500 +SELECT count() FROM test WHERE value LIKE '%010%'; +18816 diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql index 745af904c5f..946e72ba2fd 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql @@ -13,7 +13,7 @@ SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELEC SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; SELECT count() FROM system.filesystem_cache; -INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=1; +INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; @@ -50,3 +50,15 @@ SELECT count() FROM system.filesystem_cache; SET mutations_sync=2; ALTER TABLE test UPDATE value = 'kek' WHERE key = 100; SELECT count() FROM system.filesystem_cache; + +INSERT INTO test SELECT number, toString(number) FROM numbers(5000000); +SYSTEM FLUSH LOGS; +SELECT query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read +FROM system.query_log +WHERE query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%' +AND type = 'QueryFinish' +AND current_database = currentDatabase() +ORDER BY query_start_time DESC +LIMIT 1; +SELECT count() FROM test; +SELECT count() FROM test WHERE value LIKE '%010%'; From 36c583d0dee412d398a7c253e12dff6c6b670447 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 1 Apr 2022 16:45:15 +0200 Subject: [PATCH 22/27] Better version of cache on insert --- src/Common/CurrentThread.h | 1 + src/Common/FileSegment.cpp | 72 +++++++++++++++++++ src/Common/FileSegment.h | 4 ++ src/IO/ParallelReadBuffer.cpp | 2 +- src/IO/WriteBufferFromS3.cpp | 66 +++++++++++++++-- src/IO/WriteBufferFromS3.h | 7 +- src/Interpreters/ThreadStatusExt.cpp | 10 +++ src/Interpreters/threadPoolCallbackRunner.cpp | 9 ++- src/Interpreters/threadPoolCallbackRunner.h | 2 +- tests/config/install.sh | 2 +- 10 files changed, 163 insertions(+), 12 deletions(-) diff --git a/src/Common/CurrentThread.h b/src/Common/CurrentThread.h index 9dbe8d355d6..4888adb511a 100644 --- a/src/Common/CurrentThread.h +++ b/src/Common/CurrentThread.h @@ -91,6 +91,7 @@ public: struct QueryScope { explicit QueryScope(ContextMutablePtr query_context); + explicit QueryScope(ContextPtr query_context); ~QueryScope(); void logPeakMemoryUsage(); diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index 76749d24f43..d8e7a994df4 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -262,6 +262,78 @@ void FileSegment::write(const char * from, size_t size, size_t offset_) assert(getDownloadOffset() == offset_ + size); } +void FileSegment::writeInMemory(const char * from, size_t size) +{ + if (!size) + throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Writing zero size is not allowed"); + + if (availableSize() < size) + throw Exception( + ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + "Not enough space is reserved. Available: {}, expected: {}", availableSize(), size); + + std::lock_guard segment_lock(mutex); + + if (cache_writer) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache writer already initialized"); + + auto download_path = cache->getPathInLocalCache(key(), offset()); + cache_writer = std::make_unique(download_path, size + 1); + + try + { + cache_writer->write(from, size); + } + catch (...) + { + LOG_ERROR(log, "Failed to write to cache. File segment info: {}", getInfoForLogImpl(segment_lock)); + + download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION; + + cache_writer->finalize(); + cache_writer.reset(); + + throw; + } +} + +size_t FileSegment::finalizeWrite() +{ + std::lock_guard segment_lock(mutex); + + if (!cache_writer) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache writer not initialized"); + + size_t size = cache_writer->offset(); + + if (size == 0) + throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Writing size is not allowed"); + + try + { + cache_writer->next(); + } + catch (...) + { + download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION; + + cache_writer->finalize(); + cache_writer.reset(); + + throw; + } + + downloaded_size += size; + cache_writer.reset(); + downloader_id.clear(); + download_state = State::DOWNLOADED; + + if (downloaded_size != range().size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected {} == {}", downloaded_size, range().size()); + + return size; +} + FileSegment::State FileSegment::wait() { std::unique_lock segment_lock(mutex); diff --git a/src/Common/FileSegment.h b/src/Common/FileSegment.h index ed9d33d37d1..c9e4146c726 100644 --- a/src/Common/FileSegment.h +++ b/src/Common/FileSegment.h @@ -97,6 +97,10 @@ public: void write(const char * from, size_t size, size_t offset_); + void writeInMemory(const char * from, size_t size); + + size_t finalizeWrite(); + RemoteFileReaderPtr getRemoteFileReader(); void setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_); diff --git a/src/IO/ParallelReadBuffer.cpp b/src/IO/ParallelReadBuffer.cpp index f036d6a08c8..64550e9430b 100644 --- a/src/IO/ParallelReadBuffer.cpp +++ b/src/IO/ParallelReadBuffer.cpp @@ -33,7 +33,7 @@ bool ParallelReadBuffer::addReaderToPool(std::unique_lock & /*buffer auto worker = read_workers.emplace_back(std::make_shared(std::move(reader))); - schedule([this, worker = std::move(worker)]() mutable { readerThreadFunction(std::move(worker)); }); + schedule([this, worker = std::move(worker)]() mutable { readerThreadFunction(std::move(worker)); }, nullptr); return true; } diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index b5e61724ede..20d9a054230 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -43,6 +44,7 @@ struct WriteBufferFromS3::UploadPartTask bool is_finised = false; std::string tag; std::exception_ptr exception; + std::optional cache_files; }; struct WriteBufferFromS3::PutObjectTask @@ -93,25 +95,50 @@ void WriteBufferFromS3::nextImpl() size_t size = offset(); temporary_buffer->write(working_buffer.begin(), size); + ThreadGroupStatusPtr running_group = CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup() + ? CurrentThread::get().getThreadGroup() + : MainThreadStatus::getInstance().getThreadGroup(); + + if (CurrentThread::isInitialized()) + query_context = CurrentThread::get().getQueryContext(); + + if (!query_context) + { + if (!shared_query_context) + { + ContextPtr global_context = CurrentThread::isInitialized() ? CurrentThread::get().getGlobalContext() : nullptr; + if (global_context) + { + shared_query_context = Context::createCopy(global_context); + shared_query_context->makeQueryContext(); + } + } + + if (shared_query_context) + { + shared_query_context->setCurrentQueryId(toString(UUIDHelpers::generateV4())); + query_context = shared_query_context; + } + } + if (cacheEnabled()) { if (blob_name.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty blob name"); auto cache_key = cache->hash(blob_name); - auto file_segments_holder = cache->setDownloading(cache_key, current_download_offset, size); + file_segments_holder.emplace(cache->setDownloading(cache_key, current_download_offset, size)); current_download_offset += size; size_t remaining_size = size; - for (const auto & file_segment : file_segments_holder.file_segments) + for (const auto & file_segment : file_segments_holder->file_segments) { size_t current_size = std::min(file_segment->range().size(), remaining_size); remaining_size -= current_size; if (file_segment->reserve(current_size)) { - file_segment->write(working_buffer.begin(), current_size); - ProfileEvents::increment(ProfileEvents::RemoteFSCacheDownloadBytes, current_size); + file_segment->writeInMemory(working_buffer.begin(), current_size); } else { @@ -273,7 +300,9 @@ void WriteBufferFromS3::writePart() /// Releasing lock and condvar notification. bg_tasks_condvar.notify_one(); } - }); + + finalizeCacheIfNeeded(); + }, query_context); } else { @@ -281,6 +310,7 @@ void WriteBufferFromS3::writePart() fillUploadRequest(task.req, part_tags.size() + 1); processUploadRequest(task); part_tags.push_back(task.tag); + finalizeCacheIfNeeded(); } } @@ -389,13 +419,15 @@ void WriteBufferFromS3::makeSinglepartUpload() bg_tasks_condvar.notify_one(); } - }); + finalizeCacheIfNeeded(); + }, query_context); } else { PutObjectTask task; fillPutRequest(task.req); processPutRequest(task); + finalizeCacheIfNeeded(); } } @@ -423,6 +455,28 @@ void WriteBufferFromS3::processPutRequest(PutObjectTask & task) throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); } +void WriteBufferFromS3::finalizeCacheIfNeeded() +{ + if (!file_segments_holder) + return; + + auto & file_segments = file_segments_holder->file_segments; + for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end();) + { + try + { + size_t size = (*file_segment_it)->finalizeWrite(); + file_segment_it = file_segments.erase(file_segment_it); + + ProfileEvents::increment(ProfileEvents::RemoteFSCacheDownloadBytes, size); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } +} + void WriteBufferFromS3::waitForReadyBackGroundTasks() { if (schedule) diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index ecddd72b9e8..1987bbe76a5 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -32,7 +33,7 @@ namespace Aws::S3::Model namespace DB { -using ScheduleFunc = std::function)>; +using ScheduleFunc = std::function, ContextPtr)>; class WriteBufferFromFile; /** @@ -125,6 +126,10 @@ private: const String blob_name; FileCachePtr cache; size_t current_download_offset = 0; + std::optional file_segments_holder; + void finalizeCacheIfNeeded(); + ContextMutablePtr shared_query_context; + ContextPtr query_context; }; } diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 2ea371d3d03..8fbbdb44c99 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -597,6 +597,16 @@ CurrentThread::QueryScope::QueryScope(ContextMutablePtr query_context) query_context->makeQueryContext(); } +CurrentThread::QueryScope::QueryScope(ContextPtr query_context) +{ + if (!query_context->hasQueryContext()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Cannot initialize query scope without query context"); + + CurrentThread::initializeQuery(); + CurrentThread::attachQueryContext(query_context); +} + void CurrentThread::QueryScope::logPeakMemoryUsage() { auto group = CurrentThread::getGroup(); diff --git a/src/Interpreters/threadPoolCallbackRunner.cpp b/src/Interpreters/threadPoolCallbackRunner.cpp index 288079e49d2..9eeea986d09 100644 --- a/src/Interpreters/threadPoolCallbackRunner.cpp +++ b/src/Interpreters/threadPoolCallbackRunner.cpp @@ -9,14 +9,19 @@ namespace DB CallbackRunner threadPoolCallbackRunner(ThreadPool & pool) { - return [pool = &pool, thread_group = CurrentThread::getGroup()](auto callback) mutable + return [pool = &pool, thread_group = CurrentThread::getGroup()](auto callback, ContextPtr query_context) mutable { pool->scheduleOrThrow( - [&, callback = std::move(callback), thread_group]() + [&, callback = std::move(callback), thread_group, query_context]() { if (thread_group) CurrentThread::attachTo(thread_group); + std::optional query_scope; + + if (query_context && !CurrentThread::get().getQueryContext()) + query_scope.emplace(query_context); + SCOPE_EXIT_SAFE({ if (thread_group) CurrentThread::detachQueryIfNotDetached(); diff --git a/src/Interpreters/threadPoolCallbackRunner.h b/src/Interpreters/threadPoolCallbackRunner.h index 59d06f2f1bc..8d9d5d4d45b 100644 --- a/src/Interpreters/threadPoolCallbackRunner.h +++ b/src/Interpreters/threadPoolCallbackRunner.h @@ -7,7 +7,7 @@ namespace DB { /// High-order function to run callbacks (functions with 'void()' signature) somewhere asynchronously -using CallbackRunner = std::function)>; +using CallbackRunner = std::function, ContextPtr)>; /// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrow()' CallbackRunner threadPoolCallbackRunner(ThreadPool & pool); diff --git a/tests/config/install.sh b/tests/config/install.sh index f1b4fe1a588..323ded10370 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -85,7 +85,7 @@ fi if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then ln -sf $SRC_PATH/config.d/storage_conf.xml $DEST_SERVER_PATH/config.d/ - ln -sf $SRC_PATH/users.d/s3_cache.xml $DEST_SERVER_PATH/s3_cache/ + ln -sf $SRC_PATH/users.d/s3_cache.xml $DEST_SERVER_PATH/config.d/ fi if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then From 6310ad7cc71d7fdee623c776b7950f1c56df3af4 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 4 Apr 2022 14:00:27 +0200 Subject: [PATCH 23/27] Update install.sh --- tests/config/install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/config/install.sh b/tests/config/install.sh index 323ded10370..ff92f01e53f 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -85,7 +85,7 @@ fi if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then ln -sf $SRC_PATH/config.d/storage_conf.xml $DEST_SERVER_PATH/config.d/ - ln -sf $SRC_PATH/users.d/s3_cache.xml $DEST_SERVER_PATH/config.d/ + ln -sf $SRC_PATH/users.d/s3_cache.xml $DEST_SERVER_PATH/users.d/ fi if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then From 79627798c429bf8068ae3e89a16792d5708cf415 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 4 Apr 2022 20:44:39 +0200 Subject: [PATCH 24/27] Fix race --- src/IO/WriteBufferFromS3.cpp | 53 +++++++++++++++++++++++++++++++----- src/IO/WriteBufferFromS3.h | 2 +- 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 20d9a054230..86f4366ec8d 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -52,6 +52,7 @@ struct WriteBufferFromS3::PutObjectTask Aws::S3::Model::PutObjectRequest req; bool is_finised = false; std::exception_ptr exception; + std::optional cache_files; }; WriteBufferFromS3::WriteBufferFromS3( @@ -279,6 +280,13 @@ void WriteBufferFromS3::writePart() } fillUploadRequest(task->req, part_number); + + if (file_segments_holder) + { + task->cache_files.emplace(std::move(*file_segments_holder)); + file_segments_holder.reset(); + } + schedule([this, task]() { try @@ -290,6 +298,15 @@ void WriteBufferFromS3::writePart() task->exception = std::current_exception(); } + try + { + finalizeCacheIfNeeded(task->cache_files); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + { std::lock_guard lock(bg_tasks_mutex); task->is_finised = true; @@ -300,17 +317,20 @@ void WriteBufferFromS3::writePart() /// Releasing lock and condvar notification. bg_tasks_condvar.notify_one(); } - - finalizeCacheIfNeeded(); }, query_context); } else { UploadPartTask task; fillUploadRequest(task.req, part_tags.size() + 1); + if (file_segments_holder) + { + task.cache_files.emplace(std::move(*file_segments_holder)); + file_segments_holder.reset(); + } processUploadRequest(task); part_tags.push_back(task.tag); - finalizeCacheIfNeeded(); + finalizeCacheIfNeeded(task.cache_files); } } @@ -397,7 +417,14 @@ void WriteBufferFromS3::makeSinglepartUpload() if (schedule) { put_object_task = std::make_unique(); + fillPutRequest(put_object_task->req); + if (file_segments_holder) + { + put_object_task->cache_files.emplace(std::move(*file_segments_holder)); + file_segments_holder.reset(); + } + schedule([this]() { try @@ -409,6 +436,15 @@ void WriteBufferFromS3::makeSinglepartUpload() put_object_task->exception = std::current_exception(); } + try + { + finalizeCacheIfNeeded(put_object_task->cache_files); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + { std::lock_guard lock(bg_tasks_mutex); put_object_task->is_finised = true; @@ -418,16 +454,19 @@ void WriteBufferFromS3::makeSinglepartUpload() /// Releasing lock and condvar notification. bg_tasks_condvar.notify_one(); } - - finalizeCacheIfNeeded(); }, query_context); } else { PutObjectTask task; fillPutRequest(task.req); + if (file_segments_holder) + { + task.cache_files.emplace(std::move(*file_segments_holder)); + file_segments_holder.reset(); + } processPutRequest(task); - finalizeCacheIfNeeded(); + finalizeCacheIfNeeded(task.cache_files); } } @@ -455,7 +494,7 @@ void WriteBufferFromS3::processPutRequest(PutObjectTask & task) throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); } -void WriteBufferFromS3::finalizeCacheIfNeeded() +void WriteBufferFromS3::finalizeCacheIfNeeded(std::optional & file_segments_holder) { if (!file_segments_holder) return; diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 1987bbe76a5..d1e51b0c7f9 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -127,7 +127,7 @@ private: FileCachePtr cache; size_t current_download_offset = 0; std::optional file_segments_holder; - void finalizeCacheIfNeeded(); + static void finalizeCacheIfNeeded(std::optional &); ContextMutablePtr shared_query_context; ContextPtr query_context; }; From 42c5721d9f694a94ecba4d70b8c31b43e9ae196c Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 5 Apr 2022 13:03:25 +0200 Subject: [PATCH 25/27] Fix tests with wide parts enabled --- .../0_stateless/02240_system_remote_filesystem_cache.reference | 2 +- .../0_stateless/02240_system_remote_filesystem_cache.sql | 2 +- .../02241_remote_filesystem_cache_on_insert.reference | 2 +- .../0_stateless/02241_remote_filesystem_cache_on_insert.sql | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference index a26133180e4..8bcb7e1dd42 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference @@ -3,7 +3,7 @@ SYSTEM DROP FILESYSTEM CACHE; SET enable_filesystem_cache_on_write_operations=0; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql index cc5fd259ce8..757f792b931 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql @@ -5,7 +5,7 @@ SYSTEM DROP FILESYSTEM CACHE; SET enable_filesystem_cache_on_write_operations=0; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference index 5bc18d48655..b2269c16264 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference @@ -2,7 +2,7 @@ SET enable_filesystem_cache_on_write_operations=1; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; SYSTEM DROP FILESYSTEM CACHE; SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql index 946e72ba2fd..7b0ff2c70ca 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql @@ -5,7 +5,7 @@ SET enable_filesystem_cache_on_write_operations=1; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; SYSTEM DROP FILESYSTEM CACHE; From 5dce2f18b5cb8ad9285f35df1122d3a63bc3dcd4 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 7 Apr 2022 18:46:46 +0200 Subject: [PATCH 26/27] Better --- src/Common/FileCache.cpp | 8 +- src/Common/FileCache.h | 8 +- src/Common/FileCacheSettings.h | 2 +- src/Common/FileSegment.cpp | 111 +++++++++++------- src/Common/FileSegment.h | 25 +++- src/Disks/DiskCacheWrapper.cpp | 2 + src/Disks/DiskWebServer.h | 2 +- src/Disks/IDiskRemote.cpp | 14 +-- src/Disks/IDiskRemote.h | 4 +- src/Disks/IO/CachedReadBufferFromRemoteFS.cpp | 12 +- src/Disks/IO/CachedReadBufferFromRemoteFS.h | 2 - src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 2 +- src/Disks/IO/ThreadPoolRemoteFSReader.cpp | 20 ---- src/Disks/IO/ThreadPoolRemoteFSReader.h | 1 - src/Disks/S3/DiskS3.cpp | 7 +- src/IO/ParallelReadBuffer.cpp | 2 +- src/IO/WriteBufferFromS3.cpp | 33 ++---- src/IO/WriteBufferFromS3.h | 4 +- src/IO/WriteSettings.h | 1 + src/Interpreters/threadPoolCallbackRunner.cpp | 9 +- src/Interpreters/threadPoolCallbackRunner.h | 2 +- .../MergedColumnOnlyOutputStream.cpp | 2 +- .../System/StorageSystemFilesystemCache.cpp | 10 +- .../System/StorageSystemFilesystemCache.h | 4 +- 24 files changed, 139 insertions(+), 148 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index 2e05e7a7202..5d5851d0b84 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -57,7 +57,7 @@ String IFileCache::getPathInLocalCache(const Key & key) return fs::path(cache_base_path) / key_str.substr(0, 3) / key_str; } -bool IFileCache::shouldBypassCache() +bool IFileCache::isReadOnly() { return !CurrentThread::isInitialized() || !CurrentThread::get().getQueryContext() @@ -708,7 +708,7 @@ bool LRUFileCache::isLastFileSegmentHolder( return cell->file_segment.use_count() == 2; } -FileSegmentsHolder LRUFileCache::getAll() +FileSegments LRUFileCache::getSnapshot() const { std::lock_guard cache_lock(mutex); @@ -717,10 +717,10 @@ FileSegmentsHolder LRUFileCache::getAll() for (const auto & [key, cells_by_offset] : files) { for (const auto & [offset, cell] : cells_by_offset) - file_segments.push_back(cell.file_segment); + file_segments.push_back(FileSegment::getSnapshot(cell.file_segment)); } - return FileSegmentsHolder(std::move(file_segments)); + return file_segments; } std::vector LRUFileCache::tryGetCachePaths(const Key & key) diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index 089bdb633c0..e706376bc89 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -44,7 +44,7 @@ public: virtual void tryRemoveAll() = 0; - static bool shouldBypassCache(); + static bool isReadOnly(); /// Cache capacity in bytes. size_t capacity() const { return max_size; } @@ -72,10 +72,10 @@ public: */ virtual FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size) = 0; - virtual FileSegmentsHolder getAll() = 0; - virtual FileSegmentsHolder setDownloading(const Key & key, size_t offset, size_t size) = 0; + virtual FileSegments getSnapshot() const = 0; + /// For debug. virtual String dumpStructure(const Key & key) = 0; @@ -124,7 +124,7 @@ public: FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size) override; - FileSegmentsHolder getAll() override; + FileSegments getSnapshot() const override; FileSegmentsHolder setDownloading(const Key & key, size_t offset, size_t size) override; diff --git a/src/Common/FileCacheSettings.h b/src/Common/FileCacheSettings.h index 53c28400c86..0b34e1e3d82 100644 --- a/src/Common/FileCacheSettings.h +++ b/src/Common/FileCacheSettings.h @@ -2,7 +2,7 @@ #include -namespace Poco { namespace Util { class AbstractConfiguration; }} +namespace Poco { namespace Util { class AbstractConfiguration; } } namespace DB { diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index d8e7a994df4..4def08c6817 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -73,6 +73,12 @@ size_t FileSegment::getDownloadOffset() const return range().left + getDownloadedSize(segment_lock); } +size_t FileSegment::getDownloadedSize() const +{ + std::lock_guard segment_lock(mutex); + return getDownloadedSize(segment_lock); +} + size_t FileSegment::getDownloadedSize(std::lock_guard & /* segment_lock */) const { if (download_state == State::DOWNLOADED) @@ -84,24 +90,15 @@ size_t FileSegment::getDownloadedSize(std::lock_guard & /* segment_l String FileSegment::getCallerId() { - return getCallerIdImpl(false); + return getCallerIdImpl(); } -String FileSegment::getCallerIdImpl(bool allow_non_strict_checking) +String FileSegment::getCallerIdImpl() { - if (IFileCache::shouldBypassCache()) - { - /// getCallerId() can be called from completeImpl(), which can be called from complete(). - /// complete() is called from destructor of CachedReadBufferFromRemoteFS when there is no query id anymore. - /// Allow non strict checking in this case. This works correctly as if getCallerIdImpl() is called from destructor, - /// then we know that caller is not a downloader, because downloader is reset each nextImpl() call either - /// manually or via SCOPE_EXIT. - - if (allow_non_strict_checking) - return "None"; - - throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Cannot use cache without query id"); - } + if (!CurrentThread::isInitialized() + || !CurrentThread::get().getQueryContext() + || CurrentThread::getQueryId().size == 0) + return "None:" + toString(getThreadId()); return CurrentThread::getQueryId().toString() + ":" + toString(getThreadId()); } @@ -244,15 +241,9 @@ void FileSegment::write(const char * from, size_t size, size_t offset_) { std::lock_guard segment_lock(mutex); - auto info = getInfoForLogImpl(segment_lock); - e.addMessage("while writing into cache, info: " + info); + wrapWithCacheInfo(e, "while writing into cache", segment_lock); - LOG_ERROR(log, "Failed to write to cache. File segment info: {}", info); - - download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION; - - cache_writer->finalize(); - cache_writer.reset(); + setDownloadFailed(segment_lock); cv.notify_all(); @@ -265,7 +256,7 @@ void FileSegment::write(const char * from, size_t size, size_t offset_) void FileSegment::writeInMemory(const char * from, size_t size) { if (!size) - throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Writing zero size is not allowed"); + throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Attempt to write zero size cache file"); if (availableSize() < size) throw Exception( @@ -284,14 +275,13 @@ void FileSegment::writeInMemory(const char * from, size_t size) { cache_writer->write(from, size); } - catch (...) + catch (Exception & e) { - LOG_ERROR(log, "Failed to write to cache. File segment info: {}", getInfoForLogImpl(segment_lock)); + wrapWithCacheInfo(e, "while writing into cache", segment_lock); - download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION; + setDownloadFailed(segment_lock); - cache_writer->finalize(); - cache_writer.reset(); + cv.notify_all(); throw; } @@ -313,23 +303,23 @@ size_t FileSegment::finalizeWrite() { cache_writer->next(); } - catch (...) + catch (Exception & e) { - download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION; + wrapWithCacheInfo(e, "while writing into cache", segment_lock); - cache_writer->finalize(); - cache_writer.reset(); + setDownloadFailed(segment_lock); + + cv.notify_all(); throw; } downloaded_size += size; - cache_writer.reset(); - downloader_id.clear(); - download_state = State::DOWNLOADED; if (downloaded_size != range().size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected {} == {}", downloaded_size, range().size()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected downloaded size to equal file segment size ({} == {})", downloaded_size, range().size()); + + setDownloaded(segment_lock); return size; } @@ -398,6 +388,20 @@ void FileSegment::setDownloaded(std::lock_guard & /* segment_lock */ { download_state = State::DOWNLOADED; is_downloaded = true; + downloader_id.clear(); + + if (cache_writer) + { + cache_writer->finalize(); + cache_writer.reset(); + remote_file_reader.reset(); + } +} + +void FileSegment::setDownloadFailed(std::lock_guard & /* segment_lock */) +{ + download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION; + downloader_id.clear(); if (cache_writer) { @@ -455,7 +459,7 @@ void FileSegment::complete(State state) } catch (...) { - if (!downloader_id.empty() && downloader_id == getCallerIdImpl(true)) + if (!downloader_id.empty() && downloader_id == getCallerIdImpl()) downloader_id.clear(); cv.notify_all(); @@ -480,7 +484,7 @@ void FileSegment::complete(std::lock_guard & cache_lock) /// Segment state can be changed from DOWNLOADING or EMPTY only if the caller is the /// downloader or the only owner of the segment. - bool can_update_segment_state = downloader_id == getCallerIdImpl(true) + bool can_update_segment_state = downloader_id == getCallerIdImpl() || cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock); if (can_update_segment_state) @@ -489,11 +493,11 @@ void FileSegment::complete(std::lock_guard & cache_lock) try { - completeImpl(cache_lock, segment_lock, /* allow_non_strict_checking */true); + completeImpl(cache_lock, segment_lock); } catch (...) { - if (!downloader_id.empty() && downloader_id == getCallerIdImpl(true)) + if (!downloader_id.empty() && downloader_id == getCallerIdImpl()) downloader_id.clear(); cv.notify_all(); @@ -503,7 +507,7 @@ void FileSegment::complete(std::lock_guard & cache_lock) cv.notify_all(); } -void FileSegment::completeImpl(std::lock_guard & cache_lock, std::lock_guard & segment_lock, bool allow_non_strict_checking) +void FileSegment::completeImpl(std::lock_guard & cache_lock, std::lock_guard & segment_lock) { bool is_last_holder = cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock); @@ -539,7 +543,7 @@ void FileSegment::completeImpl(std::lock_guard & cache_lock, std::lo } } - if (!downloader_id.empty() && (downloader_id == getCallerIdImpl(allow_non_strict_checking) || is_last_holder)) + if (!downloader_id.empty() && (downloader_id == getCallerIdImpl() || is_last_holder)) { LOG_TEST(log, "Clearing downloader id: {}, current state: {}", downloader_id, stateToString(download_state)); downloader_id.clear(); @@ -566,6 +570,11 @@ String FileSegment::getInfoForLogImpl(std::lock_guard & segment_lock return info.str(); } +void FileSegment::wrapWithCacheInfo(Exception & e, const String & message, std::lock_guard & segment_lock) const +{ + e.addMessage(fmt::format("{}, current cache state: {}", message, getInfoForLogImpl(segment_lock))); +} + String FileSegment::stateToString(FileSegment::State state) { switch (state) @@ -599,6 +608,22 @@ void FileSegment::assertCorrectnessImpl(std::lock_guard & /* segment assert(download_state != FileSegment::State::DOWNLOADED || std::filesystem::file_size(cache->getPathInLocalCache(key(), offset())) > 0); } +FileSegmentPtr FileSegment::getSnapshot(const FileSegmentPtr & file_segment) +{ + auto snapshot = std::make_shared( + file_segment->offset(), + file_segment->range().size(), + file_segment->key(), + nullptr, + file_segment->state()); + + snapshot->hits_count = file_segment->getHitsCount(); + snapshot->ref_count = file_segment.use_count(); + snapshot->downloaded_size = file_segment->getDownloadedSize(); + + return snapshot; +} + FileSegmentsHolder::~FileSegmentsHolder() { /// In CacheableReadBufferFromRemoteFS file segment's downloader removes file segments from diff --git a/src/Common/FileSegment.h b/src/Common/FileSegment.h index c9e4146c726..a02d8e85a46 100644 --- a/src/Common/FileSegment.h +++ b/src/Common/FileSegment.h @@ -97,6 +97,11 @@ public: void write(const char * from, size_t size, size_t offset_); + /** + * writeInMemory and finalizeWrite are used together to write a single file with delay. + * Both can be called only once, one after another. Used for writing cache via threadpool + * on wrote operations. TODO: this solution is temporary, until adding a separate cache layer. + */ void writeInMemory(const char * from, size_t size); size_t finalizeWrite(); @@ -121,18 +126,24 @@ public: size_t getDownloadOffset() const; + size_t getDownloadedSize() const; + void completeBatchAndResetDownloader(); void complete(State state); String getInfoForLog() const; - size_t hits() const { return hits_num; } + size_t getHitsCount() const { return hits_count; } - void hit() { ++hits_num; } + size_t getRefCount() const { return ref_count; } + + void incrementHitsCount() { ++hits_count; } void assertCorrectness() const; + static FileSegmentPtr getSnapshot(const FileSegmentPtr & file_segment); + private: size_t availableSize() const { return reserved_size - downloaded_size; } @@ -141,6 +152,9 @@ private: void assertCorrectnessImpl(std::lock_guard & segment_lock) const; void setDownloaded(std::lock_guard & segment_lock); + void setDownloadFailed(std::lock_guard & segment_lock); + + void wrapWithCacheInfo(Exception & e, const String & message, std::lock_guard & segment_lock) const; bool lastFileSegmentHolder() const; @@ -152,9 +166,9 @@ private: void completeImpl( std::lock_guard & cache_lock, - std::lock_guard & segment_lock, bool allow_non_strict_checking = false); + std::lock_guard & segment_lock); - static String getCallerIdImpl(bool allow_non_strict_checking = false); + static String getCallerIdImpl(); void resetDownloaderImpl(std::lock_guard & segment_lock); @@ -188,7 +202,8 @@ private: bool detached = false; std::atomic is_downloaded{false}; - std::atomic hits_num = 0; /// cache hits. + std::atomic hits_count = 0; /// cache hits. + std::atomic ref_count = 0; /// Used for getting snapshot state }; struct FileSegmentsHolder : private boost::noncopyable diff --git a/src/Disks/DiskCacheWrapper.cpp b/src/Disks/DiskCacheWrapper.cpp index 178caa0c496..a86f13f55af 100644 --- a/src/Disks/DiskCacheWrapper.cpp +++ b/src/Disks/DiskCacheWrapper.cpp @@ -206,6 +206,8 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode return DiskDecorator::writeFile(path, buf_size, mode, settings); WriteSettings current_settings = settings; + /// There are two different cache implementations. Disable second one if the first is enabled. + /// The firts will soon be removed, this disabling is temporary. current_settings.enable_filesystem_cache_on_write_operations = false; LOG_TEST(log, "Write file {} to cache", backQuote(path)); diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h index 94ba32939da..6341b582174 100644 --- a/src/Disks/DiskWebServer.h +++ b/src/Disks/DiskWebServer.h @@ -77,7 +77,7 @@ public: UInt64 getTotalSpace() const final override { return std::numeric_limits::max(); } UInt64 getAvailableSpace() const final override { return std::numeric_limits::max(); } -UInt64 getUnreservedSpace() const final override { return std::numeric_limits::max(); } + UInt64 getUnreservedSpace() const final override { return std::numeric_limits::max(); } /// Read-only part diff --git a/src/Disks/IDiskRemote.cpp b/src/Disks/IDiskRemote.cpp index 0f430e69a5e..fb1c0ddc378 100644 --- a/src/Disks/IDiskRemote.cpp +++ b/src/Disks/IDiskRemote.cpp @@ -343,9 +343,9 @@ void IDiskRemote::removeMetadataRecursive(const String & path, RemoteFSPathKeepe } } -std::vector IDiskRemote::getRemotePaths(const String & path) const +std::vector IDiskRemote::getRemotePaths(const String & local_path) const { - auto metadata = readMetadata(path); + auto metadata = readMetadata(local_path); std::vector remote_paths; for (const auto & [remote_path, _] : metadata.remote_fs_objects) @@ -354,16 +354,16 @@ std::vector IDiskRemote::getRemotePaths(const String & path) const return remote_paths; } -void IDiskRemote::getRemotePathsRecursive(const String & path, std::vector & paths_map) +void IDiskRemote::getRemotePathsRecursive(const String & local_path, std::vector & paths_map) { - if (metadata_disk->isFile(path)) + if (metadata_disk->isFile(local_path)) { - paths_map.emplace_back(path, getRemotePaths(path)); + paths_map.emplace_back(local_path, getRemotePaths(local_path)); } else { - for (auto it = iterateDirectory(path); it->isValid(); it->next()) - IDiskRemote::getRemotePathsRecursive(fs::path(path) / it->name(), paths_map); + for (auto it = iterateDirectory(local_path); it->isValid(); it->next()) + IDiskRemote::getRemotePathsRecursive(fs::path(local_path) / it->name(), paths_map); } } diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index 6b16a1f753c..a8a299391bf 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -68,9 +68,9 @@ public: String getCacheBasePath() const final override; - std::vector getRemotePaths(const String & path) const final override; + std::vector getRemotePaths(const String & local_path) const final override; - void getRemotePathsRecursive(const String & path, std::vector & paths_map) override; + void getRemotePathsRecursive(const String & local_path, std::vector & paths_map) override; /// Methods for working with metadata. For some operations (like hardlink /// creation) metadata can be updated concurrently from multiple threads diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index a2e60a1937e..b9f7457447e 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -389,7 +389,7 @@ bool CachedReadBufferFromRemoteFS::completeFileSegmentAndGetNext() implementation_buffer = getImplementationBuffer(*current_file_segment_it); if (read_type == ReadType::CACHED) - (*current_file_segment_it)->hit(); + (*current_file_segment_it)->incrementHitsCount(); LOG_TEST(log, "New segment: {}", (*current_file_segment_it)->range().toString()); return true; @@ -573,8 +573,6 @@ bool CachedReadBufferFromRemoteFS::nextImpl() bool CachedReadBufferFromRemoteFS::nextImplStep() { - assertCacheAllowed(); - last_caller_id = FileSegment::getCallerId(); if (!initialized) @@ -623,7 +621,7 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() implementation_buffer = getImplementationBuffer(*current_file_segment_it); if (read_type == ReadType::CACHED) - (*current_file_segment_it)->hit(); + (*current_file_segment_it)->incrementHitsCount(); } assert(!internal_buffer.empty()); @@ -820,12 +818,6 @@ std::optional CachedReadBufferFromRemoteFS::getLastNonDownloadedOffset() return std::nullopt; } -void CachedReadBufferFromRemoteFS::assertCacheAllowed() const -{ - if (IFileCache::shouldBypassCache() && !settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache used when not allowed"); -} - String CachedReadBufferFromRemoteFS::getInfoForLog() { auto implementation_buffer_read_range_str = diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.h b/src/Disks/IO/CachedReadBufferFromRemoteFS.h index 5d632e62c0f..5fc9ec39246 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.h +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.h @@ -50,8 +50,6 @@ private: bool nextImplStep(); - void assertCacheAllowed() const; - enum class ReadType { CACHED, diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 18c61e1d704..7014b21e8b4 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -38,7 +38,7 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const S current_path = path; auto cache = settings.remote_fs_cache; - bool with_cache = cache && settings.enable_filesystem_cache && !IFileCache::shouldBypassCache(); + bool with_cache = cache && settings.enable_filesystem_cache; auto remote_file_reader_creator = [=, this]() { diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index 15d0eece624..b1ae42d03d6 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -1,6 +1,5 @@ #include "ThreadPoolRemoteFSReader.h" -#include #include #include #include @@ -51,25 +50,6 @@ std::future ThreadPoolRemoteFSReader::submit(Reques if (CurrentThread::isInitialized()) query_context = CurrentThread::get().getQueryContext(); - if (!query_context) - { - if (!shared_query_context) - { - ContextPtr global_context = CurrentThread::isInitialized() ? CurrentThread::get().getGlobalContext() : nullptr; - if (global_context) - { - shared_query_context = Context::createCopy(global_context); - shared_query_context->makeQueryContext(); - } - } - - if (shared_query_context) - { - shared_query_context->setCurrentQueryId(toString(UUIDHelpers::generateV4())); - query_context = shared_query_context; - } - } - auto task = std::make_shared>([request, running_group, query_context] { ThreadStatus thread_status; diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.h b/src/Disks/IO/ThreadPoolRemoteFSReader.h index a2a1e77c834..b2d5f11724a 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.h +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.h @@ -15,7 +15,6 @@ class ThreadPoolRemoteFSReader : public IAsynchronousReader private: ThreadPool pool; - ContextMutablePtr shared_query_context; public: ThreadPoolRemoteFSReader(size_t pool_size, size_t queue_size_); diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index 07d27f67d1e..d879953bd9e 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -230,7 +230,7 @@ std::unique_ptr DiskS3::readFile(const String & path, co ReadSettings disk_read_settings{read_settings}; if (cache) { - if (IFileCache::shouldBypassCache()) + if (IFileCache::isReadOnly()) disk_read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; disk_read_settings.remote_fs_cache = cache; @@ -272,7 +272,8 @@ std::unique_ptr DiskS3::writeFile(const String & path, LOG_TRACE(log, "{} to file by path: {}. S3 path: {}", mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), remote_fs_root_path + blob_name); - bool cache_on_insert = fs::path(path).extension() != ".tmp" + bool cache_on_write = cache + && fs::path(path).extension() != ".tmp" && write_settings.enable_filesystem_cache_on_write_operations && FileCacheFactory::instance().getSettings(getCacheBasePath()).cache_on_write_operations; @@ -285,7 +286,7 @@ std::unique_ptr DiskS3::writeFile(const String & path, settings->s3_upload_part_size_multiply_parts_count_threshold, settings->s3_max_single_part_upload_size, std::move(object_metadata), - buf_size, threadPoolCallbackRunner(getThreadPoolWriter()), blob_name, cache_on_insert ? cache : nullptr); + buf_size, threadPoolCallbackRunner(getThreadPoolWriter()), blob_name, cache_on_write ? cache : nullptr); auto create_metadata_callback = [this, path, blob_name, mode] (size_t count) { diff --git a/src/IO/ParallelReadBuffer.cpp b/src/IO/ParallelReadBuffer.cpp index 64550e9430b..f036d6a08c8 100644 --- a/src/IO/ParallelReadBuffer.cpp +++ b/src/IO/ParallelReadBuffer.cpp @@ -33,7 +33,7 @@ bool ParallelReadBuffer::addReaderToPool(std::unique_lock & /*buffer auto worker = read_workers.emplace_back(std::make_shared(std::move(reader))); - schedule([this, worker = std::move(worker)]() mutable { readerThreadFunction(std::move(worker)); }, nullptr); + schedule([this, worker = std::move(worker)]() mutable { readerThreadFunction(std::move(worker)); }); return true; } diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 86f4366ec8d..c85f3989531 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -100,28 +100,6 @@ void WriteBufferFromS3::nextImpl() ? CurrentThread::get().getThreadGroup() : MainThreadStatus::getInstance().getThreadGroup(); - if (CurrentThread::isInitialized()) - query_context = CurrentThread::get().getQueryContext(); - - if (!query_context) - { - if (!shared_query_context) - { - ContextPtr global_context = CurrentThread::isInitialized() ? CurrentThread::get().getGlobalContext() : nullptr; - if (global_context) - { - shared_query_context = Context::createCopy(global_context); - shared_query_context->makeQueryContext(); - } - } - - if (shared_query_context) - { - shared_query_context->setCurrentQueryId(toString(UUIDHelpers::generateV4())); - query_context = shared_query_context; - } - } - if (cacheEnabled()) { if (blob_name.empty()) @@ -132,8 +110,10 @@ void WriteBufferFromS3::nextImpl() current_download_offset += size; size_t remaining_size = size; - for (const auto & file_segment : file_segments_holder->file_segments) + auto & file_segments = file_segments_holder->file_segments; + for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end(); ++file_segment_it) { + auto & file_segment = *file_segment_it; size_t current_size = std::min(file_segment->range().size(), remaining_size); remaining_size -= current_size; @@ -143,6 +123,7 @@ void WriteBufferFromS3::nextImpl() } else { + file_segments.erase(file_segment_it, file_segments.end()); break; } } @@ -190,7 +171,7 @@ WriteBufferFromS3::~WriteBufferFromS3() bool WriteBufferFromS3::cacheEnabled() const { - return cache != nullptr && !IFileCache::shouldBypassCache(); + return cache != nullptr; } void WriteBufferFromS3::preFinalize() @@ -317,7 +298,7 @@ void WriteBufferFromS3::writePart() /// Releasing lock and condvar notification. bg_tasks_condvar.notify_one(); } - }, query_context); + }); } else { @@ -454,7 +435,7 @@ void WriteBufferFromS3::makeSinglepartUpload() /// Releasing lock and condvar notification. bg_tasks_condvar.notify_one(); } - }, query_context); + }); } else { diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index d1e51b0c7f9..8e91bbc04da 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -33,7 +33,7 @@ namespace Aws::S3::Model namespace DB { -using ScheduleFunc = std::function, ContextPtr)>; +using ScheduleFunc = std::function)>; class WriteBufferFromFile; /** @@ -128,8 +128,6 @@ private: size_t current_download_offset = 0; std::optional file_segments_holder; static void finalizeCacheIfNeeded(std::optional &); - ContextMutablePtr shared_query_context; - ContextPtr query_context; }; } diff --git a/src/IO/WriteSettings.h b/src/IO/WriteSettings.h index af26452e8e6..3464bb31664 100644 --- a/src/IO/WriteSettings.h +++ b/src/IO/WriteSettings.h @@ -3,6 +3,7 @@ namespace DB { +/// Settings to be passed to IDisk::writeFile() struct WriteSettings { bool enable_filesystem_cache_on_write_operations = false; diff --git a/src/Interpreters/threadPoolCallbackRunner.cpp b/src/Interpreters/threadPoolCallbackRunner.cpp index 9eeea986d09..288079e49d2 100644 --- a/src/Interpreters/threadPoolCallbackRunner.cpp +++ b/src/Interpreters/threadPoolCallbackRunner.cpp @@ -9,19 +9,14 @@ namespace DB CallbackRunner threadPoolCallbackRunner(ThreadPool & pool) { - return [pool = &pool, thread_group = CurrentThread::getGroup()](auto callback, ContextPtr query_context) mutable + return [pool = &pool, thread_group = CurrentThread::getGroup()](auto callback) mutable { pool->scheduleOrThrow( - [&, callback = std::move(callback), thread_group, query_context]() + [&, callback = std::move(callback), thread_group]() { if (thread_group) CurrentThread::attachTo(thread_group); - std::optional query_scope; - - if (query_context && !CurrentThread::get().getQueryContext()) - query_scope.emplace(query_context); - SCOPE_EXIT_SAFE({ if (thread_group) CurrentThread::detachQueryIfNotDetached(); diff --git a/src/Interpreters/threadPoolCallbackRunner.h b/src/Interpreters/threadPoolCallbackRunner.h index 8d9d5d4d45b..59d06f2f1bc 100644 --- a/src/Interpreters/threadPoolCallbackRunner.h +++ b/src/Interpreters/threadPoolCallbackRunner.h @@ -7,7 +7,7 @@ namespace DB { /// High-order function to run callbacks (functions with 'void()' signature) somewhere asynchronously -using CallbackRunner = std::function, ContextPtr)>; +using CallbackRunner = std::function)>; /// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrow()' CallbackRunner threadPoolCallbackRunner(ThreadPool & pool); diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 005d8093bba..4fb993bfcc7 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -27,7 +27,7 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( MergeTreeWriterSettings writer_settings( global_settings, - WriteSettings{}, + data_part->storage.getContext()->getWriteSettings(), storage_settings, index_granularity_info ? index_granularity_info->is_adaptive : data_part->storage.canUseAdaptiveGranularity(), /* rewrite_primary_key = */false); diff --git a/src/Storages/System/StorageSystemFilesystemCache.cpp b/src/Storages/System/StorageSystemFilesystemCache.cpp index 08a62c47f27..f3ead8a95f0 100644 --- a/src/Storages/System/StorageSystemFilesystemCache.cpp +++ b/src/Storages/System/StorageSystemFilesystemCache.cpp @@ -22,6 +22,7 @@ NamesAndTypesList StorageSystemFilesystemCache::getNamesAndTypes() {"state", std::make_shared()}, {"cache_hits", std::make_shared()}, {"references", std::make_shared()}, + {"downloaded_size", std::make_shared()}, }; } @@ -37,9 +38,9 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex for (const auto & [cache_base_path, cache_data] : caches) { const auto & cache = cache_data.cache; - auto holder = cache->getAll(); + auto file_segments = cache->getSnapshot(); - for (const auto & file_segment : holder.file_segments) + for (const auto & file_segment : file_segments) { res_columns[0]->insert(cache_base_path); res_columns[1]->insert(cache->getPathInLocalCache(file_segment->key(), file_segment->offset())); @@ -49,8 +50,9 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex res_columns[3]->insert(range.right); res_columns[4]->insert(range.size()); res_columns[5]->insert(FileSegment::stateToString(file_segment->state())); - res_columns[6]->insert(file_segment->hits()); - res_columns[7]->insert(file_segment.use_count()); + res_columns[6]->insert(file_segment->getHitsCount()); + res_columns[7]->insert(file_segment->getRefCount()); + res_columns[8]->insert(file_segment->getDownloadedSize()); } } } diff --git a/src/Storages/System/StorageSystemFilesystemCache.h b/src/Storages/System/StorageSystemFilesystemCache.h index 0f0bd81e760..1d9d28d7b50 100644 --- a/src/Storages/System/StorageSystemFilesystemCache.h +++ b/src/Storages/System/StorageSystemFilesystemCache.h @@ -7,12 +7,14 @@ namespace DB { /** + * Usgae example. How to get mapping from local paths to remote paths: * SELECT * cache_path, * cache_hits, * remote_path, * local_path, - * file_segment_range, + * file_segment_range_begin, + * file_segment_range_end, * size, * state * FROM From 0b7af1f26c85877094478cdbaa52c9fae451159a Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 8 Apr 2022 01:58:55 +0200 Subject: [PATCH 27/27] Fix checks --- src/Common/FileCache.cpp | 2 +- src/Common/FileSegment.cpp | 5 +++-- src/Common/FileSegment.h | 2 +- src/Disks/DiskCacheWrapper.cpp | 2 +- .../0_stateless/02240_system_remote_filesystem_cache.sql | 2 +- .../0_stateless/02241_remote_filesystem_cache_on_insert.sql | 2 +- 6 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index 5d5851d0b84..05d32f5ffe4 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -717,7 +717,7 @@ FileSegments LRUFileCache::getSnapshot() const for (const auto & [key, cells_by_offset] : files) { for (const auto & [offset, cell] : cells_by_offset) - file_segments.push_back(FileSegment::getSnapshot(cell.file_segment)); + file_segments.push_back(FileSegment::getSnapshot(cell.file_segment, cache_lock)); } return file_segments; diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index 4def08c6817..ba4129a0ef6 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -608,18 +608,19 @@ void FileSegment::assertCorrectnessImpl(std::lock_guard & /* segment assert(download_state != FileSegment::State::DOWNLOADED || std::filesystem::file_size(cache->getPathInLocalCache(key(), offset())) > 0); } -FileSegmentPtr FileSegment::getSnapshot(const FileSegmentPtr & file_segment) +FileSegmentPtr FileSegment::getSnapshot(const FileSegmentPtr & file_segment, std::lock_guard & /* cache_lock */) { auto snapshot = std::make_shared( file_segment->offset(), file_segment->range().size(), file_segment->key(), nullptr, - file_segment->state()); + State::EMPTY); snapshot->hits_count = file_segment->getHitsCount(); snapshot->ref_count = file_segment.use_count(); snapshot->downloaded_size = file_segment->getDownloadedSize(); + snapshot->download_state = file_segment->state(); return snapshot; } diff --git a/src/Common/FileSegment.h b/src/Common/FileSegment.h index a02d8e85a46..615fd9a56de 100644 --- a/src/Common/FileSegment.h +++ b/src/Common/FileSegment.h @@ -142,7 +142,7 @@ public: void assertCorrectness() const; - static FileSegmentPtr getSnapshot(const FileSegmentPtr & file_segment); + static FileSegmentPtr getSnapshot(const FileSegmentPtr & file_segment, std::lock_guard & cache_lock); private: size_t availableSize() const { return reserved_size - downloaded_size; } diff --git a/src/Disks/DiskCacheWrapper.cpp b/src/Disks/DiskCacheWrapper.cpp index a86f13f55af..cc2c330975a 100644 --- a/src/Disks/DiskCacheWrapper.cpp +++ b/src/Disks/DiskCacheWrapper.cpp @@ -207,7 +207,7 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode WriteSettings current_settings = settings; /// There are two different cache implementations. Disable second one if the first is enabled. - /// The firts will soon be removed, this disabling is temporary. + /// The first will soon be removed, this disabling is temporary. current_settings.enable_filesystem_cache_on_write_operations = false; LOG_TEST(log, "Write file {} to cache", backQuote(path)); diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql index 757f792b931..aa469779130 100644 --- a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-parallel, no-fasttest, no-s3-storage -- { echo } diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql index 7b0ff2c70ca..c3ab1de3693 100644 --- a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-parallel, no-fasttest, no-s3-storage -- { echo }