diff --git a/src/Common/getQueryId.h b/src/Common/getQueryId.h new file mode 100644 index 00000000000..32f1937ecb0 --- /dev/null +++ b/src/Common/getQueryId.h @@ -0,0 +1,13 @@ +#include + +namespace DB +{ + +static String getQueryId() +{ + if (!CurrentThread::isInitialized() || !CurrentThread::get().getQueryContext()) + return ""; + return CurrentThread::getQueryId().toString(); +} + +} diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp index d1e4b9fa21e..8ac576be61c 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp @@ -1,10 +1,10 @@ #include "AsynchronousReadIndirectBufferFromRemoteFS.h" #include +#include #include #include #include -#include namespace CurrentMetrics @@ -57,7 +57,6 @@ AsynchronousReadIndirectBufferFromRemoteFS::AsynchronousReadIndirectBufferFromRe ProfileEvents::increment(ProfileEvents::RemoteFSBuffers); } - String AsynchronousReadIndirectBufferFromRemoteFS::getFileName() const { return impl->getFileName(); diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index 66b918e7e12..575cb6a4162 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace ProfileEvents @@ -23,13 +24,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -static String getQueryId() -{ - if (!CurrentThread::isInitialized() || !CurrentThread::get().getQueryContext() || CurrentThread::getQueryId().size == 0) - return ""; - return CurrentThread::getQueryId().toString(); -} - CachedReadBufferFromRemoteFS::CachedReadBufferFromRemoteFS( const String & remote_fs_object_path_, FileCachePtr cache_, @@ -49,7 +43,7 @@ CachedReadBufferFromRemoteFS::CachedReadBufferFromRemoteFS( , read_until_position(read_until_position_) , remote_file_reader_creator(remote_file_reader_creator_) , query_id(getQueryId()) - , enable_logging(!query_id.empty() && CurrentThread::get().getQueryContext()->getSettingsRef().enable_filesystem_cache_log) + , enable_logging(!query_id.empty() && settings_.enable_filesystem_cache_log) { } @@ -63,6 +57,7 @@ void CachedReadBufferFromRemoteFS::appendFilesystemCacheLog( .source_file_path = remote_fs_object_path, .file_segment_range = { file_segment_range.left, file_segment_range.right }, .file_segment_size = file_segment_range.size(), + .cache_attempted = true, }; switch (type) diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 4399befa433..39b82131aa9 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -21,9 +21,12 @@ #include #include #include +#include +#include namespace fs = std::filesystem; + namespace DB { @@ -39,9 +42,6 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const S auto remote_path = fs::path(common_path_prefix) / path; auto cache = settings.remote_fs_cache; - bool with_cache = cache - && settings.enable_filesystem_cache - && (!IFileCache::isReadOnly() || settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache); auto remote_file_reader_creator = [=, this]() { @@ -95,7 +95,30 @@ ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather( , blobs_to_read(blobs_to_read_) , settings(settings_) , log(&Poco::Logger::get("ReadBufferFromRemoteFSGather")) + , query_id(getQueryId()) + , enable_cache_log(!query_id.empty() && settings.enable_filesystem_cache_log) { + with_cache = settings.remote_fs_cache + && settings.enable_filesystem_cache + && (!IFileCache::isReadOnly() || settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache); +} + + +void ReadBufferFromRemoteFSGather::appendFilesystemCacheLog() +{ + FilesystemCacheLogElement elem + { + .event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()), + .query_id = query_id, + .source_file_path = getFileName(), + .file_segment_range = { 0, total_bytes_read }, + .read_type = FilesystemCacheLogElement::ReadType::READ_FROM_FS_BYPASSING_CACHE, + .file_segment_size = total_bytes_read, + .cache_attempted = false, + }; + + if (auto cache_log = Context::getGlobalContextInstance()->getFilesystemCacheLog()) + cache_log->add(elem); } @@ -199,6 +222,7 @@ bool ReadBufferFromRemoteFSGather::readImpl() */ if (bytes_to_ignore) { + total_bytes_read += bytes_to_ignore; current_buf->ignore(bytes_to_ignore); result = current_buf->hasPendingData(); file_offset_of_buffer_end += bytes_to_ignore; @@ -225,6 +249,7 @@ bool ReadBufferFromRemoteFSGather::readImpl() { assert(available()); nextimpl_working_buffer_offset = offset(); + total_bytes_read += available(); } return result; @@ -282,5 +307,12 @@ size_t ReadBufferFromRemoteFSGather::getImplementationBufferOffset() const return current_buf->getFileOffsetOfBufferEnd(); } +ReadBufferFromRemoteFSGather::~ReadBufferFromRemoteFSGather() +{ + if (!with_cache && enable_cache_log) + { + appendFilesystemCacheLog(); + } +} } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index da2d1dee4b2..67df9104139 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -29,6 +29,9 @@ public: const std::string & common_path_prefix_, const BlobsPathToSize & blobs_to_read_, const ReadSettings & settings_); + + ~ReadBufferFromRemoteFSGather() override; + String getFileName() const; void reset(); @@ -66,6 +69,8 @@ protected: String current_path; + bool with_cache; + private: bool nextImpl() override; @@ -75,6 +80,8 @@ private: bool moveToNextBuffer(); + void appendFilesystemCacheLog(); + SeekableReadBufferPtr current_buf; size_t current_buf_idx = 0; @@ -89,6 +96,12 @@ private: size_t bytes_to_ignore = 0; Poco::Logger * log; + + String query_id; + + size_t total_bytes_read = 0; + + bool enable_cache_log = false; }; diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index 92346615a7a..6a154c60987 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -80,6 +80,7 @@ struct ReadSettings bool enable_filesystem_cache = true; size_t filesystem_cache_max_wait_sec = 1; bool read_from_filesystem_cache_if_exists_otherwise_bypass_cache = false; + bool enable_filesystem_cache_log = false; size_t remote_read_min_bytes_for_seek = DBMS_DEFAULT_BUFFER_SIZE; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 84f18d66196..293217d7a16 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -3407,6 +3407,7 @@ ReadSettings Context::getReadSettings() const res.enable_filesystem_cache = settings.enable_filesystem_cache; res.filesystem_cache_max_wait_sec = settings.filesystem_cache_max_wait_sec; res.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache; + res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log; res.remote_read_min_bytes_for_seek = settings.remote_read_min_bytes_for_seek; diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp index 9eec65a38ab..a8470ca15ba 100644 --- a/src/Interpreters/FilesystemCacheLog.cpp +++ b/src/Interpreters/FilesystemCacheLog.cpp @@ -37,6 +37,7 @@ NamesAndTypesList FilesystemCacheLogElement::getNamesAndTypes() {"file_segment_range", std::make_shared(std::move(types))}, {"size", std::make_shared()}, {"read_type", std::make_shared()}, + {"cache_attempted", std::make_shared()}, }; } @@ -53,6 +54,7 @@ void FilesystemCacheLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(Tuple{file_segment_range.first, file_segment_range.second}); columns[i++]->insert(file_segment_size); columns[i++]->insert(typeToString(read_type)); + columns[i++]->insert(cache_attempted); } }; diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index 208f53a76cf..bef5a82d39b 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -39,6 +39,7 @@ struct FilesystemCacheLogElement std::pair file_segment_range{}; ReadType read_type{}; size_t file_segment_size; + bool cache_attempted; static std::string name() { return "FilesystemCacheLog"; }