From 561c87222df972748f3f6bc884078eee99db76ad Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 26 May 2022 11:04:35 +0800 Subject: [PATCH] add prefetch for hive text --- src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp | 7 +++++++ src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h | 1 + src/Storages/Hive/StorageHive.cpp | 3 +++ 3 files changed, 11 insertions(+) diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp index c18364306d0..35dccf644d6 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp @@ -42,9 +42,12 @@ AsynchronousReadBufferFromHDFS::AsynchronousReadBufferFromHDFS( , impl(std::move(impl_)) , prefetch_buffer(settings_.remote_fs_buffer_size) , read_until_position(impl->getFileSize()) + , use_prefetch(settings_.remote_fs_prefetch) , log(&Poco::Logger::get("AsynchronousReadBufferFromHDFS")) { ProfileEvents::increment(ProfileEvents::RemoteFSBuffers); + if (use_prefetch) + prefetch(); } bool AsynchronousReadBufferFromHDFS::hasPendingDataToRead() @@ -142,6 +145,10 @@ bool AsynchronousReadBufferFromHDFS::nextImpl() file_offset_of_buffer_end = impl->getFileOffsetOfBufferEnd(); prefetch_future = {}; + + if (use_prefetch) + prefetch(); + return size; } diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h index 43ab0c09834..dd86ffac1d4 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h +++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h @@ -57,6 +57,7 @@ private: size_t file_offset_of_buffer_end = 0; std::optional read_until_position; + bool use_prefetch; Poco::Logger * log; }; diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 6a8ea4b4ca5..0f21403673c 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -133,6 +133,9 @@ public: if (to_read_block.has(name_type.name)) to_read_block.erase(name_type.name); } + + /// Apply read buffer prefetch for HiveText format, because it is read sequentially + read_settings.remote_fs_prefetch = format == "HiveText"; } FormatSettings updateFormatSettings(const HiveFilePtr & hive_file)