add prefetch for hive text

This commit is contained in:
taiyang-li 2022-05-26 11:04:35 +08:00
parent a7a816dcb6
commit 561c87222d
3 changed files with 11 additions and 0 deletions

View File

@ -42,9 +42,12 @@ AsynchronousReadBufferFromHDFS::AsynchronousReadBufferFromHDFS(
, impl(std::move(impl_)) , impl(std::move(impl_))
, prefetch_buffer(settings_.remote_fs_buffer_size) , prefetch_buffer(settings_.remote_fs_buffer_size)
, read_until_position(impl->getFileSize()) , read_until_position(impl->getFileSize())
, use_prefetch(settings_.remote_fs_prefetch)
, log(&Poco::Logger::get("AsynchronousReadBufferFromHDFS")) , log(&Poco::Logger::get("AsynchronousReadBufferFromHDFS"))
{ {
ProfileEvents::increment(ProfileEvents::RemoteFSBuffers); ProfileEvents::increment(ProfileEvents::RemoteFSBuffers);
if (use_prefetch)
prefetch();
} }
bool AsynchronousReadBufferFromHDFS::hasPendingDataToRead() bool AsynchronousReadBufferFromHDFS::hasPendingDataToRead()
@ -142,6 +145,10 @@ bool AsynchronousReadBufferFromHDFS::nextImpl()
file_offset_of_buffer_end = impl->getFileOffsetOfBufferEnd(); file_offset_of_buffer_end = impl->getFileOffsetOfBufferEnd();
prefetch_future = {}; prefetch_future = {};
if (use_prefetch)
prefetch();
return size; return size;
} }

View File

@ -57,6 +57,7 @@ private:
size_t file_offset_of_buffer_end = 0; size_t file_offset_of_buffer_end = 0;
std::optional<size_t> read_until_position; std::optional<size_t> read_until_position;
bool use_prefetch;
Poco::Logger * log; Poco::Logger * log;
}; };

View File

@ -133,6 +133,9 @@ public:
if (to_read_block.has(name_type.name)) if (to_read_block.has(name_type.name))
to_read_block.erase(name_type.name); to_read_block.erase(name_type.name);
} }
/// Apply read buffer prefetch for HiveText format, because it is read sequentially
read_settings.remote_fs_prefetch = format == "HiveText";
} }
FormatSettings updateFormatSettings(const HiveFilePtr & hive_file) FormatSettings updateFormatSettings(const HiveFilePtr & hive_file)