From 10b8684003035f811fece12c70f2d85811f21267 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Sat, 5 Feb 2022 01:12:09 +0300 Subject: [PATCH] fix rare bug in reading of empty arrays --- src/Compression/CompressedReadBufferFromFile.cpp | 1 - src/IO/ReadBufferFromFileDescriptor.cpp | 9 +++++++-- src/IO/ReadBufferFromFileDescriptor.h | 3 +++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp index 1a70b27e9f4..b8ce485abc5 100644 --- a/src/Compression/CompressedReadBufferFromFile.cpp +++ b/src/Compression/CompressedReadBufferFromFile.cpp @@ -113,7 +113,6 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) /// need to skip some bytes in decompressed data (seek happened before readBig call). if (nextimpl_working_buffer_offset == 0 && size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read) { - decompressTo(to + bytes_read, size_decompressed, size_compressed_without_checksum); bytes_read += size_decompressed; bytes += size_decompressed; diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp index ed6b1a60181..0e538dc5224 100644 --- a/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/src/IO/ReadBufferFromFileDescriptor.cpp @@ -111,6 +111,7 @@ bool ReadBufferFromFileDescriptor::nextImpl() ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadBytes, bytes_read); working_buffer = internal_buffer; working_buffer.resize(bytes_read); + buffer_is_dirty = false; } else return false; @@ -152,10 +153,10 @@ off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence) } /// Position is unchanged. - if (new_pos + (working_buffer.end() - pos) == file_offset_of_buffer_end) + if (!buffer_is_dirty && (new_pos + (working_buffer.end() - pos) == file_offset_of_buffer_end)) return new_pos; - if (file_offset_of_buffer_end - working_buffer.size() <= static_cast(new_pos) + if (!buffer_is_dirty && file_offset_of_buffer_end - working_buffer.size() <= static_cast(new_pos) && new_pos <= file_offset_of_buffer_end) { /// Position is still inside the buffer. @@ -179,6 +180,10 @@ off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence) /// First put position at the end of the buffer so the next read will fetch new data to the buffer. pos = working_buffer.end(); + /// Mark buffer as dirty to disallow further seek optimizations, because fetching data to the buffer + /// is delayed to the next call of 'nextImpl', but it may be not called before next seek. + buffer_is_dirty = true; + /// In case of using 'pread' we just update the info about the next position in file. /// In case of using 'read' we call 'lseek'. diff --git a/src/IO/ReadBufferFromFileDescriptor.h b/src/IO/ReadBufferFromFileDescriptor.h index 188cdd709b5..48acd5d323e 100644 --- a/src/IO/ReadBufferFromFileDescriptor.h +++ b/src/IO/ReadBufferFromFileDescriptor.h @@ -62,6 +62,9 @@ public: private: /// Assuming file descriptor supports 'select', check that we have data to read or wait until timeout. bool poll(size_t timeout_microseconds); + + /// If it's true then we cannot assume on content of buffer to optimize seek calls. + bool buffer_is_dirty = true; };