fix rare bug in reading of empty arrays

This commit is contained in:
Anton Popov 2022-02-05 01:12:09 +03:00 committed by mergify-bot
parent 1482d61bef
commit 10b8684003
3 changed files with 10 additions and 3 deletions

View File

@ -113,7 +113,6 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n)
/// need to skip some bytes in decompressed data (seek happened before readBig call).
if (nextimpl_working_buffer_offset == 0 && size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read)
{
decompressTo(to + bytes_read, size_decompressed, size_compressed_without_checksum);
bytes_read += size_decompressed;
bytes += size_decompressed;

View File

@ -111,6 +111,7 @@ bool ReadBufferFromFileDescriptor::nextImpl()
ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadBytes, bytes_read);
working_buffer = internal_buffer;
working_buffer.resize(bytes_read);
buffer_is_dirty = false;
}
else
return false;
@ -152,10 +153,10 @@ off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence)
}
/// Position is unchanged.
if (new_pos + (working_buffer.end() - pos) == file_offset_of_buffer_end)
if (!buffer_is_dirty && (new_pos + (working_buffer.end() - pos) == file_offset_of_buffer_end))
return new_pos;
if (file_offset_of_buffer_end - working_buffer.size() <= static_cast<size_t>(new_pos)
if (!buffer_is_dirty && file_offset_of_buffer_end - working_buffer.size() <= static_cast<size_t>(new_pos)
&& new_pos <= file_offset_of_buffer_end)
{
/// Position is still inside the buffer.
@ -179,6 +180,10 @@ off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence)
/// First put position at the end of the buffer so the next read will fetch new data to the buffer.
pos = working_buffer.end();
/// Mark buffer as dirty to disallow further seek optimizations, because fetching data to the buffer
/// is delayed to the next call of 'nextImpl', but it may be not called before next seek.
buffer_is_dirty = true;
/// In case of using 'pread' we just update the info about the next position in file.
/// In case of using 'read' we call 'lseek'.

View File

@ -62,6 +62,9 @@ public:
private:
/// Assuming file descriptor supports 'select', check that we have data to read or wait until timeout.
bool poll(size_t timeout_microseconds);
/// If it's true then we cannot assume on content of buffer to optimize seek calls.
bool buffer_is_dirty = true;
};