From b0f9112696eb0ab583cb69eb709d2b6d1b09e01f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 15 Oct 2021 10:13:11 +0300 Subject: [PATCH 1/3] Do not allow zero-length reads Since this may create pretty odd issues, since reading 0 bytes will return 0, and some code may not be ready for this. v0: add a check in ReadBuffer ctor v2: Do not create empty ReadBuffer from BufferWithOwnMemory with empty size v3: - revert "Do not create empty ReadBuffer from BufferWithOwnMemory with empty size" - Replace INVALID_SETTING_VALUE with LOGICAL_ERROR - Move the check for empty buffer in ReadBuffer into reading because of MMapReadBufferFromFile v4: replace with assert of internal_buffer.size() v5: move assertion to implementations since there are exceptions for nested readers, like LimitReadBuffer and similar. --- src/IO/ReadBufferFromFileDescriptor.cpp | 3 +++ src/IO/SynchronousReader.cpp | 3 +++ src/IO/ThreadPoolReader.cpp | 3 +++ 3 files changed, 9 insertions(+) diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp index a710dfe33fb..ed8eba62f04 100644 --- a/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/src/IO/ReadBufferFromFileDescriptor.cpp @@ -51,6 +51,9 @@ std::string ReadBufferFromFileDescriptor::getFileName() const bool ReadBufferFromFileDescriptor::nextImpl() { + /// If internal_buffer size is empty, then read() cannot be distinguished from EOF + assert(!internal_buffer.empty()); + size_t bytes_read = 0; while (!bytes_read) { diff --git a/src/IO/SynchronousReader.cpp b/src/IO/SynchronousReader.cpp index 82f29da7d91..e47ed48b7f4 100644 --- a/src/IO/SynchronousReader.cpp +++ b/src/IO/SynchronousReader.cpp @@ -37,6 +37,9 @@ namespace ErrorCodes std::future SynchronousReader::submit(Request request) { + /// If size is zero, then read() cannot be distinguished from EOF + assert(request.size); + int fd = assert_cast(*request.descriptor).fd; #if defined(POSIX_FADV_WILLNEED) diff --git a/src/IO/ThreadPoolReader.cpp b/src/IO/ThreadPoolReader.cpp index 701fa759848..273778df37c 100644 --- a/src/IO/ThreadPoolReader.cpp +++ b/src/IO/ThreadPoolReader.cpp @@ -76,6 +76,9 @@ ThreadPoolReader::ThreadPoolReader(size_t pool_size, size_t queue_size_) std::future ThreadPoolReader::submit(Request request) { + /// If size is zero, then read() cannot be distinguished from EOF + assert(request.size); + int fd = assert_cast(*request.descriptor).fd; #if defined(__linux__) From a630821f60a4d91042c8744faa7ec624577593cc Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 17 Oct 2021 10:41:11 +0300 Subject: [PATCH 2/3] Do not try to read empty files. CI report [1]. [1]: https://clickhouse-test-reports.s3.yandex.net/30190/fe534553b2a0ac543795956b3fbde673cf5a342b/functional_stateless_tests_(debug).html#fail1 Fixes: 01560_ttl_remove_empty_parts in debug build (and some other tests) v0: fix MergeTreeDataPartCompact::loadIndexGranularity()/MergeTreeDataPartWide::loadIndexGranularity() v2: use EmptyReadBuffer in DiskLocal::readFile() v3: introduce ReadBufferFromEmptyFile v4: rebase against readbuffer-real-size branch --- src/IO/ReadBufferFromEmptyFile.h | 23 +++++++++++++++++++++++ src/IO/createReadBufferFromFileBase.cpp | 3 +++ 2 files changed, 26 insertions(+) create mode 100644 src/IO/ReadBufferFromEmptyFile.h diff --git a/src/IO/ReadBufferFromEmptyFile.h b/src/IO/ReadBufferFromEmptyFile.h new file mode 100644 index 00000000000..311aee1559b --- /dev/null +++ b/src/IO/ReadBufferFromEmptyFile.h @@ -0,0 +1,23 @@ +#pragma once + +#include + +namespace DB +{ + +/// In case of empty file it does not make any sense to read it. +/// +/// Plus regular readers from file has an assert that buffer is not empty, that will fail: +/// - ReadBufferFromFileDescriptor +/// - SynchronousReader +/// - ThreadPoolReader +class ReadBufferFromEmptyFile : public ReadBufferFromFileBase +{ +private: + bool nextImpl() override { return false; } + std::string getFileName() const override { return ""; } + off_t seek(off_t /*off*/, int /*whence*/) override { return 0; } + off_t getPosition() override { return 0; } +}; + +} diff --git a/src/IO/createReadBufferFromFileBase.cpp b/src/IO/createReadBufferFromFileBase.cpp index 1e06d701afc..3b2ec4d326a 100644 --- a/src/IO/createReadBufferFromFileBase.cpp +++ b/src/IO/createReadBufferFromFileBase.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -33,6 +34,8 @@ std::unique_ptr createReadBufferFromFileBase( char * existing_memory, size_t alignment) { + if (size.has_value() && !*size) + return std::make_unique(); size_t estimated_size = size.has_value() ? *size : 0; if (!existing_memory From a1926b3a559d2314cfc1e186d403db37d4008f8b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 21 Oct 2021 23:11:50 +0300 Subject: [PATCH 3/3] Fix MergeTreeReaderCompact with empty buffer (max_read_buffer_size=0) CI: https://clickhouse-test-reports.s3.yandex.net/30190/0682d80f603d934eda51c93959164ee29eb52c02/functional_stateless_tests_(debug).html#fail1 --- src/Storages/MergeTree/MergeTreeReaderCompact.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index 33acb4610eb..9d0a4eed78e 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -71,6 +71,9 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( if (buffer_size) settings.read_settings = settings.read_settings.adjustBufferSize(buffer_size); + if (!settings.read_settings.local_fs_buffer_size || !settings.read_settings.remote_fs_buffer_size) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read to empty buffer."); + const String full_data_path = data_part->getFullRelativePath() + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION; if (uncompressed_cache) {