2011-08-09 15:57:33 +00:00
|
|
|
#pragma once
|
2010-06-04 18:25:25 +00:00
|
|
|
|
2020-07-31 14:53:41 +00:00
|
|
|
#include <cassert>
|
2010-06-04 18:25:25 +00:00
|
|
|
#include <cstring>
|
2011-08-09 15:57:33 +00:00
|
|
|
#include <algorithm>
|
2017-02-09 10:10:13 +00:00
|
|
|
#include <memory>
|
2010-06-04 18:25:25 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/Exception.h>
|
|
|
|
#include <IO/BufferBase.h>
|
2022-06-06 04:58:22 +00:00
|
|
|
#include <IO/AsynchronousReader.h>
|
2010-06-04 18:25:25 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-01-11 21:46:36 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int ATTEMPT_TO_READ_AFTER_EOF;
|
|
|
|
extern const int CANNOT_READ_ALL_DATA;
|
2022-06-06 04:58:22 +00:00
|
|
|
extern const int NOT_IMPLEMENTED;
|
2016-01-11 21:46:36 +00:00
|
|
|
}
|
|
|
|
|
2017-05-28 14:29:40 +00:00
|
|
|
/** A simple abstract class for buffered data reading (char sequences) from somewhere.
|
|
|
|
* Unlike std::istream, it provides access to the internal buffer,
|
|
|
|
* and also allows you to manually manage the position inside the buffer.
|
2010-06-04 18:25:25 +00:00
|
|
|
*
|
2017-05-28 14:29:40 +00:00
|
|
|
* Note! `char *`, not `const char *` is used
|
|
|
|
* (so that you can take out the common code into BufferBase, and also so that you can fill the buffer in with new data).
|
|
|
|
* This causes inconveniences - for example, when using ReadBuffer to read from a chunk of memory const char *,
|
|
|
|
* you have to use const_cast.
|
2011-10-16 03:05:15 +00:00
|
|
|
*
|
2020-02-17 17:12:22 +00:00
|
|
|
* Derived classes must implement the nextImpl() method.
|
2010-06-04 18:25:25 +00:00
|
|
|
*/
|
2011-06-27 18:22:14 +00:00
|
|
|
class ReadBuffer : public BufferBase
|
2010-06-04 18:25:25 +00:00
|
|
|
{
|
|
|
|
public:
|
2017-05-28 14:29:40 +00:00
|
|
|
/** Creates a buffer and sets a piece of available data to read to zero size,
|
|
|
|
* so that the next() function is called to load the new data portion into the buffer at the first try.
|
2011-10-16 03:05:15 +00:00
|
|
|
*/
|
2012-02-09 23:49:04 +00:00
|
|
|
ReadBuffer(Position ptr, size_t size) : BufferBase(ptr, size, 0) { working_buffer.resize(0); }
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-05-28 14:29:40 +00:00
|
|
|
/** Used when the buffer is already full of data that can be read.
|
|
|
|
* (in this case, pass 0 as an offset)
|
2011-10-16 03:05:15 +00:00
|
|
|
*/
|
|
|
|
ReadBuffer(Position ptr, size_t size, size_t offset) : BufferBase(ptr, size, offset) {}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-07-31 14:53:41 +00:00
|
|
|
// Copying the read buffers can be dangerous because they can hold a lot of
|
|
|
|
// memory or open files, so better to disable the copy constructor to prevent
|
|
|
|
// accidental copying.
|
|
|
|
ReadBuffer(const ReadBuffer &) = delete;
|
|
|
|
|
2019-01-23 11:00:43 +00:00
|
|
|
// FIXME: behavior differs greately from `BufferBase::set()` and it's very confusing.
|
2012-02-09 23:49:04 +00:00
|
|
|
void set(Position ptr, size_t size) { BufferBase::set(ptr, size, 0); working_buffer.resize(0); }
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-05-28 14:29:40 +00:00
|
|
|
/** read next data and fill a buffer with it; set position to the beginning;
|
|
|
|
* return `false` in case of end, `true` otherwise; throw an exception, if something is wrong
|
2010-06-04 18:25:25 +00:00
|
|
|
*/
|
2015-02-05 14:28:48 +00:00
|
|
|
bool next()
|
2011-06-24 21:08:26 +00:00
|
|
|
{
|
2021-02-03 12:37:32 +00:00
|
|
|
assert(!hasPendingData());
|
2021-02-04 22:21:24 +00:00
|
|
|
assert(position() <= working_buffer.end());
|
2021-02-03 12:37:32 +00:00
|
|
|
|
2011-06-27 18:22:14 +00:00
|
|
|
bytes += offset();
|
2011-06-24 21:08:26 +00:00
|
|
|
bool res = nextImpl();
|
2011-06-26 21:30:59 +00:00
|
|
|
if (!res)
|
2021-02-04 14:46:46 +00:00
|
|
|
working_buffer = Buffer(pos, pos);
|
|
|
|
else
|
2021-11-26 12:03:46 +00:00
|
|
|
{
|
2021-02-04 14:46:46 +00:00
|
|
|
pos = working_buffer.begin() + nextimpl_working_buffer_offset;
|
2021-11-26 12:03:46 +00:00
|
|
|
assert(position() != working_buffer.end());
|
|
|
|
}
|
2020-07-31 14:53:41 +00:00
|
|
|
nextimpl_working_buffer_offset = 0;
|
2021-02-04 22:21:24 +00:00
|
|
|
|
|
|
|
assert(position() <= working_buffer.end());
|
|
|
|
|
2011-06-24 21:08:26 +00:00
|
|
|
return res;
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
|
2014-07-22 08:20:45 +00:00
|
|
|
inline void nextIfAtEnd()
|
|
|
|
{
|
2015-02-07 23:13:04 +00:00
|
|
|
if (!hasPendingData())
|
2014-07-22 08:20:45 +00:00
|
|
|
next();
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-02-03 12:37:32 +00:00
|
|
|
virtual ~ReadBuffer() = default;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
|
2017-05-28 14:29:40 +00:00
|
|
|
/** Unlike std::istream, it returns true if all data was read
|
|
|
|
* (and not in case there was an attempt to read after the end).
|
|
|
|
* If at the moment the position is at the end of the buffer, it calls the next() method.
|
|
|
|
* That is, it has a side effect - if the buffer is over, then it updates it and set the position to the beginning.
|
2010-06-04 18:25:25 +00:00
|
|
|
*
|
2017-05-28 14:29:40 +00:00
|
|
|
* Try to read after the end should throw an exception.
|
2010-06-04 18:25:25 +00:00
|
|
|
*/
|
2016-04-15 02:28:56 +00:00
|
|
|
bool ALWAYS_INLINE eof()
|
2010-06-04 18:25:25 +00:00
|
|
|
{
|
2015-02-07 23:13:04 +00:00
|
|
|
return !hasPendingData() && !next();
|
2010-06-04 18:25:25 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2010-06-04 18:25:25 +00:00
|
|
|
void ignore()
|
|
|
|
{
|
|
|
|
if (!eof())
|
|
|
|
++pos;
|
|
|
|
else
|
2019-02-21 15:49:42 +00:00
|
|
|
throwReadAfterEOF();
|
2010-06-04 18:25:25 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2011-06-06 20:35:58 +00:00
|
|
|
void ignore(size_t n)
|
|
|
|
{
|
2015-06-26 10:37:58 +00:00
|
|
|
while (n != 0 && !eof())
|
2011-06-06 20:35:58 +00:00
|
|
|
{
|
|
|
|
size_t bytes_to_ignore = std::min(static_cast<size_t>(working_buffer.end() - pos), n);
|
|
|
|
pos += bytes_to_ignore;
|
|
|
|
n -= bytes_to_ignore;
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2011-06-06 20:35:58 +00:00
|
|
|
if (n)
|
2019-02-21 15:49:42 +00:00
|
|
|
throwReadAfterEOF();
|
2011-06-06 20:35:58 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-05-28 14:29:40 +00:00
|
|
|
/// You could call this method `ignore`, and `ignore` call `ignoreStrict`.
|
2014-07-22 08:20:45 +00:00
|
|
|
size_t tryIgnore(size_t n)
|
|
|
|
{
|
|
|
|
size_t bytes_ignored = 0;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2014-07-22 08:20:45 +00:00
|
|
|
while (bytes_ignored < n && !eof())
|
|
|
|
{
|
|
|
|
size_t bytes_to_ignore = std::min(static_cast<size_t>(working_buffer.end() - pos), n - bytes_ignored);
|
|
|
|
pos += bytes_to_ignore;
|
|
|
|
bytes_ignored += bytes_to_ignore;
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2014-07-22 08:20:45 +00:00
|
|
|
return bytes_ignored;
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-09-08 16:28:49 +00:00
|
|
|
void ignoreAll()
|
|
|
|
{
|
|
|
|
tryIgnore(std::numeric_limits<size_t>::max());
|
|
|
|
}
|
|
|
|
|
2021-02-19 12:51:26 +00:00
|
|
|
/// Peeks a single byte.
|
|
|
|
bool ALWAYS_INLINE peek(char & c)
|
2019-02-21 15:49:42 +00:00
|
|
|
{
|
|
|
|
if (eof())
|
|
|
|
return false;
|
2021-02-19 12:51:26 +00:00
|
|
|
c = *pos;
|
2019-02-21 15:49:42 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-02-19 12:51:26 +00:00
|
|
|
/// Reads a single byte.
|
2022-11-11 09:56:18 +00:00
|
|
|
[[nodiscard]] bool ALWAYS_INLINE read(char & c)
|
2021-02-19 12:51:26 +00:00
|
|
|
{
|
|
|
|
if (peek(c))
|
|
|
|
{
|
|
|
|
++pos;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-02-21 15:49:42 +00:00
|
|
|
void ALWAYS_INLINE readStrict(char & c)
|
|
|
|
{
|
|
|
|
if (read(c))
|
|
|
|
return;
|
|
|
|
throwReadAfterEOF();
|
|
|
|
}
|
|
|
|
|
2017-05-28 14:29:40 +00:00
|
|
|
/** Reads as many as there are, no more than n bytes. */
|
2022-11-11 09:56:18 +00:00
|
|
|
[[nodiscard]] size_t read(char * to, size_t n)
|
2010-06-04 18:25:25 +00:00
|
|
|
{
|
|
|
|
size_t bytes_copied = 0;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2012-03-25 07:52:31 +00:00
|
|
|
while (bytes_copied < n && !eof())
|
2010-06-04 18:25:25 +00:00
|
|
|
{
|
|
|
|
size_t bytes_to_copy = std::min(static_cast<size_t>(working_buffer.end() - pos), n - bytes_copied);
|
2015-04-03 11:51:41 +00:00
|
|
|
::memcpy(to + bytes_copied, pos, bytes_to_copy);
|
2010-06-04 18:25:25 +00:00
|
|
|
pos += bytes_to_copy;
|
2010-06-04 19:06:32 +00:00
|
|
|
bytes_copied += bytes_to_copy;
|
2010-06-04 18:25:25 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2010-06-04 18:25:25 +00:00
|
|
|
return bytes_copied;
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-05-28 14:29:40 +00:00
|
|
|
/** Reads n bytes, if there are less - throws an exception. */
|
2010-06-04 18:25:25 +00:00
|
|
|
void readStrict(char * to, size_t n)
|
|
|
|
{
|
2018-07-13 14:42:30 +00:00
|
|
|
auto read_bytes = read(to, n);
|
|
|
|
if (n != read_bytes)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA,
|
|
|
|
"Cannot read all data. Bytes read: {}. Bytes expected: {}.", read_bytes, std::to_string(n));
|
2010-06-04 18:25:25 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-02-17 17:12:22 +00:00
|
|
|
/** A method that can be more efficiently implemented in derived classes, in the case of reading large enough blocks.
|
2017-05-28 14:29:40 +00:00
|
|
|
* The implementation can read data directly into `to`, without superfluous copying, if in `to` there is enough space for work.
|
|
|
|
* For example, a CompressedReadBuffer can decompress the data directly into `to`, if the entire decompressed block fits there.
|
|
|
|
* By default - the same as read.
|
|
|
|
* Don't use for small reads.
|
2013-09-08 00:00:25 +00:00
|
|
|
*/
|
2022-11-11 09:56:18 +00:00
|
|
|
[[nodiscard]] virtual size_t readBig(char * to, size_t n) { return read(to, n); }
|
2013-09-08 00:00:25 +00:00
|
|
|
|
2021-07-26 00:34:36 +00:00
|
|
|
/** Do something to allow faster subsequent call to 'nextImpl' if possible.
|
|
|
|
* It's used for asynchronous readers with double-buffering.
|
|
|
|
*/
|
|
|
|
virtual void prefetch() {}
|
2021-03-04 11:10:21 +00:00
|
|
|
|
2021-10-21 17:43:27 +00:00
|
|
|
/**
|
2022-01-26 18:43:23 +00:00
|
|
|
* Set upper bound for read range [..., position).
|
|
|
|
* Required for reading from remote filesystem, when it matters how much we read.
|
2021-10-21 17:43:27 +00:00
|
|
|
*/
|
2021-10-18 15:35:11 +00:00
|
|
|
virtual void setReadUntilPosition(size_t /* position */) {}
|
2022-01-26 18:43:23 +00:00
|
|
|
|
2021-10-21 17:43:27 +00:00
|
|
|
virtual void setReadUntilEnd() {}
|
2021-10-18 15:35:11 +00:00
|
|
|
|
2022-06-06 04:58:22 +00:00
|
|
|
/// Read at most `size` bytes into data at specified offset `offset`. First ignore `ignore` bytes if `ignore` > 0.
|
|
|
|
/// Notice: this function only need to be implemented in synchronous read buffers to be wrapped in asynchronous read.
|
|
|
|
/// Such as ReadBufferFromRemoteFSGather and AsynchronousReadIndirectBufferFromRemoteFS.
|
|
|
|
virtual IAsynchronousReader::Result readInto(char * /*data*/, size_t /*size*/, size_t /*offset*/, size_t /*ignore*/)
|
|
|
|
{
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "readInto not implemented");
|
2022-06-06 04:58:22 +00:00
|
|
|
}
|
|
|
|
|
2015-04-06 15:10:09 +00:00
|
|
|
protected:
|
2020-07-31 14:53:41 +00:00
|
|
|
/// The number of bytes to ignore from the initial position of `working_buffer`
|
|
|
|
/// buffer. Apparently this is an additional out-parameter for nextImpl(),
|
|
|
|
/// not a real field.
|
|
|
|
size_t nextimpl_working_buffer_offset = 0;
|
2015-04-06 15:10:09 +00:00
|
|
|
|
2011-05-13 19:40:56 +00:00
|
|
|
private:
|
2017-05-28 14:29:40 +00:00
|
|
|
/** Read the next data and fill a buffer with it.
|
|
|
|
* Return `false` in case of the end, `true` otherwise.
|
|
|
|
* Throw an exception if something is wrong.
|
2011-06-24 21:08:26 +00:00
|
|
|
*/
|
2018-06-03 16:51:31 +00:00
|
|
|
virtual bool nextImpl() { return false; }
|
2019-02-21 15:49:42 +00:00
|
|
|
|
2021-02-06 22:31:20 +00:00
|
|
|
[[noreturn]] static void throwReadAfterEOF()
|
2019-02-21 15:49:42 +00:00
|
|
|
{
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after eof");
|
2019-02-21 15:49:42 +00:00
|
|
|
}
|
2010-06-04 18:25:25 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2017-02-09 10:10:13 +00:00
|
|
|
using ReadBufferPtr = std::shared_ptr<ReadBuffer>;
|
|
|
|
|
2021-02-19 12:51:26 +00:00
|
|
|
/// Due to inconsistencies in ReadBuffer-family interfaces:
|
|
|
|
/// - some require to fully wrap underlying buffer and own it,
|
|
|
|
/// - some just wrap the reference without ownership,
|
|
|
|
/// we need to be able to wrap reference-only buffers with movable transparent proxy-buffer.
|
|
|
|
/// The uniqueness of such wraps is responsibility of the code author.
|
2023-01-10 00:32:37 +00:00
|
|
|
std::unique_ptr<ReadBuffer> wrapReadBufferReference(ReadBuffer & ref);
|
|
|
|
std::unique_ptr<ReadBuffer> wrapReadBufferPointer(ReadBufferPtr ptr);
|
2017-02-09 10:10:13 +00:00
|
|
|
|
2010-06-04 18:25:25 +00:00
|
|
|
}
|