ClickHouse/src/IO/PeekableReadBuffer.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

138 lines
4.7 KiB
C++
Raw Normal View History

2019-04-12 00:45:18 +00:00
#pragma once
#include <IO/ReadBuffer.h>
#include <IO/BufferWithOwnMemory.h>
2023-01-11 17:12:56 +00:00
#include <stack>
2019-04-12 00:45:18 +00:00
namespace DB
{
2019-09-30 18:21:58 +00:00
/// Also allows to set checkpoint at some position in stream and come back to this position later.
/// When next() is called, saves data between checkpoint and current position to own memory and loads next data to sub-buffer
/// Sub-buffer should not be accessed directly during the lifetime of peekable buffer (unless
/// you reset() the state of peekable buffer after each change of underlying buffer)
2019-05-12 03:15:08 +00:00
/// If position() of peekable buffer is explicitly set to some position before checkpoint
/// (e.g. by istr.position() = prev_pos), behavior is undefined.
2019-04-12 00:45:18 +00:00
class PeekableReadBuffer : public BufferWithOwnMemory<ReadBuffer>
{
2019-08-29 13:30:43 +00:00
friend class PeekableReadBufferCheckpoint;
2019-04-12 00:45:18 +00:00
public:
explicit PeekableReadBuffer(ReadBuffer & sub_buf_, size_t start_size_ = 0);
2019-05-12 03:15:08 +00:00
~PeekableReadBuffer() override;
void prefetch(Priority priority) override { sub_buf->prefetch(priority); }
2022-04-25 08:48:08 +00:00
2019-05-12 03:15:08 +00:00
/// Sets checkpoint at current position
2019-09-26 10:49:22 +00:00
ALWAYS_INLINE inline void setCheckpoint()
2019-09-25 16:08:58 +00:00
{
if (checkpoint)
2023-01-11 17:12:56 +00:00
{
/// Recursive checkpoints. We just remember offset from the
/// first checkpoint to the current position.
recursive_checkpoints_offsets.push(offsetFromCheckpoint());
return;
}
2019-09-25 16:08:58 +00:00
checkpoint_in_own_memory = currentlyReadFromOwnMemory();
if (!checkpoint_in_own_memory)
{
/// Don't need to store unread data anymore
peeked_size = 0;
}
checkpoint.emplace(pos);
2019-09-25 16:08:58 +00:00
}
2019-05-12 03:15:08 +00:00
/// Forget checkpoint and all data between checkpoint and position
2019-09-26 10:49:22 +00:00
ALWAYS_INLINE inline void dropCheckpoint()
2019-09-25 16:08:58 +00:00
{
2021-03-19 20:29:01 +00:00
assert(checkpoint);
2023-01-11 17:12:56 +00:00
if (!recursive_checkpoints_offsets.empty())
{
recursive_checkpoints_offsets.pop();
return;
}
2019-09-25 16:08:58 +00:00
if (!currentlyReadFromOwnMemory())
{
/// Don't need to store unread data anymore
peeked_size = 0;
}
checkpoint = std::nullopt;
2019-09-25 16:08:58 +00:00
checkpoint_in_own_memory = false;
}
2019-05-12 03:15:08 +00:00
/// Sets position at checkpoint.
/// All pointers (such as this->buffer().end()) may be invalidated
void rollbackToCheckpoint(bool drop = false);
2019-05-12 03:15:08 +00:00
2019-09-30 18:21:58 +00:00
/// If checkpoint and current position are in different buffers, appends data from sub-buffer to own memory,
/// so data between checkpoint and position will be in continuous memory.
void makeContinuousMemoryFromCheckpointToPos();
2019-09-30 18:21:58 +00:00
/// Returns true if there unread data extracted from sub-buffer in own memory.
/// This data will be lost after destruction of peekable buffer.
bool hasUnreadData() const;
2019-04-12 00:45:18 +00:00
2023-01-11 17:12:56 +00:00
const ReadBuffer & getSubBuffer() const { return *sub_buf; }
2019-04-12 00:45:18 +00:00
private:
2019-05-12 03:15:08 +00:00
bool nextImpl() override;
void resetImpl();
2019-09-30 18:21:58 +00:00
bool peekNext();
2019-09-25 16:08:58 +00:00
inline bool useSubbufferOnly() const { return !peeked_size; }
inline bool currentlyReadFromOwnMemory() const { return working_buffer.begin() != sub_buf->buffer().begin(); }
2019-09-25 16:08:58 +00:00
inline bool checkpointInOwnMemory() const { return checkpoint_in_own_memory; }
2019-05-12 03:15:08 +00:00
void checkStateCorrect() const;
/// Makes possible to append `bytes_to_append` bytes to data in own memory.
/// Updates all invalidated pointers and sizes.
2019-09-30 18:21:58 +00:00
void resizeOwnMemoryIfNecessary(size_t bytes_to_append);
2019-05-12 03:15:08 +00:00
char * getMemoryData() { return use_stack_memory ? stack_memory : memory.data(); }
const char * getMemoryData() const { return use_stack_memory ? stack_memory : memory.data(); }
2023-01-11 17:12:56 +00:00
size_t offsetFromCheckpointInOwnMemory() const;
size_t offsetFromCheckpoint() const;
2019-04-21 15:37:04 +00:00
ReadBuffer * sub_buf;
2019-05-12 03:15:08 +00:00
size_t peeked_size = 0;
std::optional<Position> checkpoint = std::nullopt;
2019-05-12 03:15:08 +00:00
bool checkpoint_in_own_memory = false;
/// To prevent expensive and in some cases unnecessary memory allocations on PeekableReadBuffer
/// creation (for example if PeekableReadBuffer is often created or if we need to remember small amount of
/// data after checkpoint), at the beginning we will use small amount of memory on stack and allocate
/// larger buffer only if reserved memory is not enough.
char stack_memory[PADDING_FOR_SIMD];
bool use_stack_memory = true;
2023-01-11 17:12:56 +00:00
std::stack<size_t> recursive_checkpoints_offsets;
2019-04-12 00:45:18 +00:00
};
2019-05-17 01:12:32 +00:00
class PeekableReadBufferCheckpoint : boost::noncopyable
{
PeekableReadBuffer & buf;
2019-08-29 13:30:43 +00:00
bool auto_rollback;
2019-05-17 01:12:32 +00:00
public:
2019-08-29 13:30:43 +00:00
explicit PeekableReadBufferCheckpoint(PeekableReadBuffer & buf_, bool auto_rollback_ = false)
: buf(buf_), auto_rollback(auto_rollback_) { buf.setCheckpoint(); }
~PeekableReadBufferCheckpoint()
{
if (!buf.checkpoint)
return;
if (auto_rollback)
buf.rollbackToCheckpoint();
buf.dropCheckpoint();
}
2019-05-17 01:12:32 +00:00
};
2019-04-12 00:45:18 +00:00
}