ClickHouse/src/IO/WriteBuffer.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

199 lines
5.1 KiB
C++
Raw Normal View History

#pragma once
#include <algorithm>
#include <memory>
#include <cassert>
#include <cstring>
#include <Common/Exception.h>
#include <Common/LockMemoryExceptionInThread.h>
#include <IO/BufferBase.h>
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_WRITE_AFTER_END_OF_BUFFER;
2021-11-22 11:19:26 +00:00
extern const int LOGICAL_ERROR;
}
2017-05-28 14:29:40 +00:00
/** A simple abstract class for buffered data writing (char sequences) somewhere.
* Unlike std::ostream, it provides access to the internal buffer,
* and also allows you to manually manage the position inside the buffer.
*
* Derived classes must implement the nextImpl() method.
*/
class WriteBuffer : public BufferBase
{
public:
2020-10-05 15:59:29 +00:00
using BufferBase::set;
2020-12-29 22:34:26 +00:00
using BufferBase::position;
2017-03-09 04:26:17 +00:00
void set(Position ptr, size_t size) { BufferBase::set(ptr, size, 0); }
2017-05-28 14:29:40 +00:00
/** write the data in the buffer (from the beginning of the buffer to the current position);
2023-11-20 11:13:24 +00:00
* set the position to the beginning; throw an exception, if something is wrong.
*
* Next call doesn't guarantee that buffer capacity is regained after.
* Some buffers (i.g WriteBufferFromS3) flush its data only after certain amount of consumed data.
* If direct write is performed into [position(), buffer().end()) and its length is not enough,
* you need to fill it first (i.g with write call), after it the capacity is regained.
*/
void next()
{
if (!offset())
return;
2023-04-18 11:11:42 +00:00
auto bytes_in_buffer = offset();
try
{
nextImpl();
}
catch (...)
{
2017-05-28 14:29:40 +00:00
/** If the nextImpl() call was unsuccessful, move the cursor to the beginning,
* so that later (for example, when the stack was expanded) there was no second attempt to write data.
*/
pos = working_buffer.begin();
2023-04-18 11:11:42 +00:00
bytes += bytes_in_buffer;
throw;
}
2023-04-18 11:11:42 +00:00
bytes += bytes_in_buffer;
pos = working_buffer.begin();
}
/// Calling finalize() in the destructor of derived classes is a bad practice.
virtual ~WriteBuffer();
void nextIfAtEnd()
{
if (!hasPendingData())
next();
}
void write(const char * from, size_t n)
{
2021-11-22 11:19:26 +00:00
if (finalized)
throw Exception{ErrorCodes::LOGICAL_ERROR, "Cannot write to finalized buffer"};
size_t bytes_copied = 0;
/// Produces endless loop
2021-02-04 23:14:17 +00:00
assert(!working_buffer.empty());
while (bytes_copied < n)
{
nextIfAtEnd();
size_t bytes_to_copy = std::min(static_cast<size_t>(working_buffer.end() - pos), n - bytes_copied);
memcpy(pos, from + bytes_copied, bytes_to_copy);
pos += bytes_to_copy;
bytes_copied += bytes_to_copy;
}
}
void write(char x)
{
2021-11-22 11:19:26 +00:00
if (finalized)
throw Exception{ErrorCodes::LOGICAL_ERROR, "Cannot write to finalized buffer"};
nextIfAtEnd();
*pos = x;
++pos;
}
/// This method may be called before finalize() to tell there would not be any more data written.
/// Used does not have to call it, implementation should check it itself if needed.
///
/// The idea is similar to prefetch. In case if all data is written, we can flush the buffer
/// and start sending data asynchronously. It may improve writing performance in case you have
/// multiple files to finalize. Mainly, for blob storage, finalization has high latency,
/// and calling preFinalize in a loop may parallelize it.
virtual void preFinalize() { next(); }
2021-12-29 14:04:21 +00:00
2021-11-10 22:58:56 +00:00
/// Write the last data.
void finalize()
{
if (finalized)
return;
2024-05-27 22:09:39 +00:00
if (canceled)
2024-05-28 13:20:45 +00:00
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot finalize buffer after cancellation.");
2024-05-27 22:09:39 +00:00
LockMemoryExceptionInThread lock(VariableContext::Global);
2021-11-22 11:19:26 +00:00
try
{
finalizeImpl();
finalized = true;
}
catch (...)
{
pos = working_buffer.begin();
cancel();
2021-11-22 11:19:26 +00:00
throw;
}
2021-11-10 22:58:56 +00:00
}
2024-06-25 21:04:47 +00:00
void cancel() noexcept;
2024-05-27 22:09:39 +00:00
/// Wait for data to be reliably written. Mainly, call fsync for fd.
/// May be called after finalize() if needed.
virtual void sync()
{
next();
}
2021-11-10 22:58:56 +00:00
protected:
2023-06-01 17:00:47 +00:00
WriteBuffer(Position ptr, size_t size) : BufferBase(ptr, size, 0) {}
2021-11-10 22:58:56 +00:00
virtual void finalizeImpl()
{
next();
}
virtual void cancelImpl() noexcept
2024-05-27 22:09:39 +00:00
{
}
2021-11-10 22:58:56 +00:00
bool finalized = false;
2024-05-27 22:09:39 +00:00
bool canceled = false;
private:
2017-05-28 14:29:40 +00:00
/** Write the data in the buffer (from the beginning of the buffer to the current position).
* Throw an exception if something is wrong.
*/
2023-06-01 17:00:47 +00:00
virtual void nextImpl()
{
throw Exception(ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER, "Cannot write after end of buffer.");
}
};
using WriteBufferPtr = std::shared_ptr<WriteBuffer>;
2023-06-01 17:00:47 +00:00
class WriteBufferFromPointer : public WriteBuffer
{
public:
WriteBufferFromPointer(Position ptr, size_t size) : WriteBuffer(ptr, size) {}
private:
void finalizeImpl() override
2023-06-01 17:00:47 +00:00
{
/// no op
}
void sync() override
2023-06-01 17:00:47 +00:00
{
/// no on
}
};
}