2016-06-07 08:23:15 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <algorithm>
|
2017-02-09 10:10:13 +00:00
|
|
|
#include <memory>
|
2021-02-01 22:55:26 +00:00
|
|
|
#include <cassert>
|
2022-06-27 21:00:37 +00:00
|
|
|
#include <cstring>
|
2016-06-07 08:23:15 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/Exception.h>
|
2022-01-10 19:39:10 +00:00
|
|
|
#include <Common/LockMemoryExceptionInThread.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <IO/BufferBase.h>
|
2016-06-07 08:23:15 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int CANNOT_WRITE_AFTER_END_OF_BUFFER;
|
2021-11-22 11:19:26 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2016-06-07 08:23:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-05-28 14:29:40 +00:00
|
|
|
/** A simple abstract class for buffered data writing (char sequences) somewhere.
|
|
|
|
* Unlike std::ostream, it provides access to the internal buffer,
|
|
|
|
* and also allows you to manually manage the position inside the buffer.
|
2016-06-07 08:23:15 +00:00
|
|
|
*
|
2020-02-17 18:53:59 +00:00
|
|
|
* Derived classes must implement the nextImpl() method.
|
2016-06-07 08:23:15 +00:00
|
|
|
*/
|
|
|
|
class WriteBuffer : public BufferBase
|
|
|
|
{
|
|
|
|
public:
|
2020-10-05 15:59:29 +00:00
|
|
|
using BufferBase::set;
|
2020-12-29 22:34:26 +00:00
|
|
|
using BufferBase::position;
|
2017-03-09 04:26:17 +00:00
|
|
|
void set(Position ptr, size_t size) { BufferBase::set(ptr, size, 0); }
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-05-28 14:29:40 +00:00
|
|
|
/** write the data in the buffer (from the beginning of the buffer to the current position);
|
2023-11-20 11:13:24 +00:00
|
|
|
* set the position to the beginning; throw an exception, if something is wrong.
|
|
|
|
*
|
|
|
|
* Next call doesn't guarantee that buffer capacity is regained after.
|
|
|
|
* Some buffers (i.g WriteBufferFromS3) flush its data only after certain amount of consumed data.
|
|
|
|
* If direct write is performed into [position(), buffer().end()) and its length is not enough,
|
|
|
|
* you need to fill it first (i.g with write call), after it the capacity is regained.
|
2016-06-07 08:23:15 +00:00
|
|
|
*/
|
2024-05-19 08:34:59 +00:00
|
|
|
void next()
|
2016-06-07 08:23:15 +00:00
|
|
|
{
|
2021-01-31 19:39:05 +00:00
|
|
|
if (!offset())
|
2016-06-07 08:23:15 +00:00
|
|
|
return;
|
2023-04-18 11:11:42 +00:00
|
|
|
|
|
|
|
auto bytes_in_buffer = offset();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2016-06-07 08:23:15 +00:00
|
|
|
try
|
|
|
|
{
|
|
|
|
nextImpl();
|
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
2017-05-28 14:29:40 +00:00
|
|
|
/** If the nextImpl() call was unsuccessful, move the cursor to the beginning,
|
|
|
|
* so that later (for example, when the stack was expanded) there was no second attempt to write data.
|
2016-06-07 08:23:15 +00:00
|
|
|
*/
|
|
|
|
pos = working_buffer.begin();
|
2023-04-18 11:11:42 +00:00
|
|
|
bytes += bytes_in_buffer;
|
2024-06-21 18:12:34 +00:00
|
|
|
|
2016-06-07 08:23:15 +00:00
|
|
|
throw;
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2023-04-18 11:11:42 +00:00
|
|
|
bytes += bytes_in_buffer;
|
2016-06-07 08:23:15 +00:00
|
|
|
pos = working_buffer.begin();
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2023-05-31 13:47:00 +00:00
|
|
|
/// Calling finalize() in the destructor of derived classes is a bad practice.
|
2023-06-16 14:34:50 +00:00
|
|
|
virtual ~WriteBuffer();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2024-05-19 08:34:59 +00:00
|
|
|
void nextIfAtEnd()
|
2016-06-07 08:23:15 +00:00
|
|
|
{
|
|
|
|
if (!hasPendingData())
|
|
|
|
next();
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2016-06-07 08:23:15 +00:00
|
|
|
void write(const char * from, size_t n)
|
|
|
|
{
|
2021-11-22 11:19:26 +00:00
|
|
|
if (finalized)
|
|
|
|
throw Exception{ErrorCodes::LOGICAL_ERROR, "Cannot write to finalized buffer"};
|
|
|
|
|
2016-06-07 08:23:15 +00:00
|
|
|
size_t bytes_copied = 0;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-02-01 22:55:26 +00:00
|
|
|
/// Produces endless loop
|
2021-02-04 23:14:17 +00:00
|
|
|
assert(!working_buffer.empty());
|
2021-02-01 22:55:26 +00:00
|
|
|
|
2016-06-07 08:23:15 +00:00
|
|
|
while (bytes_copied < n)
|
|
|
|
{
|
|
|
|
nextIfAtEnd();
|
|
|
|
size_t bytes_to_copy = std::min(static_cast<size_t>(working_buffer.end() - pos), n - bytes_copied);
|
2018-12-20 18:16:06 +00:00
|
|
|
memcpy(pos, from + bytes_copied, bytes_to_copy);
|
2016-06-07 08:23:15 +00:00
|
|
|
pos += bytes_to_copy;
|
|
|
|
bytes_copied += bytes_to_copy;
|
|
|
|
}
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2024-05-19 08:34:59 +00:00
|
|
|
void write(char x)
|
2016-06-07 08:23:15 +00:00
|
|
|
{
|
2021-11-22 11:19:26 +00:00
|
|
|
if (finalized)
|
|
|
|
throw Exception{ErrorCodes::LOGICAL_ERROR, "Cannot write to finalized buffer"};
|
|
|
|
|
2016-06-07 08:23:15 +00:00
|
|
|
nextIfAtEnd();
|
|
|
|
*pos = x;
|
|
|
|
++pos;
|
|
|
|
}
|
|
|
|
|
2022-02-01 10:36:51 +00:00
|
|
|
/// This method may be called before finalize() to tell there would not be any more data written.
|
|
|
|
/// Used does not have to call it, implementation should check it itself if needed.
|
|
|
|
///
|
|
|
|
/// The idea is similar to prefetch. In case if all data is written, we can flush the buffer
|
|
|
|
/// and start sending data asynchronously. It may improve writing performance in case you have
|
|
|
|
/// multiple files to finalize. Mainly, for blob storage, finalization has high latency,
|
|
|
|
/// and calling preFinalize in a loop may parallelize it.
|
|
|
|
virtual void preFinalize() { next(); }
|
2021-12-29 14:04:21 +00:00
|
|
|
|
2021-11-10 22:58:56 +00:00
|
|
|
/// Write the last data.
|
|
|
|
void finalize()
|
|
|
|
{
|
|
|
|
if (finalized)
|
|
|
|
return;
|
|
|
|
|
2024-05-27 22:09:39 +00:00
|
|
|
if (canceled)
|
2024-05-28 13:20:45 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot finalize buffer after cancellation.");
|
2024-05-27 22:09:39 +00:00
|
|
|
|
2022-01-10 19:39:10 +00:00
|
|
|
LockMemoryExceptionInThread lock(VariableContext::Global);
|
2021-11-22 11:19:26 +00:00
|
|
|
try
|
|
|
|
{
|
|
|
|
finalizeImpl();
|
|
|
|
finalized = true;
|
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
|
|
|
pos = working_buffer.begin();
|
2024-06-21 18:12:34 +00:00
|
|
|
|
|
|
|
cancel();
|
|
|
|
|
2021-11-22 11:19:26 +00:00
|
|
|
throw;
|
|
|
|
}
|
2021-11-10 22:58:56 +00:00
|
|
|
}
|
|
|
|
|
2024-06-25 21:04:47 +00:00
|
|
|
void cancel() noexcept;
|
2024-05-27 22:09:39 +00:00
|
|
|
|
2022-02-01 10:36:51 +00:00
|
|
|
/// Wait for data to be reliably written. Mainly, call fsync for fd.
|
|
|
|
/// May be called after finalize() if needed.
|
|
|
|
virtual void sync()
|
|
|
|
{
|
|
|
|
next();
|
|
|
|
}
|
|
|
|
|
2021-11-10 22:58:56 +00:00
|
|
|
protected:
|
2023-06-01 17:00:47 +00:00
|
|
|
WriteBuffer(Position ptr, size_t size) : BufferBase(ptr, size, 0) {}
|
|
|
|
|
2021-11-10 22:58:56 +00:00
|
|
|
virtual void finalizeImpl()
|
2021-02-13 10:12:55 +00:00
|
|
|
{
|
|
|
|
next();
|
|
|
|
}
|
2019-11-19 12:46:07 +00:00
|
|
|
|
2024-06-21 18:12:34 +00:00
|
|
|
virtual void cancelImpl() noexcept
|
2024-05-27 22:09:39 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2021-11-10 22:58:56 +00:00
|
|
|
bool finalized = false;
|
2024-05-27 22:09:39 +00:00
|
|
|
bool canceled = false;
|
2019-11-19 12:46:07 +00:00
|
|
|
|
2016-06-07 08:23:15 +00:00
|
|
|
private:
|
2017-05-28 14:29:40 +00:00
|
|
|
/** Write the data in the buffer (from the beginning of the buffer to the current position).
|
|
|
|
* Throw an exception if something is wrong.
|
2016-06-07 08:23:15 +00:00
|
|
|
*/
|
2023-06-01 17:00:47 +00:00
|
|
|
virtual void nextImpl()
|
|
|
|
{
|
|
|
|
throw Exception(ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER, "Cannot write after end of buffer.");
|
|
|
|
}
|
2016-06-07 08:23:15 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2017-02-09 10:10:13 +00:00
|
|
|
using WriteBufferPtr = std::shared_ptr<WriteBuffer>;
|
|
|
|
|
|
|
|
|
2023-06-01 17:00:47 +00:00
|
|
|
class WriteBufferFromPointer : public WriteBuffer
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
WriteBufferFromPointer(Position ptr, size_t size) : WriteBuffer(ptr, size) {}
|
|
|
|
|
|
|
|
private:
|
2024-02-28 23:24:27 +00:00
|
|
|
void finalizeImpl() override
|
2023-06-01 17:00:47 +00:00
|
|
|
{
|
|
|
|
/// no op
|
|
|
|
}
|
|
|
|
|
2024-02-28 23:24:27 +00:00
|
|
|
void sync() override
|
2023-06-01 17:00:47 +00:00
|
|
|
{
|
|
|
|
/// no on
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2016-06-07 08:23:15 +00:00
|
|
|
}
|