2020-02-17 14:27:09 +00:00
|
|
|
#include <unistd.h>
|
2022-05-08 17:01:47 +00:00
|
|
|
#include <cerrno>
|
2020-05-15 22:11:59 +00:00
|
|
|
#include <cassert>
|
2020-07-07 11:45:20 +00:00
|
|
|
#include <sys/stat.h>
|
2016-10-25 06:49:24 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/Exception.h>
|
|
|
|
#include <Common/ProfileEvents.h>
|
|
|
|
#include <Common/CurrentMetrics.h>
|
2019-02-10 17:40:52 +00:00
|
|
|
#include <Common/Stopwatch.h>
|
2016-10-25 06:49:24 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <IO/WriteBufferFromFileDescriptor.h>
|
|
|
|
#include <IO/WriteHelpers.h>
|
2016-10-25 06:49:24 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace ProfileEvents
|
|
|
|
{
|
|
|
|
extern const Event WriteBufferFromFileDescriptorWrite;
|
2016-11-29 21:56:00 +00:00
|
|
|
extern const Event WriteBufferFromFileDescriptorWriteFailed;
|
2016-10-25 06:49:24 +00:00
|
|
|
extern const Event WriteBufferFromFileDescriptorWriteBytes;
|
2018-05-28 19:53:03 +00:00
|
|
|
extern const Event DiskWriteElapsedMicroseconds;
|
2022-07-13 13:29:22 +00:00
|
|
|
extern const Event FileSync;
|
|
|
|
extern const Event FileSyncElapsedMicroseconds;
|
2016-10-25 06:49:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
namespace CurrentMetrics
|
|
|
|
{
|
|
|
|
extern const Metric Write;
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int CANNOT_WRITE_TO_FILE_DESCRIPTOR;
|
|
|
|
extern const int CANNOT_FSYNC;
|
|
|
|
extern const int CANNOT_SEEK_THROUGH_FILE;
|
|
|
|
extern const int CANNOT_TRUNCATE_FILE;
|
2020-07-07 11:45:20 +00:00
|
|
|
extern const int CANNOT_FSTAT;
|
2016-10-25 06:49:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void WriteBufferFromFileDescriptor::nextImpl()
|
|
|
|
{
|
|
|
|
if (!offset())
|
|
|
|
return;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-05-31 15:54:08 +00:00
|
|
|
Stopwatch watch;
|
2018-05-28 19:53:03 +00:00
|
|
|
|
2016-10-25 06:49:24 +00:00
|
|
|
size_t bytes_written = 0;
|
|
|
|
while (bytes_written != offset())
|
|
|
|
{
|
|
|
|
ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWrite);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2016-10-25 06:49:24 +00:00
|
|
|
ssize_t res = 0;
|
|
|
|
{
|
|
|
|
CurrentMetrics::Increment metric_increment{CurrentMetrics::Write};
|
|
|
|
res = ::write(fd, working_buffer.begin() + bytes_written, offset() - bytes_written);
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2016-10-25 06:49:24 +00:00
|
|
|
if ((-1 == res || 0 == res) && errno != EINTR)
|
2016-11-29 21:56:00 +00:00
|
|
|
{
|
|
|
|
ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteFailed);
|
2021-09-06 09:16:52 +00:00
|
|
|
|
|
|
|
/// Don't use getFileName() here because this method can be called from destructor
|
2021-09-06 10:35:37 +00:00
|
|
|
String error_file_name = file_name;
|
|
|
|
if (error_file_name.empty())
|
|
|
|
error_file_name = "(fd = " + toString(fd) + ")";
|
|
|
|
throwFromErrnoWithPath("Cannot write to file " + error_file_name, error_file_name,
|
2019-08-07 12:52:47 +00:00
|
|
|
ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR);
|
2016-11-29 21:56:00 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2016-10-25 06:49:24 +00:00
|
|
|
if (res > 0)
|
|
|
|
bytes_written += res;
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-05-31 15:54:08 +00:00
|
|
|
ProfileEvents::increment(ProfileEvents::DiskWriteElapsedMicroseconds, watch.elapsedMicroseconds());
|
2016-10-25 06:49:24 +00:00
|
|
|
ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteBytes, bytes_written);
|
|
|
|
}
|
|
|
|
|
2021-09-06 10:35:37 +00:00
|
|
|
/// NOTE: This class can be used as a very low-level building block, for example
|
|
|
|
/// in trace collector. In such places allocations of memory can be dangerous,
|
2021-09-06 11:34:17 +00:00
|
|
|
/// so don't allocate anything in this constructor.
|
2016-10-25 06:49:24 +00:00
|
|
|
WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor(
|
|
|
|
int fd_,
|
|
|
|
size_t buf_size,
|
|
|
|
char * existing_memory,
|
2021-09-06 09:16:52 +00:00
|
|
|
size_t alignment,
|
2021-09-06 10:35:37 +00:00
|
|
|
std::string file_name_)
|
2021-09-06 09:16:52 +00:00
|
|
|
: WriteBufferFromFileBase(buf_size, existing_memory, alignment)
|
|
|
|
, fd(fd_)
|
2021-09-06 10:35:37 +00:00
|
|
|
, file_name(std::move(file_name_))
|
2021-09-06 09:16:52 +00:00
|
|
|
{
|
|
|
|
}
|
2016-10-25 06:49:24 +00:00
|
|
|
|
|
|
|
|
|
|
|
WriteBufferFromFileDescriptor::~WriteBufferFromFileDescriptor()
|
2021-11-10 22:58:56 +00:00
|
|
|
{
|
2021-11-11 17:27:23 +00:00
|
|
|
finalize();
|
2021-11-10 22:58:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void WriteBufferFromFileDescriptor::finalizeImpl()
|
2016-10-25 06:49:24 +00:00
|
|
|
{
|
Do not catch exceptions during final flush in writers destructors
Since this hides real problems, since destructor does final flush and if
it fails, then data will be lost.
One of such examples if MEMORY_LIMIT_EXCEEDED exception, so lock
exceptions from destructors, by using
MemoryTracker::LockExceptionInThread to block these exception, and allow
others (so std::terminate will be called, since this is c++11 with
noexcept for destructors by default).
Here is an example, that leads to empty block in the distributed batch:
2021.01.21 12:43:18.619739 [ 46468 ] {7bd60d75-ebcb-45d2-874d-260df9a4ddac} <Error> virtual DB::CompressedWriteBuffer::~CompressedWriteBuffer(): Code: 241, e.displayText() = DB::Exception: Memory limit (for user) exceeded: would use 332.07 GiB (attempt to allocate chunk of 4355342 bytes), maximum: 256.00 GiB, Stack trace (when copying this message, always include the lines below):
0. DB::Exception::Exception<>() @ 0x86f7b88 in /usr/bin/clickhouse
...
4. void DB::PODArrayBase<>::resize<>(unsigned long) @ 0xe9e878d in /usr/bin/clickhouse
5. DB::CompressedWriteBuffer::nextImpl() @ 0xe9f0296 in /usr/bin/clickhouse
6. DB::CompressedWriteBuffer::~CompressedWriteBuffer() @ 0xe9f0415 in /usr/bin/clickhouse
7. DB::DistributedBlockOutputStream::writeToShard() @ 0xf6bed4a in /usr/bin/clickhouse
2021-01-22 18:56:50 +00:00
|
|
|
if (fd < 0)
|
2016-10-25 06:49:24 +00:00
|
|
|
{
|
Do not catch exceptions during final flush in writers destructors
Since this hides real problems, since destructor does final flush and if
it fails, then data will be lost.
One of such examples if MEMORY_LIMIT_EXCEEDED exception, so lock
exceptions from destructors, by using
MemoryTracker::LockExceptionInThread to block these exception, and allow
others (so std::terminate will be called, since this is c++11 with
noexcept for destructors by default).
Here is an example, that leads to empty block in the distributed batch:
2021.01.21 12:43:18.619739 [ 46468 ] {7bd60d75-ebcb-45d2-874d-260df9a4ddac} <Error> virtual DB::CompressedWriteBuffer::~CompressedWriteBuffer(): Code: 241, e.displayText() = DB::Exception: Memory limit (for user) exceeded: would use 332.07 GiB (attempt to allocate chunk of 4355342 bytes), maximum: 256.00 GiB, Stack trace (when copying this message, always include the lines below):
0. DB::Exception::Exception<>() @ 0x86f7b88 in /usr/bin/clickhouse
...
4. void DB::PODArrayBase<>::resize<>(unsigned long) @ 0xe9e878d in /usr/bin/clickhouse
5. DB::CompressedWriteBuffer::nextImpl() @ 0xe9f0296 in /usr/bin/clickhouse
6. DB::CompressedWriteBuffer::~CompressedWriteBuffer() @ 0xe9f0415 in /usr/bin/clickhouse
7. DB::DistributedBlockOutputStream::writeToShard() @ 0xf6bed4a in /usr/bin/clickhouse
2021-01-22 18:56:50 +00:00
|
|
|
assert(!offset() && "attempt to write after close");
|
|
|
|
return;
|
2016-10-25 06:49:24 +00:00
|
|
|
}
|
Do not catch exceptions during final flush in writers destructors
Since this hides real problems, since destructor does final flush and if
it fails, then data will be lost.
One of such examples if MEMORY_LIMIT_EXCEEDED exception, so lock
exceptions from destructors, by using
MemoryTracker::LockExceptionInThread to block these exception, and allow
others (so std::terminate will be called, since this is c++11 with
noexcept for destructors by default).
Here is an example, that leads to empty block in the distributed batch:
2021.01.21 12:43:18.619739 [ 46468 ] {7bd60d75-ebcb-45d2-874d-260df9a4ddac} <Error> virtual DB::CompressedWriteBuffer::~CompressedWriteBuffer(): Code: 241, e.displayText() = DB::Exception: Memory limit (for user) exceeded: would use 332.07 GiB (attempt to allocate chunk of 4355342 bytes), maximum: 256.00 GiB, Stack trace (when copying this message, always include the lines below):
0. DB::Exception::Exception<>() @ 0x86f7b88 in /usr/bin/clickhouse
...
4. void DB::PODArrayBase<>::resize<>(unsigned long) @ 0xe9e878d in /usr/bin/clickhouse
5. DB::CompressedWriteBuffer::nextImpl() @ 0xe9f0296 in /usr/bin/clickhouse
6. DB::CompressedWriteBuffer::~CompressedWriteBuffer() @ 0xe9f0415 in /usr/bin/clickhouse
7. DB::DistributedBlockOutputStream::writeToShard() @ 0xf6bed4a in /usr/bin/clickhouse
2021-01-22 18:56:50 +00:00
|
|
|
|
|
|
|
next();
|
2016-10-25 06:49:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void WriteBufferFromFileDescriptor::sync()
|
|
|
|
{
|
|
|
|
/// If buffer has pending data - write it.
|
|
|
|
next();
|
|
|
|
|
2022-07-13 13:29:22 +00:00
|
|
|
ProfileEvents::increment(ProfileEvents::FileSync);
|
|
|
|
|
|
|
|
Stopwatch watch;
|
|
|
|
|
2016-10-25 06:49:24 +00:00
|
|
|
/// Request OS to sync data with storage medium.
|
2021-11-12 07:49:21 +00:00
|
|
|
#if defined(OS_DARWIN)
|
|
|
|
int res = ::fsync(fd);
|
|
|
|
#else
|
2021-11-10 09:23:46 +00:00
|
|
|
int res = ::fdatasync(fd);
|
2021-11-12 07:49:21 +00:00
|
|
|
#endif
|
2022-07-13 13:29:22 +00:00
|
|
|
ProfileEvents::increment(ProfileEvents::FileSyncElapsedMicroseconds, watch.elapsedMicroseconds());
|
|
|
|
|
2016-10-25 06:49:24 +00:00
|
|
|
if (-1 == res)
|
2019-08-07 12:52:47 +00:00
|
|
|
throwFromErrnoWithPath("Cannot fsync " + getFileName(), getFileName(), ErrorCodes::CANNOT_FSYNC);
|
2016-10-25 06:49:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-09-06 09:16:52 +00:00
|
|
|
off_t WriteBufferFromFileDescriptor::seek(off_t offset, int whence) // NOLINT
|
2016-10-25 06:49:24 +00:00
|
|
|
{
|
|
|
|
off_t res = lseek(fd, offset, whence);
|
|
|
|
if (-1 == res)
|
2019-08-07 12:52:47 +00:00
|
|
|
throwFromErrnoWithPath("Cannot seek through file " + getFileName(), getFileName(),
|
|
|
|
ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
|
2016-10-25 06:49:24 +00:00
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2021-09-06 09:16:52 +00:00
|
|
|
void WriteBufferFromFileDescriptor::truncate(off_t length) // NOLINT
|
2016-10-25 06:49:24 +00:00
|
|
|
{
|
|
|
|
int res = ftruncate(fd, length);
|
|
|
|
if (-1 == res)
|
2019-08-07 12:52:47 +00:00
|
|
|
throwFromErrnoWithPath("Cannot truncate file " + getFileName(), getFileName(), ErrorCodes::CANNOT_TRUNCATE_FILE);
|
2016-10-25 06:49:24 +00:00
|
|
|
}
|
|
|
|
|
2020-07-07 11:45:20 +00:00
|
|
|
|
2021-09-06 09:16:52 +00:00
|
|
|
off_t WriteBufferFromFileDescriptor::size() const
|
2020-07-07 11:45:20 +00:00
|
|
|
{
|
|
|
|
struct stat buf;
|
|
|
|
int res = fstat(fd, &buf);
|
|
|
|
if (-1 == res)
|
|
|
|
throwFromErrnoWithPath("Cannot execute fstat " + getFileName(), getFileName(), ErrorCodes::CANNOT_FSTAT);
|
|
|
|
return buf.st_size;
|
|
|
|
}
|
|
|
|
|
2021-09-06 10:35:37 +00:00
|
|
|
std::string WriteBufferFromFileDescriptor::getFileName() const
|
|
|
|
{
|
|
|
|
if (file_name.empty())
|
|
|
|
return "(fd = " + toString(fd) + ")";
|
|
|
|
|
|
|
|
return file_name;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-10-25 06:49:24 +00:00
|
|
|
}
|