2019-12-17 13:45:53 +00:00
|
|
|
#include "DiskS3.h"
|
|
|
|
|
|
|
|
#if USE_AWS_S3
|
|
|
|
# include "DiskFactory.h"
|
|
|
|
|
2020-01-18 23:18:23 +00:00
|
|
|
# include <random>
|
2020-01-27 18:44:30 +00:00
|
|
|
# include <utility>
|
2020-01-18 23:18:23 +00:00
|
|
|
# include <IO/ReadBufferFromFile.h>
|
2020-01-28 13:01:08 +00:00
|
|
|
# include <IO/ReadBufferFromS3.h>
|
2020-01-18 23:18:23 +00:00
|
|
|
# include <IO/ReadHelpers.h>
|
2020-01-28 13:01:08 +00:00
|
|
|
# include <IO/S3Common.h>
|
|
|
|
# include <IO/WriteBufferFromFile.h>
|
|
|
|
# include <IO/WriteBufferFromS3.h>
|
2020-01-18 23:18:23 +00:00
|
|
|
# include <IO/WriteHelpers.h>
|
2019-12-17 13:45:53 +00:00
|
|
|
# include <Poco/File.h>
|
2020-01-18 23:18:23 +00:00
|
|
|
# include <Common/checkStackSize.h>
|
2020-03-23 14:45:48 +00:00
|
|
|
# include <Common/createHardLink.h>
|
2019-12-17 13:45:53 +00:00
|
|
|
# include <Common/quoteString.h>
|
2020-01-18 23:18:23 +00:00
|
|
|
# include <Common/thread_local_rng.h>
|
2019-12-17 13:45:53 +00:00
|
|
|
|
|
|
|
# include <aws/s3/model/CopyObjectRequest.h>
|
|
|
|
# include <aws/s3/model/DeleteObjectRequest.h>
|
|
|
|
# include <aws/s3/model/GetObjectRequest.h>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2020-02-25 18:02:41 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2019-12-17 13:45:53 +00:00
|
|
|
extern const int FILE_ALREADY_EXISTS;
|
|
|
|
extern const int PATH_ACCESS_DENIED;
|
2020-01-28 12:46:39 +00:00
|
|
|
extern const int CANNOT_SEEK_THROUGH_FILE;
|
|
|
|
extern const int UNKNOWN_FORMAT;
|
2019-12-17 13:45:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
namespace
|
|
|
|
{
|
2020-03-18 00:57:00 +00:00
|
|
|
String getRandomName()
|
|
|
|
{
|
|
|
|
std::uniform_int_distribution<int> distribution('a', 'z');
|
|
|
|
String res(32, ' '); /// The number of bits of entropy should be not less than 128.
|
|
|
|
for (auto & c : res)
|
|
|
|
c = distribution(thread_local_rng);
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2020-01-18 23:18:23 +00:00
|
|
|
template <typename Result, typename Error>
|
|
|
|
void throwIfError(Aws::Utils::Outcome<Result, Error> && response)
|
2019-12-17 13:45:53 +00:00
|
|
|
{
|
|
|
|
if (!response.IsSuccess())
|
|
|
|
{
|
2020-01-18 23:18:23 +00:00
|
|
|
const auto & err = response.GetError();
|
2019-12-17 13:45:53 +00:00
|
|
|
throw Exception(err.GetMessage(), static_cast<int>(err.GetErrorType()));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-27 18:44:30 +00:00
|
|
|
/**
|
|
|
|
* S3 metadata file layout:
|
2020-01-28 13:46:02 +00:00
|
|
|
* Number of S3 objects, Total size of all S3 objects.
|
|
|
|
* Each S3 object represents path where object located in S3 and size of object.
|
2020-01-27 18:44:30 +00:00
|
|
|
*/
|
|
|
|
struct Metadata
|
2020-01-18 23:18:23 +00:00
|
|
|
{
|
2020-02-20 16:39:32 +00:00
|
|
|
/// Metadata file version.
|
2020-03-23 02:12:31 +00:00
|
|
|
static constexpr UInt32 VERSION = 1;
|
2020-01-28 12:46:39 +00:00
|
|
|
|
2020-01-28 13:01:08 +00:00
|
|
|
using PathAndSize = std::pair<String, size_t>;
|
2020-01-28 12:46:39 +00:00
|
|
|
|
2020-03-23 14:45:48 +00:00
|
|
|
/// Disk path.
|
|
|
|
const String & disk_path;
|
|
|
|
/// Relative path to metadata file on local FS.
|
2020-01-28 12:46:39 +00:00
|
|
|
String metadata_file_path;
|
2020-02-20 16:39:32 +00:00
|
|
|
/// Total size of all S3 objects.
|
2020-01-27 18:44:30 +00:00
|
|
|
size_t total_size;
|
2020-02-20 16:39:32 +00:00
|
|
|
/// S3 objects paths and their sizes.
|
2020-01-28 12:46:39 +00:00
|
|
|
std::vector<PathAndSize> s3_objects;
|
2020-03-23 14:45:48 +00:00
|
|
|
/// Number of references (hardlinks) to this metadata file.
|
|
|
|
UInt32 ref_count;
|
2020-01-27 18:44:30 +00:00
|
|
|
|
2020-02-20 16:39:32 +00:00
|
|
|
/// Load metadata by path or create empty if `create` flag is set.
|
2020-03-23 14:45:48 +00:00
|
|
|
explicit Metadata(const String & disk_path_, const String & metadata_file_path_, bool create = false)
|
|
|
|
: disk_path(disk_path_), metadata_file_path(metadata_file_path_), total_size(0), s3_objects(0), ref_count(0)
|
2020-01-27 18:44:30 +00:00
|
|
|
{
|
|
|
|
if (create)
|
2020-01-27 19:51:48 +00:00
|
|
|
return;
|
2020-01-27 18:44:30 +00:00
|
|
|
|
2020-03-23 14:45:48 +00:00
|
|
|
ReadBufferFromFile buf(disk_path + metadata_file_path, 1024); /* reasonable buffer size for small file */
|
2020-01-28 12:46:39 +00:00
|
|
|
|
|
|
|
UInt32 version;
|
|
|
|
readIntText(version, buf);
|
|
|
|
|
|
|
|
if (version != VERSION)
|
2020-01-28 13:01:08 +00:00
|
|
|
throw Exception(
|
2020-03-23 14:45:48 +00:00
|
|
|
"Unknown metadata file version. Path: " + disk_path + metadata_file_path
|
|
|
|
+ " Version: " + std::to_string(version) + ", Expected version: " + std::to_string(VERSION),
|
2020-01-28 13:01:08 +00:00
|
|
|
ErrorCodes::UNKNOWN_FORMAT);
|
2020-01-28 12:46:39 +00:00
|
|
|
|
|
|
|
assertChar('\n', buf);
|
|
|
|
|
2020-03-23 14:45:48 +00:00
|
|
|
UInt32 s3_objects_count;
|
2020-01-28 12:46:39 +00:00
|
|
|
readIntText(s3_objects_count, buf);
|
2020-01-27 19:51:48 +00:00
|
|
|
assertChar('\t', buf);
|
2020-01-27 18:44:30 +00:00
|
|
|
readIntText(total_size, buf);
|
2020-01-27 19:51:48 +00:00
|
|
|
assertChar('\n', buf);
|
2020-01-28 12:46:39 +00:00
|
|
|
s3_objects.resize(s3_objects_count);
|
|
|
|
for (UInt32 i = 0; i < s3_objects_count; ++i)
|
2020-01-27 18:44:30 +00:00
|
|
|
{
|
2020-03-23 14:45:48 +00:00
|
|
|
String s3_object_path;
|
|
|
|
size_t s3_object_size;
|
|
|
|
readIntText(s3_object_size, buf);
|
2020-01-27 19:51:48 +00:00
|
|
|
assertChar('\t', buf);
|
2020-03-23 14:45:48 +00:00
|
|
|
readEscapedString(s3_object_path, buf);
|
2020-01-27 19:51:48 +00:00
|
|
|
assertChar('\n', buf);
|
2020-03-23 14:45:48 +00:00
|
|
|
s3_objects[i] = {s3_object_path, s3_object_size};
|
2020-01-27 18:44:30 +00:00
|
|
|
}
|
2020-03-23 14:45:48 +00:00
|
|
|
|
|
|
|
readIntText(ref_count, buf);
|
|
|
|
assertChar('\n', buf);
|
2020-01-27 18:44:30 +00:00
|
|
|
}
|
2020-01-18 23:18:23 +00:00
|
|
|
|
2020-01-28 12:46:39 +00:00
|
|
|
void addObject(const String & path, size_t size)
|
2020-01-27 18:44:30 +00:00
|
|
|
{
|
|
|
|
total_size += size;
|
2020-01-28 12:46:39 +00:00
|
|
|
s3_objects.emplace_back(path, size);
|
2020-01-27 18:44:30 +00:00
|
|
|
}
|
|
|
|
|
2020-02-20 16:39:32 +00:00
|
|
|
/// Fsync metadata file if 'sync' flag is set.
|
|
|
|
void save(bool sync = false)
|
2020-01-27 19:17:22 +00:00
|
|
|
{
|
2020-03-23 14:45:48 +00:00
|
|
|
WriteBufferFromFile buf(disk_path + metadata_file_path, 1024);
|
2020-02-20 16:48:52 +00:00
|
|
|
|
2020-01-28 12:46:39 +00:00
|
|
|
writeIntText(VERSION, buf);
|
|
|
|
writeChar('\n', buf);
|
|
|
|
|
2020-03-23 14:45:48 +00:00
|
|
|
writeIntText(s3_objects.size(), buf);
|
2020-01-27 19:51:48 +00:00
|
|
|
writeChar('\t', buf);
|
|
|
|
writeIntText(total_size, buf);
|
|
|
|
writeChar('\n', buf);
|
2020-03-23 14:45:48 +00:00
|
|
|
for (const auto & [s3_object_path, s3_object_size] : s3_objects)
|
2020-01-27 19:51:48 +00:00
|
|
|
{
|
2020-03-23 14:45:48 +00:00
|
|
|
writeIntText(s3_object_size, buf);
|
2020-01-27 19:51:48 +00:00
|
|
|
writeChar('\t', buf);
|
2020-03-23 14:45:48 +00:00
|
|
|
writeEscapedString(s3_object_path, buf);
|
2020-01-27 19:51:48 +00:00
|
|
|
writeChar('\n', buf);
|
2020-01-27 18:44:30 +00:00
|
|
|
}
|
2020-03-23 14:45:48 +00:00
|
|
|
|
|
|
|
writeIntText(ref_count, buf);
|
|
|
|
writeChar('\n', buf);
|
|
|
|
|
2020-01-27 19:51:48 +00:00
|
|
|
buf.finalize();
|
2020-02-20 16:39:32 +00:00
|
|
|
if (sync)
|
|
|
|
buf.sync();
|
2020-01-27 18:44:30 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2020-02-20 16:39:32 +00:00
|
|
|
/// Reads data from S3 using stored paths in metadata.
|
2020-03-19 23:48:53 +00:00
|
|
|
class ReadIndirectBufferFromS3 final : public ReadBufferFromFileBase
|
2020-01-18 23:18:23 +00:00
|
|
|
{
|
2020-01-27 18:44:30 +00:00
|
|
|
public:
|
|
|
|
ReadIndirectBufferFromS3(
|
2020-01-28 13:01:08 +00:00
|
|
|
std::shared_ptr<Aws::S3::S3Client> client_ptr_, const String & bucket_, Metadata metadata_, size_t buf_size_)
|
2020-03-23 14:45:48 +00:00
|
|
|
: client_ptr(std::move(client_ptr_)), bucket(bucket_), metadata(std::move(metadata_)), buf_size(buf_size_)
|
2020-01-27 18:44:30 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2020-01-28 12:46:39 +00:00
|
|
|
off_t seek(off_t offset_, int whence) override
|
2020-01-27 19:51:48 +00:00
|
|
|
{
|
2020-02-27 16:47:40 +00:00
|
|
|
if (whence == SEEK_CUR)
|
|
|
|
{
|
|
|
|
/// If position within current working buffer - shift pos.
|
|
|
|
if (working_buffer.size() && size_t(getPosition() + offset_) < absolute_position)
|
|
|
|
{
|
|
|
|
pos += offset_;
|
|
|
|
return getPosition();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
absolute_position += offset_;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (whence == SEEK_SET)
|
|
|
|
{
|
|
|
|
/// If position within current working buffer - shift pos.
|
|
|
|
if (working_buffer.size() && size_t(offset_) >= absolute_position - working_buffer.size()
|
|
|
|
&& size_t(offset_) < absolute_position)
|
|
|
|
{
|
|
|
|
pos = working_buffer.end() - (absolute_position - offset_);
|
|
|
|
return getPosition();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
absolute_position = offset_;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
throw Exception("Only SEEK_SET or SEEK_CUR modes are allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
|
2020-02-14 14:28:33 +00:00
|
|
|
|
|
|
|
current_buf = initialize();
|
|
|
|
pos = working_buffer.end();
|
2020-01-27 18:44:30 +00:00
|
|
|
|
2020-02-14 14:28:33 +00:00
|
|
|
return absolute_position;
|
2020-01-27 18:44:30 +00:00
|
|
|
}
|
|
|
|
|
2020-02-14 14:28:33 +00:00
|
|
|
off_t getPosition() override { return absolute_position - available(); }
|
|
|
|
|
|
|
|
std::string getFileName() const override { return metadata.metadata_file_path; }
|
|
|
|
|
2020-01-27 18:44:30 +00:00
|
|
|
private:
|
|
|
|
std::unique_ptr<ReadBufferFromS3> initialize()
|
|
|
|
{
|
2020-02-14 14:28:33 +00:00
|
|
|
size_t offset = absolute_position;
|
2020-03-23 14:45:48 +00:00
|
|
|
for (size_t i = 0; i < metadata.s3_objects.size(); ++i)
|
2020-01-27 18:44:30 +00:00
|
|
|
{
|
|
|
|
current_buf_idx = i;
|
2020-03-23 14:45:48 +00:00
|
|
|
const auto & [path, size] = metadata.s3_objects[i];
|
2020-01-27 19:17:22 +00:00
|
|
|
if (size > offset)
|
|
|
|
{
|
2020-01-28 12:46:39 +00:00
|
|
|
auto buf = std::make_unique<ReadBufferFromS3>(client_ptr, bucket, path, buf_size);
|
2020-01-27 19:51:48 +00:00
|
|
|
buf->seek(offset, SEEK_SET);
|
|
|
|
return buf;
|
2020-01-27 18:44:30 +00:00
|
|
|
}
|
|
|
|
offset -= size;
|
|
|
|
}
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool nextImpl() override
|
|
|
|
{
|
2020-02-20 16:39:32 +00:00
|
|
|
/// Find first available buffer that fits to given offset.
|
|
|
|
if (!current_buf)
|
2020-01-27 18:44:30 +00:00
|
|
|
current_buf = initialize();
|
2020-01-18 23:18:23 +00:00
|
|
|
|
2020-02-20 16:39:32 +00:00
|
|
|
/// If current buffer has remaining data - use it.
|
2020-01-27 18:44:30 +00:00
|
|
|
if (current_buf && current_buf->next())
|
|
|
|
{
|
|
|
|
working_buffer = current_buf->buffer();
|
2020-02-14 14:28:33 +00:00
|
|
|
absolute_position += working_buffer.size();
|
2020-01-27 18:44:30 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-02-20 16:39:32 +00:00
|
|
|
/// If there is no available buffers - nothing to read.
|
2020-03-23 14:45:48 +00:00
|
|
|
if (current_buf_idx + 1 >= metadata.s3_objects.size())
|
2020-01-27 18:44:30 +00:00
|
|
|
return false;
|
|
|
|
|
2020-01-28 12:46:39 +00:00
|
|
|
++current_buf_idx;
|
2020-03-23 14:45:48 +00:00
|
|
|
const auto & path = metadata.s3_objects[current_buf_idx].first;
|
2020-01-28 12:46:39 +00:00
|
|
|
current_buf = std::make_unique<ReadBufferFromS3>(client_ptr, bucket, path, buf_size);
|
2020-01-27 18:44:30 +00:00
|
|
|
current_buf->next();
|
|
|
|
working_buffer = current_buf->buffer();
|
2020-02-14 14:28:33 +00:00
|
|
|
absolute_position += working_buffer.size();
|
2020-01-27 18:44:30 +00:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::shared_ptr<Aws::S3::S3Client> client_ptr;
|
|
|
|
const String & bucket;
|
|
|
|
Metadata metadata;
|
|
|
|
size_t buf_size;
|
|
|
|
|
2020-02-14 14:28:33 +00:00
|
|
|
size_t absolute_position = 0;
|
2020-03-23 14:45:48 +00:00
|
|
|
size_t current_buf_idx = 0;
|
2020-01-27 18:44:30 +00:00
|
|
|
std::unique_ptr<ReadBufferFromS3> current_buf;
|
|
|
|
};
|
|
|
|
|
2020-01-28 13:46:02 +00:00
|
|
|
/// Stores data in S3 and adds the object key (S3 path) and object size to metadata file on local FS.
|
2020-03-19 23:48:53 +00:00
|
|
|
class WriteIndirectBufferFromS3 final : public WriteBufferFromFileBase
|
2019-12-17 13:45:53 +00:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
WriteIndirectBufferFromS3(
|
|
|
|
std::shared_ptr<Aws::S3::S3Client> & client_ptr_,
|
|
|
|
const String & bucket_,
|
2020-01-27 18:44:30 +00:00
|
|
|
Metadata metadata_,
|
2020-01-28 12:46:39 +00:00
|
|
|
const String & s3_path_,
|
2020-02-01 09:28:04 +00:00
|
|
|
size_t min_upload_part_size,
|
2019-12-17 13:45:53 +00:00
|
|
|
size_t buf_size_)
|
2020-02-20 16:39:32 +00:00
|
|
|
: WriteBufferFromFileBase(buf_size_, nullptr, 0)
|
|
|
|
, impl(WriteBufferFromS3(client_ptr_, bucket_, s3_path_, min_upload_part_size, buf_size_))
|
2020-01-27 18:44:30 +00:00
|
|
|
, metadata(std::move(metadata_))
|
2020-01-28 12:46:39 +00:00
|
|
|
, s3_path(s3_path_)
|
2019-12-17 13:45:53 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
~WriteIndirectBufferFromS3() override
|
|
|
|
{
|
2020-01-20 15:57:13 +00:00
|
|
|
try
|
|
|
|
{
|
|
|
|
finalize();
|
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
|
|
|
tryLogCurrentException(__PRETTY_FUNCTION__);
|
|
|
|
}
|
2019-12-17 13:45:53 +00:00
|
|
|
}
|
|
|
|
|
2020-02-20 16:39:32 +00:00
|
|
|
void finalize() override
|
|
|
|
{
|
|
|
|
if (finalized)
|
|
|
|
return;
|
|
|
|
|
|
|
|
next();
|
|
|
|
impl.finalize();
|
|
|
|
|
|
|
|
metadata.addObject(s3_path, count());
|
|
|
|
metadata.save();
|
|
|
|
|
|
|
|
finalized = true;
|
|
|
|
}
|
|
|
|
|
2020-03-19 16:37:55 +00:00
|
|
|
void sync() override
|
|
|
|
{
|
|
|
|
if (finalized)
|
|
|
|
metadata.save(true);
|
|
|
|
}
|
|
|
|
|
2020-02-20 16:39:32 +00:00
|
|
|
std::string getFileName() const override { return metadata.metadata_file_path; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
void nextImpl() override
|
|
|
|
{
|
|
|
|
/// Transfer current working buffer to WriteBufferFromS3.
|
|
|
|
impl.swap(*this);
|
|
|
|
|
|
|
|
/// Write actual data to S3.
|
|
|
|
impl.next();
|
|
|
|
|
|
|
|
/// Return back working buffer.
|
|
|
|
impl.swap(*this);
|
|
|
|
}
|
|
|
|
|
2019-12-17 13:45:53 +00:00
|
|
|
private:
|
2020-02-20 16:39:32 +00:00
|
|
|
WriteBufferFromS3 impl;
|
2019-12-17 13:45:53 +00:00
|
|
|
bool finalized = false;
|
2020-01-27 18:44:30 +00:00
|
|
|
Metadata metadata;
|
2020-01-28 12:46:39 +00:00
|
|
|
String s3_path;
|
2019-12-17 13:45:53 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2020-01-18 23:42:46 +00:00
|
|
|
|
2020-03-19 23:48:53 +00:00
|
|
|
class DiskS3DirectoryIterator final : public IDiskDirectoryIterator
|
2020-01-18 23:42:46 +00:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
DiskS3DirectoryIterator(const String & full_path, const String & folder_path_) : iter(full_path), folder_path(folder_path_) {}
|
|
|
|
|
|
|
|
void next() override { ++iter; }
|
|
|
|
|
|
|
|
bool isValid() const override { return iter != Poco::DirectoryIterator(); }
|
|
|
|
|
|
|
|
String path() const override
|
|
|
|
{
|
|
|
|
if (iter->isDirectory())
|
|
|
|
return folder_path + iter.name() + '/';
|
|
|
|
else
|
|
|
|
return folder_path + iter.name();
|
|
|
|
}
|
|
|
|
|
2020-02-27 16:47:40 +00:00
|
|
|
String name() const override { return iter.name(); }
|
|
|
|
|
2020-01-18 23:42:46 +00:00
|
|
|
private:
|
|
|
|
Poco::DirectoryIterator iter;
|
|
|
|
String folder_path;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
using DiskS3Ptr = std::shared_ptr<DiskS3>;
|
|
|
|
|
2020-03-19 23:48:53 +00:00
|
|
|
class DiskS3Reservation final : public IReservation
|
2020-01-18 23:42:46 +00:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
DiskS3Reservation(const DiskS3Ptr & disk_, UInt64 size_)
|
|
|
|
: disk(disk_), size(size_), metric_increment(CurrentMetrics::DiskSpaceReservedForMerge, size_)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
UInt64 getSize() const override { return size; }
|
|
|
|
|
|
|
|
DiskPtr getDisk() const override { return disk; }
|
|
|
|
|
|
|
|
void update(UInt64 new_size) override
|
|
|
|
{
|
|
|
|
std::lock_guard lock(disk->reservation_mutex);
|
|
|
|
disk->reserved_bytes -= size;
|
|
|
|
size = new_size;
|
|
|
|
disk->reserved_bytes += size;
|
|
|
|
}
|
|
|
|
|
|
|
|
~DiskS3Reservation() override;
|
|
|
|
|
|
|
|
private:
|
|
|
|
DiskS3Ptr disk;
|
|
|
|
UInt64 size;
|
|
|
|
CurrentMetrics::Increment metric_increment;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2020-02-14 14:28:33 +00:00
|
|
|
DiskS3::DiskS3(
|
|
|
|
String name_,
|
|
|
|
std::shared_ptr<Aws::S3::S3Client> client_,
|
|
|
|
String bucket_,
|
|
|
|
String s3_root_path_,
|
|
|
|
String metadata_path_,
|
|
|
|
size_t min_upload_part_size_)
|
2019-12-17 13:45:53 +00:00
|
|
|
: name(std::move(name_))
|
|
|
|
, client(std::move(client_))
|
|
|
|
, bucket(std::move(bucket_))
|
|
|
|
, s3_root_path(std::move(s3_root_path_))
|
|
|
|
, metadata_path(std::move(metadata_path_))
|
2020-02-01 09:28:04 +00:00
|
|
|
, min_upload_part_size(min_upload_part_size_)
|
2019-12-17 13:45:53 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
ReservationPtr DiskS3::reserve(UInt64 bytes)
|
|
|
|
{
|
|
|
|
if (!tryReserve(bytes))
|
|
|
|
return {};
|
|
|
|
return std::make_unique<DiskS3Reservation>(std::static_pointer_cast<DiskS3>(shared_from_this()), bytes);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool DiskS3::exists(const String & path) const
|
|
|
|
{
|
|
|
|
return Poco::File(metadata_path + path).exists();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool DiskS3::isFile(const String & path) const
|
|
|
|
{
|
2020-01-27 19:51:48 +00:00
|
|
|
return Poco::File(metadata_path + path).isFile();
|
2019-12-17 13:45:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool DiskS3::isDirectory(const String & path) const
|
|
|
|
{
|
|
|
|
return Poco::File(metadata_path + path).isDirectory();
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t DiskS3::getFileSize(const String & path) const
|
|
|
|
{
|
2020-03-23 14:45:48 +00:00
|
|
|
Metadata metadata(metadata_path, path);
|
2020-01-27 18:44:30 +00:00
|
|
|
return metadata.total_size;
|
2019-12-17 13:45:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void DiskS3::createDirectory(const String & path)
|
|
|
|
{
|
|
|
|
Poco::File(metadata_path + path).createDirectory();
|
|
|
|
}
|
|
|
|
|
|
|
|
void DiskS3::createDirectories(const String & path)
|
|
|
|
{
|
|
|
|
Poco::File(metadata_path + path).createDirectories();
|
|
|
|
}
|
|
|
|
|
|
|
|
DiskDirectoryIteratorPtr DiskS3::iterateDirectory(const String & path)
|
|
|
|
{
|
|
|
|
return std::make_unique<DiskS3DirectoryIterator>(metadata_path + path, path);
|
|
|
|
}
|
|
|
|
|
2020-01-18 23:18:23 +00:00
|
|
|
void DiskS3::clearDirectory(const String & path)
|
|
|
|
{
|
|
|
|
for (auto it{iterateDirectory(path)}; it->isValid(); it->next())
|
|
|
|
if (isFile(it->path()))
|
|
|
|
remove(it->path());
|
|
|
|
}
|
|
|
|
|
2019-12-17 13:45:53 +00:00
|
|
|
void DiskS3::moveFile(const String & from_path, const String & to_path)
|
|
|
|
{
|
|
|
|
if (exists(to_path))
|
2020-01-27 18:44:30 +00:00
|
|
|
throw Exception("File already exists: " + to_path, ErrorCodes::FILE_ALREADY_EXISTS);
|
2019-12-17 13:45:53 +00:00
|
|
|
Poco::File(metadata_path + from_path).renameTo(metadata_path + to_path);
|
|
|
|
}
|
|
|
|
|
|
|
|
void DiskS3::replaceFile(const String & from_path, const String & to_path)
|
|
|
|
{
|
|
|
|
Poco::File from_file(metadata_path + from_path);
|
|
|
|
Poco::File to_file(metadata_path + to_path);
|
|
|
|
if (to_file.exists())
|
|
|
|
{
|
|
|
|
Poco::File tmp_file(metadata_path + to_path + ".old");
|
|
|
|
to_file.renameTo(tmp_file.path());
|
|
|
|
from_file.renameTo(metadata_path + to_path);
|
2020-01-18 23:18:23 +00:00
|
|
|
remove(to_path + ".old");
|
2019-12-17 13:45:53 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
from_file.renameTo(to_file.path());
|
|
|
|
}
|
|
|
|
|
|
|
|
void DiskS3::copyFile(const String & from_path, const String & to_path)
|
|
|
|
{
|
|
|
|
if (exists(to_path))
|
2020-01-18 23:18:23 +00:00
|
|
|
remove(to_path);
|
2019-12-17 13:45:53 +00:00
|
|
|
|
2020-03-23 14:45:48 +00:00
|
|
|
Metadata from(metadata_path, from_path);
|
|
|
|
Metadata to(metadata_path, to_path, true);
|
2020-01-27 18:44:30 +00:00
|
|
|
|
2020-03-19 16:37:55 +00:00
|
|
|
for (const auto & [path, size] : from.s3_objects)
|
2020-01-27 18:44:30 +00:00
|
|
|
{
|
2020-01-28 12:46:39 +00:00
|
|
|
auto new_path = s3_root_path + getRandomName();
|
2020-01-27 18:44:30 +00:00
|
|
|
Aws::S3::Model::CopyObjectRequest req;
|
2020-03-19 16:37:55 +00:00
|
|
|
req.SetCopySource(bucket + "/" + path);
|
2020-01-27 18:44:30 +00:00
|
|
|
req.SetBucket(bucket);
|
2020-01-28 12:46:39 +00:00
|
|
|
req.SetKey(new_path);
|
2020-01-27 18:44:30 +00:00
|
|
|
throwIfError(client->CopyObject(req));
|
|
|
|
|
2020-01-28 13:01:08 +00:00
|
|
|
to.addObject(new_path, size);
|
2020-01-27 18:44:30 +00:00
|
|
|
}
|
2019-12-17 13:45:53 +00:00
|
|
|
|
2020-01-27 18:44:30 +00:00
|
|
|
to.save();
|
2019-12-17 13:45:53 +00:00
|
|
|
}
|
|
|
|
|
2020-02-20 16:39:32 +00:00
|
|
|
std::unique_ptr<ReadBufferFromFileBase> DiskS3::readFile(const String & path, size_t buf_size, size_t, size_t, size_t) const
|
2019-12-17 13:45:53 +00:00
|
|
|
{
|
2020-03-23 14:45:48 +00:00
|
|
|
Metadata metadata(metadata_path, path);
|
2020-01-27 18:44:30 +00:00
|
|
|
|
|
|
|
LOG_DEBUG(
|
|
|
|
&Logger::get("DiskS3"),
|
2020-03-23 14:45:48 +00:00
|
|
|
"Read from file by path: " << backQuote(metadata_path + path) << " Existing S3 objects: " << metadata.s3_objects.size());
|
2020-01-27 18:44:30 +00:00
|
|
|
|
|
|
|
return std::make_unique<ReadIndirectBufferFromS3>(client, bucket, metadata, buf_size);
|
2019-12-17 13:45:53 +00:00
|
|
|
}
|
|
|
|
|
2020-02-20 16:39:32 +00:00
|
|
|
std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path, size_t buf_size, WriteMode mode, size_t, size_t)
|
2019-12-17 13:45:53 +00:00
|
|
|
{
|
2020-01-27 18:44:30 +00:00
|
|
|
bool exist = exists(path);
|
2020-02-20 16:39:32 +00:00
|
|
|
/// Path to store new S3 object.
|
2020-01-28 12:46:39 +00:00
|
|
|
auto s3_path = s3_root_path + getRandomName();
|
2020-01-27 18:44:30 +00:00
|
|
|
if (!exist || mode == WriteMode::Rewrite)
|
2019-12-17 13:45:53 +00:00
|
|
|
{
|
2020-02-20 16:39:32 +00:00
|
|
|
/// If metadata file exists - remove and create new.
|
2020-01-27 18:44:30 +00:00
|
|
|
if (exist)
|
|
|
|
remove(path);
|
|
|
|
|
2020-03-23 14:45:48 +00:00
|
|
|
Metadata metadata(metadata_path, path, true);
|
2020-02-20 16:39:32 +00:00
|
|
|
/// Save empty metadata to disk to have ability to get file size while buffer is not finalized.
|
2020-01-27 18:44:30 +00:00
|
|
|
metadata.save();
|
|
|
|
|
2020-01-28 13:01:08 +00:00
|
|
|
LOG_DEBUG(&Logger::get("DiskS3"), "Write to file by path: " << backQuote(metadata_path + path) << " New S3 path: " << s3_path);
|
2020-01-27 18:44:30 +00:00
|
|
|
|
2020-02-02 00:54:50 +00:00
|
|
|
return std::make_unique<WriteIndirectBufferFromS3>(client, bucket, metadata, s3_path, min_upload_part_size, buf_size);
|
2019-12-17 13:45:53 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2020-03-23 14:45:48 +00:00
|
|
|
Metadata metadata(metadata_path, path);
|
2020-01-27 18:44:30 +00:00
|
|
|
|
|
|
|
LOG_DEBUG(
|
|
|
|
&Logger::get("DiskS3"),
|
2020-01-28 12:46:39 +00:00
|
|
|
"Append to file by path: " << backQuote(metadata_path + path) << " New S3 path: " << s3_path
|
2020-03-23 14:45:48 +00:00
|
|
|
<< " Existing S3 objects: " << metadata.s3_objects.size());
|
2020-01-27 18:44:30 +00:00
|
|
|
|
2020-02-02 00:54:50 +00:00
|
|
|
return std::make_unique<WriteIndirectBufferFromS3>(client, bucket, metadata, s3_path, min_upload_part_size, buf_size);
|
2019-12-17 13:45:53 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-18 23:18:23 +00:00
|
|
|
void DiskS3::remove(const String & path)
|
2020-01-15 14:15:21 +00:00
|
|
|
{
|
2020-01-27 18:44:30 +00:00
|
|
|
LOG_DEBUG(&Logger::get("DiskS3"), "Remove file by path: " << backQuote(metadata_path + path));
|
|
|
|
|
2020-01-15 14:15:21 +00:00
|
|
|
Poco::File file(metadata_path + path);
|
|
|
|
if (file.isFile())
|
|
|
|
{
|
2020-03-23 14:45:48 +00:00
|
|
|
Metadata metadata(metadata_path, path);
|
2020-01-27 18:44:30 +00:00
|
|
|
|
2020-03-23 14:45:48 +00:00
|
|
|
/// If there is no references - delete content from S3.
|
|
|
|
if (metadata.ref_count == 0)
|
|
|
|
{
|
|
|
|
file.remove();
|
|
|
|
for (const auto & [s3_object_path, _] : metadata.s3_objects)
|
|
|
|
{
|
|
|
|
/// TODO: Make operation idempotent. Do not throw exception if key is already deleted.
|
|
|
|
Aws::S3::Model::DeleteObjectRequest request;
|
|
|
|
request.SetBucket(bucket);
|
|
|
|
request.SetKey(s3_object_path);
|
|
|
|
throwIfError(client->DeleteObject(request));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else /// In other case decrement number of references, save metadata and delete file.
|
|
|
|
{
|
|
|
|
--metadata.ref_count;
|
|
|
|
metadata.save();
|
|
|
|
file.remove();
|
2020-01-27 18:44:30 +00:00
|
|
|
}
|
2020-01-18 23:18:23 +00:00
|
|
|
}
|
2020-03-23 14:45:48 +00:00
|
|
|
else
|
|
|
|
file.remove();
|
2020-01-18 23:18:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void DiskS3::removeRecursive(const String & path)
|
|
|
|
{
|
2020-01-28 13:01:08 +00:00
|
|
|
checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks.
|
2020-01-15 14:15:21 +00:00
|
|
|
|
2020-01-18 23:18:23 +00:00
|
|
|
Poco::File file(metadata_path + path);
|
|
|
|
if (file.isFile())
|
|
|
|
{
|
2020-02-27 16:47:40 +00:00
|
|
|
remove(path);
|
2020-01-15 14:15:21 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2020-01-18 23:18:23 +00:00
|
|
|
for (auto it{iterateDirectory(path)}; it->isValid(); it->next())
|
|
|
|
removeRecursive(it->path());
|
2020-01-27 18:44:30 +00:00
|
|
|
file.remove();
|
2020-01-15 14:15:21 +00:00
|
|
|
}
|
2019-12-17 13:45:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool DiskS3::tryReserve(UInt64 bytes)
|
|
|
|
{
|
2020-01-18 23:42:46 +00:00
|
|
|
std::lock_guard lock(reservation_mutex);
|
2019-12-17 13:45:53 +00:00
|
|
|
if (bytes == 0)
|
|
|
|
{
|
|
|
|
LOG_DEBUG(&Logger::get("DiskS3"), "Reserving 0 bytes on s3 disk " << backQuote(name));
|
|
|
|
++reservation_count;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto available_space = getAvailableSpace();
|
|
|
|
UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes);
|
|
|
|
if (unreserved_space >= bytes)
|
|
|
|
{
|
|
|
|
LOG_DEBUG(
|
|
|
|
&Logger::get("DiskS3"),
|
|
|
|
"Reserving " << formatReadableSizeWithBinarySuffix(bytes) << " on disk " << backQuote(name) << ", having unreserved "
|
|
|
|
<< formatReadableSizeWithBinarySuffix(unreserved_space) << ".");
|
|
|
|
++reservation_count;
|
|
|
|
reserved_bytes += bytes;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-02-28 17:14:55 +00:00
|
|
|
void DiskS3::listFiles(const String & path, std::vector<String> & file_names)
|
|
|
|
{
|
|
|
|
for (auto it = iterateDirectory(path); it->isValid(); it->next())
|
|
|
|
file_names.push_back(it->name());
|
|
|
|
}
|
|
|
|
|
2020-03-05 14:02:15 +00:00
|
|
|
void DiskS3::setLastModified(const String & path, const Poco::Timestamp & timestamp)
|
|
|
|
{
|
|
|
|
Poco::File(metadata_path + path).setLastModified(timestamp);
|
|
|
|
}
|
|
|
|
|
|
|
|
Poco::Timestamp DiskS3::getLastModified(const String & path)
|
|
|
|
{
|
|
|
|
return Poco::File(metadata_path + path).getLastModified();
|
|
|
|
}
|
|
|
|
|
2020-03-19 16:37:55 +00:00
|
|
|
void DiskS3::createHardLink(const String & src_path, const String & dst_path)
|
|
|
|
{
|
2020-03-23 14:45:48 +00:00
|
|
|
/// Increment number of references.
|
|
|
|
Metadata src(metadata_path, src_path);
|
|
|
|
++src.ref_count;
|
|
|
|
src.save();
|
|
|
|
|
|
|
|
/// Create FS hardlink to metadata file.
|
|
|
|
DB::createHardLink(metadata_path + src_path, metadata_path + dst_path);
|
2020-03-19 16:37:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void DiskS3::createFile(const String & path)
|
|
|
|
{
|
|
|
|
/// Create empty metadata file.
|
2020-03-23 14:45:48 +00:00
|
|
|
Metadata metadata(metadata_path, path, true);
|
2020-03-19 16:37:55 +00:00
|
|
|
metadata.save();
|
|
|
|
}
|
|
|
|
|
|
|
|
void DiskS3::setReadOnly(const String & path)
|
|
|
|
{
|
|
|
|
Poco::File(metadata_path + path).setReadOnly(true);
|
|
|
|
}
|
2020-01-18 23:42:46 +00:00
|
|
|
|
2019-12-17 13:45:53 +00:00
|
|
|
DiskS3Reservation::~DiskS3Reservation()
|
|
|
|
{
|
|
|
|
try
|
|
|
|
{
|
2020-01-18 23:42:46 +00:00
|
|
|
std::lock_guard lock(disk->reservation_mutex);
|
2019-12-17 13:45:53 +00:00
|
|
|
if (disk->reserved_bytes < size)
|
|
|
|
{
|
|
|
|
disk->reserved_bytes = 0;
|
|
|
|
LOG_ERROR(&Logger::get("DiskLocal"), "Unbalanced reservations size for disk '" + disk->getName() + "'.");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
disk->reserved_bytes -= size;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (disk->reservation_count == 0)
|
|
|
|
LOG_ERROR(&Logger::get("DiskLocal"), "Unbalanced reservation count for disk '" + disk->getName() + "'.");
|
|
|
|
else
|
|
|
|
--disk->reservation_count;
|
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
|
|
|
tryLogCurrentException(__PRETTY_FUNCTION__);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-18 03:27:32 +00:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
|
|
|
void checkWriteAccess(IDisk & disk)
|
2020-01-28 12:46:39 +00:00
|
|
|
{
|
2020-03-18 03:27:32 +00:00
|
|
|
auto file = disk.writeFile("test_acl", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite);
|
2020-01-28 13:01:08 +00:00
|
|
|
file->write("test", 4);
|
2020-01-28 12:46:39 +00:00
|
|
|
}
|
|
|
|
|
2020-03-18 03:27:32 +00:00
|
|
|
void checkReadAccess(const String & disk_name, IDisk & disk)
|
2020-01-28 12:46:39 +00:00
|
|
|
{
|
2020-03-18 03:27:32 +00:00
|
|
|
auto file = disk.readFile("test_acl", DBMS_DEFAULT_BUFFER_SIZE);
|
2020-01-28 13:01:08 +00:00
|
|
|
String buf(4, '0');
|
|
|
|
file->readStrict(buf.data(), 4);
|
|
|
|
if (buf != "test")
|
|
|
|
throw Exception("No read access to S3 bucket in disk " + disk_name, ErrorCodes::PATH_ACCESS_DENIED);
|
2020-01-28 12:46:39 +00:00
|
|
|
}
|
|
|
|
|
2020-03-18 03:27:32 +00:00
|
|
|
void checkRemoveAccess(IDisk & disk)
|
2020-01-28 12:46:39 +00:00
|
|
|
{
|
2020-03-18 03:27:32 +00:00
|
|
|
disk.remove("test_acl");
|
|
|
|
}
|
|
|
|
|
2020-01-28 12:46:39 +00:00
|
|
|
}
|
|
|
|
|
2019-12-17 13:45:53 +00:00
|
|
|
void registerDiskS3(DiskFactory & factory)
|
|
|
|
{
|
2020-01-28 13:01:08 +00:00
|
|
|
auto creator = [](const String & name,
|
2019-12-17 13:45:53 +00:00
|
|
|
const Poco::Util::AbstractConfiguration & config,
|
|
|
|
const String & config_prefix,
|
|
|
|
const Context & context) -> DiskPtr {
|
|
|
|
Poco::File disk{context.getPath() + "disks/" + name};
|
|
|
|
disk.createDirectories();
|
|
|
|
|
|
|
|
S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint")));
|
|
|
|
auto client = S3::ClientFactory::instance().create(
|
2020-01-15 14:15:21 +00:00
|
|
|
uri.endpoint,
|
|
|
|
config.getString(config_prefix + ".access_key_id", ""),
|
|
|
|
config.getString(config_prefix + ".secret_access_key", ""));
|
2019-12-17 13:45:53 +00:00
|
|
|
|
|
|
|
if (uri.key.back() != '/')
|
|
|
|
throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
String metadata_path = context.getPath() + "disks/" + name + "/";
|
|
|
|
|
2020-02-14 14:28:33 +00:00
|
|
|
auto s3disk
|
|
|
|
= std::make_shared<DiskS3>(name, client, uri.bucket, uri.key, metadata_path, context.getSettingsRef().s3_min_upload_part_size);
|
2019-12-17 13:45:53 +00:00
|
|
|
|
2020-01-18 23:18:23 +00:00
|
|
|
/// This code is used only to check access to the corresponding disk.
|
2020-03-18 03:27:32 +00:00
|
|
|
checkWriteAccess(*s3disk);
|
|
|
|
checkReadAccess(name, *s3disk);
|
|
|
|
checkRemoveAccess(*s3disk);
|
2019-12-17 13:45:53 +00:00
|
|
|
|
|
|
|
return s3disk;
|
|
|
|
};
|
|
|
|
factory.registerDiskType("s3", creator);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|