ClickHouse/src/IO/WriteBufferFromS3.h

84 lines
2.2 KiB
C++
Raw Normal View History

2019-05-31 10:58:43 +00:00
#pragma once
2019-12-06 14:37:21 +00:00
#include <Common/config.h>
#if USE_AWS_S3
2020-01-28 13:05:37 +00:00
# include <memory>
# include <vector>
2021-10-02 07:13:14 +00:00
# include <base/logger_useful.h>
# include <base/types.h>
2020-01-28 13:05:37 +00:00
# include <IO/BufferWithOwnMemory.h>
# include <IO/WriteBuffer.h>
2021-10-27 23:10:39 +00:00
# include <aws/core/utils/memory/stl/AWSStringStream.h>
2019-05-31 10:58:43 +00:00
2019-12-11 14:21:48 +00:00
namespace Aws::S3
{
2020-01-28 13:05:37 +00:00
class S3Client;
2019-12-11 14:21:48 +00:00
}
2019-05-31 10:58:43 +00:00
namespace DB
{
/**
* Buffer to write a data to a S3 object with specified bucket and key.
* If data size written to the buffer is less than 'max_single_part_upload_size' write is performed using singlepart upload.
* In another case multipart upload is used:
* Data is divided on chunks with size greater than 'minimum_upload_part_size'. Last chunk can be less than this threshold.
* Each chunk is written as a part to S3.
2019-05-31 10:58:43 +00:00
*/
2019-06-17 00:06:14 +00:00
class WriteBufferFromS3 : public BufferWithOwnMemory<WriteBuffer>
2019-05-31 10:58:43 +00:00
{
private:
2019-12-03 16:23:24 +00:00
String bucket;
String key;
std::optional<std::map<String, String>> object_metadata;
2019-12-03 16:23:24 +00:00
std::shared_ptr<Aws::S3::S3Client> client_ptr;
2019-06-21 05:24:01 +00:00
size_t minimum_upload_part_size;
size_t max_single_part_upload_size;
/// Buffer to accumulate data.
std::shared_ptr<Aws::StringStream> temporary_buffer;
2019-06-17 00:42:47 +00:00
size_t last_part_size;
2019-09-22 22:06:22 +00:00
/// Upload in S3 is made in parts.
/// We initiate upload, then upload each part and get ETag as a response, and then finish upload with listing all our parts.
String multipart_upload_id;
2019-06-17 00:42:47 +00:00
std::vector<String> part_tags;
2019-05-31 10:58:43 +00:00
2020-05-30 21:57:37 +00:00
Poco::Logger * log = &Poco::Logger::get("WriteBufferFromS3");
2019-12-03 16:23:24 +00:00
2019-05-31 10:58:43 +00:00
public:
2020-01-28 13:05:37 +00:00
explicit WriteBufferFromS3(
std::shared_ptr<Aws::S3::S3Client> client_ptr_,
const String & bucket_,
const String & key_,
size_t minimum_upload_part_size_,
size_t max_single_part_upload_size_,
std::optional<std::map<String, String>> object_metadata_ = std::nullopt,
2020-01-28 13:05:37 +00:00
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE);
2019-05-31 10:58:43 +00:00
2019-06-17 00:06:14 +00:00
void nextImpl() override;
2019-05-31 10:58:43 +00:00
/// Receives response from the server after sending all data.
2019-11-19 17:11:13 +00:00
void finalize() override;
2019-06-17 00:06:14 +00:00
2019-06-17 00:42:47 +00:00
private:
2020-09-30 13:24:36 +00:00
bool finalized = false;
void allocateBuffer();
void createMultipartUpload();
void writePart();
void completeMultipartUpload();
void makeSinglepartUpload();
2020-09-30 13:24:36 +00:00
void finalizeImpl();
2019-05-31 10:58:43 +00:00
};
}
2019-12-06 14:37:21 +00:00
#endif