Fix initialization of s3 request settings.

This commit is contained in:
Vitaly Baranov 2022-12-09 16:21:23 +01:00
parent 517e84cfb9
commit b91af1b650
6 changed files with 61 additions and 28 deletions

View File

@ -223,18 +223,21 @@ void BackupWriterS3::copyObjectMultipartImpl(
for (size_t part_number = 1; position < size; ++part_number)
{
/// Check that part number is not too big.
if (part_number > request_settings.max_part_number)
{
throw Exception(
ErrorCodes::INVALID_CONFIG_PARAMETER,
"Part number {} became too big while writing {} bytes to S3. Check min_upload_part_size = {}, max_upload_part_size = {}, "
"upload_part_size_multiply_factor = {}, upload_part_size_multiply_parts_count_threshold = {}",
part_number, size, request_settings.min_upload_part_size, request_settings.max_upload_part_size,
request_settings.upload_part_size_multiply_factor, request_settings.upload_part_size_multiply_parts_count_threshold);
"Part number exceeded {} while writing {} bytes to S3. Check min_upload_part_size = {}, max_upload_part_size = {}, "
"upload_part_size_multiply_factor = {}, upload_part_size_multiply_parts_count_threshold = {}, max_single_operation_copy_size = {}",
request_settings.max_part_number, size, request_settings.min_upload_part_size, request_settings.max_upload_part_size,
request_settings.upload_part_size_multiply_factor, request_settings.upload_part_size_multiply_parts_count_threshold,
request_settings.max_single_operation_copy_size);
}
size_t next_position = std::min(position + upload_part_size, size);
/// Make a copy request to copy a part.
Aws::S3::Model::UploadPartCopyRequest part_request;
part_request.SetCopySource(src_bucket + "/" + src_key);
part_request.SetBucket(dst_bucket);
@ -261,6 +264,7 @@ void BackupWriterS3::copyObjectMultipartImpl(
position = next_position;
/// Maybe increase `upload_part_size` (we need to increase it sometimes to keep `part_number` less or equal than `max_part_number`).
if (part_number % request_settings.upload_part_size_multiply_parts_count_threshold == 0)
{
upload_part_size *= request_settings.upload_part_size_multiply_factor;

View File

@ -136,7 +136,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa
return;
S3Settings::RequestSettings request_settings_1;
request_settings_1.upload_part_size_multiply_parts_count_threshold = 10000;
request_settings_1.setEmptyFieldsByDefault();
const auto create_writer = [&](const auto & key)
{

View File

@ -37,8 +37,10 @@ std::unique_ptr<S3ObjectStorageSettings> getSettings(const Poco::Util::AbstractC
S3Settings::RequestSettings request_settings;
request_settings.max_single_read_retries = config.getUInt64(config_prefix + ".s3_max_single_read_retries", settings.s3_max_single_read_retries);
request_settings.min_upload_part_size = config.getUInt64(config_prefix + ".s3_min_upload_part_size", settings.s3_min_upload_part_size);
request_settings.max_upload_part_size = config.getUInt64(config_prefix + ".s3_max_upload_part_size", S3Settings::RequestSettings::DEFAULT_MAX_UPLOAD_PART_SIZE);
request_settings.upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_factor", settings.s3_upload_part_size_multiply_factor);
request_settings.upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", settings.s3_upload_part_size_multiply_parts_count_threshold);
request_settings.max_part_number = config.getUInt64(config_prefix + ".s3_max_part_number", S3Settings::RequestSettings::DEFAULT_MAX_PART_NUMBER);
request_settings.max_single_part_upload_size = config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", settings.s3_max_single_part_upload_size);
request_settings.check_objects_after_upload = config.getUInt64(config_prefix + ".s3_check_objects_after_upload", settings.s3_check_objects_after_upload);
request_settings.max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".s3_max_unexpected_write_error_retries", settings.s3_max_unexpected_write_error_retries);

View File

@ -308,16 +308,20 @@ void WriteBufferFromS3::writePart()
void WriteBufferFromS3::fillUploadRequest(Aws::S3::Model::UploadPartRequest & req)
{
if (++part_number > request_settings.max_part_number)
/// Increase part number.
++part_number;
if (!multipart_upload_id.empty() && (part_number > request_settings.max_part_number))
{
throw Exception(
ErrorCodes::INVALID_CONFIG_PARAMETER,
"Part number {} became too big while writing {} bytes to S3. Check min_upload_part_size = {}, max_upload_part_size = {}, "
"upload_part_size_multiply_factor = {}, upload_part_size_multiply_parts_count_threshold = {}",
part_number, count(), request_settings.min_upload_part_size, request_settings.max_upload_part_size,
request_settings.upload_part_size_multiply_factor, request_settings.upload_part_size_multiply_parts_count_threshold);
"Part number exceeded {} while writing {} bytes to S3. Check min_upload_part_size = {}, max_upload_part_size = {}, "
"upload_part_size_multiply_factor = {}, upload_part_size_multiply_parts_count_threshold = {}, max_single_part_upload_size = {}",
request_settings.max_part_number, count(), request_settings.min_upload_part_size, request_settings.max_upload_part_size,
request_settings.upload_part_size_multiply_factor, request_settings.upload_part_size_multiply_parts_count_threshold,
request_settings.max_single_part_upload_size);
}
/// Setup request.
req.SetBucket(bucket);
req.SetKey(key);
req.SetPartNumber(static_cast<int>(part_number));
@ -328,7 +332,8 @@ void WriteBufferFromS3::fillUploadRequest(Aws::S3::Model::UploadPartRequest & re
/// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840
req.SetContentType("binary/octet-stream");
if (part_number % request_settings.upload_part_size_multiply_parts_count_threshold == 0)
/// Maybe increase `upload_part_size` (we need to increase it sometimes to keep `part_number` less or equal than `max_part_number`).
if (!multipart_upload_id.empty() && (part_number % request_settings.upload_part_size_multiply_parts_count_threshold == 0))
{
upload_part_size *= request_settings.upload_part_size_multiply_factor;
upload_part_size = std::min(upload_part_size, request_settings.max_upload_part_size);

View File

@ -6,26 +6,12 @@
#include <Common/Exception.h>
#include <Common/Throttler.h>
#include <Interpreters/Context.h>
#include <base/unit.h>
#include <boost/algorithm/string/predicate.hpp>
namespace DB
{
namespace
{
/// An object up to 5 GB can be copied in a single atomic operation.
constexpr UInt64 DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE = 5_GiB;
/// The maximum size of an uploaded part.
constexpr UInt64 DEFAULT_MAX_UPLOAD_PART_SIZE = 5_GiB;
/// The maximum part number
constexpr UInt64 DEFAULT_MAX_PART_NUMBER = 10000;
}
void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings)
{
std::lock_guard lock(mutex);
@ -64,12 +50,12 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U
S3Settings::RequestSettings request_settings;
request_settings.max_single_read_retries = get_uint_for_key(key, "max_single_read_retries", true, settings.s3_max_single_read_retries);
request_settings.min_upload_part_size = get_uint_for_key(key, "min_upload_part_size", true, settings.s3_min_upload_part_size);
request_settings.max_upload_part_size = get_uint_for_key(key, "max_upload_part_size", true, DEFAULT_MAX_UPLOAD_PART_SIZE);
request_settings.max_upload_part_size = get_uint_for_key(key, "max_upload_part_size", true, S3Settings::RequestSettings::DEFAULT_MAX_UPLOAD_PART_SIZE);
request_settings.upload_part_size_multiply_factor = get_uint_for_key(key, "upload_part_size_multiply_factor", true, settings.s3_upload_part_size_multiply_factor);
request_settings.upload_part_size_multiply_parts_count_threshold = get_uint_for_key(key, "upload_part_size_multiply_parts_count_threshold", true, settings.s3_upload_part_size_multiply_parts_count_threshold);
request_settings.max_part_number = get_uint_for_key(key, "max_part_number", true, DEFAULT_MAX_PART_NUMBER);
request_settings.max_part_number = get_uint_for_key(key, "max_part_number", true, S3Settings::RequestSettings::DEFAULT_MAX_PART_NUMBER);
request_settings.max_single_part_upload_size = get_uint_for_key(key, "max_single_part_upload_size", true, settings.s3_max_single_part_upload_size);
request_settings.max_single_operation_copy_size = get_uint_for_key(key, "max_single_operation_copy_size", true, DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE);
request_settings.max_single_operation_copy_size = get_uint_for_key(key, "max_single_operation_copy_size", true, S3Settings::RequestSettings::DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE);
request_settings.max_connections = get_uint_for_key(key, "max_connections", true, settings.s3_max_connections);
request_settings.check_objects_after_upload = get_bool_for_key(key, "check_objects_after_upload", true, false);

View File

@ -60,6 +60,18 @@ struct S3Settings
&& put_request_throttler == other.put_request_throttler;
}
static const constexpr UInt64 DEFAULT_SINGLE_READ_RETRIES = 4;
static const constexpr UInt64 DEFAULT_MIN_UPLOAD_PART_SIZE = 16 * 1024 * 1024;
static const constexpr UInt64 DEFAULT_MAX_UPLOAD_PART_SIZE = 5ULL * 1024 * 1024 * 1024;
static const constexpr UInt64 DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_FACTOR = 2;
static const constexpr UInt64 DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_PARTS_COUNT_THRESHOLD = 500;
static const constexpr UInt64 DEFAULT_MAX_PART_NUMBER = 10000;
static const constexpr UInt64 DEFAULT_MAX_SINGLE_PART_UPLOAD_SIZE = 32 * 1024 * 1024;
static const constexpr UInt64 DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE = 5ULL * 1024 * 1024 * 1024;
static const constexpr UInt64 DEFAULT_MAX_CONNECTIONS = 1024;
static const constexpr UInt64 DEFAULT_MAX_UNEXPECTED_WRITE_ERRORS_RETRIES = 4;
void setEmptyFieldsByDefault();
void updateFromSettingsIfEmpty(const Settings & settings);
};
@ -85,4 +97,28 @@ private:
std::map<const String, const S3Settings> s3_settings;
};
inline void S3Settings::RequestSettings::setEmptyFieldsByDefault()
{
if (!max_single_read_retries)
max_single_read_retries = DEFAULT_SINGLE_READ_RETRIES;
if (!min_upload_part_size)
min_upload_part_size = DEFAULT_MIN_UPLOAD_PART_SIZE;
if (!max_upload_part_size)
max_upload_part_size = DEFAULT_MAX_UPLOAD_PART_SIZE;
if (!upload_part_size_multiply_factor)
upload_part_size_multiply_factor = DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_FACTOR;
if (!upload_part_size_multiply_parts_count_threshold)
upload_part_size_multiply_parts_count_threshold = DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_PARTS_COUNT_THRESHOLD;
if (!max_part_number)
max_part_number = DEFAULT_MAX_PART_NUMBER;
if (!max_single_part_upload_size)
max_single_part_upload_size = DEFAULT_MAX_SINGLE_PART_UPLOAD_SIZE;
if (!max_single_operation_copy_size)
max_single_operation_copy_size = DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE;
if (!max_connections)
max_connections = DEFAULT_MAX_CONNECTIONS;
if (!max_unexpected_write_error_retries)
max_unexpected_write_error_retries = DEFAULT_MAX_UNEXPECTED_WRITE_ERRORS_RETRIES;
}
}