diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 5e3440fa8ea..35f63405e43 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -223,18 +223,21 @@ void BackupWriterS3::copyObjectMultipartImpl( for (size_t part_number = 1; position < size; ++part_number) { + /// Check that part number is not too big. if (part_number > request_settings.max_part_number) { throw Exception( ErrorCodes::INVALID_CONFIG_PARAMETER, - "Part number {} became too big while writing {} bytes to S3. Check min_upload_part_size = {}, max_upload_part_size = {}, " - "upload_part_size_multiply_factor = {}, upload_part_size_multiply_parts_count_threshold = {}", - part_number, size, request_settings.min_upload_part_size, request_settings.max_upload_part_size, - request_settings.upload_part_size_multiply_factor, request_settings.upload_part_size_multiply_parts_count_threshold); + "Part number exceeded {} while writing {} bytes to S3. Check min_upload_part_size = {}, max_upload_part_size = {}, " + "upload_part_size_multiply_factor = {}, upload_part_size_multiply_parts_count_threshold = {}, max_single_operation_copy_size = {}", + request_settings.max_part_number, size, request_settings.min_upload_part_size, request_settings.max_upload_part_size, + request_settings.upload_part_size_multiply_factor, request_settings.upload_part_size_multiply_parts_count_threshold, + request_settings.max_single_operation_copy_size); } size_t next_position = std::min(position + upload_part_size, size); + /// Make a copy request to copy a part. Aws::S3::Model::UploadPartCopyRequest part_request; part_request.SetCopySource(src_bucket + "/" + src_key); part_request.SetBucket(dst_bucket); @@ -261,6 +264,7 @@ void BackupWriterS3::copyObjectMultipartImpl( position = next_position; + /// Maybe increase `upload_part_size` (we need to increase it sometimes to keep `part_number` less or equal than `max_part_number`). if (part_number % request_settings.upload_part_size_multiply_parts_count_threshold == 0) { upload_part_size *= request_settings.upload_part_size_multiply_factor; diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 75acc7ecb8b..02451ac36de 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -136,7 +136,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa return; S3Settings::RequestSettings request_settings_1; - request_settings_1.upload_part_size_multiply_parts_count_threshold = 10000; + request_settings_1.setEmptyFieldsByDefault(); const auto create_writer = [&](const auto & key) { diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index ee6b798629c..da3a2ae710e 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -37,8 +37,10 @@ std::unique_ptr getSettings(const Poco::Util::AbstractC S3Settings::RequestSettings request_settings; request_settings.max_single_read_retries = config.getUInt64(config_prefix + ".s3_max_single_read_retries", settings.s3_max_single_read_retries); request_settings.min_upload_part_size = config.getUInt64(config_prefix + ".s3_min_upload_part_size", settings.s3_min_upload_part_size); + request_settings.max_upload_part_size = config.getUInt64(config_prefix + ".s3_max_upload_part_size", S3Settings::RequestSettings::DEFAULT_MAX_UPLOAD_PART_SIZE); request_settings.upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_factor", settings.s3_upload_part_size_multiply_factor); request_settings.upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", settings.s3_upload_part_size_multiply_parts_count_threshold); + request_settings.max_part_number = config.getUInt64(config_prefix + ".s3_max_part_number", S3Settings::RequestSettings::DEFAULT_MAX_PART_NUMBER); request_settings.max_single_part_upload_size = config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", settings.s3_max_single_part_upload_size); request_settings.check_objects_after_upload = config.getUInt64(config_prefix + ".s3_check_objects_after_upload", settings.s3_check_objects_after_upload); request_settings.max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".s3_max_unexpected_write_error_retries", settings.s3_max_unexpected_write_error_retries); diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 868dc89ca7e..1cd6a8ab64e 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -308,16 +308,20 @@ void WriteBufferFromS3::writePart() void WriteBufferFromS3::fillUploadRequest(Aws::S3::Model::UploadPartRequest & req) { - if (++part_number > request_settings.max_part_number) + /// Increase part number. + ++part_number; + if (!multipart_upload_id.empty() && (part_number > request_settings.max_part_number)) { throw Exception( ErrorCodes::INVALID_CONFIG_PARAMETER, - "Part number {} became too big while writing {} bytes to S3. Check min_upload_part_size = {}, max_upload_part_size = {}, " - "upload_part_size_multiply_factor = {}, upload_part_size_multiply_parts_count_threshold = {}", - part_number, count(), request_settings.min_upload_part_size, request_settings.max_upload_part_size, - request_settings.upload_part_size_multiply_factor, request_settings.upload_part_size_multiply_parts_count_threshold); + "Part number exceeded {} while writing {} bytes to S3. Check min_upload_part_size = {}, max_upload_part_size = {}, " + "upload_part_size_multiply_factor = {}, upload_part_size_multiply_parts_count_threshold = {}, max_single_part_upload_size = {}", + request_settings.max_part_number, count(), request_settings.min_upload_part_size, request_settings.max_upload_part_size, + request_settings.upload_part_size_multiply_factor, request_settings.upload_part_size_multiply_parts_count_threshold, + request_settings.max_single_part_upload_size); } + /// Setup request. req.SetBucket(bucket); req.SetKey(key); req.SetPartNumber(static_cast(part_number)); @@ -328,7 +332,8 @@ void WriteBufferFromS3::fillUploadRequest(Aws::S3::Model::UploadPartRequest & re /// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840 req.SetContentType("binary/octet-stream"); - if (part_number % request_settings.upload_part_size_multiply_parts_count_threshold == 0) + /// Maybe increase `upload_part_size` (we need to increase it sometimes to keep `part_number` less or equal than `max_part_number`). + if (!multipart_upload_id.empty() && (part_number % request_settings.upload_part_size_multiply_parts_count_threshold == 0)) { upload_part_size *= request_settings.upload_part_size_multiply_factor; upload_part_size = std::min(upload_part_size, request_settings.max_upload_part_size); diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 282af0f1c39..f96b4f4509b 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -6,26 +6,12 @@ #include #include #include -#include #include namespace DB { -namespace -{ - /// An object up to 5 GB can be copied in a single atomic operation. - constexpr UInt64 DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE = 5_GiB; - - /// The maximum size of an uploaded part. - constexpr UInt64 DEFAULT_MAX_UPLOAD_PART_SIZE = 5_GiB; - - /// The maximum part number - constexpr UInt64 DEFAULT_MAX_PART_NUMBER = 10000; -} - - void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings) { std::lock_guard lock(mutex); @@ -64,12 +50,12 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U S3Settings::RequestSettings request_settings; request_settings.max_single_read_retries = get_uint_for_key(key, "max_single_read_retries", true, settings.s3_max_single_read_retries); request_settings.min_upload_part_size = get_uint_for_key(key, "min_upload_part_size", true, settings.s3_min_upload_part_size); - request_settings.max_upload_part_size = get_uint_for_key(key, "max_upload_part_size", true, DEFAULT_MAX_UPLOAD_PART_SIZE); + request_settings.max_upload_part_size = get_uint_for_key(key, "max_upload_part_size", true, S3Settings::RequestSettings::DEFAULT_MAX_UPLOAD_PART_SIZE); request_settings.upload_part_size_multiply_factor = get_uint_for_key(key, "upload_part_size_multiply_factor", true, settings.s3_upload_part_size_multiply_factor); request_settings.upload_part_size_multiply_parts_count_threshold = get_uint_for_key(key, "upload_part_size_multiply_parts_count_threshold", true, settings.s3_upload_part_size_multiply_parts_count_threshold); - request_settings.max_part_number = get_uint_for_key(key, "max_part_number", true, DEFAULT_MAX_PART_NUMBER); + request_settings.max_part_number = get_uint_for_key(key, "max_part_number", true, S3Settings::RequestSettings::DEFAULT_MAX_PART_NUMBER); request_settings.max_single_part_upload_size = get_uint_for_key(key, "max_single_part_upload_size", true, settings.s3_max_single_part_upload_size); - request_settings.max_single_operation_copy_size = get_uint_for_key(key, "max_single_operation_copy_size", true, DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE); + request_settings.max_single_operation_copy_size = get_uint_for_key(key, "max_single_operation_copy_size", true, S3Settings::RequestSettings::DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE); request_settings.max_connections = get_uint_for_key(key, "max_connections", true, settings.s3_max_connections); request_settings.check_objects_after_upload = get_bool_for_key(key, "check_objects_after_upload", true, false); diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index bed92b8da25..bf04dbe3a61 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -60,6 +60,18 @@ struct S3Settings && put_request_throttler == other.put_request_throttler; } + static const constexpr UInt64 DEFAULT_SINGLE_READ_RETRIES = 4; + static const constexpr UInt64 DEFAULT_MIN_UPLOAD_PART_SIZE = 16 * 1024 * 1024; + static const constexpr UInt64 DEFAULT_MAX_UPLOAD_PART_SIZE = 5ULL * 1024 * 1024 * 1024; + static const constexpr UInt64 DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_FACTOR = 2; + static const constexpr UInt64 DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_PARTS_COUNT_THRESHOLD = 500; + static const constexpr UInt64 DEFAULT_MAX_PART_NUMBER = 10000; + static const constexpr UInt64 DEFAULT_MAX_SINGLE_PART_UPLOAD_SIZE = 32 * 1024 * 1024; + static const constexpr UInt64 DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE = 5ULL * 1024 * 1024 * 1024; + static const constexpr UInt64 DEFAULT_MAX_CONNECTIONS = 1024; + static const constexpr UInt64 DEFAULT_MAX_UNEXPECTED_WRITE_ERRORS_RETRIES = 4; + + void setEmptyFieldsByDefault(); void updateFromSettingsIfEmpty(const Settings & settings); }; @@ -85,4 +97,28 @@ private: std::map s3_settings; }; +inline void S3Settings::RequestSettings::setEmptyFieldsByDefault() +{ + if (!max_single_read_retries) + max_single_read_retries = DEFAULT_SINGLE_READ_RETRIES; + if (!min_upload_part_size) + min_upload_part_size = DEFAULT_MIN_UPLOAD_PART_SIZE; + if (!max_upload_part_size) + max_upload_part_size = DEFAULT_MAX_UPLOAD_PART_SIZE; + if (!upload_part_size_multiply_factor) + upload_part_size_multiply_factor = DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_FACTOR; + if (!upload_part_size_multiply_parts_count_threshold) + upload_part_size_multiply_parts_count_threshold = DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_PARTS_COUNT_THRESHOLD; + if (!max_part_number) + max_part_number = DEFAULT_MAX_PART_NUMBER; + if (!max_single_part_upload_size) + max_single_part_upload_size = DEFAULT_MAX_SINGLE_PART_UPLOAD_SIZE; + if (!max_single_operation_copy_size) + max_single_operation_copy_size = DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE; + if (!max_connections) + max_connections = DEFAULT_MAX_CONNECTIONS; + if (!max_unexpected_write_error_retries) + max_unexpected_write_error_retries = DEFAULT_MAX_UNEXPECTED_WRITE_ERRORS_RETRIES; +} + }