2020-06-01 17:16:09 +00:00
|
|
|
#include <Storages/StorageS3Settings.h>
|
|
|
|
|
2022-09-15 07:45:28 +00:00
|
|
|
#include <IO/S3Common.h>
|
|
|
|
|
2020-06-01 17:16:09 +00:00
|
|
|
#include <Poco/Util/AbstractConfiguration.h>
|
2022-12-17 14:09:53 +00:00
|
|
|
#include <Storages/NamedCollections/NamedCollections.h>
|
2020-06-01 17:16:09 +00:00
|
|
|
#include <Common/Exception.h>
|
2022-11-17 16:35:04 +00:00
|
|
|
#include <Common/Throttler.h>
|
2022-04-03 22:33:59 +00:00
|
|
|
#include <Interpreters/Context.h>
|
2021-01-07 03:42:39 +00:00
|
|
|
#include <boost/algorithm/string/predicate.hpp>
|
|
|
|
|
2020-06-01 17:16:09 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2022-12-17 14:09:53 +00:00
|
|
|
namespace ErrorCodes
|
2020-06-01 17:16:09 +00:00
|
|
|
{
|
2022-12-17 14:09:53 +00:00
|
|
|
extern const int INVALID_SETTING_VALUE;
|
|
|
|
}
|
2020-06-01 17:16:09 +00:00
|
|
|
|
2022-12-17 14:09:53 +00:00
|
|
|
S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const Settings & settings)
|
|
|
|
: PartUploadSettings()
|
|
|
|
{
|
2022-12-17 16:02:34 +00:00
|
|
|
updateFromSettingsImpl(settings, false);
|
2022-12-17 14:09:53 +00:00
|
|
|
validate();
|
|
|
|
}
|
|
|
|
|
|
|
|
S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(
|
|
|
|
const Poco::Util::AbstractConfiguration & config,
|
|
|
|
const String & key,
|
|
|
|
const Settings & settings)
|
|
|
|
: PartUploadSettings(settings)
|
|
|
|
{
|
|
|
|
min_upload_part_size = config.getUInt64(key + ".min_upload_part_size", min_upload_part_size);
|
|
|
|
max_upload_part_size = config.getUInt64(key + ".max_upload_part_size", max_upload_part_size);
|
|
|
|
upload_part_size_multiply_factor = config.getUInt64(key + ".upload_part_size_multiply_factor", upload_part_size_multiply_factor);
|
|
|
|
upload_part_size_multiply_parts_count_threshold = config.getUInt64(key + ".upload_part_size_multiply_parts_count_threshold", upload_part_size_multiply_parts_count_threshold);
|
|
|
|
max_part_number = config.getUInt64(key + ".max_part_number", max_part_number);
|
|
|
|
max_single_part_upload_size = config.getUInt64(key + ".max_single_part_upload_size", max_single_part_upload_size);
|
|
|
|
max_single_operation_copy_size = config.getUInt64(key + ".max_single_operation_copy_size", max_single_operation_copy_size);
|
|
|
|
|
|
|
|
validate();
|
|
|
|
}
|
|
|
|
|
2022-12-17 16:02:34 +00:00
|
|
|
void S3Settings::RequestSettings::PartUploadSettings::updateFromSettingsImpl(const Settings & settings, bool if_changed)
|
|
|
|
{
|
|
|
|
if (!if_changed || settings.s3_min_upload_part_size.changed)
|
|
|
|
min_upload_part_size = settings.s3_min_upload_part_size;
|
|
|
|
|
|
|
|
if (!if_changed || settings.s3_upload_part_size_multiply_factor.changed)
|
|
|
|
upload_part_size_multiply_factor = settings.s3_upload_part_size_multiply_factor;
|
|
|
|
|
|
|
|
if (!if_changed || settings.s3_upload_part_size_multiply_parts_count_threshold.changed)
|
|
|
|
upload_part_size_multiply_parts_count_threshold = settings.s3_upload_part_size_multiply_parts_count_threshold;
|
|
|
|
|
|
|
|
if (!if_changed || settings.s3_max_single_part_upload_size.changed)
|
|
|
|
max_single_part_upload_size = settings.s3_max_single_part_upload_size;
|
|
|
|
}
|
|
|
|
|
2022-12-17 14:09:53 +00:00
|
|
|
void S3Settings::RequestSettings::PartUploadSettings::validate()
|
|
|
|
{
|
|
|
|
static constexpr size_t min_upload_part_size_limit = 5 * 1024 * 1024;
|
|
|
|
if (min_upload_part_size < min_upload_part_size_limit)
|
|
|
|
throw Exception(
|
|
|
|
ErrorCodes::INVALID_SETTING_VALUE,
|
|
|
|
"Setting min_upload_part_size has invalid value {} which is less than the s3 API limit {}",
|
|
|
|
ReadableSize(min_upload_part_size), ReadableSize(min_upload_part_size_limit));
|
|
|
|
|
|
|
|
static constexpr size_t max_upload_part_size_limit = 5ull * 1024 * 1024 * 1024;
|
|
|
|
if (max_upload_part_size > max_upload_part_size_limit)
|
|
|
|
throw Exception(
|
|
|
|
ErrorCodes::INVALID_SETTING_VALUE,
|
|
|
|
"Setting max_upload_part_size has invalid value {} which is grater than the s3 API limit {}",
|
|
|
|
ReadableSize(max_upload_part_size), ReadableSize(max_upload_part_size_limit));
|
|
|
|
|
|
|
|
if (max_single_part_upload_size > max_upload_part_size_limit)
|
|
|
|
throw Exception(
|
|
|
|
ErrorCodes::INVALID_SETTING_VALUE,
|
|
|
|
"Setting max_single_part_upload_size has invalid value {} which is grater than the s3 API limit {}",
|
|
|
|
ReadableSize(max_single_part_upload_size), ReadableSize(max_upload_part_size_limit));
|
|
|
|
|
|
|
|
if (max_single_operation_copy_size > max_upload_part_size_limit)
|
|
|
|
throw Exception(
|
|
|
|
ErrorCodes::INVALID_SETTING_VALUE,
|
|
|
|
"Setting max_single_operation_copy_size has invalid value {} which is grater than the s3 API limit {}",
|
|
|
|
ReadableSize(max_single_operation_copy_size), ReadableSize(max_upload_part_size_limit));
|
|
|
|
|
|
|
|
if (max_upload_part_size < min_upload_part_size)
|
|
|
|
throw Exception(
|
|
|
|
ErrorCodes::INVALID_SETTING_VALUE,
|
|
|
|
"Setting max_upload_part_size ({}) can't be less than setting min_upload_part_size {}",
|
|
|
|
ReadableSize(max_upload_part_size), ReadableSize(min_upload_part_size));
|
|
|
|
|
|
|
|
if (!upload_part_size_multiply_factor)
|
|
|
|
throw Exception(
|
|
|
|
ErrorCodes::INVALID_SETTING_VALUE,
|
|
|
|
"Setting upload_part_size_multiply_factor cannot be zero",
|
|
|
|
upload_part_size_multiply_factor);
|
|
|
|
|
|
|
|
if (!upload_part_size_multiply_parts_count_threshold)
|
|
|
|
throw Exception(
|
|
|
|
ErrorCodes::INVALID_SETTING_VALUE,
|
|
|
|
"Setting upload_part_size_multiply_parts_count_threshold cannot be zero",
|
|
|
|
upload_part_size_multiply_parts_count_threshold);
|
|
|
|
|
|
|
|
if (!max_part_number)
|
|
|
|
throw Exception(
|
|
|
|
ErrorCodes::INVALID_SETTING_VALUE,
|
|
|
|
"Setting max_part_number cannot be zero",
|
|
|
|
max_part_number);
|
|
|
|
|
|
|
|
static constexpr size_t max_part_number_limit = 10000;
|
|
|
|
if (max_part_number > max_part_number_limit)
|
|
|
|
throw Exception(
|
|
|
|
ErrorCodes::INVALID_SETTING_VALUE,
|
|
|
|
"Setting max_part_number has invalid value {} which is grater than the s3 API limit {}",
|
|
|
|
ReadableSize(max_part_number), ReadableSize(max_part_number_limit));
|
|
|
|
|
|
|
|
size_t maybe_overflow;
|
|
|
|
if (common::mulOverflow(max_upload_part_size, upload_part_size_multiply_factor, maybe_overflow))
|
|
|
|
throw Exception(
|
|
|
|
ErrorCodes::INVALID_SETTING_VALUE,
|
|
|
|
"Setting upload_part_size_multiply_factor is too big ({}). Multiplication to max_upload_part_size ({}) will cause integer overflow",
|
|
|
|
ReadableSize(max_part_number), ReadableSize(max_part_number_limit));
|
|
|
|
|
|
|
|
/// TODO: it's possible to set too small limits. We can check that max possible object size is not too small.
|
|
|
|
}
|
|
|
|
|
|
|
|
S3Settings::RequestSettings::RequestSettings(const Settings & settings)
|
|
|
|
: upload_settings(settings)
|
|
|
|
{
|
2022-12-17 16:02:34 +00:00
|
|
|
updateFromSettingsImpl(settings, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
void S3Settings::RequestSettings::updateFromSettingsImpl(const Settings & settings, bool if_changed)
|
|
|
|
{
|
|
|
|
if (!if_changed || settings.s3_max_single_read_retries.changed)
|
|
|
|
max_single_read_retries = settings.s3_max_single_read_retries;
|
|
|
|
|
|
|
|
if (!if_changed || settings.s3_max_connections.changed)
|
|
|
|
max_connections = settings.s3_max_connections;
|
|
|
|
|
|
|
|
if (!if_changed || settings.s3_check_objects_after_upload.changed)
|
|
|
|
check_objects_after_upload = settings.s3_check_objects_after_upload;
|
|
|
|
|
|
|
|
if (!if_changed || settings.s3_max_unexpected_write_error_retries.changed)
|
|
|
|
max_unexpected_write_error_retries = settings.s3_max_unexpected_write_error_retries;
|
|
|
|
|
|
|
|
if ((!if_changed || settings.s3_max_get_rps.changed || settings.s3_max_get_burst.changed) && settings.s3_max_get_rps)
|
2022-12-17 14:09:53 +00:00
|
|
|
get_request_throttler = std::make_shared<Throttler>(
|
|
|
|
settings.s3_max_get_rps, settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * settings.s3_max_get_rps);
|
2022-12-17 16:02:34 +00:00
|
|
|
|
|
|
|
if ((!if_changed || settings.s3_max_put_rps.changed || settings.s3_max_put_burst.changed) && settings.s3_max_put_rps)
|
2022-12-17 14:09:53 +00:00
|
|
|
put_request_throttler = std::make_shared<Throttler>(
|
|
|
|
settings.s3_max_put_rps, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * settings.s3_max_put_rps);
|
|
|
|
}
|
|
|
|
|
2022-12-17 16:02:34 +00:00
|
|
|
void S3Settings::RequestSettings::updateFromSettings(const Settings & settings)
|
|
|
|
{
|
|
|
|
updateFromSettingsImpl(settings, true);
|
|
|
|
upload_settings.updateFromSettings(settings);
|
|
|
|
}
|
|
|
|
|
2022-12-17 14:09:53 +00:00
|
|
|
S3Settings::RequestSettings::RequestSettings(
|
|
|
|
const Poco::Util::AbstractConfiguration & config,
|
|
|
|
const String & key,
|
|
|
|
const Settings & settings)
|
|
|
|
: upload_settings(config, key, settings)
|
|
|
|
{
|
|
|
|
max_single_read_retries = config.getUInt64(key + ".max_single_read_retries", settings.s3_max_single_read_retries);
|
|
|
|
max_connections = config.getUInt64(key + ".max_connections", settings.s3_max_connections);
|
|
|
|
check_objects_after_upload = config.getBool(key + ".check_objects_after_upload", false);
|
2022-04-03 22:33:59 +00:00
|
|
|
|
2022-12-17 14:09:53 +00:00
|
|
|
/// NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload,
|
|
|
|
/// which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used.
|
|
|
|
if (UInt64 max_get_rps = config.getUInt64(key + ".max_get_rps", settings.s3_max_get_rps))
|
2022-04-03 22:33:59 +00:00
|
|
|
{
|
2022-12-17 14:09:53 +00:00
|
|
|
size_t default_max_get_burst = settings.s3_max_get_burst
|
|
|
|
? settings.s3_max_get_burst
|
|
|
|
: (Throttler::default_burst_seconds * max_get_rps);
|
2022-04-03 22:33:59 +00:00
|
|
|
|
2022-12-17 14:09:53 +00:00
|
|
|
size_t max_get_burst = config.getUInt64(key + ".max_get_burst", default_max_get_burst);
|
2022-08-23 13:12:09 +00:00
|
|
|
|
2022-12-17 14:09:53 +00:00
|
|
|
get_request_throttler = std::make_shared<Throttler>(max_get_rps, max_get_burst);
|
|
|
|
}
|
|
|
|
if (UInt64 max_put_rps = config.getUInt64(key + ".max_put_rps", settings.s3_max_put_rps))
|
2022-08-23 13:12:09 +00:00
|
|
|
{
|
2022-12-17 14:09:53 +00:00
|
|
|
size_t default_max_put_burst = settings.s3_max_put_burst
|
|
|
|
? settings.s3_max_put_burst
|
|
|
|
: (Throttler::default_burst_seconds * max_put_rps);
|
|
|
|
|
|
|
|
size_t max_put_burst = config.getUInt64(key + ".max_put_burst", default_max_put_burst);
|
|
|
|
|
|
|
|
put_request_throttler = std::make_shared<Throttler>(max_put_rps, max_put_burst);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings)
|
|
|
|
{
|
|
|
|
std::lock_guard lock(mutex);
|
|
|
|
s3_settings.clear();
|
|
|
|
if (!config.has(config_elem))
|
|
|
|
return;
|
2022-08-23 13:12:09 +00:00
|
|
|
|
2022-12-17 14:09:53 +00:00
|
|
|
Poco::Util::AbstractConfiguration::Keys config_keys;
|
|
|
|
config.keys(config_elem, config_keys);
|
2022-08-23 13:12:09 +00:00
|
|
|
|
2020-06-01 17:16:09 +00:00
|
|
|
for (const String & key : config_keys)
|
|
|
|
{
|
2021-01-07 03:42:39 +00:00
|
|
|
if (config.has(config_elem + "." + key + ".endpoint"))
|
2020-12-10 09:19:42 +00:00
|
|
|
{
|
2022-12-17 14:09:53 +00:00
|
|
|
auto endpoint = config.getString(config_elem + "." + key + ".endpoint");
|
2022-09-15 07:45:28 +00:00
|
|
|
auto auth_settings = S3::AuthSettings::loadFromConfig(config_elem + "." + key, config);
|
2022-12-17 14:09:53 +00:00
|
|
|
S3Settings::RequestSettings request_settings(config, config_elem + "." + key, settings);
|
2022-11-17 16:35:04 +00:00
|
|
|
|
|
|
|
s3_settings.emplace(endpoint, S3Settings{std::move(auth_settings), std::move(request_settings)});
|
2021-01-07 03:42:39 +00:00
|
|
|
}
|
2020-06-01 17:16:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-03 22:33:59 +00:00
|
|
|
S3Settings StorageS3Settings::getSettings(const String & endpoint) const
|
2020-06-01 17:16:09 +00:00
|
|
|
{
|
2020-06-04 13:48:20 +00:00
|
|
|
std::lock_guard lock(mutex);
|
2022-04-03 22:33:59 +00:00
|
|
|
auto next_prefix_setting = s3_settings.upper_bound(endpoint);
|
2021-01-07 03:42:39 +00:00
|
|
|
|
|
|
|
/// Linear time algorithm may be replaced with logarithmic with prefix tree map.
|
2022-04-03 22:33:59 +00:00
|
|
|
for (auto possible_prefix_setting = next_prefix_setting; possible_prefix_setting != s3_settings.begin();)
|
2021-01-07 03:42:39 +00:00
|
|
|
{
|
|
|
|
std::advance(possible_prefix_setting, -1);
|
|
|
|
if (boost::algorithm::starts_with(endpoint, possible_prefix_setting->first))
|
|
|
|
return possible_prefix_setting->second;
|
|
|
|
}
|
|
|
|
|
2020-06-01 17:16:09 +00:00
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|