2020-06-01 17:16:09 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <map>
|
|
|
|
#include <memory>
|
|
|
|
#include <mutex>
|
2020-12-10 09:19:42 +00:00
|
|
|
#include <optional>
|
2020-09-15 09:55:57 +00:00
|
|
|
#include <vector>
|
2021-10-02 07:13:14 +00:00
|
|
|
#include <base/types.h>
|
2022-04-03 22:33:59 +00:00
|
|
|
#include <Interpreters/Context_fwd.h>
|
2022-11-17 16:35:04 +00:00
|
|
|
#include <Common/Throttler_fwd.h>
|
2020-06-01 17:16:09 +00:00
|
|
|
|
2022-09-15 07:45:28 +00:00
|
|
|
#include <IO/S3Common.h>
|
|
|
|
|
2020-06-01 17:16:09 +00:00
|
|
|
namespace Poco::Util
|
|
|
|
{
|
|
|
|
class AbstractConfiguration;
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2022-04-03 22:33:59 +00:00
|
|
|
struct Settings;
|
2022-12-17 14:09:53 +00:00
|
|
|
class NamedCollection;
|
2022-04-03 22:33:59 +00:00
|
|
|
|
|
|
|
struct S3Settings
|
2020-06-01 17:16:09 +00:00
|
|
|
{
|
2022-11-17 16:35:04 +00:00
|
|
|
struct RequestSettings
|
2021-03-04 15:56:55 +00:00
|
|
|
{
|
2022-12-17 14:09:53 +00:00
|
|
|
struct PartUploadSettings
|
|
|
|
{
|
|
|
|
size_t min_upload_part_size = 16 * 1024 * 1024;
|
|
|
|
size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024;
|
|
|
|
size_t upload_part_size_multiply_factor = 2;
|
|
|
|
size_t upload_part_size_multiply_parts_count_threshold = 500;
|
|
|
|
size_t max_part_number = 10000;
|
|
|
|
size_t max_single_part_upload_size = 32 * 1024 * 1024;
|
|
|
|
size_t max_single_operation_copy_size = 5ULL * 1024 * 1024 * 1024;
|
2023-01-04 16:23:22 +00:00
|
|
|
String storage_class_name;
|
2022-12-17 14:09:53 +00:00
|
|
|
|
2022-12-17 16:02:34 +00:00
|
|
|
void updateFromSettings(const Settings & settings) { updateFromSettingsImpl(settings, true); }
|
|
|
|
void validate();
|
|
|
|
|
2022-12-17 14:09:53 +00:00
|
|
|
private:
|
|
|
|
PartUploadSettings() = default;
|
|
|
|
explicit PartUploadSettings(const Settings & settings);
|
|
|
|
explicit PartUploadSettings(const NamedCollection & collection);
|
2022-12-18 12:31:17 +00:00
|
|
|
PartUploadSettings(
|
|
|
|
const Poco::Util::AbstractConfiguration & config,
|
|
|
|
const String & config_prefix,
|
|
|
|
const Settings & settings,
|
|
|
|
String setting_name_prefix = {});
|
2022-12-17 14:09:53 +00:00
|
|
|
|
2022-12-17 16:02:34 +00:00
|
|
|
void updateFromSettingsImpl(const Settings & settings, bool if_changed);
|
2022-12-17 14:09:53 +00:00
|
|
|
|
|
|
|
friend struct RequestSettings;
|
|
|
|
};
|
|
|
|
|
|
|
|
private:
|
|
|
|
PartUploadSettings upload_settings = {};
|
|
|
|
|
|
|
|
public:
|
|
|
|
size_t max_single_read_retries = 4;
|
|
|
|
size_t max_connections = 1024;
|
2022-08-23 13:12:09 +00:00
|
|
|
bool check_objects_after_upload = false;
|
2022-12-17 14:09:53 +00:00
|
|
|
size_t max_unexpected_write_error_retries = 4;
|
2023-01-20 19:10:23 +00:00
|
|
|
size_t list_object_keys_size = 1000;
|
2022-11-17 16:35:04 +00:00
|
|
|
ThrottlerPtr get_request_throttler;
|
|
|
|
ThrottlerPtr put_request_throttler;
|
2023-01-27 14:07:14 +00:00
|
|
|
|
|
|
|
/// If this is set to false then `S3::getObjectSize()` will use `GetObjectAttributes` request instead of `HeadObject`.
|
|
|
|
/// Details: `HeadObject` request never returns a response body (even if there is an error) however
|
|
|
|
/// if the request was sent without specifying a region in the endpoint (i.e. for example "https://test.s3.amazonaws.com/mydata.csv"
|
|
|
|
/// instead of "https://test.s3-us-west-2.amazonaws.com/mydata.csv") then that response body is one of the main ways to determine
|
|
|
|
/// the correct region and try to repeat the request again with the correct region.
|
|
|
|
/// For any other request type (`GetObject`, `ListObjects`, etc.) AWS SDK does that because they have response bodies, but for `HeadObject`
|
|
|
|
/// there is no response body so this way doesn't work. That's why it's better to use `GetObjectAttributes` requests sometimes.
|
|
|
|
/// See https://github.com/aws/aws-sdk-cpp/issues/1558 and also the function S3ErrorMarshaller::ExtractRegion() for more information.
|
|
|
|
bool allow_head_object_request = true;
|
2022-04-06 20:27:38 +00:00
|
|
|
|
2023-02-04 14:11:14 +00:00
|
|
|
bool throw_on_zero_files_match = false;
|
2023-02-03 04:27:13 +00:00
|
|
|
|
2022-12-17 14:09:53 +00:00
|
|
|
const PartUploadSettings & getUploadSettings() const { return upload_settings; }
|
2022-04-06 20:27:38 +00:00
|
|
|
|
2022-11-17 16:35:04 +00:00
|
|
|
RequestSettings() = default;
|
|
|
|
explicit RequestSettings(const Settings & settings);
|
2022-12-17 14:09:53 +00:00
|
|
|
explicit RequestSettings(const NamedCollection & collection);
|
2022-12-18 12:31:17 +00:00
|
|
|
|
|
|
|
/// What's the setting_name_prefix, and why do we need it?
|
|
|
|
/// There are (at least) two config sections where s3 settings can be specified:
|
|
|
|
/// * settings for s3 disk (clickhouse/storage_configuration/disks)
|
|
|
|
/// * settings for s3 storage (clickhouse/s3), which are also used for backups
|
|
|
|
/// Even though settings are the same, in case of s3 disk they are prefixed with "s3_"
|
|
|
|
/// ("s3_max_single_part_upload_size"), but in case of s3 storage they are not
|
|
|
|
/// ( "max_single_part_upload_size"). Why this happened is a complete mystery to me.
|
|
|
|
RequestSettings(
|
|
|
|
const Poco::Util::AbstractConfiguration & config,
|
|
|
|
const String & config_prefix,
|
|
|
|
const Settings & settings,
|
|
|
|
String setting_name_prefix = {});
|
2022-12-17 16:02:34 +00:00
|
|
|
|
|
|
|
void updateFromSettings(const Settings & settings);
|
|
|
|
|
|
|
|
private:
|
|
|
|
void updateFromSettingsImpl(const Settings & settings, bool if_changed);
|
2022-04-03 22:33:59 +00:00
|
|
|
};
|
|
|
|
|
2022-09-15 07:45:28 +00:00
|
|
|
S3::AuthSettings auth_settings;
|
2022-11-17 16:35:04 +00:00
|
|
|
RequestSettings request_settings;
|
2020-06-01 17:16:09 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/// Settings for the StorageS3.
|
|
|
|
class StorageS3Settings
|
|
|
|
{
|
|
|
|
public:
|
2022-04-03 22:33:59 +00:00
|
|
|
void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings);
|
2020-06-01 17:16:09 +00:00
|
|
|
|
2022-04-03 22:33:59 +00:00
|
|
|
S3Settings getSettings(const String & endpoint) const;
|
2020-06-01 17:16:09 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
mutable std::mutex mutex;
|
2022-04-03 22:33:59 +00:00
|
|
|
std::map<const String, const S3Settings> s3_settings;
|
2020-06-01 17:16:09 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|