ClickHouse/src/Storages/StorageS3Settings.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

123 lines
4.7 KiB
C++
Raw Normal View History

#pragma once
#include <map>
#include <memory>
#include <mutex>
#include <optional>
2020-09-15 09:55:57 +00:00
#include <vector>
2021-10-02 07:13:14 +00:00
#include <base/types.h>
2022-04-03 22:33:59 +00:00
#include <Interpreters/Context_fwd.h>
#include <Common/Throttler_fwd.h>
#include <IO/S3Common.h>
namespace Poco::Util
{
class AbstractConfiguration;
}
namespace DB
{
2022-04-03 22:33:59 +00:00
struct Settings;
2022-12-17 14:09:53 +00:00
class NamedCollection;
2022-04-03 22:33:59 +00:00
struct S3Settings
{
struct RequestSettings
{
2022-12-17 14:09:53 +00:00
struct PartUploadSettings
{
size_t min_upload_part_size = 16 * 1024 * 1024;
size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024;
size_t upload_part_size_multiply_factor = 2;
size_t upload_part_size_multiply_parts_count_threshold = 500;
size_t max_part_number = 10000;
size_t max_single_part_upload_size = 32 * 1024 * 1024;
size_t max_single_operation_copy_size = 5ULL * 1024 * 1024 * 1024;
2023-01-04 16:23:22 +00:00
String storage_class_name;
2022-12-17 14:09:53 +00:00
2022-12-17 16:02:34 +00:00
void updateFromSettings(const Settings & settings) { updateFromSettingsImpl(settings, true); }
void validate();
2022-12-17 14:09:53 +00:00
private:
PartUploadSettings() = default;
explicit PartUploadSettings(const Settings & settings);
explicit PartUploadSettings(const NamedCollection & collection);
2022-12-18 12:31:17 +00:00
PartUploadSettings(
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
const Settings & settings,
String setting_name_prefix = {});
2022-12-17 14:09:53 +00:00
2022-12-17 16:02:34 +00:00
void updateFromSettingsImpl(const Settings & settings, bool if_changed);
2022-12-17 14:09:53 +00:00
friend struct RequestSettings;
};
private:
PartUploadSettings upload_settings = {};
public:
size_t max_single_read_retries = 4;
size_t max_connections = 1024;
bool check_objects_after_upload = false;
2022-12-17 14:09:53 +00:00
size_t max_unexpected_write_error_retries = 4;
size_t list_object_keys_size = 1000;
ThrottlerPtr get_request_throttler;
ThrottlerPtr put_request_throttler;
/// If this is set to false then `S3::getObjectSize()` will use `GetObjectAttributes` request instead of `HeadObject`.
/// Details: `HeadObject` request never returns a response body (even if there is an error) however
/// if the request was sent without specifying a region in the endpoint (i.e. for example "https://test.s3.amazonaws.com/mydata.csv"
/// instead of "https://test.s3-us-west-2.amazonaws.com/mydata.csv") then that response body is one of the main ways to determine
/// the correct region and try to repeat the request again with the correct region.
/// For any other request type (`GetObject`, `ListObjects`, etc.) AWS SDK does that because they have response bodies, but for `HeadObject`
/// there is no response body so this way doesn't work. That's why it's better to use `GetObjectAttributes` requests sometimes.
/// See https://github.com/aws/aws-sdk-cpp/issues/1558 and also the function S3ErrorMarshaller::ExtractRegion() for more information.
bool allow_head_object_request = true;
2022-04-06 20:27:38 +00:00
2022-12-17 14:09:53 +00:00
const PartUploadSettings & getUploadSettings() const { return upload_settings; }
2022-04-06 20:27:38 +00:00
RequestSettings() = default;
explicit RequestSettings(const Settings & settings);
2022-12-17 14:09:53 +00:00
explicit RequestSettings(const NamedCollection & collection);
2022-12-18 12:31:17 +00:00
/// What's the setting_name_prefix, and why do we need it?
/// There are (at least) two config sections where s3 settings can be specified:
/// * settings for s3 disk (clickhouse/storage_configuration/disks)
/// * settings for s3 storage (clickhouse/s3), which are also used for backups
/// Even though settings are the same, in case of s3 disk they are prefixed with "s3_"
/// ("s3_max_single_part_upload_size"), but in case of s3 storage they are not
/// ( "max_single_part_upload_size"). Why this happened is a complete mystery to me.
RequestSettings(
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
const Settings & settings,
String setting_name_prefix = {});
2022-12-17 16:02:34 +00:00
void updateFromSettings(const Settings & settings);
private:
void updateFromSettingsImpl(const Settings & settings, bool if_changed);
2022-04-03 22:33:59 +00:00
};
S3::AuthSettings auth_settings;
RequestSettings request_settings;
};
/// Settings for the StorageS3.
class StorageS3Settings
{
public:
2022-04-03 22:33:59 +00:00
void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings);
2022-04-03 22:33:59 +00:00
S3Settings getSettings(const String & endpoint) const;
private:
mutable std::mutex mutex;
2022-04-03 22:33:59 +00:00
std::map<const String, const S3Settings> s3_settings;
};
}