2019-12-06 14:37:21 +00:00
|
|
|
#include <Common/config.h>
|
|
|
|
|
|
|
|
#if USE_AWS_S3
|
|
|
|
|
2020-04-01 19:56:40 +00:00
|
|
|
# include <IO/S3Common.h>
|
|
|
|
# include <IO/WriteBufferFromString.h>
|
2020-06-01 17:16:09 +00:00
|
|
|
# include <Storages/StorageS3Settings.h>
|
2019-11-05 07:54:13 +00:00
|
|
|
|
2020-04-01 19:56:40 +00:00
|
|
|
# include <aws/core/auth/AWSCredentialsProvider.h>
|
2020-12-10 09:19:42 +00:00
|
|
|
# include <aws/core/auth/AWSCredentialsProviderChain.h>
|
|
|
|
# include <aws/core/auth/STSCredentialsProvider.h>
|
|
|
|
# include <aws/core/client/DefaultRetryStrategy.h>
|
|
|
|
# include <aws/core/platform/Environment.h>
|
2020-04-01 19:56:40 +00:00
|
|
|
# include <aws/core/utils/logging/LogMacros.h>
|
|
|
|
# include <aws/core/utils/logging/LogSystemInterface.h>
|
|
|
|
# include <aws/s3/S3Client.h>
|
2020-05-27 15:02:25 +00:00
|
|
|
# include <aws/core/http/HttpClientFactory.h>
|
2020-05-31 22:25:37 +00:00
|
|
|
# include <IO/S3/PocoHTTPClientFactory.h>
|
|
|
|
# include <IO/S3/PocoHTTPClient.h>
|
2020-05-19 10:00:40 +00:00
|
|
|
# include <Poco/URI.h>
|
2020-04-01 19:56:40 +00:00
|
|
|
# include <re2/re2.h>
|
2020-12-10 22:05:02 +00:00
|
|
|
# include <boost/algorithm/string/case_conv.hpp>
|
2020-04-01 19:56:40 +00:00
|
|
|
# include <common/logger_useful.h>
|
2019-11-05 07:54:13 +00:00
|
|
|
|
2019-12-17 13:50:39 +00:00
|
|
|
namespace
|
|
|
|
{
|
2020-07-15 14:16:45 +00:00
|
|
|
|
|
|
|
const char * S3_LOGGER_TAG_NAMES[][2] = {
|
|
|
|
{"AWSClient", "AWSClient"},
|
|
|
|
{"AWSAuthV4Signer", "AWSClient (AWSAuthV4Signer)"},
|
|
|
|
};
|
|
|
|
|
2020-05-30 21:57:37 +00:00
|
|
|
const std::pair<DB::LogsLevel, Poco::Message::Priority> & convertLogLevel(Aws::Utils::Logging::LogLevel log_level)
|
2019-12-17 13:50:39 +00:00
|
|
|
{
|
2020-05-30 21:57:37 +00:00
|
|
|
static const std::unordered_map<Aws::Utils::Logging::LogLevel, std::pair<DB::LogsLevel, Poco::Message::Priority>> mapping =
|
|
|
|
{
|
|
|
|
{Aws::Utils::Logging::LogLevel::Off, {DB::LogsLevel::none, Poco::Message::PRIO_FATAL}},
|
|
|
|
{Aws::Utils::Logging::LogLevel::Fatal, {DB::LogsLevel::error, Poco::Message::PRIO_FATAL}},
|
|
|
|
{Aws::Utils::Logging::LogLevel::Error, {DB::LogsLevel::error, Poco::Message::PRIO_ERROR}},
|
|
|
|
{Aws::Utils::Logging::LogLevel::Warn, {DB::LogsLevel::warning, Poco::Message::PRIO_WARNING}},
|
|
|
|
{Aws::Utils::Logging::LogLevel::Info, {DB::LogsLevel::information, Poco::Message::PRIO_INFORMATION}},
|
2020-06-30 22:04:30 +00:00
|
|
|
{Aws::Utils::Logging::LogLevel::Debug, {DB::LogsLevel::trace, Poco::Message::PRIO_TRACE}},
|
2020-05-30 21:57:37 +00:00
|
|
|
{Aws::Utils::Logging::LogLevel::Trace, {DB::LogsLevel::trace, Poco::Message::PRIO_TRACE}},
|
2019-12-17 13:50:39 +00:00
|
|
|
};
|
|
|
|
return mapping.at(log_level);
|
|
|
|
}
|
|
|
|
|
2020-01-10 09:24:05 +00:00
|
|
|
class AWSLogger final : public Aws::Utils::Logging::LogSystemInterface
|
2019-12-17 13:50:39 +00:00
|
|
|
{
|
|
|
|
public:
|
2020-07-15 14:16:45 +00:00
|
|
|
AWSLogger()
|
|
|
|
{
|
|
|
|
for (auto [tag, name] : S3_LOGGER_TAG_NAMES)
|
|
|
|
tag_loggers[tag] = &Poco::Logger::get(name);
|
|
|
|
|
|
|
|
default_logger = tag_loggers[S3_LOGGER_TAG_NAMES[0][0]];
|
|
|
|
}
|
|
|
|
|
2019-12-17 13:50:39 +00:00
|
|
|
~AWSLogger() final = default;
|
|
|
|
|
|
|
|
Aws::Utils::Logging::LogLevel GetLogLevel() const final { return Aws::Utils::Logging::LogLevel::Trace; }
|
|
|
|
|
2020-03-18 03:27:32 +00:00
|
|
|
void Log(Aws::Utils::Logging::LogLevel log_level, const char * tag, const char * format_str, ...) final // NOLINT
|
2019-12-17 13:50:39 +00:00
|
|
|
{
|
2020-07-15 14:16:45 +00:00
|
|
|
callLogImpl(log_level, tag, format_str); /// FIXME. Variadic arguments?
|
2019-12-17 13:50:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void LogStream(Aws::Utils::Logging::LogLevel log_level, const char * tag, const Aws::OStringStream & message_stream) final
|
2020-07-15 14:16:45 +00:00
|
|
|
{
|
|
|
|
callLogImpl(log_level, tag, message_stream.str().c_str());
|
|
|
|
}
|
|
|
|
|
|
|
|
void callLogImpl(Aws::Utils::Logging::LogLevel log_level, const char * tag, const char * message)
|
2019-12-17 13:50:39 +00:00
|
|
|
{
|
2020-04-22 05:39:31 +00:00
|
|
|
const auto & [level, prio] = convertLogLevel(log_level);
|
2020-07-15 14:16:45 +00:00
|
|
|
if (tag_loggers.count(tag) > 0)
|
|
|
|
{
|
|
|
|
LOG_IMPL(tag_loggers[tag], level, prio, "{}", message);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
LOG_IMPL(default_logger, level, prio, "{}: {}", tag, message);
|
|
|
|
}
|
2019-12-17 13:50:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void Flush() final {}
|
|
|
|
|
|
|
|
private:
|
2020-07-15 14:16:45 +00:00
|
|
|
Poco::Logger * default_logger;
|
|
|
|
std::unordered_map<String, Poco::Logger *> tag_loggers;
|
2019-12-17 13:50:39 +00:00
|
|
|
};
|
2020-06-01 17:16:09 +00:00
|
|
|
|
2020-12-10 09:19:42 +00:00
|
|
|
class S3CredentialsProviderChain : public Aws::Auth::AWSCredentialsProviderChain
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
explicit S3CredentialsProviderChain(const DB::S3::PocoHTTPClientConfiguration & configuration, const Aws::Auth::AWSCredentials & credentials, bool use_environment_credentials)
|
|
|
|
{
|
|
|
|
if (use_environment_credentials)
|
|
|
|
{
|
|
|
|
const DB::RemoteHostFilter & remote_host_filter = configuration.remote_host_filter;
|
|
|
|
const unsigned int s3_max_redirects = configuration.s3_max_redirects;
|
|
|
|
|
|
|
|
static const char AWS_ECS_CONTAINER_CREDENTIALS_RELATIVE_URI[] = "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI";
|
|
|
|
static const char AWS_ECS_CONTAINER_CREDENTIALS_FULL_URI[] = "AWS_CONTAINER_CREDENTIALS_FULL_URI";
|
|
|
|
static const char AWS_ECS_CONTAINER_AUTHORIZATION_TOKEN[] = "AWS_CONTAINER_AUTHORIZATION_TOKEN";
|
|
|
|
static const char AWS_EC2_METADATA_DISABLED[] = "AWS_EC2_METADATA_DISABLED";
|
|
|
|
|
|
|
|
auto * logger = &Poco::Logger::get("S3CredentialsProviderChain");
|
|
|
|
|
|
|
|
/// The only difference from DefaultAWSCredentialsProviderChain::DefaultAWSCredentialsProviderChain()
|
|
|
|
/// is that this chain uses custom ClientConfiguration.
|
|
|
|
|
|
|
|
AddProvider(std::make_shared<Aws::Auth::EnvironmentAWSCredentialsProvider>());
|
|
|
|
AddProvider(std::make_shared<Aws::Auth::ProfileConfigFileAWSCredentialsProvider>());
|
|
|
|
AddProvider(std::make_shared<Aws::Auth::STSAssumeRoleWebIdentityCredentialsProvider>());
|
|
|
|
|
|
|
|
/// ECS TaskRole Credentials only available when ENVIRONMENT VARIABLE is set.
|
|
|
|
const auto relative_uri = Aws::Environment::GetEnv(AWS_ECS_CONTAINER_CREDENTIALS_RELATIVE_URI);
|
|
|
|
LOG_DEBUG(logger, "The environment variable value {} is {}", AWS_ECS_CONTAINER_CREDENTIALS_RELATIVE_URI,
|
|
|
|
relative_uri);
|
|
|
|
|
|
|
|
const auto absolute_uri = Aws::Environment::GetEnv(AWS_ECS_CONTAINER_CREDENTIALS_FULL_URI);
|
|
|
|
LOG_DEBUG(logger, "The environment variable value {} is {}", AWS_ECS_CONTAINER_CREDENTIALS_FULL_URI,
|
|
|
|
absolute_uri);
|
|
|
|
|
|
|
|
const auto ec2_metadata_disabled = Aws::Environment::GetEnv(AWS_EC2_METADATA_DISABLED);
|
|
|
|
LOG_DEBUG(logger, "The environment variable value {} is {}", AWS_EC2_METADATA_DISABLED,
|
|
|
|
ec2_metadata_disabled);
|
|
|
|
|
|
|
|
if (!relative_uri.empty())
|
|
|
|
{
|
|
|
|
AddProvider(std::make_shared<Aws::Auth::TaskRoleCredentialsProvider>(relative_uri.c_str()));
|
|
|
|
LOG_INFO(logger, "Added ECS metadata service credentials provider with relative path: [{}] to the provider chain.",
|
|
|
|
relative_uri);
|
|
|
|
}
|
|
|
|
else if (!absolute_uri.empty())
|
|
|
|
{
|
|
|
|
const auto token = Aws::Environment::GetEnv(AWS_ECS_CONTAINER_AUTHORIZATION_TOKEN);
|
|
|
|
AddProvider(std::make_shared<Aws::Auth::TaskRoleCredentialsProvider>(absolute_uri.c_str(), token.c_str()));
|
|
|
|
|
|
|
|
/// DO NOT log the value of the authorization token for security purposes.
|
|
|
|
LOG_INFO(logger, "Added ECS credentials provider with URI: [{}] to the provider chain with a{} authorization token.",
|
|
|
|
absolute_uri, token.empty() ? "n empty" : " non-empty");
|
|
|
|
}
|
|
|
|
else if (Aws::Utils::StringUtils::ToLower(ec2_metadata_disabled.c_str()) != "true")
|
|
|
|
{
|
|
|
|
Aws::Client::ClientConfiguration aws_client_configuration;
|
|
|
|
|
|
|
|
/// See MakeDefaultHttpResourceClientConfiguration().
|
|
|
|
/// This is part of EC2 metadata client, but unfortunately it can't be accessed from outside
|
|
|
|
/// of contrib/aws/aws-cpp-sdk-core/source/internal/AWSHttpResourceClient.cpp
|
|
|
|
aws_client_configuration.maxConnections = 2;
|
|
|
|
aws_client_configuration.scheme = Aws::Http::Scheme::HTTP;
|
|
|
|
|
|
|
|
/// Explicitly set the proxy settings to empty/zero to avoid relying on defaults that could potentially change
|
|
|
|
/// in the future.
|
|
|
|
aws_client_configuration.proxyHost = "";
|
|
|
|
aws_client_configuration.proxyUserName = "";
|
|
|
|
aws_client_configuration.proxyPassword = "";
|
|
|
|
aws_client_configuration.proxyPort = 0;
|
|
|
|
|
|
|
|
/// EC2MetadataService throttles by delaying the response so the service client should set a large read timeout.
|
|
|
|
/// EC2MetadataService delay is in order of seconds so it only make sense to retry after a couple of seconds.
|
|
|
|
aws_client_configuration.connectTimeoutMs = 1000;
|
|
|
|
aws_client_configuration.requestTimeoutMs = 1000;
|
|
|
|
aws_client_configuration.retryStrategy = std::make_shared<Aws::Client::DefaultRetryStrategy>(1, 1000);
|
|
|
|
|
|
|
|
DB::S3::PocoHTTPClientConfiguration client_configuration(aws_client_configuration, remote_host_filter, s3_max_redirects);
|
|
|
|
auto ec2_metadata_client = std::make_shared<Aws::Internal::EC2MetadataClient>(client_configuration);
|
|
|
|
auto config_loader = std::make_shared<Aws::Config::EC2InstanceProfileConfigLoader>(ec2_metadata_client);
|
|
|
|
|
|
|
|
AddProvider(std::make_shared<Aws::Auth::InstanceProfileCredentialsProvider>(config_loader));
|
|
|
|
LOG_INFO(logger, "Added EC2 metadata service credentials provider to the provider chain.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
AddProvider(std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(credentials));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2020-06-01 17:16:09 +00:00
|
|
|
class S3AuthSigner : public Aws::Client::AWSAuthV4Signer
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
S3AuthSigner(
|
2020-06-04 13:48:20 +00:00
|
|
|
const Aws::Client::ClientConfiguration & client_configuration,
|
2020-06-01 17:16:09 +00:00
|
|
|
const Aws::Auth::AWSCredentials & credentials,
|
2020-12-10 09:19:42 +00:00
|
|
|
const DB::HeaderCollection & headers_,
|
|
|
|
bool use_environment_credentials)
|
2020-06-01 17:16:09 +00:00
|
|
|
: Aws::Client::AWSAuthV4Signer(
|
2020-12-10 09:19:42 +00:00
|
|
|
std::make_shared<S3CredentialsProviderChain>(
|
|
|
|
static_cast<const DB::S3::PocoHTTPClientConfiguration &>(client_configuration),
|
|
|
|
credentials,
|
|
|
|
use_environment_credentials),
|
2020-06-01 17:16:09 +00:00
|
|
|
"s3",
|
2020-06-04 13:48:20 +00:00
|
|
|
client_configuration.region,
|
2020-06-01 17:16:09 +00:00
|
|
|
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
|
|
|
|
false)
|
|
|
|
, headers(headers_)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2020-06-04 13:48:20 +00:00
|
|
|
bool SignRequest(Aws::Http::HttpRequest & request, const char * region, bool sign_body) const override
|
2020-06-01 17:16:09 +00:00
|
|
|
{
|
2020-06-04 13:48:20 +00:00
|
|
|
auto result = Aws::Client::AWSAuthV4Signer::SignRequest(request, region, sign_body);
|
2020-06-01 17:16:09 +00:00
|
|
|
for (const auto & header : headers)
|
|
|
|
request.SetHeaderValue(header.name, header.value);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-01-26 07:49:16 +00:00
|
|
|
bool SignRequest(Aws::Http::HttpRequest & request, const char * region, const char * service_name, bool sign_body) const override
|
|
|
|
{
|
|
|
|
auto result = Aws::Client::AWSAuthV4Signer::SignRequest(request, region, service_name, sign_body);
|
|
|
|
for (const auto & header : headers)
|
|
|
|
request.SetHeaderValue(header.name, header.value);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool PresignRequest(
|
|
|
|
Aws::Http::HttpRequest & request,
|
|
|
|
const char * region,
|
|
|
|
long long expiration_time_sec) const override // NOLINT
|
|
|
|
{
|
|
|
|
auto result = Aws::Client::AWSAuthV4Signer::PresignRequest(request, region, expiration_time_sec);
|
|
|
|
for (const auto & header : headers)
|
|
|
|
request.SetHeaderValue(header.name, header.value);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2020-06-01 17:16:09 +00:00
|
|
|
bool PresignRequest(
|
|
|
|
Aws::Http::HttpRequest & request,
|
|
|
|
const char * region,
|
2021-01-26 07:49:16 +00:00
|
|
|
const char * service_name,
|
2020-06-04 13:48:20 +00:00
|
|
|
long long expiration_time_sec) const override // NOLINT
|
2020-06-01 17:16:09 +00:00
|
|
|
{
|
2021-01-26 07:49:16 +00:00
|
|
|
auto result = Aws::Client::AWSAuthV4Signer::PresignRequest(request, region, service_name, expiration_time_sec);
|
2020-06-01 17:16:09 +00:00
|
|
|
for (const auto & header : headers)
|
|
|
|
request.SetHeaderValue(header.name, header.value);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
const DB::HeaderCollection headers;
|
|
|
|
};
|
2020-07-15 14:16:45 +00:00
|
|
|
|
2019-12-17 13:50:39 +00:00
|
|
|
}
|
|
|
|
|
2020-07-15 14:16:45 +00:00
|
|
|
|
2019-12-06 15:14:39 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2019-12-03 16:23:24 +00:00
|
|
|
extern const int BAD_ARGUMENTS;
|
2019-12-03 01:22:25 +00:00
|
|
|
}
|
|
|
|
|
2019-12-06 15:14:39 +00:00
|
|
|
namespace S3
|
|
|
|
{
|
2019-12-06 14:48:56 +00:00
|
|
|
ClientFactory::ClientFactory()
|
|
|
|
{
|
2020-04-29 08:45:13 +00:00
|
|
|
aws_options = Aws::SDKOptions{};
|
2019-12-03 16:23:24 +00:00
|
|
|
Aws::InitAPI(aws_options);
|
2019-12-17 13:50:39 +00:00
|
|
|
Aws::Utils::Logging::InitializeAWSLogging(std::make_shared<AWSLogger>());
|
2020-05-31 22:25:37 +00:00
|
|
|
Aws::Http::SetHttpClientFactory(std::make_shared<PocoHTTPClientFactory>());
|
2019-12-03 16:23:24 +00:00
|
|
|
}
|
|
|
|
|
2019-12-06 14:48:56 +00:00
|
|
|
ClientFactory::~ClientFactory()
|
|
|
|
{
|
2019-12-17 13:50:39 +00:00
|
|
|
Aws::Utils::Logging::ShutdownAWSLogging();
|
2019-12-06 14:37:21 +00:00
|
|
|
Aws::ShutdownAPI(aws_options);
|
|
|
|
}
|
2019-11-05 07:54:13 +00:00
|
|
|
|
2019-12-10 23:03:45 +00:00
|
|
|
ClientFactory & ClientFactory::instance()
|
2019-12-06 14:48:56 +00:00
|
|
|
{
|
2019-12-06 14:37:21 +00:00
|
|
|
static ClientFactory ret;
|
|
|
|
return ret;
|
|
|
|
}
|
2019-12-03 16:23:24 +00:00
|
|
|
|
2020-05-02 22:34:50 +00:00
|
|
|
/// This method is not static because it requires ClientFactory to be initialized.
|
|
|
|
std::shared_ptr<Aws::S3::S3Client> ClientFactory::create( // NOLINT
|
2019-12-06 14:48:56 +00:00
|
|
|
const String & endpoint,
|
2020-05-20 13:47:53 +00:00
|
|
|
bool is_virtual_hosted_style,
|
2019-12-06 14:48:56 +00:00
|
|
|
const String & access_key_id,
|
2020-08-11 06:09:08 +00:00
|
|
|
const String & secret_access_key,
|
2020-12-10 09:19:42 +00:00
|
|
|
bool use_environment_credentials,
|
2020-11-13 16:31:51 +00:00
|
|
|
const RemoteHostFilter & remote_host_filter,
|
2020-11-23 11:02:17 +00:00
|
|
|
unsigned int s3_max_redirects)
|
2019-12-06 14:48:56 +00:00
|
|
|
{
|
2019-12-06 14:37:21 +00:00
|
|
|
Aws::Client::ClientConfiguration cfg;
|
2020-05-19 10:00:40 +00:00
|
|
|
|
2019-12-06 14:37:21 +00:00
|
|
|
if (!endpoint.empty())
|
|
|
|
cfg.endpointOverride = endpoint;
|
|
|
|
|
2020-12-10 09:19:42 +00:00
|
|
|
return create(cfg,
|
|
|
|
is_virtual_hosted_style,
|
|
|
|
access_key_id,
|
|
|
|
secret_access_key,
|
|
|
|
use_environment_credentials,
|
|
|
|
remote_host_filter,
|
|
|
|
s3_max_redirects);
|
2020-04-28 15:28:31 +00:00
|
|
|
}
|
|
|
|
|
2020-05-02 22:34:50 +00:00
|
|
|
std::shared_ptr<Aws::S3::S3Client> ClientFactory::create( // NOLINT
|
2020-12-03 04:18:12 +00:00
|
|
|
const Aws::Client::ClientConfiguration & cfg,
|
2020-05-20 13:47:53 +00:00
|
|
|
bool is_virtual_hosted_style,
|
2020-04-28 15:28:31 +00:00
|
|
|
const String & access_key_id,
|
2020-08-11 06:09:08 +00:00
|
|
|
const String & secret_access_key,
|
2020-12-10 09:19:42 +00:00
|
|
|
bool use_environment_credentials,
|
2020-11-13 16:31:51 +00:00
|
|
|
const RemoteHostFilter & remote_host_filter,
|
2020-11-23 11:02:17 +00:00
|
|
|
unsigned int s3_max_redirects)
|
2020-04-28 15:28:31 +00:00
|
|
|
{
|
2020-01-18 23:18:23 +00:00
|
|
|
Aws::Auth::AWSCredentials credentials(access_key_id, secret_access_key);
|
2019-12-06 14:37:21 +00:00
|
|
|
|
2020-11-23 11:02:17 +00:00
|
|
|
PocoHTTPClientConfiguration client_configuration(cfg, remote_host_filter, s3_max_redirects);
|
2020-05-20 13:47:53 +00:00
|
|
|
|
2020-10-06 08:20:47 +00:00
|
|
|
client_configuration.updateSchemeAndRegion();
|
2020-05-20 13:47:53 +00:00
|
|
|
|
2019-12-06 14:37:21 +00:00
|
|
|
return std::make_shared<Aws::S3::S3Client>(
|
2020-12-10 09:19:42 +00:00
|
|
|
std::make_shared<S3CredentialsProviderChain>(
|
|
|
|
client_configuration,
|
|
|
|
credentials,
|
|
|
|
use_environment_credentials), // AWS credentials provider.
|
2020-05-20 13:47:53 +00:00
|
|
|
std::move(client_configuration), // Client configuration.
|
2020-04-28 15:28:31 +00:00
|
|
|
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, // Sign policy.
|
2020-05-20 13:47:53 +00:00
|
|
|
is_virtual_hosted_style || cfg.endpointOverride.empty() // Use virtual addressing if endpoint is not specified.
|
2019-12-06 14:37:21 +00:00
|
|
|
);
|
|
|
|
}
|
2019-11-05 07:54:13 +00:00
|
|
|
|
2020-06-01 17:16:09 +00:00
|
|
|
std::shared_ptr<Aws::S3::S3Client> ClientFactory::create( // NOLINT
|
2020-12-03 04:18:12 +00:00
|
|
|
const Aws::Client::ClientConfiguration & cfg,
|
2020-06-01 17:16:09 +00:00
|
|
|
bool is_virtual_hosted_style,
|
|
|
|
const String & access_key_id,
|
|
|
|
const String & secret_access_key,
|
2020-08-11 06:09:08 +00:00
|
|
|
HeaderCollection headers,
|
2020-12-10 09:19:42 +00:00
|
|
|
bool use_environment_credentials,
|
2020-11-13 16:31:51 +00:00
|
|
|
const RemoteHostFilter & remote_host_filter,
|
2020-11-23 11:02:17 +00:00
|
|
|
unsigned int s3_max_redirects)
|
2020-06-01 17:16:09 +00:00
|
|
|
{
|
2020-12-03 04:18:12 +00:00
|
|
|
PocoHTTPClientConfiguration client_configuration(cfg, remote_host_filter, s3_max_redirects);
|
2020-10-06 08:20:47 +00:00
|
|
|
|
|
|
|
client_configuration.updateSchemeAndRegion();
|
2020-06-01 17:16:09 +00:00
|
|
|
|
|
|
|
Aws::Auth::AWSCredentials credentials(access_key_id, secret_access_key);
|
2020-12-10 09:19:42 +00:00
|
|
|
|
|
|
|
auto auth_signer = std::make_shared<S3AuthSigner>(client_configuration, std::move(credentials), std::move(headers), use_environment_credentials);
|
2020-06-01 17:16:09 +00:00
|
|
|
return std::make_shared<Aws::S3::S3Client>(
|
2020-12-10 09:19:42 +00:00
|
|
|
std::move(auth_signer),
|
2020-10-06 08:20:47 +00:00
|
|
|
std::move(client_configuration), // Client configuration.
|
|
|
|
is_virtual_hosted_style || client_configuration.endpointOverride.empty() // Use virtual addressing only if endpoint is not specified.
|
2020-06-01 17:16:09 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2020-04-29 08:45:13 +00:00
|
|
|
URI::URI(const Poco::URI & uri_)
|
2019-12-06 14:48:56 +00:00
|
|
|
{
|
2020-04-02 08:58:29 +00:00
|
|
|
/// Case when bucket name represented in domain name of S3 URL.
|
|
|
|
/// E.g. (https://bucket-name.s3.Region.amazonaws.com/key)
|
|
|
|
/// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#virtual-hosted-style-access
|
2020-07-10 07:26:55 +00:00
|
|
|
static const RE2 virtual_hosted_style_pattern(R"((.+)\.(s3|cos)([.\-][a-z0-9\-.:]+))");
|
2020-05-24 10:42:13 +00:00
|
|
|
|
2020-04-02 08:58:29 +00:00
|
|
|
/// Case when bucket name and key represented in path of S3 URL.
|
|
|
|
/// E.g. (https://s3.Region.amazonaws.com/bucket-name/key)
|
|
|
|
/// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#path-style-access
|
2020-05-19 10:00:40 +00:00
|
|
|
static const RE2 path_style_pattern("^/([^/]*)/(.*)");
|
2019-12-06 14:37:21 +00:00
|
|
|
|
2020-07-13 14:13:30 +00:00
|
|
|
static constexpr auto S3 = "S3";
|
2020-07-17 11:07:26 +00:00
|
|
|
static constexpr auto COSN = "COSN";
|
|
|
|
static constexpr auto COS = "COS";
|
2020-07-13 14:13:30 +00:00
|
|
|
|
2019-12-09 10:58:57 +00:00
|
|
|
uri = uri_;
|
2020-07-13 14:13:30 +00:00
|
|
|
storage_name = S3;
|
2019-12-09 10:58:57 +00:00
|
|
|
|
2019-12-06 14:37:21 +00:00
|
|
|
if (uri.getHost().empty())
|
2020-04-02 08:58:29 +00:00
|
|
|
throw Exception("Host is empty in S3 URI: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
|
2019-12-06 14:37:21 +00:00
|
|
|
|
2020-07-17 11:07:26 +00:00
|
|
|
String name;
|
2020-05-20 13:47:53 +00:00
|
|
|
String endpoint_authority_from_uri;
|
2019-12-06 14:37:21 +00:00
|
|
|
|
2020-07-10 07:26:55 +00:00
|
|
|
if (re2::RE2::FullMatch(uri.getAuthority(), virtual_hosted_style_pattern, &bucket, &name, &endpoint_authority_from_uri))
|
2020-04-01 19:56:40 +00:00
|
|
|
{
|
2020-05-20 13:47:53 +00:00
|
|
|
is_virtual_hosted_style = true;
|
2020-07-10 07:26:55 +00:00
|
|
|
endpoint = uri.getScheme() + "://" + name + endpoint_authority_from_uri;
|
2020-05-20 13:47:53 +00:00
|
|
|
|
2020-04-02 08:58:29 +00:00
|
|
|
/// S3 specification requires at least 3 and at most 63 characters in bucket name.
|
|
|
|
/// https://docs.aws.amazon.com/awscloudtrail/latest/userguide/cloudtrail-s3-bucket-naming-requirements.html
|
2020-04-01 19:56:40 +00:00
|
|
|
if (bucket.length() < 3 || bucket.length() > 63)
|
2020-04-02 08:59:10 +00:00
|
|
|
throw Exception(
|
2020-05-19 10:00:40 +00:00
|
|
|
"Bucket name length is out of bounds in virtual hosted style S3 URI: " + bucket + " (" + uri.toString() + ")", ErrorCodes::BAD_ARGUMENTS);
|
2020-04-01 19:56:40 +00:00
|
|
|
|
2020-12-14 10:53:40 +00:00
|
|
|
if (!uri.getPath().empty())
|
2020-12-14 08:17:29 +00:00
|
|
|
{
|
|
|
|
/// Remove leading '/' from path to extract key.
|
|
|
|
key = uri.getPath().substr(1);
|
|
|
|
}
|
|
|
|
|
2020-04-01 19:56:40 +00:00
|
|
|
if (key.empty() || key == "/")
|
2020-05-19 10:00:40 +00:00
|
|
|
throw Exception("Key name is empty in virtual hosted style S3 URI: " + key + " (" + uri.toString() + ")", ErrorCodes::BAD_ARGUMENTS);
|
2020-07-13 14:13:30 +00:00
|
|
|
boost::to_upper(name);
|
2020-07-17 03:33:29 +00:00
|
|
|
if (name != S3 && name != COS)
|
|
|
|
{
|
2020-07-13 14:13:30 +00:00
|
|
|
throw Exception("Object storage system name is unrecognized in virtual hosted style S3 URI: " + name + " (" + uri.toString() + ")", ErrorCodes::BAD_ARGUMENTS);
|
|
|
|
}
|
2020-07-17 03:33:29 +00:00
|
|
|
if (name == S3)
|
|
|
|
{
|
2020-07-13 14:13:30 +00:00
|
|
|
storage_name = name;
|
2020-07-17 03:33:29 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2020-07-13 14:13:30 +00:00
|
|
|
storage_name = COSN;
|
|
|
|
}
|
2020-04-01 19:56:40 +00:00
|
|
|
}
|
|
|
|
else if (re2::RE2::PartialMatch(uri.getPath(), path_style_pattern, &bucket, &key))
|
2019-12-06 14:48:56 +00:00
|
|
|
{
|
2020-05-20 13:47:53 +00:00
|
|
|
is_virtual_hosted_style = false;
|
|
|
|
endpoint = uri.getScheme() + "://" + uri.getAuthority();
|
|
|
|
|
2020-04-02 08:58:29 +00:00
|
|
|
/// S3 specification requires at least 3 and at most 63 characters in bucket name.
|
|
|
|
/// https://docs.aws.amazon.com/awscloudtrail/latest/userguide/cloudtrail-s3-bucket-naming-requirements.html
|
2020-04-01 19:56:40 +00:00
|
|
|
if (bucket.length() < 3 || bucket.length() > 63)
|
2020-04-02 08:59:10 +00:00
|
|
|
throw Exception(
|
2020-05-19 10:00:40 +00:00
|
|
|
"Bucket name length is out of bounds in path style S3 URI: " + bucket + " (" + uri.toString() + ")", ErrorCodes::BAD_ARGUMENTS);
|
2019-12-06 14:48:56 +00:00
|
|
|
|
2020-04-01 19:56:40 +00:00
|
|
|
if (key.empty() || key == "/")
|
2020-05-19 10:00:40 +00:00
|
|
|
throw Exception("Key name is empty in path style S3 URI: " + key + " (" + uri.toString() + ")", ErrorCodes::BAD_ARGUMENTS);
|
2019-12-06 14:37:21 +00:00
|
|
|
}
|
|
|
|
else
|
2020-04-02 08:58:29 +00:00
|
|
|
throw Exception("Bucket or key name are invalid in S3 URI: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
|
2019-12-03 16:23:24 +00:00
|
|
|
}
|
2019-11-05 07:54:13 +00:00
|
|
|
}
|
|
|
|
|
2019-12-01 11:24:55 +00:00
|
|
|
}
|
2019-12-06 14:37:21 +00:00
|
|
|
|
2019-12-09 12:36:06 +00:00
|
|
|
#endif
|