ClickHouse/src/IO/S3Common.cpp

260 lines
10 KiB
C++
Raw Normal View History

2019-12-06 14:37:21 +00:00
#include <Common/config.h>
#if USE_AWS_S3
# include <IO/S3Common.h>
# include <IO/WriteBufferFromString.h>
# include <Storages/StorageS3Settings.h>
2019-11-05 07:54:13 +00:00
# include <aws/core/auth/AWSCredentialsProvider.h>
# include <aws/core/utils/logging/LogMacros.h>
# include <aws/core/utils/logging/LogSystemInterface.h>
# include <aws/s3/S3Client.h>
2020-05-27 15:02:25 +00:00
# include <aws/core/http/HttpClientFactory.h>
2020-05-31 22:25:37 +00:00
# include <IO/S3/PocoHTTPClientFactory.h>
2020-06-08 20:17:39 +00:00
# include <IO/S3/PocoHTTPClientFactory.cpp>
2020-05-31 22:25:37 +00:00
# include <IO/S3/PocoHTTPClient.h>
2020-06-08 20:17:39 +00:00
# include <IO/S3/PocoHTTPClient.cpp>
2020-05-19 10:00:40 +00:00
# include <boost/algorithm/string.hpp>
# include <Poco/URI.h>
# include <re2/re2.h>
# include <common/logger_useful.h>
2019-11-05 07:54:13 +00:00
namespace
{
2020-05-30 21:57:37 +00:00
const std::pair<DB::LogsLevel, Poco::Message::Priority> & convertLogLevel(Aws::Utils::Logging::LogLevel log_level)
{
2020-05-30 21:57:37 +00:00
static const std::unordered_map<Aws::Utils::Logging::LogLevel, std::pair<DB::LogsLevel, Poco::Message::Priority>> mapping =
{
{Aws::Utils::Logging::LogLevel::Off, {DB::LogsLevel::none, Poco::Message::PRIO_FATAL}},
{Aws::Utils::Logging::LogLevel::Fatal, {DB::LogsLevel::error, Poco::Message::PRIO_FATAL}},
{Aws::Utils::Logging::LogLevel::Error, {DB::LogsLevel::error, Poco::Message::PRIO_ERROR}},
{Aws::Utils::Logging::LogLevel::Warn, {DB::LogsLevel::warning, Poco::Message::PRIO_WARNING}},
{Aws::Utils::Logging::LogLevel::Info, {DB::LogsLevel::information, Poco::Message::PRIO_INFORMATION}},
{Aws::Utils::Logging::LogLevel::Debug, {DB::LogsLevel::debug, Poco::Message::PRIO_DEBUG}},
{Aws::Utils::Logging::LogLevel::Trace, {DB::LogsLevel::trace, Poco::Message::PRIO_TRACE}},
};
return mapping.at(log_level);
}
class AWSLogger final : public Aws::Utils::Logging::LogSystemInterface
{
public:
~AWSLogger() final = default;
Aws::Utils::Logging::LogLevel GetLogLevel() const final { return Aws::Utils::Logging::LogLevel::Trace; }
2020-03-18 03:27:32 +00:00
void Log(Aws::Utils::Logging::LogLevel log_level, const char * tag, const char * format_str, ...) final // NOLINT
{
2020-04-22 05:39:31 +00:00
const auto & [level, prio] = convertLogLevel(log_level);
2020-05-23 22:24:01 +00:00
LOG_IMPL(log, level, prio, "{}: {}", tag, format_str);
}
void LogStream(Aws::Utils::Logging::LogLevel log_level, const char * tag, const Aws::OStringStream & message_stream) final
{
2020-04-22 05:39:31 +00:00
const auto & [level, prio] = convertLogLevel(log_level);
2020-05-23 22:24:01 +00:00
LOG_IMPL(log, level, prio, "{}: {}", tag, message_stream.str());
}
void Flush() final {}
private:
2019-12-17 15:38:50 +00:00
Poco::Logger * log = &Poco::Logger::get("AWSClient");
};
class S3AuthSigner : public Aws::Client::AWSAuthV4Signer
{
public:
S3AuthSigner(
2020-06-04 13:48:20 +00:00
const Aws::Client::ClientConfiguration & client_configuration,
const Aws::Auth::AWSCredentials & credentials,
const DB::HeaderCollection & headers_)
: Aws::Client::AWSAuthV4Signer(
std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(credentials),
"s3",
2020-06-04 13:48:20 +00:00
client_configuration.region,
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
false)
, headers(headers_)
{
}
2020-06-04 13:48:20 +00:00
bool SignRequest(Aws::Http::HttpRequest & request, const char * region, bool sign_body) const override
{
2020-06-04 13:48:20 +00:00
auto result = Aws::Client::AWSAuthV4Signer::SignRequest(request, region, sign_body);
for (const auto & header : headers)
request.SetHeaderValue(header.name, header.value);
return result;
}
bool PresignRequest(
Aws::Http::HttpRequest & request,
const char * region,
const char * serviceName,
2020-06-04 13:48:20 +00:00
long long expiration_time_sec) const override // NOLINT
{
2020-06-04 13:48:20 +00:00
auto result = Aws::Client::AWSAuthV4Signer::PresignRequest(request, region, serviceName, expiration_time_sec);
for (const auto & header : headers)
request.SetHeaderValue(header.name, header.value);
return result;
}
private:
const DB::HeaderCollection headers;
};
}
2019-12-06 15:14:39 +00:00
namespace DB
{
namespace ErrorCodes
{
2019-12-03 16:23:24 +00:00
extern const int BAD_ARGUMENTS;
2019-12-03 01:22:25 +00:00
}
2019-12-06 15:14:39 +00:00
namespace S3
{
2019-12-06 14:48:56 +00:00
ClientFactory::ClientFactory()
{
2020-04-29 08:45:13 +00:00
aws_options = Aws::SDKOptions{};
2019-12-03 16:23:24 +00:00
Aws::InitAPI(aws_options);
Aws::Utils::Logging::InitializeAWSLogging(std::make_shared<AWSLogger>());
2020-05-31 22:25:37 +00:00
Aws::Http::SetHttpClientFactory(std::make_shared<PocoHTTPClientFactory>());
2019-12-03 16:23:24 +00:00
}
2019-12-06 14:48:56 +00:00
ClientFactory::~ClientFactory()
{
Aws::Utils::Logging::ShutdownAWSLogging();
2019-12-06 14:37:21 +00:00
Aws::ShutdownAPI(aws_options);
}
2019-11-05 07:54:13 +00:00
2019-12-10 23:03:45 +00:00
ClientFactory & ClientFactory::instance()
2019-12-06 14:48:56 +00:00
{
2019-12-06 14:37:21 +00:00
static ClientFactory ret;
return ret;
}
2019-12-03 16:23:24 +00:00
2020-05-02 22:34:50 +00:00
/// This method is not static because it requires ClientFactory to be initialized.
std::shared_ptr<Aws::S3::S3Client> ClientFactory::create( // NOLINT
2019-12-06 14:48:56 +00:00
const String & endpoint,
bool is_virtual_hosted_style,
2019-12-06 14:48:56 +00:00
const String & access_key_id,
const String & secret_access_key)
{
2019-12-06 14:37:21 +00:00
Aws::Client::ClientConfiguration cfg;
2020-05-19 10:00:40 +00:00
2019-12-06 14:37:21 +00:00
if (!endpoint.empty())
cfg.endpointOverride = endpoint;
return create(cfg, is_virtual_hosted_style, access_key_id, secret_access_key);
}
2020-05-02 22:34:50 +00:00
std::shared_ptr<Aws::S3::S3Client> ClientFactory::create( // NOLINT
Aws::Client::ClientConfiguration & cfg,
bool is_virtual_hosted_style,
const String & access_key_id,
const String & secret_access_key)
{
2020-01-18 23:18:23 +00:00
Aws::Auth::AWSCredentials credentials(access_key_id, secret_access_key);
2019-12-06 14:37:21 +00:00
Aws::Client::ClientConfiguration client_configuration = cfg;
if (!client_configuration.endpointOverride.empty())
{
2020-05-24 10:42:13 +00:00
static const RE2 region_pattern(R"(^s3[.\-]([a-z0-9\-]+)\.amazonaws\.)");
Poco::URI uri(client_configuration.endpointOverride);
if (uri.getScheme() == "http")
client_configuration.scheme = Aws::Http::Scheme::HTTP;
String region;
if (re2::RE2::PartialMatch(uri.getHost(), region_pattern, &region))
{
boost::algorithm::to_lower(region);
client_configuration.region = region;
}
}
2019-12-06 14:37:21 +00:00
return std::make_shared<Aws::S3::S3Client>(
credentials, // Aws credentials.
std::move(client_configuration), // Client configuration.
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, // Sign policy.
is_virtual_hosted_style || cfg.endpointOverride.empty() // Use virtual addressing if endpoint is not specified.
2019-12-06 14:37:21 +00:00
);
}
2019-11-05 07:54:13 +00:00
std::shared_ptr<Aws::S3::S3Client> ClientFactory::create( // NOLINT
const String & endpoint,
bool is_virtual_hosted_style,
const String & access_key_id,
const String & secret_access_key,
HeaderCollection headers)
{
Aws::Client::ClientConfiguration cfg;
if (!endpoint.empty())
cfg.endpointOverride = endpoint;
Aws::Auth::AWSCredentials credentials(access_key_id, secret_access_key);
return std::make_shared<Aws::S3::S3Client>(
std::make_shared<S3AuthSigner>(cfg, std::move(credentials), std::move(headers)),
std::move(cfg), // Client configuration.
is_virtual_hosted_style || cfg.endpointOverride.empty() // Use virtual addressing only if endpoint is not specified.
);
}
2020-04-29 08:45:13 +00:00
URI::URI(const Poco::URI & uri_)
2019-12-06 14:48:56 +00:00
{
2020-04-02 08:58:29 +00:00
/// Case when bucket name represented in domain name of S3 URL.
/// E.g. (https://bucket-name.s3.Region.amazonaws.com/key)
/// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#virtual-hosted-style-access
static const RE2 virtual_hosted_style_pattern(R"((.+)\.(s3[.\-][a-z0-9\-.:]+))");
2020-05-24 10:42:13 +00:00
2020-04-02 08:58:29 +00:00
/// Case when bucket name and key represented in path of S3 URL.
/// E.g. (https://s3.Region.amazonaws.com/bucket-name/key)
/// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#path-style-access
2020-05-19 10:00:40 +00:00
static const RE2 path_style_pattern("^/([^/]*)/(.*)");
2019-12-06 14:37:21 +00:00
uri = uri_;
2019-12-06 14:37:21 +00:00
if (uri.getHost().empty())
2020-04-02 08:58:29 +00:00
throw Exception("Host is empty in S3 URI: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
2019-12-06 14:37:21 +00:00
String endpoint_authority_from_uri;
2019-12-06 14:37:21 +00:00
if (re2::RE2::FullMatch(uri.getAuthority(), virtual_hosted_style_pattern, &bucket, &endpoint_authority_from_uri))
{
is_virtual_hosted_style = true;
endpoint = uri.getScheme() + "://" + endpoint_authority_from_uri;
2020-04-02 08:58:29 +00:00
/// S3 specification requires at least 3 and at most 63 characters in bucket name.
/// https://docs.aws.amazon.com/awscloudtrail/latest/userguide/cloudtrail-s3-bucket-naming-requirements.html
if (bucket.length() < 3 || bucket.length() > 63)
2020-04-02 08:59:10 +00:00
throw Exception(
2020-05-19 10:00:40 +00:00
"Bucket name length is out of bounds in virtual hosted style S3 URI: " + bucket + " (" + uri.toString() + ")", ErrorCodes::BAD_ARGUMENTS);
2020-04-02 08:58:29 +00:00
/// Remove leading '/' from path to extract key.
key = uri.getPath().substr(1);
if (key.empty() || key == "/")
2020-05-19 10:00:40 +00:00
throw Exception("Key name is empty in virtual hosted style S3 URI: " + key + " (" + uri.toString() + ")", ErrorCodes::BAD_ARGUMENTS);
}
else if (re2::RE2::PartialMatch(uri.getPath(), path_style_pattern, &bucket, &key))
2019-12-06 14:48:56 +00:00
{
is_virtual_hosted_style = false;
endpoint = uri.getScheme() + "://" + uri.getAuthority();
2020-04-02 08:58:29 +00:00
/// S3 specification requires at least 3 and at most 63 characters in bucket name.
/// https://docs.aws.amazon.com/awscloudtrail/latest/userguide/cloudtrail-s3-bucket-naming-requirements.html
if (bucket.length() < 3 || bucket.length() > 63)
2020-04-02 08:59:10 +00:00
throw Exception(
2020-05-19 10:00:40 +00:00
"Bucket name length is out of bounds in path style S3 URI: " + bucket + " (" + uri.toString() + ")", ErrorCodes::BAD_ARGUMENTS);
2019-12-06 14:48:56 +00:00
if (key.empty() || key == "/")
2020-05-19 10:00:40 +00:00
throw Exception("Key name is empty in path style S3 URI: " + key + " (" + uri.toString() + ")", ErrorCodes::BAD_ARGUMENTS);
2019-12-06 14:37:21 +00:00
}
else
2020-04-02 08:58:29 +00:00
throw Exception("Bucket or key name are invalid in S3 URI: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
2019-12-03 16:23:24 +00:00
}
2019-11-05 07:54:13 +00:00
}
}
2019-12-06 14:37:21 +00:00
#endif