ClickHouse/dbms/IO/S3Common.cpp

155 lines
4.9 KiB
C++
Raw Normal View History

2019-12-06 14:37:21 +00:00
#include <Common/config.h>
#if USE_AWS_S3
2019-11-05 07:54:13 +00:00
#include <IO/S3Common.h>
2019-12-03 01:22:25 +00:00
#include <IO/WriteBufferFromString.h>
2019-11-05 07:54:13 +00:00
2019-12-11 14:21:48 +00:00
#include <regex>
#include <aws/s3/S3Client.h>
2019-12-03 16:23:24 +00:00
#include <aws/core/auth/AWSCredentialsProvider.h>
#include <aws/core/utils/logging/LogSystemInterface.h>
#include <aws/core/utils/logging/LogMacros.h>
#include <common/logger_useful.h>
2019-11-05 07:54:13 +00:00
namespace
{
2019-12-17 15:38:50 +00:00
const std::pair<LogsLevel, Message::Priority> & convertLogLevel(Aws::Utils::Logging::LogLevel log_level)
{
static const std::unordered_map<Aws::Utils::Logging::LogLevel, std::pair<LogsLevel, Message::Priority>> mapping = {
{Aws::Utils::Logging::LogLevel::Off, {LogsLevel::none, Message::PRIO_FATAL}},
{Aws::Utils::Logging::LogLevel::Fatal, {LogsLevel::error, Message::PRIO_FATAL}},
{Aws::Utils::Logging::LogLevel::Error, {LogsLevel::error, Message::PRIO_ERROR}},
{Aws::Utils::Logging::LogLevel::Warn, {LogsLevel::warning, Message::PRIO_WARNING}},
{Aws::Utils::Logging::LogLevel::Info, {LogsLevel::information, Message::PRIO_INFORMATION}},
{Aws::Utils::Logging::LogLevel::Debug, {LogsLevel::debug, Message::PRIO_DEBUG}},
{Aws::Utils::Logging::LogLevel::Trace, {LogsLevel::trace, Message::PRIO_TRACE}},
};
return mapping.at(log_level);
}
class AWSLogger final : public Aws::Utils::Logging::LogSystemInterface
{
public:
~AWSLogger() final = default;
Aws::Utils::Logging::LogLevel GetLogLevel() const final { return Aws::Utils::Logging::LogLevel::Trace; }
2020-03-18 03:27:32 +00:00
void Log(Aws::Utils::Logging::LogLevel log_level, const char * tag, const char * format_str, ...) final // NOLINT
{
2019-12-17 15:38:50 +00:00
auto & [level, prio] = convertLogLevel(log_level);
LOG_SIMPLE(log, std::string(tag) + ": " + format_str, level, prio);
}
void LogStream(Aws::Utils::Logging::LogLevel log_level, const char * tag, const Aws::OStringStream & message_stream) final
{
2019-12-17 15:38:50 +00:00
auto & [level, prio] = convertLogLevel(log_level);
LOG_SIMPLE(log, std::string(tag) + ": " + message_stream.str(), level, prio);
}
void Flush() final {}
private:
2019-12-17 15:38:50 +00:00
Poco::Logger * log = &Poco::Logger::get("AWSClient");
};
}
2019-12-06 15:14:39 +00:00
namespace DB
{
2019-11-05 07:54:13 +00:00
2019-12-06 15:14:39 +00:00
namespace ErrorCodes
{
2019-12-03 16:23:24 +00:00
extern const int BAD_ARGUMENTS;
2019-12-03 01:22:25 +00:00
}
2019-12-06 15:14:39 +00:00
namespace S3
{
2019-12-06 14:48:56 +00:00
ClientFactory::ClientFactory()
{
2019-12-11 14:21:48 +00:00
aws_options = Aws::SDKOptions {};
2019-12-03 16:23:24 +00:00
Aws::InitAPI(aws_options);
Aws::Utils::Logging::InitializeAWSLogging(std::make_shared<AWSLogger>());
2019-12-03 16:23:24 +00:00
}
2019-12-06 14:48:56 +00:00
ClientFactory::~ClientFactory()
{
Aws::Utils::Logging::ShutdownAWSLogging();
2019-12-06 14:37:21 +00:00
Aws::ShutdownAPI(aws_options);
}
2019-11-05 07:54:13 +00:00
2019-12-10 23:03:45 +00:00
ClientFactory & ClientFactory::instance()
2019-12-06 14:48:56 +00:00
{
2019-12-06 14:37:21 +00:00
static ClientFactory ret;
return ret;
}
2019-12-03 16:23:24 +00:00
2020-03-18 00:57:00 +00:00
std::shared_ptr<Aws::S3::S3Client> ClientFactory::create( // NOLINT
2019-12-06 14:48:56 +00:00
const String & endpoint,
const String & access_key_id,
const String & secret_access_key)
{
2019-12-06 14:37:21 +00:00
Aws::Client::ClientConfiguration cfg;
if (!endpoint.empty())
cfg.endpointOverride = endpoint;
2020-01-18 23:18:23 +00:00
Aws::Auth::AWSCredentials credentials(access_key_id, secret_access_key);
2019-12-06 14:37:21 +00:00
return std::make_shared<Aws::S3::S3Client>(
2020-01-18 23:18:23 +00:00
credentials, // Aws credentials.
2019-12-06 14:37:21 +00:00
std::move(cfg), // Client configuration.
Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, // Sign policy.
endpoint.empty() // Use virtual addressing only if endpoint is not specified.
);
}
2019-11-05 07:54:13 +00:00
URI::URI(const Poco::URI & uri_)
2019-12-06 14:48:56 +00:00
{
static const std::regex bucket_key_pattern("([^/]+)/(.*)"); /// TODO std::regex is discouraged
2019-12-06 14:37:21 +00:00
uri = uri_;
2019-12-06 14:37:21 +00:00
// s3://*
2019-12-06 14:48:56 +00:00
if (uri.getScheme() == "s3" || uri.getScheme() == "S3")
{
2019-12-06 14:37:21 +00:00
bucket = uri.getAuthority();
if (bucket.empty())
throw Exception ("Invalid S3 URI: no bucket: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
2019-12-06 14:48:56 +00:00
2019-12-06 14:37:21 +00:00
const auto & path = uri.getPath();
// s3://bucket or s3://bucket/
if (path.length() <= 1)
throw Exception ("Invalid S3 URI: no key: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
2019-12-06 14:48:56 +00:00
2019-12-06 14:37:21 +00:00
key = path.substr(1);
return;
}
if (uri.getHost().empty())
throw Exception("Invalid S3 URI: no host: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
endpoint = uri.getScheme() + "://" + uri.getAuthority();
// Parse bucket and key from path.
std::smatch match;
std::regex_search(uri.getPath(), match, bucket_key_pattern);
2019-12-06 14:48:56 +00:00
if (!match.empty())
{
2019-12-06 14:37:21 +00:00
bucket = match.str(1);
if (bucket.empty())
throw Exception ("Invalid S3 URI: no bucket: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
2019-12-06 14:48:56 +00:00
2019-12-06 14:37:21 +00:00
key = match.str(2);
if (key.empty())
throw Exception ("Invalid S3 URI: no key: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
}
else
throw Exception("Invalid S3 URI: no bucket or key: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
2019-12-03 16:23:24 +00:00
}
2019-11-05 07:54:13 +00:00
}
}
2019-12-06 14:37:21 +00:00
#endif