mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 09:32:01 +00:00
Merge pull request #49390 from ClickHouse/fix-gcs-headers
Properly fix GCS when HMAC is used
This commit is contained in:
commit
212c57c034
@ -112,6 +112,22 @@ std::unique_ptr<Client> Client::create(const Client & other)
|
||||
return std::unique_ptr<Client>(new Client(other));
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
ProviderType deduceProviderType(const std::string & url)
|
||||
{
|
||||
if (url.find(".amazonaws.com") != std::string::npos)
|
||||
return ProviderType::AWS;
|
||||
|
||||
if (url.find("storage.googleapis.com") != std::string::npos)
|
||||
return ProviderType::GCS;
|
||||
|
||||
return ProviderType::UNKNOWN;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Client::Client(
|
||||
size_t max_redirects_,
|
||||
ServerSideEncryptionKMSConfig sse_kms_config_,
|
||||
@ -128,9 +144,28 @@ Client::Client(
|
||||
endpoint_provider->GetBuiltInParameters().GetParameter("Region").GetString(explicit_region);
|
||||
endpoint_provider->GetBuiltInParameters().GetParameter("Endpoint").GetString(initial_endpoint);
|
||||
|
||||
provider_type = getProviderTypeFromURL(initial_endpoint);
|
||||
provider_type = deduceProviderType(initial_endpoint);
|
||||
LOG_TRACE(log, "Provider type: {}", toString(provider_type));
|
||||
|
||||
if (provider_type == ProviderType::GCS)
|
||||
{
|
||||
/// GCS can operate in 2 modes for header and query params names:
|
||||
/// - with both x-amz and x-goog prefixes allowed (but cannot mix different prefixes in same request)
|
||||
/// - only with x-goog prefix
|
||||
/// first mode is allowed only with HMAC (or unsigned requests) so when we
|
||||
/// find credential keys we can simply behave as the underlying storage is S3
|
||||
/// otherwise, we need to be aware we are making requests to GCS
|
||||
/// and replace all headers with a valid prefix when needed
|
||||
if (credentials_provider)
|
||||
{
|
||||
auto credentials = credentials_provider->GetAWSCredentials();
|
||||
if (credentials.IsEmpty())
|
||||
api_mode = ApiMode::GCS;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_TRACE(log, "API mode: {}", toString(api_mode));
|
||||
|
||||
detect_region = provider_type == ProviderType::AWS && explicit_region == Aws::Region::AWS_GLOBAL;
|
||||
|
||||
cache = std::make_shared<ClientCache>();
|
||||
@ -208,7 +243,7 @@ Model::HeadObjectOutcome Client::HeadObject(const HeadObjectRequest & request) c
|
||||
{
|
||||
const auto & bucket = request.GetBucket();
|
||||
|
||||
request.setProviderType(provider_type);
|
||||
request.setApiMode(api_mode);
|
||||
|
||||
if (auto region = getRegionForBucket(bucket); !region.empty())
|
||||
{
|
||||
@ -348,7 +383,7 @@ std::invoke_result_t<RequestFn, RequestType>
|
||||
Client::doRequest(const RequestType & request, RequestFn request_fn) const
|
||||
{
|
||||
const auto & bucket = request.GetBucket();
|
||||
request.setProviderType(provider_type);
|
||||
request.setApiMode(api_mode);
|
||||
|
||||
if (auto region = getRegionForBucket(bucket); !region.empty())
|
||||
{
|
||||
@ -421,9 +456,23 @@ Client::doRequest(const RequestType & request, RequestFn request_fn) const
|
||||
throw Exception(ErrorCodes::TOO_MANY_REDIRECTS, "Too many redirects");
|
||||
}
|
||||
|
||||
ProviderType Client::getProviderType() const
|
||||
bool Client::supportsMultiPartCopy() const
|
||||
{
|
||||
return provider_type;
|
||||
return provider_type != ProviderType::GCS;
|
||||
}
|
||||
|
||||
void Client::BuildHttpRequest(const Aws::AmazonWebServiceRequest& request,
|
||||
const std::shared_ptr<Aws::Http::HttpRequest>& httpRequest) const
|
||||
{
|
||||
Aws::S3::S3Client::BuildHttpRequest(request, httpRequest);
|
||||
|
||||
if (api_mode == ApiMode::GCS)
|
||||
{
|
||||
/// some GCS requests don't like S3 specific headers that the client sets
|
||||
httpRequest->DeleteHeader("x-amz-api-version");
|
||||
httpRequest->DeleteHeader("amz-sdk-invocation-id");
|
||||
httpRequest->DeleteHeader("amz-sdk-request");
|
||||
}
|
||||
}
|
||||
|
||||
std::string Client::getRegionForBucket(const std::string & bucket, bool force_detect) const
|
||||
|
@ -190,7 +190,10 @@ public:
|
||||
using Aws::S3::S3Client::EnableRequestProcessing;
|
||||
using Aws::S3::S3Client::DisableRequestProcessing;
|
||||
|
||||
ProviderType getProviderType() const;
|
||||
void BuildHttpRequest(const Aws::AmazonWebServiceRequest& request,
|
||||
const std::shared_ptr<Aws::Http::HttpRequest>& httpRequest) const override;
|
||||
|
||||
bool supportsMultiPartCopy() const;
|
||||
private:
|
||||
Client(size_t max_redirects_,
|
||||
ServerSideEncryptionKMSConfig sse_kms_config_,
|
||||
@ -238,7 +241,12 @@ private:
|
||||
std::string explicit_region;
|
||||
mutable bool detect_region = true;
|
||||
|
||||
/// provider type can determine if some functionality is supported
|
||||
/// but for same provider, we would need to generate different headers depending on the
|
||||
/// mode
|
||||
/// E.g. GCS can work in AWS mode in some cases and accept headers with x-amz prefix
|
||||
ProviderType provider_type{ProviderType::UNKNOWN};
|
||||
ApiMode api_mode{ApiMode::AWS};
|
||||
|
||||
mutable std::shared_ptr<ClientCache> cache;
|
||||
|
||||
|
@ -260,17 +260,6 @@ void PocoHTTPClient::makeRequestInternal(
|
||||
Poco::Logger * log = &Poco::Logger::get("AWSClient");
|
||||
|
||||
auto uri = request.GetUri().GetURIString();
|
||||
#if 0
|
||||
auto provider_type = getProviderTypeFromURL(uri);
|
||||
|
||||
if (provider_type == ProviderType::GCS)
|
||||
{
|
||||
/// some GCS requests don't like S3 specific headers that the client sets
|
||||
request.DeleteHeader("x-amz-api-version");
|
||||
request.DeleteHeader("amz-sdk-invocation-id");
|
||||
request.DeleteHeader("amz-sdk-request");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (enable_s3_requests_logging)
|
||||
LOG_TEST(log, "Make request to: {}", uri);
|
||||
|
@ -22,20 +22,17 @@ std::string_view toString(ProviderType provider_type)
|
||||
}
|
||||
}
|
||||
|
||||
bool supportsMultiPartCopy(ProviderType provider_type)
|
||||
std::string_view toString(ApiMode api_mode)
|
||||
{
|
||||
return provider_type != ProviderType::GCS;
|
||||
}
|
||||
using enum ApiMode;
|
||||
|
||||
ProviderType getProviderTypeFromURL(const std::string & url)
|
||||
{
|
||||
if (url.find(".amazonaws.com") != std::string::npos)
|
||||
return ProviderType::AWS;
|
||||
|
||||
if (url.find("storage.googleapis.com") != std::string::npos)
|
||||
return ProviderType::GCS;
|
||||
|
||||
return ProviderType::UNKNOWN;
|
||||
switch (api_mode)
|
||||
{
|
||||
case AWS:
|
||||
return "AWS";
|
||||
case GCS:
|
||||
return "GCS";
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -10,6 +10,11 @@
|
||||
namespace DB::S3
|
||||
{
|
||||
|
||||
/// Provider type defines the platform containing the object
|
||||
/// we are trying to access
|
||||
/// This information is useful for determining general support for
|
||||
/// some feature like multipart copy which is currently supported by AWS
|
||||
/// but not by GCS
|
||||
enum class ProviderType : uint8_t
|
||||
{
|
||||
AWS,
|
||||
@ -19,9 +24,20 @@ enum class ProviderType : uint8_t
|
||||
|
||||
std::string_view toString(ProviderType provider_type);
|
||||
|
||||
bool supportsMultiPartCopy(ProviderType provider_type);
|
||||
/// Mode in which we can use the XML API
|
||||
/// This value can be same as the provider type but there can be a difference
|
||||
/// For example, GCS can work in both
|
||||
/// AWS compatible mode (accept headers starting with x-amz)
|
||||
/// and GCS mode (accept only headers starting with x-goog)
|
||||
/// Because GCS mode is enforced when some features are used we
|
||||
/// need to have support for both.
|
||||
enum class ApiMode : uint8_t
|
||||
{
|
||||
AWS,
|
||||
GCS
|
||||
};
|
||||
|
||||
ProviderType getProviderTypeFromURL(const std::string & url);
|
||||
std::string_view toString(ApiMode api_mode);
|
||||
|
||||
}
|
||||
|
||||
|
@ -10,7 +10,7 @@ namespace DB::S3
|
||||
Aws::Http::HeaderValueCollection CopyObjectRequest::GetRequestSpecificHeaders() const
|
||||
{
|
||||
auto headers = Model::CopyObjectRequest::GetRequestSpecificHeaders();
|
||||
if (provider_type != ProviderType::GCS)
|
||||
if (api_mode != ApiMode::GCS)
|
||||
return headers;
|
||||
|
||||
/// GCS supports same headers as S3 but with a prefix x-goog instead of x-amz
|
||||
|
@ -62,15 +62,15 @@ public:
|
||||
return uri_override;
|
||||
}
|
||||
|
||||
void setProviderType(ProviderType provider_type_) const
|
||||
void setApiMode(ApiMode api_mode_) const
|
||||
{
|
||||
provider_type = provider_type_;
|
||||
api_mode = api_mode_;
|
||||
}
|
||||
|
||||
protected:
|
||||
mutable std::string region_override;
|
||||
mutable std::optional<S3::URI> uri_override;
|
||||
mutable ProviderType provider_type{ProviderType::UNKNOWN};
|
||||
mutable ApiMode api_mode{ApiMode::AWS};
|
||||
};
|
||||
|
||||
class CopyObjectRequest : public ExtendedRequest<Model::CopyObjectRequest>
|
||||
|
@ -595,7 +595,7 @@ namespace
|
||||
, src_key(src_key_)
|
||||
, offset(src_offset_)
|
||||
, size(src_size_)
|
||||
, supports_multipart_copy(S3::supportsMultiPartCopy(client_ptr_->getProviderType()))
|
||||
, supports_multipart_copy(client_ptr_->supportsMultiPartCopy())
|
||||
{
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user