mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 09:32:01 +00:00
Merge pull request #37659 from frew/master
Support `batch_delete` capability for GCS
This commit is contained in:
commit
06d94a4dde
15
src/Disks/ObjectStorages/S3/S3Capabilities.cpp
Normal file
15
src/Disks/ObjectStorages/S3/S3Capabilities.cpp
Normal file
@ -0,0 +1,15 @@
|
||||
#include <Disks/ObjectStorages/S3/S3Capabilities.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
S3Capabilities getCapabilitiesFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
|
||||
{
|
||||
return S3Capabilities
|
||||
{
|
||||
.support_batch_delete = config.getBool(config_prefix + ".support_batch_delete", true),
|
||||
.support_proxy = config.getBool(config_prefix + ".support_proxy", config.has(config_prefix + ".proxy")),
|
||||
};
|
||||
}
|
||||
|
||||
}
|
27
src/Disks/ObjectStorages/S3/S3Capabilities.h
Normal file
27
src/Disks/ObjectStorages/S3/S3Capabilities.h
Normal file
@ -0,0 +1,27 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Supported/unsupported features by different S3 implementations
|
||||
/// Can be useful only for almost compatible with AWS S3 versions.
|
||||
struct S3Capabilities
|
||||
{
|
||||
/// Google S3 implementation doesn't support batch delete
|
||||
/// TODO: possibly we have to use Google SDK https://github.com/googleapis/google-cloud-cpp/tree/main/google/cloud/storage
|
||||
/// because looks like it miss a lot of features like:
|
||||
/// 1) batch delete
|
||||
/// 2) list_v2
|
||||
/// 3) multipart upload works differently
|
||||
bool support_batch_delete{true};
|
||||
|
||||
/// Y.Cloud S3 implementation support proxy for connection
|
||||
bool support_proxy{false};
|
||||
};
|
||||
|
||||
S3Capabilities getCapabilitiesFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
|
||||
|
||||
}
|
@ -17,6 +17,7 @@
|
||||
#include <aws/s3/model/CopyObjectRequest.h>
|
||||
#include <aws/s3/model/ListObjectsV2Request.h>
|
||||
#include <aws/s3/model/HeadObjectRequest.h>
|
||||
#include <aws/s3/model/DeleteObjectRequest.h>
|
||||
#include <aws/s3/model/DeleteObjectsRequest.h>
|
||||
#include <aws/s3/model/CreateMultipartUploadRequest.h>
|
||||
#include <aws/s3/model/CompleteMultipartUploadRequest.h>
|
||||
@ -213,18 +214,34 @@ void S3ObjectStorage::listPrefix(const std::string & path, BlobsPathToSize & chi
|
||||
void S3ObjectStorage::removeObject(const std::string & path)
|
||||
{
|
||||
auto client_ptr = client.get();
|
||||
Aws::S3::Model::ObjectIdentifier obj;
|
||||
obj.SetKey(path);
|
||||
auto settings_ptr = s3_settings.get();
|
||||
|
||||
Aws::S3::Model::Delete delkeys;
|
||||
delkeys.SetObjects({obj});
|
||||
// If chunk size is 0, only use single delete request
|
||||
// This allows us to work with GCS, which doesn't support DeleteObjects
|
||||
if (!s3_capabilities.support_batch_delete)
|
||||
{
|
||||
Aws::S3::Model::DeleteObjectRequest request;
|
||||
request.SetBucket(bucket);
|
||||
request.SetKey(path);
|
||||
auto outcome = client_ptr->DeleteObject(request);
|
||||
|
||||
Aws::S3::Model::DeleteObjectsRequest request;
|
||||
request.SetBucket(bucket);
|
||||
request.SetDelete(delkeys);
|
||||
auto outcome = client_ptr->DeleteObjects(request);
|
||||
throwIfError(outcome);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// TODO: For AWS we prefer to use multiobject operation even for single object
|
||||
/// maybe we shouldn't?
|
||||
Aws::S3::Model::ObjectIdentifier obj;
|
||||
obj.SetKey(path);
|
||||
Aws::S3::Model::Delete delkeys;
|
||||
delkeys.SetObjects({obj});
|
||||
Aws::S3::Model::DeleteObjectsRequest request;
|
||||
request.SetBucket(bucket);
|
||||
request.SetDelete(delkeys);
|
||||
auto outcome = client_ptr->DeleteObjects(request);
|
||||
|
||||
throwIfError(outcome);
|
||||
throwIfError(outcome);
|
||||
}
|
||||
}
|
||||
|
||||
void S3ObjectStorage::removeObjects(const std::vector<std::string> & paths)
|
||||
@ -235,31 +252,39 @@ void S3ObjectStorage::removeObjects(const std::vector<std::string> & paths)
|
||||
auto client_ptr = client.get();
|
||||
auto settings_ptr = s3_settings.get();
|
||||
|
||||
size_t chunk_size_limit = settings_ptr->objects_chunk_size_to_delete;
|
||||
size_t current_position = 0;
|
||||
|
||||
while (current_position < paths.size())
|
||||
if (!s3_capabilities.support_batch_delete)
|
||||
{
|
||||
std::vector<Aws::S3::Model::ObjectIdentifier> current_chunk;
|
||||
String keys;
|
||||
for (; current_position < paths.size() && current_chunk.size() < chunk_size_limit; ++current_position)
|
||||
for (const auto & path : paths)
|
||||
removeObject(path);
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t chunk_size_limit = settings_ptr->objects_chunk_size_to_delete;
|
||||
size_t current_position = 0;
|
||||
|
||||
while (current_position < paths.size())
|
||||
{
|
||||
Aws::S3::Model::ObjectIdentifier obj;
|
||||
obj.SetKey(paths[current_position]);
|
||||
current_chunk.push_back(obj);
|
||||
std::vector<Aws::S3::Model::ObjectIdentifier> current_chunk;
|
||||
String keys;
|
||||
for (; current_position < paths.size() && current_chunk.size() < chunk_size_limit; ++current_position)
|
||||
{
|
||||
Aws::S3::Model::ObjectIdentifier obj;
|
||||
obj.SetKey(paths[current_position]);
|
||||
current_chunk.push_back(obj);
|
||||
|
||||
if (!keys.empty())
|
||||
keys += ", ";
|
||||
keys += paths[current_position];
|
||||
if (!keys.empty())
|
||||
keys += ", ";
|
||||
keys += paths[current_position];
|
||||
}
|
||||
|
||||
Aws::S3::Model::Delete delkeys;
|
||||
delkeys.SetObjects(current_chunk);
|
||||
Aws::S3::Model::DeleteObjectsRequest request;
|
||||
request.SetBucket(bucket);
|
||||
request.SetDelete(delkeys);
|
||||
auto outcome = client_ptr->DeleteObjects(request);
|
||||
throwIfError(outcome);
|
||||
}
|
||||
|
||||
Aws::S3::Model::Delete delkeys;
|
||||
delkeys.SetObjects(current_chunk);
|
||||
Aws::S3::Model::DeleteObjectsRequest request;
|
||||
request.SetBucket(bucket);
|
||||
request.SetDelete(delkeys);
|
||||
auto outcome = client_ptr->DeleteObjects(request);
|
||||
throwIfError(outcome);
|
||||
}
|
||||
}
|
||||
|
||||
@ -493,7 +518,7 @@ std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(const std::s
|
||||
return std::make_unique<S3ObjectStorage>(
|
||||
nullptr, getClient(config, config_prefix, context),
|
||||
getSettings(config, config_prefix, context),
|
||||
version_id, new_namespace);
|
||||
version_id, s3_capabilities, new_namespace);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -5,6 +5,7 @@
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include <Disks/ObjectStorages/IObjectStorage.h>
|
||||
#include <Disks/ObjectStorages/S3/S3Capabilities.h>
|
||||
#include <memory>
|
||||
#include <aws/s3/S3Client.h>
|
||||
#include <aws/s3/model/HeadObjectResult.h>
|
||||
@ -46,11 +47,13 @@ public:
|
||||
std::unique_ptr<Aws::S3::S3Client> && client_,
|
||||
std::unique_ptr<S3ObjectStorageSettings> && s3_settings_,
|
||||
String version_id_,
|
||||
const S3Capabilities & s3_capabilities_,
|
||||
String bucket_)
|
||||
: IObjectStorage(std::move(cache_))
|
||||
, bucket(bucket_)
|
||||
, client(std::move(client_))
|
||||
, s3_settings(std::move(s3_settings_))
|
||||
, s3_capabilities(s3_capabilities_)
|
||||
, version_id(std::move(version_id_))
|
||||
{}
|
||||
|
||||
@ -129,6 +132,7 @@ private:
|
||||
|
||||
MultiVersion<Aws::S3::S3Client> client;
|
||||
MultiVersion<S3ObjectStorageSettings> s3_settings;
|
||||
const S3Capabilities s3_capabilities;
|
||||
|
||||
const String version_id;
|
||||
};
|
||||
|
@ -89,11 +89,12 @@ void registerDiskS3(DiskFactory & factory)
|
||||
auto metadata_storage = std::make_shared<MetadataStorageFromDisk>(metadata_disk, uri.key);
|
||||
|
||||
FileCachePtr cache = getCachePtrForDisk(name, config, config_prefix, context);
|
||||
S3Capabilities s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
|
||||
|
||||
ObjectStoragePtr s3_storage = std::make_unique<S3ObjectStorage>(
|
||||
std::move(cache), getClient(config, config_prefix, context),
|
||||
getSettings(config, config_prefix, context),
|
||||
uri.version_id, uri.bucket);
|
||||
uri.version_id, s3_capabilities, uri.bucket);
|
||||
|
||||
bool send_metadata = config.getBool(config_prefix + ".send_metadata", false);
|
||||
uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16);
|
||||
|
Loading…
Reference in New Issue
Block a user