Use managed identity for backups IO in Azure Blob Storage

Also adds option to prevent ClickHouse from trying to create a non-existing container, which requires
a role assignment at the storage account level.
This commit is contained in:
Dani Pozo 2024-03-22 17:58:07 +01:00
parent 9082a018a5
commit 2736b4ef64
10 changed files with 50 additions and 27 deletions

View File

@ -87,6 +87,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des
- `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables - `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables
- `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family. - `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family.
- `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD` - `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD`
- `azure_attempt_to_create_container`: when using Azure Blob Storage, whether the specified container will try to be created if it doesn't exist. Default: true.
### Usage examples ### Usage examples

View File

@ -40,6 +40,7 @@ public:
bool deduplicate_files = true; bool deduplicate_files = true;
bool allow_s3_native_copy = true; bool allow_s3_native_copy = true;
bool use_same_s3_credentials_for_base_backup = false; bool use_same_s3_credentials_for_base_backup = false;
bool azure_attempt_to_create_container = true;
ReadSettings read_settings; ReadSettings read_settings;
WriteSettings write_settings; WriteSettings write_settings;
}; };

View File

@ -140,12 +140,13 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
StorageAzureBlob::Configuration configuration_, StorageAzureBlob::Configuration configuration_,
const ReadSettings & read_settings_, const ReadSettings & read_settings_,
const WriteSettings & write_settings_, const WriteSettings & write_settings_,
const ContextPtr & context_) const ContextPtr & context_,
bool attempt_to_create_container)
: BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage")) : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage"))
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false} , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
, configuration(configuration_) , configuration(configuration_)
{ {
auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false, attempt_to_create_container);
object_storage = std::make_unique<AzureObjectStorage>("BackupWriterAzureBlobStorage", object_storage = std::make_unique<AzureObjectStorage>("BackupWriterAzureBlobStorage",
std::move(client_ptr), std::move(client_ptr),
StorageAzureBlob::createSettings(context_), StorageAzureBlob::createSettings(context_),

View File

@ -37,7 +37,7 @@ private:
class BackupWriterAzureBlobStorage : public BackupWriterDefault class BackupWriterAzureBlobStorage : public BackupWriterDefault
{ {
public: public:
BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container);
~BackupWriterAzureBlobStorage() override; ~BackupWriterAzureBlobStorage() override;
bool fileExists(const String & file_name) override; bool fileExists(const String & file_name) override;

View File

@ -28,6 +28,7 @@ namespace ErrorCodes
M(Bool, deduplicate_files) \ M(Bool, deduplicate_files) \
M(Bool, allow_s3_native_copy) \ M(Bool, allow_s3_native_copy) \
M(Bool, use_same_s3_credentials_for_base_backup) \ M(Bool, use_same_s3_credentials_for_base_backup) \
M(Bool, azure_attempt_to_create_container) \
M(Bool, read_from_filesystem_cache) \ M(Bool, read_from_filesystem_cache) \
M(UInt64, shard_num) \ M(UInt64, shard_num) \
M(UInt64, replica_num) \ M(UInt64, replica_num) \

View File

@ -47,6 +47,9 @@ struct BackupSettings
/// Whether base backup to S3 should inherit credentials from the BACKUP query. /// Whether base backup to S3 should inherit credentials from the BACKUP query.
bool use_same_s3_credentials_for_base_backup = false; bool use_same_s3_credentials_for_base_backup = false;
/// Whether a new Azure container should be created if it does not exist (requires permissions at storage account level)
bool azure_attempt_to_create_container = true;
/// Allow to use the filesystem cache in passive mode - benefit from the existing cache entries, /// Allow to use the filesystem cache in passive mode - benefit from the existing cache entries,
/// but don't put more entries into the cache. /// but don't put more entries into the cache.
bool read_from_filesystem_cache = true; bool read_from_filesystem_cache = true;

View File

@ -597,6 +597,7 @@ void BackupsWorker::doBackup(
backup_create_params.deduplicate_files = backup_settings.deduplicate_files; backup_create_params.deduplicate_files = backup_settings.deduplicate_files;
backup_create_params.allow_s3_native_copy = backup_settings.allow_s3_native_copy; backup_create_params.allow_s3_native_copy = backup_settings.allow_s3_native_copy;
backup_create_params.use_same_s3_credentials_for_base_backup = backup_settings.use_same_s3_credentials_for_base_backup; backup_create_params.use_same_s3_credentials_for_base_backup = backup_settings.use_same_s3_credentials_for_base_backup;
backup_create_params.azure_attempt_to_create_container = backup_settings.azure_attempt_to_create_container;
backup_create_params.read_settings = getReadSettingsForBackup(context, backup_settings); backup_create_params.read_settings = getReadSettingsForBackup(context, backup_settings);
backup_create_params.write_settings = getWriteSettingsForBackup(context); backup_create_params.write_settings = getWriteSettingsForBackup(context);
backup = BackupFactory::instance().createBackup(backup_create_params); backup = BackupFactory::instance().createBackup(backup_create_params);

View File

@ -86,7 +86,7 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
if (args.size() == 3) if (args.size() == 3)
{ {
configuration.connection_url = args[0].safeGet<String>(); configuration.connection_url = args[0].safeGet<String>();
configuration.is_connection_string = true; configuration.is_connection_string = !configuration.connection_url.starts_with("http");
configuration.container = args[1].safeGet<String>(); configuration.container = args[1].safeGet<String>();
configuration.blob_path = args[2].safeGet<String>(); configuration.blob_path = args[2].safeGet<String>();
@ -147,7 +147,8 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
auto writer = std::make_shared<BackupWriterAzureBlobStorage>(configuration, auto writer = std::make_shared<BackupWriterAzureBlobStorage>(configuration,
params.read_settings, params.read_settings,
params.write_settings, params.write_settings,
params.context); params.context,
params.azure_attempt_to_create_container);
return std::make_unique<BackupImpl>( return std::make_unique<BackupImpl>(
params.backup_info, params.backup_info,

View File

@ -17,6 +17,7 @@
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypesNumber.h>
#include <azure/storage/common/storage_credential.hpp> #include <azure/storage/common/storage_credential.hpp>
#include <azure/identity/managed_identity_credential.hpp>
#include <Processors/Transforms/AddingDefaultsTransform.h> #include <Processors/Transforms/AddingDefaultsTransform.h>
#include <Processors/Transforms/ExtractColumnsTransform.h> #include <Processors/Transforms/ExtractColumnsTransform.h>
#include <Processors/Formats/IOutputFormat.h> #include <Processors/Formats/IOutputFormat.h>
@ -336,33 +337,37 @@ static bool containerExists(std::unique_ptr<BlobServiceClient> &blob_service_cli
return false; return false;
} }
AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only) AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container)
{ {
AzureClientPtr result; AzureClientPtr result;
if (configuration.is_connection_string) if (configuration.is_connection_string)
{ {
std::shared_ptr<Azure::Identity::ManagedIdentityCredential> managed_identity_credential = std::make_shared<Azure::Identity::ManagedIdentityCredential>();
std::unique_ptr<BlobServiceClient> blob_service_client = std::make_unique<BlobServiceClient>(BlobServiceClient::CreateFromConnectionString(configuration.connection_url)); std::unique_ptr<BlobServiceClient> blob_service_client = std::make_unique<BlobServiceClient>(BlobServiceClient::CreateFromConnectionString(configuration.connection_url));
result = std::make_unique<BlobContainerClient>(BlobContainerClient::CreateFromConnectionString(configuration.connection_url, configuration.container)); result = std::make_unique<BlobContainerClient>(BlobContainerClient::CreateFromConnectionString(configuration.connection_url, configuration.container));
bool container_exists = containerExists(blob_service_client,configuration.container);
if (!container_exists) if (attempt_to_create_container)
{ {
if (is_read_only) bool container_exists = containerExists(blob_service_client,configuration.container);
throw Exception( if (!container_exists)
ErrorCodes::DATABASE_ACCESS_DENIED, {
"AzureBlobStorage container does not exist '{}'", if (is_read_only)
configuration.container); throw Exception(
ErrorCodes::DATABASE_ACCESS_DENIED,
"AzureBlobStorage container does not exist '{}'",
configuration.container);
try try
{
result->CreateIfNotExists();
} catch (const Azure::Storage::StorageException & e)
{
if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict
&& e.ReasonPhrase == "The specified container already exists."))
{ {
throw; result->CreateIfNotExists();
} catch (const Azure::Storage::StorageException & e)
{
if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict
&& e.ReasonPhrase == "The specified container already exists."))
{
throw;
}
} }
} }
} }
@ -377,17 +382,17 @@ AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration co
} }
std::unique_ptr<BlobServiceClient> blob_service_client; std::unique_ptr<BlobServiceClient> blob_service_client;
std::shared_ptr<Azure::Identity::ManagedIdentityCredential> managed_identity_credential;
if (storage_shared_key_credential) if (storage_shared_key_credential)
{ {
blob_service_client = std::make_unique<BlobServiceClient>(configuration.connection_url, storage_shared_key_credential); blob_service_client = std::make_unique<BlobServiceClient>(configuration.connection_url, storage_shared_key_credential);
} }
else else
{ {
blob_service_client = std::make_unique<BlobServiceClient>(configuration.connection_url); managed_identity_credential = std::make_shared<Azure::Identity::ManagedIdentityCredential>();
blob_service_client = std::make_unique<BlobServiceClient>(configuration.connection_url, managed_identity_credential);
} }
bool container_exists = containerExists(blob_service_client,configuration.container);
std::string final_url; std::string final_url;
size_t pos = configuration.connection_url.find('?'); size_t pos = configuration.connection_url.find('?');
if (pos != std::string::npos) if (pos != std::string::npos)
@ -400,12 +405,21 @@ AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration co
final_url final_url
= configuration.connection_url + (configuration.connection_url.back() == '/' ? "" : "/") + configuration.container; = configuration.connection_url + (configuration.connection_url.back() == '/' ? "" : "/") + configuration.container;
if (!attempt_to_create_container)
{
if (storage_shared_key_credential)
return std::make_unique<BlobContainerClient>(final_url, storage_shared_key_credential);
else
return std::make_unique<BlobContainerClient>(final_url, managed_identity_credential);
}
bool container_exists = containerExists(blob_service_client,configuration.container);
if (container_exists) if (container_exists)
{ {
if (storage_shared_key_credential) if (storage_shared_key_credential)
result = std::make_unique<BlobContainerClient>(final_url, storage_shared_key_credential); result = std::make_unique<BlobContainerClient>(final_url, storage_shared_key_credential);
else else
result = std::make_unique<BlobContainerClient>(final_url); result = std::make_unique<BlobContainerClient>(final_url, managed_identity_credential);
} }
else else
{ {
@ -425,7 +439,7 @@ AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration co
if (storage_shared_key_credential) if (storage_shared_key_credential)
result = std::make_unique<BlobContainerClient>(final_url, storage_shared_key_credential); result = std::make_unique<BlobContainerClient>(final_url, storage_shared_key_credential);
else else
result = std::make_unique<BlobContainerClient>(final_url); result = std::make_unique<BlobContainerClient>(final_url, managed_identity_credential);
} }
else else
{ {

View File

@ -69,7 +69,7 @@ public:
ASTPtr partition_by_); ASTPtr partition_by_);
static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context); static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context);
static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only); static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container = true);
static AzureObjectStorage::SettingsPtr createSettings(const ContextPtr & local_context); static AzureObjectStorage::SettingsPtr createSettings(const ContextPtr & local_context);