Merge pull request #61785 from danipozo/azure-use-managed-identity-for-backups

Use managed identity for backups IO in Azure Blob Storage
This commit is contained in:
Alexey Milovidov 2024-03-24 20:31:12 +03:00 committed by GitHub
commit 717603802d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 51 additions and 27 deletions

View File

@ -87,6 +87,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des
- `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables
- `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family.
- `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD`
- `azure_attempt_to_create_container`: when using Azure Blob Storage, whether the specified container will try to be created if it doesn't exist. Default: true.
### Usage examples

View File

@ -40,6 +40,7 @@ public:
bool deduplicate_files = true;
bool allow_s3_native_copy = true;
bool use_same_s3_credentials_for_base_backup = false;
bool azure_attempt_to_create_container = true;
ReadSettings read_settings;
WriteSettings write_settings;
};

View File

@ -140,12 +140,13 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
StorageAzureBlob::Configuration configuration_,
const ReadSettings & read_settings_,
const WriteSettings & write_settings_,
const ContextPtr & context_)
const ContextPtr & context_,
bool attempt_to_create_container)
: BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage"))
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
, configuration(configuration_)
{
auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false, attempt_to_create_container);
object_storage = std::make_unique<AzureObjectStorage>("BackupWriterAzureBlobStorage",
std::move(client_ptr),
StorageAzureBlob::createSettings(context_),

View File

@ -37,7 +37,7 @@ private:
class BackupWriterAzureBlobStorage : public BackupWriterDefault
{
public:
BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container);
~BackupWriterAzureBlobStorage() override;
bool fileExists(const String & file_name) override;

View File

@ -28,6 +28,7 @@ namespace ErrorCodes
M(Bool, deduplicate_files) \
M(Bool, allow_s3_native_copy) \
M(Bool, use_same_s3_credentials_for_base_backup) \
M(Bool, azure_attempt_to_create_container) \
M(Bool, read_from_filesystem_cache) \
M(UInt64, shard_num) \
M(UInt64, replica_num) \

View File

@ -47,6 +47,9 @@ struct BackupSettings
/// Whether base backup to S3 should inherit credentials from the BACKUP query.
bool use_same_s3_credentials_for_base_backup = false;
/// Whether a new Azure container should be created if it does not exist (requires permissions at storage account level)
bool azure_attempt_to_create_container = true;
/// Allow to use the filesystem cache in passive mode - benefit from the existing cache entries,
/// but don't put more entries into the cache.
bool read_from_filesystem_cache = true;

View File

@ -597,6 +597,7 @@ void BackupsWorker::doBackup(
backup_create_params.deduplicate_files = backup_settings.deduplicate_files;
backup_create_params.allow_s3_native_copy = backup_settings.allow_s3_native_copy;
backup_create_params.use_same_s3_credentials_for_base_backup = backup_settings.use_same_s3_credentials_for_base_backup;
backup_create_params.azure_attempt_to_create_container = backup_settings.azure_attempt_to_create_container;
backup_create_params.read_settings = getReadSettingsForBackup(context, backup_settings);
backup_create_params.write_settings = getWriteSettingsForBackup(context);
backup = BackupFactory::instance().createBackup(backup_create_params);

View File

@ -86,7 +86,7 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
if (args.size() == 3)
{
configuration.connection_url = args[0].safeGet<String>();
configuration.is_connection_string = true;
configuration.is_connection_string = !configuration.connection_url.starts_with("http");
configuration.container = args[1].safeGet<String>();
configuration.blob_path = args[2].safeGet<String>();
@ -147,7 +147,8 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
auto writer = std::make_shared<BackupWriterAzureBlobStorage>(configuration,
params.read_settings,
params.write_settings,
params.context);
params.context,
params.azure_attempt_to_create_container);
return std::make_unique<BackupImpl>(
params.backup_info,

View File

@ -17,6 +17,7 @@
#include <DataTypes/DataTypesNumber.h>
#include <azure/storage/common/storage_credential.hpp>
#include <azure/identity/managed_identity_credential.hpp>
#include <Processors/Transforms/AddingDefaultsTransform.h>
#include <Processors/Transforms/ExtractColumnsTransform.h>
#include <Processors/Formats/IOutputFormat.h>
@ -336,16 +337,19 @@ static bool containerExists(std::unique_ptr<BlobServiceClient> &blob_service_cli
return false;
}
AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only)
AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container)
{
AzureClientPtr result;
if (configuration.is_connection_string)
{
std::shared_ptr<Azure::Identity::ManagedIdentityCredential> managed_identity_credential = std::make_shared<Azure::Identity::ManagedIdentityCredential>();
std::unique_ptr<BlobServiceClient> blob_service_client = std::make_unique<BlobServiceClient>(BlobServiceClient::CreateFromConnectionString(configuration.connection_url));
result = std::make_unique<BlobContainerClient>(BlobContainerClient::CreateFromConnectionString(configuration.connection_url, configuration.container));
bool container_exists = containerExists(blob_service_client,configuration.container);
if (attempt_to_create_container)
{
bool container_exists = containerExists(blob_service_client,configuration.container);
if (!container_exists)
{
if (is_read_only)
@ -357,7 +361,8 @@ AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration co
try
{
result->CreateIfNotExists();
} catch (const Azure::Storage::StorageException & e)
}
catch (const Azure::Storage::StorageException & e)
{
if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict
&& e.ReasonPhrase == "The specified container already exists."))
@ -367,6 +372,7 @@ AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration co
}
}
}
}
else
{
std::shared_ptr<Azure::Storage::StorageSharedKeyCredential> storage_shared_key_credential;
@ -377,17 +383,17 @@ AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration co
}
std::unique_ptr<BlobServiceClient> blob_service_client;
std::shared_ptr<Azure::Identity::ManagedIdentityCredential> managed_identity_credential;
if (storage_shared_key_credential)
{
blob_service_client = std::make_unique<BlobServiceClient>(configuration.connection_url, storage_shared_key_credential);
}
else
{
blob_service_client = std::make_unique<BlobServiceClient>(configuration.connection_url);
managed_identity_credential = std::make_shared<Azure::Identity::ManagedIdentityCredential>();
blob_service_client = std::make_unique<BlobServiceClient>(configuration.connection_url, managed_identity_credential);
}
bool container_exists = containerExists(blob_service_client,configuration.container);
std::string final_url;
size_t pos = configuration.connection_url.find('?');
if (pos != std::string::npos)
@ -400,12 +406,21 @@ AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration co
final_url
= configuration.connection_url + (configuration.connection_url.back() == '/' ? "" : "/") + configuration.container;
if (!attempt_to_create_container)
{
if (storage_shared_key_credential)
return std::make_unique<BlobContainerClient>(final_url, storage_shared_key_credential);
else
return std::make_unique<BlobContainerClient>(final_url, managed_identity_credential);
}
bool container_exists = containerExists(blob_service_client,configuration.container);
if (container_exists)
{
if (storage_shared_key_credential)
result = std::make_unique<BlobContainerClient>(final_url, storage_shared_key_credential);
else
result = std::make_unique<BlobContainerClient>(final_url);
result = std::make_unique<BlobContainerClient>(final_url, managed_identity_credential);
}
else
{
@ -425,7 +440,7 @@ AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration co
if (storage_shared_key_credential)
result = std::make_unique<BlobContainerClient>(final_url, storage_shared_key_credential);
else
result = std::make_unique<BlobContainerClient>(final_url);
result = std::make_unique<BlobContainerClient>(final_url, managed_identity_credential);
}
else
{

View File

@ -69,7 +69,7 @@ public:
ASTPtr partition_by_);
static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context);
static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only);
static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container = true);
static AzureObjectStorage::SettingsPtr createSettings(const ContextPtr & local_context);