From 2736b4ef6436b86d82fb8439c82b1b0fcbab1213 Mon Sep 17 00:00:00 2001 From: Dani Pozo Date: Fri, 22 Mar 2024 17:58:07 +0100 Subject: [PATCH 1/2] Use managed identity for backups IO in Azure Blob Storage Also adds option to prevent ClickHouse from trying to create a non-existing container, which requires a role assignment at the storage account level. --- docs/en/operations/backup.md | 1 + src/Backups/BackupFactory.h | 1 + src/Backups/BackupIO_AzureBlobStorage.cpp | 5 +- src/Backups/BackupIO_AzureBlobStorage.h | 2 +- src/Backups/BackupSettings.cpp | 1 + src/Backups/BackupSettings.h | 3 + src/Backups/BackupsWorker.cpp | 1 + .../registerBackupEngineAzureBlobStorage.cpp | 5 +- src/Storages/StorageAzureBlob.cpp | 56 ++++++++++++------- src/Storages/StorageAzureBlob.h | 2 +- 10 files changed, 50 insertions(+), 27 deletions(-) diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 8639af468c2..2ba50b39934 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -87,6 +87,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des - `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables - `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family. - `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD` + - `azure_attempt_to_create_container`: when using Azure Blob Storage, whether the specified container will try to be created if it doesn't exist. Default: true. ### Usage examples diff --git a/src/Backups/BackupFactory.h b/src/Backups/BackupFactory.h index f0c64486da4..4e752508577 100644 --- a/src/Backups/BackupFactory.h +++ b/src/Backups/BackupFactory.h @@ -40,6 +40,7 @@ public: bool deduplicate_files = true; bool allow_s3_native_copy = true; bool use_same_s3_credentials_for_base_backup = false; + bool azure_attempt_to_create_container = true; ReadSettings read_settings; WriteSettings write_settings; }; diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index b3b92323109..7f3ee6b42a7 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -140,12 +140,13 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, - const ContextPtr & context_) + const ContextPtr & context_, + bool attempt_to_create_container) : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage")) , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false} , configuration(configuration_) { - auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); + auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false, attempt_to_create_container); object_storage = std::make_unique("BackupWriterAzureBlobStorage", std::move(client_ptr), StorageAzureBlob::createSettings(context_), diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h index 95325044a62..f0b9aace4d4 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.h +++ b/src/Backups/BackupIO_AzureBlobStorage.h @@ -37,7 +37,7 @@ private: class BackupWriterAzureBlobStorage : public BackupWriterDefault { public: - BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); + BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container); ~BackupWriterAzureBlobStorage() override; bool fileExists(const String & file_name) override; diff --git a/src/Backups/BackupSettings.cpp b/src/Backups/BackupSettings.cpp index 68d825e9468..468e5651274 100644 --- a/src/Backups/BackupSettings.cpp +++ b/src/Backups/BackupSettings.cpp @@ -28,6 +28,7 @@ namespace ErrorCodes M(Bool, deduplicate_files) \ M(Bool, allow_s3_native_copy) \ M(Bool, use_same_s3_credentials_for_base_backup) \ + M(Bool, azure_attempt_to_create_container) \ M(Bool, read_from_filesystem_cache) \ M(UInt64, shard_num) \ M(UInt64, replica_num) \ diff --git a/src/Backups/BackupSettings.h b/src/Backups/BackupSettings.h index 10181ea464a..13709ca11c6 100644 --- a/src/Backups/BackupSettings.h +++ b/src/Backups/BackupSettings.h @@ -47,6 +47,9 @@ struct BackupSettings /// Whether base backup to S3 should inherit credentials from the BACKUP query. bool use_same_s3_credentials_for_base_backup = false; + /// Whether a new Azure container should be created if it does not exist (requires permissions at storage account level) + bool azure_attempt_to_create_container = true; + /// Allow to use the filesystem cache in passive mode - benefit from the existing cache entries, /// but don't put more entries into the cache. bool read_from_filesystem_cache = true; diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index d0853300edb..96fe770227c 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -597,6 +597,7 @@ void BackupsWorker::doBackup( backup_create_params.deduplicate_files = backup_settings.deduplicate_files; backup_create_params.allow_s3_native_copy = backup_settings.allow_s3_native_copy; backup_create_params.use_same_s3_credentials_for_base_backup = backup_settings.use_same_s3_credentials_for_base_backup; + backup_create_params.azure_attempt_to_create_container = backup_settings.azure_attempt_to_create_container; backup_create_params.read_settings = getReadSettingsForBackup(context, backup_settings); backup_create_params.write_settings = getWriteSettingsForBackup(context); backup = BackupFactory::instance().createBackup(backup_create_params); diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index 48f66569304..dbe5b555c31 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -86,7 +86,7 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) if (args.size() == 3) { configuration.connection_url = args[0].safeGet(); - configuration.is_connection_string = true; + configuration.is_connection_string = !configuration.connection_url.starts_with("http"); configuration.container = args[1].safeGet(); configuration.blob_path = args[2].safeGet(); @@ -147,7 +147,8 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) auto writer = std::make_shared(configuration, params.read_settings, params.write_settings, - params.context); + params.context, + params.azure_attempt_to_create_container); return std::make_unique( params.backup_info, diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 6367cb0b077..289bd4643c4 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -336,33 +337,37 @@ static bool containerExists(std::unique_ptr &blob_service_cli return false; } -AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only) +AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container) { AzureClientPtr result; if (configuration.is_connection_string) { + std::shared_ptr managed_identity_credential = std::make_shared(); std::unique_ptr blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(configuration.connection_url)); result = std::make_unique(BlobContainerClient::CreateFromConnectionString(configuration.connection_url, configuration.container)); - bool container_exists = containerExists(blob_service_client,configuration.container); - if (!container_exists) + if (attempt_to_create_container) { - if (is_read_only) - throw Exception( - ErrorCodes::DATABASE_ACCESS_DENIED, - "AzureBlobStorage container does not exist '{}'", - configuration.container); + bool container_exists = containerExists(blob_service_client,configuration.container); + if (!container_exists) + { + if (is_read_only) + throw Exception( + ErrorCodes::DATABASE_ACCESS_DENIED, + "AzureBlobStorage container does not exist '{}'", + configuration.container); - try - { - result->CreateIfNotExists(); - } catch (const Azure::Storage::StorageException & e) - { - if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict - && e.ReasonPhrase == "The specified container already exists.")) + try { - throw; + result->CreateIfNotExists(); + } catch (const Azure::Storage::StorageException & e) + { + if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict + && e.ReasonPhrase == "The specified container already exists.")) + { + throw; + } } } } @@ -377,17 +382,17 @@ AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration co } std::unique_ptr blob_service_client; + std::shared_ptr managed_identity_credential; if (storage_shared_key_credential) { blob_service_client = std::make_unique(configuration.connection_url, storage_shared_key_credential); } else { - blob_service_client = std::make_unique(configuration.connection_url); + managed_identity_credential = std::make_shared(); + blob_service_client = std::make_unique(configuration.connection_url, managed_identity_credential); } - bool container_exists = containerExists(blob_service_client,configuration.container); - std::string final_url; size_t pos = configuration.connection_url.find('?'); if (pos != std::string::npos) @@ -400,12 +405,21 @@ AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration co final_url = configuration.connection_url + (configuration.connection_url.back() == '/' ? "" : "/") + configuration.container; + if (!attempt_to_create_container) + { + if (storage_shared_key_credential) + return std::make_unique(final_url, storage_shared_key_credential); + else + return std::make_unique(final_url, managed_identity_credential); + } + + bool container_exists = containerExists(blob_service_client,configuration.container); if (container_exists) { if (storage_shared_key_credential) result = std::make_unique(final_url, storage_shared_key_credential); else - result = std::make_unique(final_url); + result = std::make_unique(final_url, managed_identity_credential); } else { @@ -425,7 +439,7 @@ AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration co if (storage_shared_key_credential) result = std::make_unique(final_url, storage_shared_key_credential); else - result = std::make_unique(final_url); + result = std::make_unique(final_url, managed_identity_credential); } else { diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 63fd489dcaf..27ac7a5c368 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -69,7 +69,7 @@ public: ASTPtr partition_by_); static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context); - static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only); + static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only, bool attempt_to_create_container = true); static AzureObjectStorage::SettingsPtr createSettings(const ContextPtr & local_context); From aeefba57b513a52527010fd14b94a4f911ab69a5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 23 Mar 2024 22:49:23 +0300 Subject: [PATCH 2/2] Update StorageAzureBlob.cpp --- src/Storages/StorageAzureBlob.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 289bd4643c4..306a5eac8e5 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -361,7 +361,8 @@ AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration co try { result->CreateIfNotExists(); - } catch (const Azure::Storage::StorageException & e) + } + catch (const Azure::Storage::StorageException & e) { if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict && e.ReasonPhrase == "The specified container already exists."))