Updated docs and addressed comments

This commit is contained in:
Smita Kulkarni 2024-02-28 11:25:35 +01:00
parent ca05557659
commit a46d7c9191
5 changed files with 90 additions and 39 deletions

View File

@ -1236,9 +1236,9 @@ Configuration markup:
```
Connection parameters:
* `endpoint` — AzureBlobStorage endpoint URL with container & prefix. Optionally can contain account_name if the authentication method used needs it. (http://azurite1:{port}/[account_name]{container_name}/{data_prefix}) or these parameters can be provided separately using storage_account_url, account_name & container. For specifying prefix, endpoint should be used.
* `endpoint_contains_account_name` - This flag is used to specify if endpoint contains account_name as it is only needed for certain authentication methods.
* `storage_account_url` - **Required**, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`.
* `endpoint` — AzureBlobStorage endpoint URL with container & prefix. Optionally can contain account_name if the authentication method used needs it. (`http://account.blob.core.windows.net:{port}/[account_name]{container_name}/{data_prefix}`) or these parameters can be provided separately using storage_account_url, account_name & container. For specifying prefix, endpoint should be used.
* `endpoint_contains_account_name` - This flag is used to specify if endpoint contains account_name as it is only needed for certain authentication methods. (Default : false)
* `storage_account_url` - Required if endpoint is not specified, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`.
* `container_name` - Target container name, defaults to `default-container`.
* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet.

View File

@ -60,8 +60,8 @@ AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::Abstr
String endpoint = config.getString(config_prefix + ".endpoint");
/// For some authentication methods account name is not present in the endpoint
/// 'endpoint_contains_account_name' bool is used to understand how to split the endpoint (default : true)
bool endpoint_contains_account_name = config.getBool(config_prefix + ".endpoint_contains_account_name", true);
/// 'endpoint_contains_account_name' bool is used to understand how to split the endpoint (default : false)
bool endpoint_contains_account_name = config.getBool(config_prefix + ".endpoint_contains_account_name", false);
size_t pos = endpoint.find("//");
if (pos == std::string::npos)
@ -76,48 +76,41 @@ AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::Abstr
storage_url = endpoint.substr(0,acc_pos_begin);
size_t acc_pos_end = endpoint.find('/',acc_pos_begin+1);
if (acc_pos_end != std::string::npos)
if (acc_pos_end == std::string::npos)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint");
account_name = endpoint.substr(acc_pos_begin+1,(acc_pos_end-acc_pos_begin)-1);
size_t cont_pos_end = endpoint.find('/', acc_pos_end+1);
if (cont_pos_end != std::string::npos)
{
account_name = endpoint.substr(acc_pos_begin+1,(acc_pos_end-acc_pos_begin)-1);
size_t cont_pos_end = endpoint.find('/', acc_pos_end+1);
if (cont_pos_end != std::string::npos)
{
container_name = endpoint.substr(acc_pos_end+1,(cont_pos_end-acc_pos_end)-1);
prefix = endpoint.substr(cont_pos_end+1);
}
else
{
container_name = endpoint.substr(acc_pos_end+1);
}
container_name = endpoint.substr(acc_pos_end+1,(cont_pos_end-acc_pos_end)-1);
prefix = endpoint.substr(cont_pos_end+1);
}
else
{
account_name = endpoint.substr(acc_pos_begin+1);
container_name = endpoint.substr(acc_pos_end+1);
}
}
else
{
size_t cont_pos_begin = endpoint.find('/', pos+2);
if (cont_pos_begin != std::string::npos)
{
storage_url = endpoint.substr(0,cont_pos_begin);
size_t cont_pos_end = endpoint.find('/',cont_pos_begin+1);
if (cont_pos_end != std::string::npos)
{
container_name = endpoint.substr(cont_pos_begin+1,(cont_pos_end-cont_pos_begin)-1);
prefix = endpoint.substr(cont_pos_end+1);
}
else
{
container_name = endpoint.substr(cont_pos_begin+1);
}
if (cont_pos_begin == std::string::npos)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected container_name in endpoint");
storage_url = endpoint.substr(0,cont_pos_begin);
size_t cont_pos_end = endpoint.find('/',cont_pos_begin+1);
if (cont_pos_end != std::string::npos)
{
container_name = endpoint.substr(cont_pos_begin+1,(cont_pos_end-cont_pos_begin)-1);
prefix = endpoint.substr(cont_pos_end+1);
}
else
{
storage_url = endpoint;
container_name = endpoint.substr(cont_pos_begin+1);
}
}
}

View File

@ -93,11 +93,11 @@ AzureObjectStorage::AzureObjectStorage(
const String & name_,
AzureClientPtr && client_,
SettingsPtr && settings_,
const String & container_)
const String & object_namespace_)
: name(name_)
, client(std::move(client_))
, settings(std::move(settings_))
, container(container_)
, object_namespace(object_namespace_)
, log(getLogger("AzureObjectStorage"))
{
}
@ -379,7 +379,7 @@ std::unique_ptr<IObjectStorage> AzureObjectStorage::cloneObjectStorage(const std
name,
getAzureBlobContainerClient(config, config_prefix),
getAzureBlobStorageSettings(config, config_prefix, context),
container
object_namespace
);
}

View File

@ -130,7 +130,7 @@ public:
const std::string & config_prefix,
ContextPtr context) override;
String getObjectsNamespace() const override { return container ; }
String getObjectsNamespace() const override { return object_namespace ; }
std::unique_ptr<IObjectStorage> cloneObjectStorage(
const std::string & new_namespace,
@ -154,7 +154,7 @@ private:
/// client used to access the files in the Blob Storage cloud
MultiVersion<Azure::Storage::Blobs::BlobContainerClient> client;
MultiVersion<AzureObjectStorageSettings> settings;
const String container;
const String object_namespace; /// container + prefix
LoggerPtr log;
};

View File

@ -623,6 +623,7 @@ def test_endpoint(cluster):
SETTINGS disk = disk(
type = azure_blob_storage,
endpoint = 'http://azurite1:{port}/{account_name}/{container_name}/{data_prefix}',
endpoint_contains_account_name = 'true',
account_name = 'devstoreaccount1',
account_key = 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==',
container_already_exists = 1,
@ -652,6 +653,7 @@ def test_endpoint_new_container(cluster):
SETTINGS disk = disk(
type = azure_blob_storage,
endpoint = 'http://azurite1:{port}/{account_name}/{container_name}/{data_prefix}',
endpoint_contains_account_name = 'true',
account_name = 'devstoreaccount1',
account_key = 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==',
skip_access_check = 0);
@ -679,6 +681,7 @@ def test_endpoint_without_prefix(cluster):
SETTINGS disk = disk(
type = azure_blob_storage,
endpoint = 'http://azurite1:{port}/{account_name}/{container_name}',
endpoint_contains_account_name = 'true',
account_name = 'devstoreaccount1',
account_key = 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==',
skip_access_check = 0);
@ -688,3 +691,58 @@ def test_endpoint_without_prefix(cluster):
)
assert 10 == int(node.query("SELECT count() FROM test"))
def test_endpoint_error_check(cluster):
node = cluster.instances[NODE_NAME]
account_name = "devstoreaccount1"
port = cluster.azurite_port
query = f"""
DROP TABLE IF EXISTS test SYNC;
CREATE TABLE test (a Int32)
ENGINE = MergeTree() ORDER BY tuple()
SETTINGS disk = disk(
type = azure_blob_storage,
endpoint = 'http://azurite1:{port}/{account_name}',
endpoint_contains_account_name = 'true',
account_name = 'devstoreaccount1',
account_key = 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==',
skip_access_check = 0);
"""
expected_err_msg = "Expected container_name in endpoint"
assert expected_err_msg in azure_query(node, query, expect_error="true")
query = f"""
DROP TABLE IF EXISTS test SYNC;
CREATE TABLE test (a Int32)
ENGINE = MergeTree() ORDER BY tuple()
SETTINGS disk = disk(
type = azure_blob_storage,
endpoint = 'http://azurite1:{port}',
endpoint_contains_account_name = 'true',
account_name = 'devstoreaccount1',
account_key = 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==',
skip_access_check = 0);
"""
expected_err_msg = "Expected account_name in endpoint"
assert expected_err_msg in azure_query(node, query, expect_error="true")
query = f"""
DROP TABLE IF EXISTS test SYNC;
CREATE TABLE test (a Int32)
ENGINE = MergeTree() ORDER BY tuple()
SETTINGS disk = disk(
type = azure_blob_storage,
endpoint = 'http://azurite1:{port}',
account_name = 'devstoreaccount1',
account_key = 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==',
skip_access_check = 0);
"""
expected_err_msg = "Expected container_name in endpoint"
assert expected_err_msg in azure_query(node, query, expect_error="true")