Merge pull request #62447 from ClickHouse/Fix_azure_restore

Fix backup restore path for AzureBlobStorage
This commit is contained in:
SmitaRKulkarni 2024-04-12 08:54:09 +00:00 committed by GitHub
commit 09cbd7e8be
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 74 additions and 86 deletions

View File

@ -50,44 +50,20 @@ BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default;
bool BackupReaderAzureBlobStorage::fileExists(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
String key = fs::path(configuration.blob_path) / file_name;
return object_storage->exists(StoredObject(key));
}
UInt64 BackupReaderAzureBlobStorage::getFileSize(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
String key = fs::path(configuration.blob_path) / file_name;
ObjectMetadata object_metadata = object_storage->getObjectMetadata(key);
return object_metadata.size_bytes;
}
std::unique_ptr<SeekableReadBuffer> BackupReaderAzureBlobStorage::readFile(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
String key = fs::path(configuration.blob_path) / file_name;
return std::make_unique<ReadBufferFromAzureBlobStorage>(
client, key, read_settings, settings->max_single_read_retries,
settings->max_single_download_retries);
@ -194,7 +170,7 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St
client,
client,
configuration.container,
fs::path(source),
fs::path(configuration.blob_path)/ source,
0,
size,
/* dest_container */ configuration.container,
@ -207,7 +183,7 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St
void BackupWriterAzureBlobStorage::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
{
copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, path_in_backup, settings,
copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, fs::path(configuration.blob_path) / path_in_backup, settings,
threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
}
@ -215,29 +191,13 @@ BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default;
bool BackupWriterAzureBlobStorage::fileExists(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
String key = fs::path(configuration.blob_path) / file_name;
return object_storage->exists(StoredObject(key));
}
UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
String key = fs::path(configuration.blob_path) / file_name;
RelativePathsWithMetadata children;
object_storage->listObjects(key,children,/*max_keys*/0);
if (children.empty())
@ -247,16 +207,7 @@ UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name)
std::unique_ptr<ReadBuffer> BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
String key = fs::path(configuration.blob_path) / file_name;
return std::make_unique<ReadBufferFromAzureBlobStorage>(
client, key, read_settings, settings->max_single_read_retries,
settings->max_single_download_retries);
@ -264,15 +215,7 @@ std::unique_ptr<ReadBuffer> BackupWriterAzureBlobStorage::readFile(const String
std::unique_ptr<WriteBuffer> BackupWriterAzureBlobStorage::writeFile(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
String key = fs::path(configuration.blob_path) / file_name;
return std::make_unique<WriteBufferFromAzureBlobStorage>(
client,
key,
@ -283,15 +226,7 @@ std::unique_ptr<WriteBuffer> BackupWriterAzureBlobStorage::writeFile(const Strin
void BackupWriterAzureBlobStorage::removeFile(const String & file_name)
{
String key;
if (startsWith(file_name, "."))
{
key= configuration.blob_path + file_name;
}
else
{
key = file_name;
}
String key = fs::path(configuration.blob_path) / file_name;
StoredObject object(key);
object_storage->removeObjectIfExists(object);
}
@ -300,7 +235,7 @@ void BackupWriterAzureBlobStorage::removeFiles(const Strings & file_names)
{
StoredObjects objects;
for (const auto & file_name : file_names)
objects.emplace_back(file_name);
objects.emplace_back(fs::path(configuration.blob_path) / file_name);
object_storage->removeObjectsIfExist(objects);
@ -310,7 +245,7 @@ void BackupWriterAzureBlobStorage::removeFilesBatch(const Strings & file_names)
{
StoredObjects objects;
for (const auto & file_name : file_names)
objects.emplace_back(file_name);
objects.emplace_back(fs::path(configuration.blob_path) / file_name);
object_storage->removeObjectsIfExist(objects);
}

View File

@ -41,6 +41,38 @@ def generate_cluster_def(port):
<account_key>Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==</account_key>
</azure_conf2>
</named_collections>
<storage_configuration>
<disks>
<blob_storage_disk>
<type>azure_blob_storage</type>
<storage_account_url>http://azurite1:{port}/devstoreaccount1</storage_account_url>
<container_name>cont</container_name>
<skip_access_check>false</skip_access_check>
<account_name>devstoreaccount1</account_name>
<account_key>Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==</account_key>
<max_single_part_upload_size>100000</max_single_part_upload_size>
<min_upload_part_size>100000</min_upload_part_size>
<max_single_download_retries>10</max_single_download_retries>
<max_single_read_retries>10</max_single_read_retries>
</blob_storage_disk>
<hdd>
<type>local</type>
<path>/</path>
</hdd>
</disks>
<policies>
<blob_storage_policy>
<volumes>
<main>
<disk>blob_storage_disk</disk>
</main>
<external>
<disk>hdd</disk>
</external>
</volumes>
</blob_storage_policy>
</policies>
</storage_configuration>
</clickhouse>
"""
)
@ -169,12 +201,12 @@ def test_backup_restore(cluster):
print(get_azure_file_content("test_simple_write_c.csv", port))
assert get_azure_file_content("test_simple_write_c.csv", port) == '1,"a"\n'
backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_write_c_backup.csv')"
backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_write_c_backup')"
azure_query(
node,
f"BACKUP TABLE test_simple_write_connection_string TO {backup_destination}",
)
print(get_azure_file_content("test_simple_write_c_backup.csv.backup", port))
print(get_azure_file_content("test_simple_write_c_backup/.backup", port))
azure_query(
node,
f"RESTORE TABLE test_simple_write_connection_string AS test_simple_write_connection_string_restored FROM {backup_destination};",
@ -195,7 +227,7 @@ def test_backup_restore_diff_container(cluster):
azure_query(
node, f"INSERT INTO test_simple_write_connection_string_cont1 VALUES (1, 'a')"
)
backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont1', 'test_simple_write_c_backup_cont1.csv')"
backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont1', 'test_simple_write_c_backup_cont1')"
azure_query(
node,
f"BACKUP TABLE test_simple_write_connection_string_cont1 TO {backup_destination}",
@ -223,14 +255,12 @@ def test_backup_restore_with_named_collection_azure_conf1(cluster):
print(get_azure_file_content("test_simple_write.csv", port))
assert get_azure_file_content("test_simple_write.csv", port) == '1,"a"\n'
backup_destination = (
f"AzureBlobStorage(azure_conf1, 'test_simple_write_nc_backup.csv')"
)
backup_destination = f"AzureBlobStorage(azure_conf1, 'test_simple_write_nc_backup')"
azure_query(
node,
f"BACKUP TABLE test_write_connection_string TO {backup_destination}",
)
print(get_azure_file_content("test_simple_write_nc_backup.csv.backup", port))
print(get_azure_file_content("test_simple_write_nc_backup/.backup", port))
azure_query(
node,
f"RESTORE TABLE test_write_connection_string AS test_write_connection_string_restored FROM {backup_destination};",
@ -253,13 +283,13 @@ def test_backup_restore_with_named_collection_azure_conf2(cluster):
assert get_azure_file_content("test_simple_write_2.csv", port) == '1,"a"\n'
backup_destination = (
f"AzureBlobStorage(azure_conf2, 'test_simple_write_nc_backup_2.csv')"
f"AzureBlobStorage(azure_conf2, 'test_simple_write_nc_backup_2')"
)
azure_query(
node,
f"BACKUP TABLE test_write_connection_string_2 TO {backup_destination}",
)
print(get_azure_file_content("test_simple_write_nc_backup_2.csv.backup", port))
print(get_azure_file_content("test_simple_write_nc_backup_2/.backup", port))
azure_query(
node,
f"RESTORE TABLE test_write_connection_string_2 AS test_write_connection_string_restored_2 FROM {backup_destination};",
@ -268,3 +298,26 @@ def test_backup_restore_with_named_collection_azure_conf2(cluster):
azure_query(node, f"SELECT * from test_write_connection_string_restored_2")
== "1\ta\n"
)
def test_backup_restore_on_merge_tree(cluster):
node = cluster.instances["node"]
port = cluster.env_variables["AZURITE_PORT"]
azure_query(
node,
f"CREATE TABLE test_simple_merge_tree(key UInt64, data String) Engine = MergeTree() ORDER BY tuple() SETTINGS storage_policy='blob_storage_policy'",
)
azure_query(node, f"INSERT INTO test_simple_merge_tree VALUES (1, 'a')")
backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_merge_tree_backup')"
azure_query(
node,
f"BACKUP TABLE test_simple_merge_tree TO {backup_destination}",
)
azure_query(
node,
f"RESTORE TABLE test_simple_merge_tree AS test_simple_merge_tree_restored FROM {backup_destination};",
)
assert (
azure_query(node, f"SELECT * from test_simple_merge_tree_restored") == "1\ta\n"
)