ClickHouse/tests/integration/test_backup_restore_s3/test.py
Azat Khuzhin 8830f0608d Support BACKUP to S3 with as-is path/data structure
Right now backup to S3 does not make a lot of sense, since:
- it has random names, and to decoding them
- requires metadata from local disk (/var/lib/disks/DISK/BACKUP_NAME)
- or send_metadata (but it is also tricky even with it)

So this patch adds simpler interface for S3, it is only suitable for
BACKUP/RESTORE, so don't try to use it for MergeTree engine.

It is done by adding separate disk - `s3_plain` for this, that:
- does not support any extended features, like renames/hardlinks/attrs/...
  (so basically everything that MergeTree requires)
- only write/read/unlink/list files

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-10-14 12:59:25 +02:00

72 lines
1.9 KiB
Python

#!/usr/bin/env python3
# pylint: disable=unused-argument
import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance(
"node",
main_configs=["configs/storage_conf.xml"],
with_minio=True,
)
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
@pytest.mark.parametrize(
"storage_policy,to_disk",
[
pytest.param(
"default",
"default",
id="from_local_to_local",
),
pytest.param(
"s3",
"default",
id="from_s3_to_local",
),
pytest.param(
"default",
"s3",
id="from_local_to_s3",
),
pytest.param(
"s3",
"s3_plain",
id="from_s3_to_s3_plain",
),
pytest.param(
"default",
"s3_plain",
id="from_local_to_s3_plain",
),
],
)
def test_backup_restore(start_cluster, storage_policy, to_disk):
backup_name = storage_policy + "_" + to_disk
node.query(
f"""
DROP TABLE IF EXISTS data NO DELAY;
CREATE TABLE data (key Int, value String, array Array(String)) Engine=MergeTree() ORDER BY tuple() SETTINGS storage_policy='{storage_policy}';
INSERT INTO data SELECT * FROM generateRandom('key Int, value String, array Array(String)') LIMIT 1000;
BACKUP TABLE data TO Disk('{to_disk}', '{backup_name}');
RESTORE TABLE data AS data_restored FROM Disk('{to_disk}', '{backup_name}');
SELECT throwIf(
(SELECT groupArray(tuple(*)) FROM data) !=
(SELECT groupArray(tuple(*)) FROM data_restored),
'Data does not matched after BACKUP/RESTORE'
);
DROP TABLE data NO DELAY;
DROP TABLE data_restored NO DELAY;
"""
)