mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-15 03:53:41 +00:00
8830f0608d
Right now backup to S3 does not make a lot of sense, since: - it has random names, and to decoding them - requires metadata from local disk (/var/lib/disks/DISK/BACKUP_NAME) - or send_metadata (but it is also tricky even with it) So this patch adds simpler interface for S3, it is only suitable for BACKUP/RESTORE, so don't try to use it for MergeTree engine. It is done by adding separate disk - `s3_plain` for this, that: - does not support any extended features, like renames/hardlinks/attrs/... (so basically everything that MergeTree requires) - only write/read/unlink/list files Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
72 lines
1.9 KiB
Python
72 lines
1.9 KiB
Python
#!/usr/bin/env python3
|
|
# pylint: disable=unused-argument
|
|
|
|
import pytest
|
|
from helpers.cluster import ClickHouseCluster
|
|
|
|
cluster = ClickHouseCluster(__file__)
|
|
node = cluster.add_instance(
|
|
"node",
|
|
main_configs=["configs/storage_conf.xml"],
|
|
with_minio=True,
|
|
)
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def start_cluster():
|
|
try:
|
|
cluster.start()
|
|
yield cluster
|
|
finally:
|
|
cluster.shutdown()
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"storage_policy,to_disk",
|
|
[
|
|
pytest.param(
|
|
"default",
|
|
"default",
|
|
id="from_local_to_local",
|
|
),
|
|
pytest.param(
|
|
"s3",
|
|
"default",
|
|
id="from_s3_to_local",
|
|
),
|
|
pytest.param(
|
|
"default",
|
|
"s3",
|
|
id="from_local_to_s3",
|
|
),
|
|
pytest.param(
|
|
"s3",
|
|
"s3_plain",
|
|
id="from_s3_to_s3_plain",
|
|
),
|
|
pytest.param(
|
|
"default",
|
|
"s3_plain",
|
|
id="from_local_to_s3_plain",
|
|
),
|
|
],
|
|
)
|
|
def test_backup_restore(start_cluster, storage_policy, to_disk):
|
|
backup_name = storage_policy + "_" + to_disk
|
|
node.query(
|
|
f"""
|
|
DROP TABLE IF EXISTS data NO DELAY;
|
|
CREATE TABLE data (key Int, value String, array Array(String)) Engine=MergeTree() ORDER BY tuple() SETTINGS storage_policy='{storage_policy}';
|
|
INSERT INTO data SELECT * FROM generateRandom('key Int, value String, array Array(String)') LIMIT 1000;
|
|
BACKUP TABLE data TO Disk('{to_disk}', '{backup_name}');
|
|
RESTORE TABLE data AS data_restored FROM Disk('{to_disk}', '{backup_name}');
|
|
SELECT throwIf(
|
|
(SELECT groupArray(tuple(*)) FROM data) !=
|
|
(SELECT groupArray(tuple(*)) FROM data_restored),
|
|
'Data does not matched after BACKUP/RESTORE'
|
|
);
|
|
DROP TABLE data NO DELAY;
|
|
DROP TABLE data_restored NO DELAY;
|
|
"""
|
|
)
|