2024-04-02 13:28:57 +00:00
|
|
|
from typing import Dict
|
2022-10-11 15:28:20 +00:00
|
|
|
import pytest
|
|
|
|
from helpers.cluster import ClickHouseCluster
|
2023-05-16 19:42:56 +00:00
|
|
|
from helpers.test_tools import TSV
|
2023-06-28 08:52:00 +00:00
|
|
|
import uuid
|
2024-05-21 11:21:53 +00:00
|
|
|
import os
|
2023-05-16 19:42:56 +00:00
|
|
|
|
2022-10-11 15:28:20 +00:00
|
|
|
|
2024-05-21 11:21:53 +00:00
|
|
|
CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs")
|
|
|
|
|
2022-10-11 15:28:20 +00:00
|
|
|
cluster = ClickHouseCluster(__file__)
|
|
|
|
node = cluster.add_instance(
|
|
|
|
"node",
|
2022-10-31 23:01:27 +00:00
|
|
|
main_configs=[
|
|
|
|
"configs/disk_s3.xml",
|
|
|
|
"configs/named_collection_s3_backups.xml",
|
|
|
|
"configs/s3_settings.xml",
|
2023-11-07 10:03:57 +00:00
|
|
|
"configs/blob_log.xml",
|
2024-04-02 13:28:57 +00:00
|
|
|
"configs/remote_servers.xml",
|
2022-10-31 23:01:27 +00:00
|
|
|
],
|
2023-04-04 16:12:08 +00:00
|
|
|
user_configs=[
|
|
|
|
"configs/zookeeper_retries.xml",
|
|
|
|
],
|
2022-10-11 15:28:20 +00:00
|
|
|
with_minio=True,
|
2024-04-02 13:28:57 +00:00
|
|
|
with_zookeeper=True,
|
2024-05-21 11:21:53 +00:00
|
|
|
stay_alive=True,
|
2022-10-11 15:28:20 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2024-05-21 11:21:53 +00:00
|
|
|
def setup_minio_users():
|
2024-05-27 08:07:05 +00:00
|
|
|
# create 2 extra users with restricted access
|
|
|
|
# miniorestricted1 - full access to bucket 'root', no access to other buckets
|
|
|
|
# miniorestricted2 - full access to bucket 'root2', no access to other buckets
|
|
|
|
# storage policy 'policy_s3_restricted' defines a policy for storing files inside bucket 'root' using 'miniorestricted1' user
|
2024-05-21 11:21:53 +00:00
|
|
|
for user, bucket in [("miniorestricted1", "root"), ("miniorestricted2", "root2")]:
|
|
|
|
print(
|
|
|
|
cluster.exec_in_container(
|
|
|
|
cluster.minio_docker_id,
|
|
|
|
[
|
|
|
|
"mc",
|
|
|
|
"alias",
|
|
|
|
"set",
|
|
|
|
"root",
|
|
|
|
"http://minio1:9001",
|
|
|
|
"minio",
|
|
|
|
"minio123",
|
|
|
|
],
|
|
|
|
)
|
|
|
|
)
|
|
|
|
policy = f"""
|
|
|
|
{{
|
|
|
|
"Version": "2012-10-17",
|
|
|
|
"Statement": [
|
|
|
|
{{
|
|
|
|
"Effect": "Allow",
|
|
|
|
"Principal": {{
|
|
|
|
"AWS": [
|
|
|
|
"*"
|
|
|
|
]
|
|
|
|
}},
|
|
|
|
"Action": [
|
|
|
|
"s3:GetBucketLocation",
|
|
|
|
"s3:ListBucket",
|
|
|
|
"s3:ListBucketMultipartUploads"
|
|
|
|
],
|
|
|
|
"Resource": [
|
|
|
|
"arn:aws:s3:::{bucket}"
|
|
|
|
]
|
|
|
|
}},
|
|
|
|
{{
|
|
|
|
"Effect": "Allow",
|
|
|
|
"Principal": {{
|
|
|
|
"AWS": [
|
|
|
|
"*"
|
|
|
|
]
|
|
|
|
}},
|
|
|
|
"Action": [
|
|
|
|
"s3:AbortMultipartUpload",
|
|
|
|
"s3:DeleteObject",
|
|
|
|
"s3:GetObject",
|
|
|
|
"s3:ListMultipartUploadParts",
|
|
|
|
"s3:PutObject"
|
|
|
|
],
|
|
|
|
"Resource": [
|
|
|
|
"arn:aws:s3:::{bucket}/*"
|
|
|
|
]
|
|
|
|
}}
|
|
|
|
]
|
|
|
|
}}"""
|
|
|
|
|
|
|
|
cluster.exec_in_container(
|
|
|
|
cluster.minio_docker_id,
|
|
|
|
["bash", "-c", f"cat >/tmp/{bucket}_policy.json <<EOL{policy}"],
|
|
|
|
)
|
|
|
|
cluster.exec_in_container(
|
|
|
|
cluster.minio_docker_id, ["cat", f"/tmp/{bucket}_policy.json"]
|
|
|
|
)
|
|
|
|
print(
|
|
|
|
cluster.exec_in_container(
|
|
|
|
cluster.minio_docker_id,
|
|
|
|
["mc", "admin", "user", "add", "root", user, "minio123"],
|
|
|
|
)
|
|
|
|
)
|
|
|
|
print(
|
|
|
|
cluster.exec_in_container(
|
|
|
|
cluster.minio_docker_id,
|
|
|
|
[
|
|
|
|
"mc",
|
|
|
|
"admin",
|
|
|
|
"policy",
|
|
|
|
"create",
|
|
|
|
"root",
|
|
|
|
f"{bucket}only",
|
|
|
|
f"/tmp/{bucket}_policy.json",
|
|
|
|
],
|
|
|
|
)
|
|
|
|
)
|
|
|
|
print(
|
|
|
|
cluster.exec_in_container(
|
|
|
|
cluster.minio_docker_id,
|
|
|
|
[
|
|
|
|
"mc",
|
|
|
|
"admin",
|
|
|
|
"policy",
|
|
|
|
"attach",
|
|
|
|
"root",
|
|
|
|
f"{bucket}only",
|
|
|
|
"--user",
|
|
|
|
user,
|
|
|
|
],
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
node.stop_clickhouse()
|
|
|
|
node.copy_file_to_container(
|
|
|
|
os.path.join(CONFIG_DIR, "disk_s3_restricted_user.xml"),
|
|
|
|
"/etc/clickhouse-server/config.d/disk_s3_restricted_user.xml",
|
|
|
|
)
|
|
|
|
node.start_clickhouse()
|
|
|
|
|
2024-05-21 11:36:57 +00:00
|
|
|
|
2022-10-12 14:58:13 +00:00
|
|
|
@pytest.fixture(scope="module", autouse=True)
|
2022-10-11 15:28:20 +00:00
|
|
|
def start_cluster():
|
|
|
|
try:
|
|
|
|
cluster.start()
|
2024-05-21 11:21:53 +00:00
|
|
|
setup_minio_users()
|
2022-10-12 14:58:13 +00:00
|
|
|
yield
|
2022-10-11 15:28:20 +00:00
|
|
|
finally:
|
|
|
|
cluster.shutdown()
|
|
|
|
|
|
|
|
|
2022-10-12 14:58:13 +00:00
|
|
|
backup_id_counter = 0
|
|
|
|
|
|
|
|
|
|
|
|
def new_backup_name():
|
|
|
|
global backup_id_counter
|
|
|
|
backup_id_counter += 1
|
|
|
|
return f"backup{backup_id_counter}"
|
|
|
|
|
|
|
|
|
2023-06-28 08:52:00 +00:00
|
|
|
def get_events_for_query(query_id: str) -> Dict[str, int]:
|
|
|
|
events = TSV(
|
2023-05-16 19:42:56 +00:00
|
|
|
node.query(
|
2023-06-28 08:52:00 +00:00
|
|
|
f"""
|
|
|
|
SYSTEM FLUSH LOGS;
|
|
|
|
|
|
|
|
WITH arrayJoin(ProfileEvents) as pe
|
|
|
|
SELECT pe.1, pe.2
|
|
|
|
FROM system.query_log
|
|
|
|
WHERE query_id = '{query_id}'
|
|
|
|
"""
|
2023-05-16 19:42:56 +00:00
|
|
|
)
|
|
|
|
)
|
2023-11-07 10:03:57 +00:00
|
|
|
result = {
|
2023-05-16 19:42:56 +00:00
|
|
|
event: int(value)
|
2023-06-28 08:52:00 +00:00
|
|
|
for event, value in [line.split("\t") for line in events.lines]
|
2023-05-16 19:42:56 +00:00
|
|
|
}
|
2023-11-07 10:03:57 +00:00
|
|
|
result["query_id"] = query_id
|
|
|
|
return result
|
2023-05-16 19:42:56 +00:00
|
|
|
|
|
|
|
|
2023-07-23 09:24:01 +00:00
|
|
|
def format_settings(settings):
|
|
|
|
if not settings:
|
|
|
|
return ""
|
|
|
|
return "SETTINGS " + ",".join(f"{k}={v}" for k, v in settings.items())
|
|
|
|
|
|
|
|
|
2023-05-16 19:42:56 +00:00
|
|
|
def check_backup_and_restore(
|
2023-06-28 08:52:00 +00:00
|
|
|
storage_policy,
|
|
|
|
backup_destination,
|
|
|
|
size=1000,
|
2023-07-23 09:24:01 +00:00
|
|
|
backup_settings=None,
|
|
|
|
restore_settings=None,
|
|
|
|
insert_settings=None,
|
|
|
|
optimize_table=True,
|
2023-05-16 19:42:56 +00:00
|
|
|
):
|
2023-07-23 09:24:01 +00:00
|
|
|
optimize_table_query = "OPTIMIZE TABLE data FINAL;" if optimize_table else ""
|
|
|
|
|
2022-10-12 14:58:13 +00:00
|
|
|
node.query(
|
|
|
|
f"""
|
2023-05-03 18:06:46 +00:00
|
|
|
DROP TABLE IF EXISTS data SYNC;
|
2022-10-12 14:58:13 +00:00
|
|
|
CREATE TABLE data (key Int, value String, array Array(String)) Engine=MergeTree() ORDER BY tuple() SETTINGS storage_policy='{storage_policy}';
|
2023-07-23 09:24:01 +00:00
|
|
|
INSERT INTO data SELECT * FROM generateRandom('key Int, value String, array Array(String)') LIMIT {size} {format_settings(insert_settings)};
|
|
|
|
{optimize_table_query}
|
2022-10-12 14:58:13 +00:00
|
|
|
"""
|
|
|
|
)
|
2023-07-23 09:24:01 +00:00
|
|
|
|
2023-05-16 19:42:56 +00:00
|
|
|
try:
|
2023-06-28 08:52:00 +00:00
|
|
|
backup_query_id = uuid.uuid4().hex
|
|
|
|
node.query(
|
2023-07-23 09:24:01 +00:00
|
|
|
f"BACKUP TABLE data TO {backup_destination} {format_settings(backup_settings)}",
|
|
|
|
query_id=backup_query_id,
|
2023-06-28 08:52:00 +00:00
|
|
|
)
|
|
|
|
restore_query_id = uuid.uuid4().hex
|
2023-05-16 19:42:56 +00:00
|
|
|
node.query(
|
|
|
|
f"""
|
2023-07-23 09:24:01 +00:00
|
|
|
RESTORE TABLE data AS data_restored FROM {backup_destination} {format_settings(restore_settings)};
|
2023-06-28 08:52:00 +00:00
|
|
|
""",
|
|
|
|
query_id=restore_query_id,
|
2023-05-16 19:42:56 +00:00
|
|
|
)
|
|
|
|
node.query(
|
|
|
|
"""
|
|
|
|
SELECT throwIf(
|
|
|
|
(SELECT count(), sum(sipHash64(*)) FROM data) !=
|
|
|
|
(SELECT count(), sum(sipHash64(*)) FROM data_restored),
|
|
|
|
'Data does not matched after BACKUP/RESTORE'
|
|
|
|
);
|
|
|
|
"""
|
|
|
|
)
|
2023-06-28 08:52:00 +00:00
|
|
|
return [
|
|
|
|
get_events_for_query(backup_query_id),
|
|
|
|
get_events_for_query(restore_query_id),
|
|
|
|
]
|
2023-05-16 19:42:56 +00:00
|
|
|
finally:
|
|
|
|
node.query(
|
|
|
|
"""
|
|
|
|
DROP TABLE data SYNC;
|
|
|
|
DROP TABLE IF EXISTS data_restored SYNC;
|
|
|
|
"""
|
|
|
|
)
|
2022-10-12 14:58:13 +00:00
|
|
|
|
|
|
|
|
2023-11-07 10:03:57 +00:00
|
|
|
def check_system_tables(backup_query_id=None):
|
2022-11-02 10:04:58 +00:00
|
|
|
disks = [
|
|
|
|
tuple(disk.split("\t"))
|
2024-03-06 09:12:26 +00:00
|
|
|
for disk in node.query(
|
|
|
|
"SELECT name, type, object_storage_type, metadata_type FROM system.disks"
|
|
|
|
).split("\n")
|
2022-11-02 10:04:58 +00:00
|
|
|
if disk
|
|
|
|
]
|
|
|
|
expected_disks = (
|
2024-03-11 13:44:19 +00:00
|
|
|
("default", "Local", "None", "None"),
|
|
|
|
("disk_s3", "ObjectStorage", "S3", "Local"),
|
|
|
|
("disk_s3_cache", "ObjectStorage", "S3", "Local"),
|
|
|
|
("disk_s3_other_bucket", "ObjectStorage", "S3", "Local"),
|
|
|
|
("disk_s3_plain", "ObjectStorage", "S3", "Plain"),
|
2024-05-21 11:21:53 +00:00
|
|
|
("disk_s3_restricted_user", "ObjectStorage", "S3", "Local"),
|
2024-05-30 14:07:25 +00:00
|
|
|
("local", "Local", "None", "None"),
|
2022-11-02 10:04:58 +00:00
|
|
|
)
|
|
|
|
assert len(expected_disks) == len(disks)
|
|
|
|
for expected_disk in expected_disks:
|
|
|
|
if expected_disk not in disks:
|
|
|
|
raise AssertionError(f"Missed {expected_disk} in {disks}")
|
|
|
|
|
2023-11-07 10:03:57 +00:00
|
|
|
if backup_query_id is not None:
|
|
|
|
blob_storage_log = node.query(
|
|
|
|
f"SELECT count() FROM system.blob_storage_log WHERE query_id = '{backup_query_id}' AND error = '' AND event_type = 'Upload'"
|
|
|
|
).strip()
|
|
|
|
assert int(blob_storage_log) >= 1, node.query(
|
|
|
|
"SELECT * FROM system.blob_storage_log FORMAT PrettyCompactMonoBlock"
|
|
|
|
)
|
|
|
|
|
2022-11-02 10:04:58 +00:00
|
|
|
|
2022-10-11 15:28:20 +00:00
|
|
|
@pytest.mark.parametrize(
|
2022-10-12 14:58:13 +00:00
|
|
|
"storage_policy, to_disk",
|
2022-10-11 15:28:20 +00:00
|
|
|
[
|
|
|
|
pytest.param(
|
|
|
|
"default",
|
|
|
|
"default",
|
|
|
|
id="from_local_to_local",
|
|
|
|
),
|
|
|
|
pytest.param(
|
2022-10-12 14:58:13 +00:00
|
|
|
"policy_s3",
|
2022-10-11 15:28:20 +00:00
|
|
|
"default",
|
|
|
|
id="from_s3_to_local",
|
|
|
|
),
|
|
|
|
pytest.param(
|
|
|
|
"default",
|
2022-10-12 14:58:13 +00:00
|
|
|
"disk_s3",
|
2022-10-11 15:28:20 +00:00
|
|
|
id="from_local_to_s3",
|
|
|
|
),
|
|
|
|
pytest.param(
|
2022-10-12 14:58:13 +00:00
|
|
|
"policy_s3",
|
|
|
|
"disk_s3_plain",
|
2022-10-11 15:28:20 +00:00
|
|
|
id="from_s3_to_s3_plain",
|
|
|
|
),
|
|
|
|
pytest.param(
|
|
|
|
"default",
|
2022-10-12 14:58:13 +00:00
|
|
|
"disk_s3_plain",
|
2022-10-11 15:28:20 +00:00
|
|
|
id="from_local_to_s3_plain",
|
|
|
|
),
|
|
|
|
],
|
|
|
|
)
|
2022-10-12 14:58:13 +00:00
|
|
|
def test_backup_to_disk(storage_policy, to_disk):
|
|
|
|
backup_name = new_backup_name()
|
|
|
|
backup_destination = f"Disk('{to_disk}', '{backup_name}')"
|
|
|
|
check_backup_and_restore(storage_policy, backup_destination)
|
|
|
|
|
|
|
|
|
2023-11-14 13:47:17 +00:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"storage_policy, to_disk",
|
|
|
|
[
|
|
|
|
pytest.param(
|
|
|
|
"policy_s3",
|
|
|
|
"disk_s3_other_bucket",
|
|
|
|
id="from_s3_to_s3",
|
|
|
|
),
|
2023-11-27 10:58:30 +00:00
|
|
|
pytest.param(
|
|
|
|
"policy_s3_other_bucket",
|
|
|
|
"disk_s3",
|
|
|
|
id="from_s3_to_s3_other_bucket",
|
|
|
|
),
|
2023-11-14 13:47:17 +00:00
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_backup_from_s3_to_s3_disk_native_copy(storage_policy, to_disk):
|
|
|
|
backup_name = new_backup_name()
|
|
|
|
backup_destination = f"Disk('{to_disk}', '{backup_name}')"
|
|
|
|
(backup_events, restore_events) = check_backup_and_restore(
|
|
|
|
storage_policy, backup_destination
|
|
|
|
)
|
|
|
|
|
|
|
|
assert backup_events["S3CopyObject"] > 0
|
|
|
|
assert restore_events["S3CopyObject"] > 0
|
|
|
|
|
|
|
|
|
2022-10-12 14:58:13 +00:00
|
|
|
def test_backup_to_s3():
|
|
|
|
storage_policy = "default"
|
|
|
|
backup_name = new_backup_name()
|
|
|
|
backup_destination = (
|
|
|
|
f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')"
|
|
|
|
)
|
2023-11-07 10:03:57 +00:00
|
|
|
(backup_events, _) = check_backup_and_restore(storage_policy, backup_destination)
|
|
|
|
check_system_tables(backup_events["query_id"])
|
2022-10-12 14:58:13 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_backup_to_s3_named_collection():
|
|
|
|
storage_policy = "default"
|
|
|
|
backup_name = new_backup_name()
|
|
|
|
backup_destination = f"S3(named_collection_s3_backups, '{backup_name}')"
|
|
|
|
check_backup_and_restore(storage_policy, backup_destination)
|
|
|
|
|
|
|
|
|
2023-01-10 00:32:37 +00:00
|
|
|
def test_backup_to_s3_multipart():
|
|
|
|
storage_policy = "default"
|
|
|
|
backup_name = new_backup_name()
|
|
|
|
backup_destination = f"S3('http://minio1:9001/root/data/backups/multipart/{backup_name}', 'minio', 'minio123')"
|
2023-06-28 08:52:00 +00:00
|
|
|
(backup_events, restore_events) = check_backup_and_restore(
|
2023-05-16 19:42:56 +00:00
|
|
|
storage_policy,
|
|
|
|
backup_destination,
|
|
|
|
size=1000000,
|
|
|
|
)
|
2023-01-10 00:32:37 +00:00
|
|
|
assert node.contains_in_log(
|
2023-01-21 14:34:09 +00:00
|
|
|
f"copyDataToS3File: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}"
|
2023-01-10 00:32:37 +00:00
|
|
|
)
|
|
|
|
|
2023-11-07 10:03:57 +00:00
|
|
|
backup_query_id = backup_events["query_id"]
|
|
|
|
blob_storage_log = node.query(
|
|
|
|
f"SELECT countIf(event_type == 'MultiPartUploadCreate') * countIf(event_type == 'MultiPartUploadComplete') * countIf(event_type == 'MultiPartUploadWrite') "
|
|
|
|
f"FROM system.blob_storage_log WHERE query_id = '{backup_query_id}' AND error = ''"
|
|
|
|
).strip()
|
|
|
|
assert int(blob_storage_log) >= 1, node.query(
|
|
|
|
"SELECT * FROM system.blob_storage_log FORMAT PrettyCompactMonoBlock"
|
|
|
|
)
|
|
|
|
|
2023-06-28 08:52:00 +00:00
|
|
|
s3_backup_events = (
|
|
|
|
"WriteBufferFromS3Microseconds",
|
|
|
|
"WriteBufferFromS3Bytes",
|
|
|
|
"WriteBufferFromS3RequestsErrors",
|
|
|
|
)
|
|
|
|
s3_restore_events = (
|
|
|
|
"ReadBufferFromS3Microseconds",
|
|
|
|
"ReadBufferFromS3Bytes",
|
|
|
|
"ReadBufferFromS3RequestsErrors",
|
|
|
|
)
|
|
|
|
|
|
|
|
objects = node.cluster.minio_client.list_objects(
|
|
|
|
"root", f"data/backups/multipart/{backup_name}/"
|
|
|
|
)
|
|
|
|
backup_meta_size = 0
|
|
|
|
for obj in objects:
|
|
|
|
if ".backup" in obj.object_name:
|
|
|
|
backup_meta_size = obj.size
|
|
|
|
break
|
|
|
|
backup_total_size = int(
|
|
|
|
node.query(
|
|
|
|
f"SELECT sum(total_size) FROM system.backups WHERE status = 'BACKUP_CREATED' AND name like '%{backup_name}%'"
|
|
|
|
).strip()
|
|
|
|
)
|
|
|
|
restore_total_size = int(
|
|
|
|
node.query(
|
|
|
|
f"SELECT sum(total_size) FROM system.backups WHERE status = 'RESTORED' AND name like '%{backup_name}%'"
|
|
|
|
).strip()
|
|
|
|
)
|
|
|
|
# backup
|
|
|
|
# NOTE: ~35 bytes is used by .lock file, so set up 100 bytes to avoid flaky test
|
|
|
|
assert (
|
|
|
|
abs(
|
|
|
|
backup_total_size
|
|
|
|
- (backup_events["WriteBufferFromS3Bytes"] - backup_meta_size)
|
|
|
|
)
|
|
|
|
< 100
|
|
|
|
)
|
|
|
|
assert backup_events["WriteBufferFromS3Microseconds"] > 0
|
|
|
|
assert "WriteBufferFromS3RequestsErrors" not in backup_events
|
|
|
|
# restore
|
|
|
|
assert (
|
|
|
|
restore_events["ReadBufferFromS3Bytes"] - backup_meta_size == restore_total_size
|
|
|
|
)
|
|
|
|
assert restore_events["ReadBufferFromS3Microseconds"] > 0
|
|
|
|
assert "ReadBufferFromS3RequestsErrors" not in restore_events
|
|
|
|
|
2023-01-10 00:32:37 +00:00
|
|
|
|
2022-10-12 14:58:13 +00:00
|
|
|
def test_backup_to_s3_native_copy():
|
|
|
|
storage_policy = "policy_s3"
|
|
|
|
backup_name = new_backup_name()
|
|
|
|
backup_destination = (
|
|
|
|
f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')"
|
|
|
|
)
|
2023-06-28 08:58:23 +00:00
|
|
|
(backup_events, restore_events) = check_backup_and_restore(
|
|
|
|
storage_policy, backup_destination
|
|
|
|
)
|
|
|
|
# single part upload
|
|
|
|
assert backup_events["S3CopyObject"] > 0
|
|
|
|
assert restore_events["S3CopyObject"] > 0
|
2023-01-10 00:32:37 +00:00
|
|
|
assert node.contains_in_log(
|
2023-01-21 14:34:09 +00:00
|
|
|
f"copyS3File: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}"
|
2023-01-10 00:32:37 +00:00
|
|
|
)
|
2022-10-12 14:58:13 +00:00
|
|
|
|
|
|
|
|
2022-10-31 23:01:27 +00:00
|
|
|
def test_backup_to_s3_native_copy_other_bucket():
|
2022-10-12 14:58:13 +00:00
|
|
|
storage_policy = "policy_s3_other_bucket"
|
|
|
|
backup_name = new_backup_name()
|
|
|
|
backup_destination = (
|
|
|
|
f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')"
|
2022-10-11 15:28:20 +00:00
|
|
|
)
|
2023-06-28 08:58:23 +00:00
|
|
|
(backup_events, restore_events) = check_backup_and_restore(
|
|
|
|
storage_policy, backup_destination
|
|
|
|
)
|
|
|
|
# single part upload
|
|
|
|
assert backup_events["S3CopyObject"] > 0
|
|
|
|
assert restore_events["S3CopyObject"] > 0
|
2023-01-10 00:32:37 +00:00
|
|
|
assert node.contains_in_log(
|
2023-01-21 14:34:09 +00:00
|
|
|
f"copyS3File: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}"
|
2023-01-10 00:32:37 +00:00
|
|
|
)
|
2022-10-31 23:01:27 +00:00
|
|
|
|
|
|
|
|
2023-01-10 00:32:37 +00:00
|
|
|
def test_backup_to_s3_native_copy_multipart():
|
2022-10-31 23:01:27 +00:00
|
|
|
storage_policy = "policy_s3"
|
|
|
|
backup_name = new_backup_name()
|
2023-01-10 00:32:37 +00:00
|
|
|
backup_destination = f"S3('http://minio1:9001/root/data/backups/multipart/{backup_name}', 'minio', 'minio123')"
|
2023-06-28 08:58:23 +00:00
|
|
|
(backup_events, restore_events) = check_backup_and_restore(
|
|
|
|
storage_policy, backup_destination, size=1000000
|
|
|
|
)
|
|
|
|
# multi part upload
|
|
|
|
assert backup_events["S3CreateMultipartUpload"] > 0
|
|
|
|
assert restore_events["S3CreateMultipartUpload"] > 0
|
2023-01-10 00:32:37 +00:00
|
|
|
assert node.contains_in_log(
|
2023-01-21 14:34:09 +00:00
|
|
|
f"copyS3File: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}/"
|
2023-01-10 00:32:37 +00:00
|
|
|
)
|
2023-05-10 09:52:09 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_incremental_backup_append_table_def():
|
|
|
|
backup_name = f"S3('http://minio1:9001/root/data/backups/{new_backup_name()}', 'minio', 'minio123')"
|
|
|
|
|
|
|
|
node.query(
|
|
|
|
"CREATE TABLE data (x UInt32, y String) Engine=MergeTree() ORDER BY y PARTITION BY x%10 SETTINGS storage_policy='policy_s3'"
|
|
|
|
)
|
|
|
|
|
|
|
|
node.query("INSERT INTO data SELECT number, toString(number) FROM numbers(100)")
|
|
|
|
assert node.query("SELECT count(), sum(x) FROM data") == "100\t4950\n"
|
|
|
|
|
|
|
|
node.query(f"BACKUP TABLE data TO {backup_name}")
|
|
|
|
|
|
|
|
node.query("ALTER TABLE data MODIFY SETTING parts_to_throw_insert=100")
|
|
|
|
|
|
|
|
incremental_backup_name = f"S3('http://minio1:9001/root/data/backups/{new_backup_name()}', 'minio', 'minio123')"
|
|
|
|
|
|
|
|
node.query(
|
|
|
|
f"BACKUP TABLE data TO {incremental_backup_name} SETTINGS base_backup = {backup_name}"
|
|
|
|
)
|
|
|
|
|
|
|
|
node.query("DROP TABLE data")
|
|
|
|
node.query(f"RESTORE TABLE data FROM {incremental_backup_name}")
|
|
|
|
|
|
|
|
assert node.query("SELECT count(), sum(x) FROM data") == "100\t4950\n"
|
|
|
|
assert "parts_to_throw_insert = 100" in node.query("SHOW CREATE TABLE data")
|
2023-07-23 09:24:01 +00:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"in_cache_initially, allow_backup_read_cache, allow_s3_native_copy",
|
|
|
|
[
|
|
|
|
(False, True, False),
|
|
|
|
(True, False, False),
|
|
|
|
(True, True, False),
|
|
|
|
(True, True, True),
|
|
|
|
],
|
|
|
|
)
|
|
|
|
def test_backup_with_fs_cache(
|
|
|
|
in_cache_initially, allow_backup_read_cache, allow_s3_native_copy
|
|
|
|
):
|
|
|
|
storage_policy = "policy_s3_cache"
|
|
|
|
|
|
|
|
backup_name = new_backup_name()
|
|
|
|
backup_destination = (
|
|
|
|
f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')"
|
|
|
|
)
|
|
|
|
|
|
|
|
insert_settings = {
|
|
|
|
"enable_filesystem_cache_on_write_operations": int(in_cache_initially)
|
|
|
|
}
|
|
|
|
|
|
|
|
backup_settings = {
|
2023-07-23 11:00:17 +00:00
|
|
|
"read_from_filesystem_cache": int(allow_backup_read_cache),
|
2023-07-23 09:24:01 +00:00
|
|
|
"allow_s3_native_copy": int(allow_s3_native_copy),
|
|
|
|
}
|
|
|
|
|
|
|
|
restore_settings = {"allow_s3_native_copy": int(allow_s3_native_copy)}
|
|
|
|
|
|
|
|
backup_events, restore_events = check_backup_and_restore(
|
|
|
|
storage_policy,
|
|
|
|
backup_destination,
|
|
|
|
size=10,
|
|
|
|
insert_settings=insert_settings,
|
|
|
|
optimize_table=False,
|
|
|
|
backup_settings=backup_settings,
|
|
|
|
restore_settings=restore_settings,
|
|
|
|
)
|
|
|
|
|
2023-07-23 11:00:17 +00:00
|
|
|
# print(f"backup_events = {backup_events}")
|
|
|
|
# print(f"restore_events = {restore_events}")
|
2023-07-23 09:24:01 +00:00
|
|
|
|
|
|
|
# BACKUP never updates the filesystem cache but it may read it if `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` allows that.
|
|
|
|
if allow_backup_read_cache and in_cache_initially:
|
|
|
|
assert backup_events["CachedReadBufferReadFromCacheBytes"] > 0
|
|
|
|
assert not "CachedReadBufferReadFromSourceBytes" in backup_events
|
|
|
|
elif allow_backup_read_cache:
|
|
|
|
assert not "CachedReadBufferReadFromCacheBytes" in backup_events
|
|
|
|
assert backup_events["CachedReadBufferReadFromSourceBytes"] > 0
|
|
|
|
else:
|
|
|
|
assert not "CachedReadBufferReadFromCacheBytes" in backup_events
|
|
|
|
assert not "CachedReadBufferReadFromSourceBytes" in backup_events
|
|
|
|
|
|
|
|
assert not "CachedReadBufferCacheWriteBytes" in backup_events
|
|
|
|
assert not "CachedWriteBufferCacheWriteBytes" in backup_events
|
|
|
|
|
|
|
|
# RESTORE doesn't use the filesystem cache during write operations.
|
|
|
|
# However while attaching parts it may use the cache while reading such files as "columns.txt" or "checksums.txt" or "primary.idx",
|
|
|
|
# see IMergeTreeDataPart::loadColumnsChecksumsIndexes()
|
|
|
|
if "CachedReadBufferReadFromSourceBytes" in restore_events:
|
|
|
|
assert (
|
|
|
|
restore_events["CachedReadBufferReadFromSourceBytes"]
|
|
|
|
== restore_events["CachedReadBufferCacheWriteBytes"]
|
|
|
|
)
|
|
|
|
|
|
|
|
assert not "CachedReadBufferReadFromCacheBytes" in restore_events
|
|
|
|
|
|
|
|
# "format_version.txt" is written when a table is created,
|
|
|
|
# see MergeTreeData::initializeDirectoriesAndFormatVersion()
|
|
|
|
if "CachedWriteBufferCacheWriteBytes" in restore_events:
|
|
|
|
assert restore_events["CachedWriteBufferCacheWriteBytes"] <= 1
|
2023-12-23 16:51:49 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_backup_to_zip():
|
|
|
|
storage_policy = "default"
|
|
|
|
backup_name = new_backup_name()
|
|
|
|
backup_destination = f"S3('http://minio1:9001/root/data/backups/{backup_name}.zip', 'minio', 'minio123')"
|
|
|
|
check_backup_and_restore(storage_policy, backup_destination)
|
2024-02-19 15:11:29 +00:00
|
|
|
|
2024-02-14 13:31:26 +00:00
|
|
|
|
2024-01-19 15:05:36 +00:00
|
|
|
def test_backup_to_tar():
|
|
|
|
storage_policy = "default"
|
|
|
|
backup_name = new_backup_name()
|
|
|
|
backup_destination = f"S3('http://minio1:9001/root/data/backups/{backup_name}.tar', 'minio', 'minio123')"
|
|
|
|
check_backup_and_restore(storage_policy, backup_destination)
|
2024-02-19 15:11:29 +00:00
|
|
|
|
2024-02-20 23:34:02 +00:00
|
|
|
|
|
|
|
def test_backup_to_tar_gz():
|
|
|
|
storage_policy = "default"
|
|
|
|
backup_name = new_backup_name()
|
|
|
|
backup_destination = f"S3('http://minio1:9001/root/data/backups/{backup_name}.tar.gz', 'minio', 'minio123')"
|
|
|
|
check_backup_and_restore(storage_policy, backup_destination)
|
|
|
|
|
|
|
|
|
|
|
|
def test_backup_to_tar_bz2():
|
|
|
|
storage_policy = "default"
|
|
|
|
backup_name = new_backup_name()
|
|
|
|
backup_destination = f"S3('http://minio1:9001/root/data/backups/{backup_name}.tar.bz2', 'minio', 'minio123')"
|
|
|
|
check_backup_and_restore(storage_policy, backup_destination)
|
|
|
|
|
|
|
|
|
|
|
|
def test_backup_to_tar_lzma():
|
|
|
|
storage_policy = "default"
|
|
|
|
backup_name = new_backup_name()
|
|
|
|
backup_destination = f"S3('http://minio1:9001/root/data/backups/{backup_name}.tar.lzma', 'minio', 'minio123')"
|
|
|
|
check_backup_and_restore(storage_policy, backup_destination)
|
|
|
|
|
|
|
|
|
2024-02-21 23:35:30 +00:00
|
|
|
def test_backup_to_tar_zst():
|
|
|
|
storage_policy = "default"
|
|
|
|
backup_name = new_backup_name()
|
|
|
|
backup_destination = f"S3('http://minio1:9001/root/data/backups/{backup_name}.tar.zst', 'minio', 'minio123')"
|
|
|
|
check_backup_and_restore(storage_policy, backup_destination)
|
|
|
|
|
|
|
|
|
|
|
|
def test_backup_to_tar_xz():
|
|
|
|
storage_policy = "default"
|
|
|
|
backup_name = new_backup_name()
|
|
|
|
backup_destination = f"S3('http://minio1:9001/root/data/backups/{backup_name}.tar.xz', 'minio', 'minio123')"
|
|
|
|
check_backup_and_restore(storage_policy, backup_destination)
|
|
|
|
|
|
|
|
|
2024-02-19 15:11:29 +00:00
|
|
|
def test_user_specific_auth(start_cluster):
|
|
|
|
def create_user(user):
|
|
|
|
node.query(f"CREATE USER {user}")
|
|
|
|
node.query(f"GRANT CURRENT GRANTS ON *.* TO {user}")
|
|
|
|
|
|
|
|
create_user("superuser1")
|
|
|
|
create_user("superuser2")
|
|
|
|
create_user("regularuser")
|
|
|
|
|
|
|
|
node.query("CREATE TABLE specific_auth (col UInt64) ENGINE=Memory")
|
|
|
|
|
2024-02-22 09:58:05 +00:00
|
|
|
assert "Access" in node.query_and_get_error(
|
2024-02-19 15:11:29 +00:00
|
|
|
"BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup1.zip')"
|
|
|
|
)
|
2024-02-22 09:58:05 +00:00
|
|
|
assert "Access" in node.query_and_get_error(
|
2024-02-19 15:11:29 +00:00
|
|
|
"BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup1.zip')",
|
|
|
|
user="regularuser",
|
|
|
|
)
|
|
|
|
|
|
|
|
node.query(
|
|
|
|
"BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup1.zip')",
|
|
|
|
user="superuser1",
|
|
|
|
)
|
|
|
|
node.query(
|
|
|
|
"RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup1.zip')",
|
|
|
|
user="superuser1",
|
|
|
|
)
|
|
|
|
|
|
|
|
node.query(
|
|
|
|
"BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup2.zip')",
|
|
|
|
user="superuser2",
|
|
|
|
)
|
|
|
|
node.query(
|
|
|
|
"RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup2.zip')",
|
|
|
|
user="superuser2",
|
|
|
|
)
|
|
|
|
|
2024-02-22 09:58:05 +00:00
|
|
|
assert "Access" in node.query_and_get_error(
|
2024-02-19 15:11:29 +00:00
|
|
|
"RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup1.zip')",
|
|
|
|
user="regularuser",
|
|
|
|
)
|
|
|
|
|
|
|
|
assert "HTTP response code: 403" in node.query_and_get_error(
|
|
|
|
"SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1.zip', 'RawBLOB')",
|
|
|
|
user="regularuser",
|
|
|
|
)
|
2024-04-02 13:28:57 +00:00
|
|
|
|
2024-02-19 15:11:29 +00:00
|
|
|
node.query(
|
|
|
|
"SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1.zip', 'RawBLOB')",
|
|
|
|
user="superuser1",
|
|
|
|
)
|
|
|
|
|
2024-04-02 13:28:57 +00:00
|
|
|
assert "Access Denied" in node.query_and_get_error(
|
|
|
|
"BACKUP TABLE specific_auth ON CLUSTER 'cluster' TO S3('http://minio1:9001/root/data/backups/limited/backup3/')",
|
|
|
|
user="regularuser",
|
|
|
|
)
|
|
|
|
|
|
|
|
node.query(
|
|
|
|
"BACKUP TABLE specific_auth ON CLUSTER 'cluster' TO S3('http://minio1:9001/root/data/backups/limited/backup3/')",
|
|
|
|
user="superuser1",
|
|
|
|
)
|
|
|
|
|
|
|
|
assert "Access Denied" in node.query_and_get_error(
|
|
|
|
"RESTORE TABLE specific_auth ON CLUSTER 'cluster' FROM S3('http://minio1:9001/root/data/backups/limited/backup3/')",
|
|
|
|
user="regularuser",
|
|
|
|
)
|
|
|
|
|
|
|
|
node.query(
|
|
|
|
"RESTORE TABLE specific_auth ON CLUSTER 'cluster' FROM S3('http://minio1:9001/root/data/backups/limited/backup3/')",
|
|
|
|
user="superuser1",
|
|
|
|
)
|
|
|
|
|
|
|
|
assert "Access Denied" in node.query_and_get_error(
|
|
|
|
"SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup3/*', 'RawBLOB')",
|
|
|
|
user="regularuser",
|
|
|
|
)
|
|
|
|
|
|
|
|
node.query(
|
|
|
|
"SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup3/*', 'RawBLOB')",
|
|
|
|
user="superuser1",
|
|
|
|
)
|
|
|
|
|
|
|
|
assert "Access Denied" in node.query_and_get_error(
|
|
|
|
"SELECT * FROM s3Cluster(cluster, 'http://minio1:9001/root/data/backups/limited/backup3/*', 'RawBLOB')",
|
|
|
|
user="regularuser",
|
|
|
|
)
|
|
|
|
|
2024-02-19 15:11:29 +00:00
|
|
|
node.query("DROP TABLE IF EXISTS test.specific_auth")
|
2024-05-21 11:21:53 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_backup_to_s3_different_credentials():
|
|
|
|
storage_policy = "policy_s3_restricted"
|
|
|
|
|
|
|
|
def check_backup_restore(allow_s3_native_copy):
|
|
|
|
backup_name = new_backup_name()
|
|
|
|
backup_destination = f"S3('http://minio1:9001/root2/data/backups/{backup_name}', 'miniorestricted2', 'minio123')"
|
|
|
|
settings = {"allow_s3_native_copy": allow_s3_native_copy}
|
|
|
|
(backup_events, _) = check_backup_and_restore(
|
|
|
|
storage_policy,
|
|
|
|
backup_destination,
|
|
|
|
backup_settings=settings,
|
|
|
|
restore_settings=settings,
|
|
|
|
)
|
|
|
|
check_system_tables(backup_events["query_id"])
|
|
|
|
|
|
|
|
check_backup_restore(False)
|
|
|
|
check_backup_restore(True)
|