Merge pull request #53150 from ClickHouse/Improvements_to_backup_restore_concurrency_test_follow

Added functions to disallow concurrency of backup restore test
This commit is contained in:
Alexey Milovidov 2023-08-12 03:23:21 +03:00 committed by GitHub
commit a85c2285f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -5,6 +5,7 @@ import time
import concurrent
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV, assert_eq_with_retry
import re
cluster = ClickHouseCluster(__file__)
@ -110,6 +111,73 @@ def create_and_fill_table():
nodes[i].query(f"INSERT INTO tbl SELECT number FROM numbers(40000000)")
def wait_for_fail_backup(node, backup_id, backup_name):
expected_errors = [
"Concurrent backups not supported",
f"Backup {backup_name} already exists",
]
status = node.query(
f"SELECT status FROM system.backups WHERE id == '{backup_id}'"
).rstrip("\n")
# It is possible that the second backup was picked up first, and then the async backup
if status == "BACKUP_FAILED":
error = node.query(
f"SELECT error FROM system.backups WHERE id == '{backup_id}'"
).rstrip("\n")
assert any([expected_error in error for expected_error in expected_errors])
return
elif status == "CREATING_BACKUP":
assert_eq_with_retry(
node,
f"SELECT status FROM system.backups WHERE id = '{backup_id}'",
"BACKUP_FAILED",
sleep_time=2,
retry_count=50,
)
error = node.query(
f"SELECT error FROM system.backups WHERE id == '{backup_id}'"
).rstrip("\n")
assert re.search(f"Backup {backup_name} already exists", error)
return
else:
assert False, "Concurrent backups both passed, when one is expected to fail"
def wait_for_fail_restore(node, restore_id):
expected_errors = [
"Concurrent restores not supported",
"Cannot restore the table default.tbl because it already contains some data",
]
status = node.query(
f"SELECT status FROM system.backups WHERE id == '{restore_id}'"
).rstrip("\n")
# It is possible that the second backup was picked up first, and then the async backup
if status == "RESTORE_FAILED":
error = node.query(
f"SELECT error FROM system.backups WHERE id == '{restore_id}'"
).rstrip("\n")
assert any([expected_error in error for expected_error in expected_errors])
return
elif status == "RESTORING":
assert_eq_with_retry(
node,
f"SELECT status FROM system.backups WHERE id = '{backup_id}'",
"RESTORE_FAILED",
sleep_time=2,
retry_count=50,
)
error = node.query(
f"SELECT error FROM system.backups WHERE id == '{backup_id}'"
).rstrip("\n")
assert re.search(
"Cannot restore the table default.tbl because it already contains some data",
error,
)
return
else:
assert False, "Concurrent restores both passed, when one is expected to fail"
# All the tests have concurrent backup/restores with same backup names
# The same works with different backup names too. Since concurrency
# check comes before backup name check, separate tests are not added for different names
@ -137,33 +205,13 @@ def test_concurrent_backups_on_same_node():
f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}"
)
if not error:
status = (
nodes[0]
.query(f"SELECT status FROM system.backups WHERE id == '{id}'")
.rstrip("\n")
)
# It is possible that the second backup was picked up first, and then the async backup
if status == "BACKUP_FAILED":
return
elif status == "CREATING_BACKUP":
assert_eq_with_retry(
nodes[0],
f"SELECT status FROM system.backups WHERE id = '{id}'",
"BACKUP_FAILED",
sleep_time=2,
retry_count=50,
)
return
else:
raise Exception(
"Concurrent backups both passed, when one is expected to fail"
)
expected_errors = [
"Concurrent backups not supported",
f"Backup {backup_name} already exists",
]
if not error:
wait_for_fail_backup(nodes[0], id, backup_name)
assert any([expected_error in error for expected_error in expected_errors])
assert_eq_with_retry(
@ -207,33 +255,14 @@ def test_concurrent_backups_on_different_nodes():
f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}"
)
if not error:
status = (
nodes[1]
.query(f"SELECT status FROM system.backups WHERE id == '{id}'")
.rstrip("\n")
)
# It is possible that the second backup was picked up first, and then the async backup
if status == "BACKUP_FAILED":
return
elif status == "CREATING_BACKUP":
assert_eq_with_retry(
nodes[1],
f"SELECT status FROM system.backups WHERE id = '{id}'",
"BACKUP_FAILED",
sleep_time=2,
retry_count=50,
)
return
else:
raise Exception(
"Concurrent backups both passed, when one is expected to fail"
)
expected_errors = [
"Concurrent backups not supported",
f"Backup {backup_name} already exists",
]
if not error:
wait_for_fail_backup(nodes[1], id, backup_name)
assert any([expected_error in error for expected_error in expected_errors])
assert_eq_with_retry(
@ -276,33 +305,14 @@ def test_concurrent_restores_on_same_node():
f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}"
)
if not error:
status = (
nodes[0]
.query(f"SELECT status FROM system.backups WHERE id == '{restore_id}'")
.rstrip("\n")
)
# It is possible that the second backup was picked up first, and then the async backup
if status == "RESTORE_FAILED":
return
elif status == "RESTORING":
assert_eq_with_retry(
nodes[0],
f"SELECT status FROM system.backups WHERE id == '{restore_id}'",
"RESTORE_FAILED",
sleep_time=2,
retry_count=50,
)
return
else:
raise Exception(
"Concurrent restores both passed, when one is expected to fail"
)
expected_errors = [
"Concurrent restores not supported",
"Cannot restore the table default.tbl because it already contains some data",
]
if not error:
wait_for_fail_restore(nodes[0], restore_id)
assert any([expected_error in error for expected_error in expected_errors])
assert_eq_with_retry(
@ -345,33 +355,14 @@ def test_concurrent_restores_on_different_node():
f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}"
)
if not error:
status = (
nodes[0]
.query(f"SELECT status FROM system.backups WHERE id == '{restore_id}'")
.rstrip("\n")
)
# It is possible that the second backup was picked up first, and then the async backup
if status == "RESTORE_FAILED":
return
elif status == "RESTORING":
assert_eq_with_retry(
nodes[0],
f"SELECT status FROM system.backups WHERE id == '{restore_id}'",
"RESTORE_FAILED",
sleep_time=2,
retry_count=50,
)
return
else:
raise Exception(
"Concurrent restores both passed, when one is expected to fail"
)
expected_errors = [
"Concurrent restores not supported",
"Cannot restore the table default.tbl because it already contains some data",
]
if not error:
wait_for_fail_restore(nodes[0], restore_id)
assert any([expected_error in error for expected_error in expected_errors])
assert_eq_with_retry(