2024-09-05 11:37:57 +00:00
|
|
|
from multiprocessing.dummy import Pool
|
2024-09-27 10:19:39 +00:00
|
|
|
from time import sleep
|
2022-09-15 13:37:17 +00:00
|
|
|
|
2024-09-27 10:19:39 +00:00
|
|
|
import pytest
|
2022-09-15 13:37:17 +00:00
|
|
|
from kazoo.client import KazooClient
|
2024-09-27 10:19:39 +00:00
|
|
|
from minio.deleteobjects import DeleteObject
|
|
|
|
|
2024-09-30 20:13:56 +00:00
|
|
|
from helpers import keeper_utils
|
2024-09-27 10:19:39 +00:00
|
|
|
from helpers.cluster import ClickHouseCluster
|
2024-09-30 20:13:56 +00:00
|
|
|
from helpers.retry_decorator import retry
|
2022-09-15 13:37:17 +00:00
|
|
|
|
|
|
|
# from kazoo.protocol.serialization import Connect, read_buffer, write_buffer
|
|
|
|
|
|
|
|
cluster = ClickHouseCluster(__file__)
|
|
|
|
node1 = cluster.add_instance(
|
|
|
|
"node1",
|
|
|
|
main_configs=["configs/keeper_config1.xml"],
|
|
|
|
stay_alive=True,
|
|
|
|
with_minio=True,
|
|
|
|
)
|
|
|
|
node2 = cluster.add_instance(
|
|
|
|
"node2",
|
|
|
|
main_configs=["configs/keeper_config2.xml"],
|
|
|
|
stay_alive=True,
|
|
|
|
with_minio=True,
|
|
|
|
)
|
|
|
|
node3 = cluster.add_instance(
|
|
|
|
"node3",
|
|
|
|
main_configs=["configs/keeper_config3.xml"],
|
|
|
|
stay_alive=True,
|
|
|
|
with_minio=True,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
|
|
def started_cluster():
|
|
|
|
try:
|
|
|
|
cluster.start()
|
|
|
|
|
|
|
|
cluster.minio_client.make_bucket("snapshots")
|
|
|
|
|
|
|
|
yield cluster
|
|
|
|
|
|
|
|
finally:
|
|
|
|
cluster.shutdown()
|
|
|
|
|
|
|
|
|
|
|
|
def get_fake_zk(nodename, timeout=30.0):
|
|
|
|
_fake_zk_instance = KazooClient(
|
|
|
|
hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout
|
|
|
|
)
|
|
|
|
_fake_zk_instance.start()
|
|
|
|
return _fake_zk_instance
|
|
|
|
|
|
|
|
|
|
|
|
def destroy_zk_client(zk):
|
|
|
|
try:
|
|
|
|
if zk:
|
|
|
|
zk.stop()
|
|
|
|
zk.close()
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
def wait_node(node):
|
|
|
|
for _ in range(100):
|
|
|
|
zk = None
|
|
|
|
try:
|
|
|
|
zk = get_fake_zk(node.name, timeout=30.0)
|
|
|
|
zk.sync("/")
|
|
|
|
print("node", node.name, "ready")
|
|
|
|
break
|
|
|
|
except Exception as ex:
|
|
|
|
sleep(0.2)
|
|
|
|
print("Waiting until", node.name, "will be ready, exception", ex)
|
|
|
|
finally:
|
|
|
|
destroy_zk_client(zk)
|
|
|
|
else:
|
|
|
|
raise Exception("Can't wait node", node.name, "to become ready")
|
|
|
|
|
|
|
|
|
2024-09-05 11:37:57 +00:00
|
|
|
def delete_keeper_snapshots_logs(nodex):
|
|
|
|
nodex.exec_in_container(
|
|
|
|
[
|
|
|
|
"bash",
|
|
|
|
"-c",
|
2024-09-05 13:57:46 +00:00
|
|
|
"rm -rf /var/lib/clickhouse/coordination/log /var/lib/clickhouse/coordination/snapshots",
|
2024-09-05 11:37:57 +00:00
|
|
|
]
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2022-09-15 13:37:17 +00:00
|
|
|
def test_s3_upload(started_cluster):
|
2024-09-05 11:37:57 +00:00
|
|
|
|
2022-09-15 13:37:17 +00:00
|
|
|
node1_zk = get_fake_zk(node1.name)
|
|
|
|
|
2022-09-22 13:03:27 +00:00
|
|
|
# we defined in configs snapshot_distance as 50
|
|
|
|
# so after 50 requests we should generate a snapshot
|
2022-09-15 13:37:17 +00:00
|
|
|
for _ in range(210):
|
|
|
|
node1_zk.create("/test", sequence=True)
|
|
|
|
|
|
|
|
def get_saved_snapshots():
|
|
|
|
return [
|
|
|
|
obj.object_name
|
|
|
|
for obj in list(cluster.minio_client.list_objects("snapshots"))
|
|
|
|
]
|
|
|
|
|
2024-09-05 11:37:57 +00:00
|
|
|
def delete_s3_snapshots():
|
|
|
|
snapshots = cluster.minio_client.list_objects("snapshots")
|
|
|
|
for s in snapshots:
|
2024-09-05 13:57:46 +00:00
|
|
|
cluster.minio_client.remove_object("snapshots", s.object_name)
|
2024-09-05 11:37:57 +00:00
|
|
|
|
2023-07-13 11:29:45 +00:00
|
|
|
# Keeper sends snapshots asynchornously, hence we need to retry.
|
|
|
|
def _check_snapshots():
|
2024-09-05 11:37:57 +00:00
|
|
|
assert set(get_saved_snapshots()) == set(
|
2023-07-13 11:29:45 +00:00
|
|
|
[
|
|
|
|
"snapshot_50.bin.zstd",
|
|
|
|
"snapshot_100.bin.zstd",
|
|
|
|
"snapshot_150.bin.zstd",
|
|
|
|
"snapshot_200.bin.zstd",
|
|
|
|
]
|
|
|
|
)
|
|
|
|
|
2024-09-30 20:42:47 +00:00
|
|
|
retry(AssertionError, retries=10, delay=2, jitter=0, backoff=1)(_check_snapshots)
|
2022-09-15 13:37:17 +00:00
|
|
|
|
|
|
|
destroy_zk_client(node1_zk)
|
|
|
|
node1.stop_clickhouse(kill=True)
|
|
|
|
|
2022-09-22 13:03:27 +00:00
|
|
|
# wait for new leader to be picked and that it continues
|
|
|
|
# uploading snapshots
|
2022-09-15 13:37:17 +00:00
|
|
|
wait_node(node2)
|
|
|
|
node2_zk = get_fake_zk(node2.name)
|
|
|
|
for _ in range(200):
|
|
|
|
node2_zk.create("/test", sequence=True)
|
|
|
|
|
2023-07-13 11:29:45 +00:00
|
|
|
def _check_snapshots_without_quorum():
|
|
|
|
assert len(get_saved_snapshots()) > 4
|
2022-09-15 13:37:17 +00:00
|
|
|
|
2024-09-30 20:42:47 +00:00
|
|
|
retry(AssertionError, retries=10, delay=2, jitter=0, backoff=1)(
|
|
|
|
_check_snapshots_without_quorum
|
|
|
|
)
|
|
|
|
|
2023-07-13 11:29:45 +00:00
|
|
|
_check_snapshots_without_quorum()
|
2022-09-15 13:37:17 +00:00
|
|
|
|
|
|
|
success_upload_message = "Successfully uploaded"
|
|
|
|
assert node2.contains_in_log(success_upload_message) or node3.contains_in_log(
|
|
|
|
success_upload_message
|
|
|
|
)
|
|
|
|
|
|
|
|
destroy_zk_client(node2_zk)
|
2024-09-05 11:37:57 +00:00
|
|
|
node2.stop_clickhouse()
|
|
|
|
delete_keeper_snapshots_logs(node2)
|
|
|
|
node3.stop_clickhouse()
|
|
|
|
delete_keeper_snapshots_logs(node3)
|
|
|
|
delete_keeper_snapshots_logs(node1)
|
|
|
|
p = Pool(3)
|
|
|
|
waiters = []
|
2024-09-05 13:57:46 +00:00
|
|
|
|
2024-09-05 11:37:57 +00:00
|
|
|
def start_clickhouse(node):
|
|
|
|
node.start_clickhouse()
|
|
|
|
|
|
|
|
waiters.append(p.apply_async(start_clickhouse, args=(node1,)))
|
|
|
|
waiters.append(p.apply_async(start_clickhouse, args=(node2,)))
|
|
|
|
waiters.append(p.apply_async(start_clickhouse, args=(node3,)))
|
|
|
|
|
|
|
|
delete_s3_snapshots() # for next iteration
|
|
|
|
|
|
|
|
for waiter in waiters:
|
|
|
|
waiter.wait()
|
|
|
|
|
2024-09-01 13:53:56 +00:00
|
|
|
keeper_utils.wait_until_connected(cluster, node1)
|
2024-09-05 11:37:57 +00:00
|
|
|
keeper_utils.wait_until_connected(cluster, node2)
|
|
|
|
keeper_utils.wait_until_connected(cluster, node3)
|