ClickHouse/tests/integration/test_keeper_snapshot_small_distance/test.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

223 lines
6.3 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
2024-09-27 10:19:39 +00:00
import os
import random
import string
import time
2024-09-27 10:19:39 +00:00
from multiprocessing.dummy import Pool
import pytest
from kazoo.client import KazooClient, KazooRetry
from kazoo.handlers.threading import KazooTimeoutError
import helpers.keeper_utils as keeper_utils
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance(
"node1", main_configs=["configs/keeper_config1.xml"], stay_alive=True
)
node2 = cluster.add_instance(
"node2", main_configs=["configs/keeper_config2.xml"], stay_alive=True
)
node3 = cluster.add_instance(
"node3", main_configs=["configs/keeper_config3.xml"], stay_alive=True
)
def start_zookeeper(node):
2022-09-06 10:58:14 +00:00
node.exec_in_container(["bash", "-c", "/opt/zookeeper/bin/zkServer.sh start"])
def stop_zookeeper(node):
node.exec_in_container(["bash", "-c", "/opt/zookeeper/bin/zkServer.sh stop"])
2023-01-03 09:59:06 +00:00
timeout = time.time() + 60
2023-01-03 09:39:14 +00:00
while node.get_process_pid("zookeeper") != None:
2023-01-03 09:59:06 +00:00
if time.time() > timeout:
raise Exception("Failed to stop ZooKeeper in 60 secs")
2023-01-03 09:39:14 +00:00
time.sleep(0.2)
2023-01-07 17:08:25 +00:00
def generate_zk_snapshot(node):
for _ in range(100):
stop_zookeeper(node)
start_zookeeper(node)
time.sleep(2)
stop_zookeeper(node)
# get last snapshot
last_snapshot = node.exec_in_container(
[
"bash",
"-c",
"find /zookeeper/version-2 -name 'snapshot.*' -printf '%T@ %p\n' | sort -n | awk 'END {print $2}'",
]
).strip()
print(f"Latest snapshot: {last_snapshot}")
try:
# verify last snapshot
# zkSnapShotToolkit is a tool to inspect generated snapshots - if it's broken, an exception is thrown
node.exec_in_container(
[
"bash",
"-c",
f"/opt/zookeeper/bin/zkSnapShotToolkit.sh {last_snapshot}",
]
)
return
except Exception as err:
print(f"Got error while reading snapshot: {err}")
raise Exception("Failed to generate a ZooKeeper snapshot")
def clear_zookeeper(node):
node.exec_in_container(["bash", "-c", "rm -fr /zookeeper/*"])
def restart_and_clear_zookeeper(node):
stop_zookeeper(node)
clear_zookeeper(node)
start_zookeeper(node)
def restart_zookeeper(node):
stop_zookeeper(node)
start_zookeeper(node)
def clear_clickhouse_data(node):
node.exec_in_container(
[
"bash",
"-c",
"rm -fr /var/lib/clickhouse/coordination/logs/* /var/lib/clickhouse/coordination/snapshots/*",
]
)
def convert_zookeeper_data(node):
2023-01-04 09:10:16 +00:00
node.exec_in_container(
[
"bash",
"-c",
"tar -cvzf /var/lib/clickhouse/zk-data.tar.gz /zookeeper/version-2",
]
)
cmd = "/usr/bin/clickhouse keeper-converter --zookeeper-logs-dir /zookeeper/version-2/ --zookeeper-snapshots-dir /zookeeper/version-2/ --output-dir /var/lib/clickhouse/coordination/snapshots"
node.exec_in_container(["bash", "-c", cmd])
return os.path.join(
"/var/lib/clickhouse/coordination/snapshots",
node.exec_in_container(
["bash", "-c", "ls /var/lib/clickhouse/coordination/snapshots"]
).strip(),
)
def stop_clickhouse(node):
node.stop_clickhouse()
def start_clickhouse(node):
node.start_clickhouse()
2022-09-06 10:58:14 +00:00
keeper_utils.wait_until_connected(cluster, node)
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def get_fake_zk(node, timeout=30.0):
_fake_zk_instance = KazooClient(
hosts=cluster.get_instance_ip(node.name) + ":9181", timeout=timeout
)
_fake_zk_instance.start()
return _fake_zk_instance
def get_genuine_zk(node, timeout=30.0):
CONNECTION_RETRIES = 100
for i in range(CONNECTION_RETRIES):
try:
_genuine_zk_instance = KazooClient(
hosts=cluster.get_instance_ip(node.name) + ":2181",
timeout=timeout,
connection_retry=KazooRetry(max_tries=20),
)
_genuine_zk_instance.start()
return _genuine_zk_instance
except KazooTimeoutError:
if i == CONNECTION_RETRIES - 1:
raise
print(
"Failed to connect to ZK cluster because of timeout. Restarting cluster and trying again."
)
time.sleep(0.2)
restart_zookeeper(node)
def test_snapshot_and_load(started_cluster):
tests/integration: add missing kazoo client termination pytest play games with logging output [1]. [1]: https://github.com/pytest-dev/pytest/issues/5502 But this does not actually affect ClickHouse tests, the only reason is that the kazoo client is not stopped correctly without calling kazoo.client.KazooClient.stop(), and that's why you can see the following messages: <details> test_storage_rabbitmq/test.py::test_rabbitmq_big_message --- Logging error --- Traceback (most recent call last): File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 179, in _socket_error_handling yield File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 659, in _connect self._socket = self.handler.create_connection( File "/usr/local/lib/python3.8/dist-packages/kazoo/handlers/threading.py", line 178, in create_connection return utils.create_tcp_connection(socket, *args, **kwargs) File "/usr/local/lib/python3.8/dist-packages/kazoo/handlers/utils.py", line 265, in create_tcp_connection sock = module.create_connection(address, timeout_at) File "/usr/lib/python3.8/socket.py", line 808, in create_connection raise err File "/usr/lib/python3.8/socket.py", line 796, in create_connection sock.connect(sa) socket.timeout: timed out During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 579, in _connect_attempt read_timeout, connect_timeout = self._connect(host, hostip, port) File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 659, in _connect self._socket = self.handler.create_connection( File "/usr/lib/python3.8/contextlib.py", line 131, in __exit__ self.gen.throw(type, value, traceback) File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 182, in _socket_error_handling raise ConnectionDropped("socket connection error: %s" % (err,)) kazoo.exceptions.ConnectionDropped: socket connection error: None During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/usr/lib/python3.8/logging/__init__.py", line 1088, in emit stream.write(msg + self.terminator) ValueError: I/O operation on closed file. Call stack: File "/usr/lib/python3.8/threading.py", line 890, in _bootstrap self._bootstrap_inner() File "/usr/lib/python3.8/threading.py", line 932, in _bootstrap_inner self.run() File "/usr/lib/python3.8/threading.py", line 870, in run self._target(*self._args, **self._kwargs) File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 512, in zk_loop if retry(self._connect_loop, retry) is STOP_CONNECTING: File "/usr/local/lib/python3.8/dist-packages/kazoo/retry.py", line 126, in __call__ return func(*args, **kwargs) File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 552, in _connect_loop status = self._connect_attempt(host, hostip, port, retry) File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 622, in _connect_attempt self.logger.warning('Connection dropped: %s', e) Message: 'Connection dropped: %s' Arguments: (ConnectionDropped('socket connection error: None'),) </details> Which eventually leads to incorrectly parsed report. This patch is an addition to the `logging.raiseExceptions=False` - #44618 Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-12-27 12:53:14 +00:00
genuine_connection = None
fake_zks = []
try:
restart_and_clear_zookeeper(node1)
genuine_connection = get_genuine_zk(node1)
2023-01-07 17:08:25 +00:00
tests/integration: add missing kazoo client termination pytest play games with logging output [1]. [1]: https://github.com/pytest-dev/pytest/issues/5502 But this does not actually affect ClickHouse tests, the only reason is that the kazoo client is not stopped correctly without calling kazoo.client.KazooClient.stop(), and that's why you can see the following messages: <details> test_storage_rabbitmq/test.py::test_rabbitmq_big_message --- Logging error --- Traceback (most recent call last): File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 179, in _socket_error_handling yield File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 659, in _connect self._socket = self.handler.create_connection( File "/usr/local/lib/python3.8/dist-packages/kazoo/handlers/threading.py", line 178, in create_connection return utils.create_tcp_connection(socket, *args, **kwargs) File "/usr/local/lib/python3.8/dist-packages/kazoo/handlers/utils.py", line 265, in create_tcp_connection sock = module.create_connection(address, timeout_at) File "/usr/lib/python3.8/socket.py", line 808, in create_connection raise err File "/usr/lib/python3.8/socket.py", line 796, in create_connection sock.connect(sa) socket.timeout: timed out During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 579, in _connect_attempt read_timeout, connect_timeout = self._connect(host, hostip, port) File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 659, in _connect self._socket = self.handler.create_connection( File "/usr/lib/python3.8/contextlib.py", line 131, in __exit__ self.gen.throw(type, value, traceback) File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 182, in _socket_error_handling raise ConnectionDropped("socket connection error: %s" % (err,)) kazoo.exceptions.ConnectionDropped: socket connection error: None During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/usr/lib/python3.8/logging/__init__.py", line 1088, in emit stream.write(msg + self.terminator) ValueError: I/O operation on closed file. Call stack: File "/usr/lib/python3.8/threading.py", line 890, in _bootstrap self._bootstrap_inner() File "/usr/lib/python3.8/threading.py", line 932, in _bootstrap_inner self.run() File "/usr/lib/python3.8/threading.py", line 870, in run self._target(*self._args, **self._kwargs) File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 512, in zk_loop if retry(self._connect_loop, retry) is STOP_CONNECTING: File "/usr/local/lib/python3.8/dist-packages/kazoo/retry.py", line 126, in __call__ return func(*args, **kwargs) File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 552, in _connect_loop status = self._connect_attempt(host, hostip, port, retry) File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 622, in _connect_attempt self.logger.warning('Connection dropped: %s', e) Message: 'Connection dropped: %s' Arguments: (ConnectionDropped('socket connection error: None'),) </details> Which eventually leads to incorrectly parsed report. This patch is an addition to the `logging.raiseExceptions=False` - #44618 Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-12-27 12:53:14 +00:00
for node in [node1, node2, node3]:
print("Stop and clear", node.name, "with dockerid", node.docker_id)
stop_clickhouse(node)
clear_clickhouse_data(node)
for i in range(1000):
genuine_connection.create("/test" + str(i), b"data")
print("Data loaded to zookeeper")
2023-01-07 17:08:25 +00:00
generate_zk_snapshot(node1)
tests/integration: add missing kazoo client termination pytest play games with logging output [1]. [1]: https://github.com/pytest-dev/pytest/issues/5502 But this does not actually affect ClickHouse tests, the only reason is that the kazoo client is not stopped correctly without calling kazoo.client.KazooClient.stop(), and that's why you can see the following messages: <details> test_storage_rabbitmq/test.py::test_rabbitmq_big_message --- Logging error --- Traceback (most recent call last): File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 179, in _socket_error_handling yield File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 659, in _connect self._socket = self.handler.create_connection( File "/usr/local/lib/python3.8/dist-packages/kazoo/handlers/threading.py", line 178, in create_connection return utils.create_tcp_connection(socket, *args, **kwargs) File "/usr/local/lib/python3.8/dist-packages/kazoo/handlers/utils.py", line 265, in create_tcp_connection sock = module.create_connection(address, timeout_at) File "/usr/lib/python3.8/socket.py", line 808, in create_connection raise err File "/usr/lib/python3.8/socket.py", line 796, in create_connection sock.connect(sa) socket.timeout: timed out During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 579, in _connect_attempt read_timeout, connect_timeout = self._connect(host, hostip, port) File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 659, in _connect self._socket = self.handler.create_connection( File "/usr/lib/python3.8/contextlib.py", line 131, in __exit__ self.gen.throw(type, value, traceback) File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 182, in _socket_error_handling raise ConnectionDropped("socket connection error: %s" % (err,)) kazoo.exceptions.ConnectionDropped: socket connection error: None During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/usr/lib/python3.8/logging/__init__.py", line 1088, in emit stream.write(msg + self.terminator) ValueError: I/O operation on closed file. Call stack: File "/usr/lib/python3.8/threading.py", line 890, in _bootstrap self._bootstrap_inner() File "/usr/lib/python3.8/threading.py", line 932, in _bootstrap_inner self.run() File "/usr/lib/python3.8/threading.py", line 870, in run self._target(*self._args, **self._kwargs) File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 512, in zk_loop if retry(self._connect_loop, retry) is STOP_CONNECTING: File "/usr/local/lib/python3.8/dist-packages/kazoo/retry.py", line 126, in __call__ return func(*args, **kwargs) File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 552, in _connect_loop status = self._connect_attempt(host, hostip, port, retry) File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 622, in _connect_attempt self.logger.warning('Connection dropped: %s', e) Message: 'Connection dropped: %s' Arguments: (ConnectionDropped('socket connection error: None'),) </details> Which eventually leads to incorrectly parsed report. This patch is an addition to the `logging.raiseExceptions=False` - #44618 Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-12-27 12:53:14 +00:00
print("Data copied to node1")
resulted_path = convert_zookeeper_data(node1)
print("Resulted path", resulted_path)
for node in [node2, node3]:
print("Copy snapshot from", node1.name, "to", node.name)
cluster.copy_file_from_container_to_container(
node1, resulted_path, node, "/var/lib/clickhouse/coordination/snapshots"
)
print("Starting clickhouses")
p = Pool(3)
result = p.map_async(start_clickhouse, [node1, node2, node3])
result.wait()
print("Loading additional data")
fake_zks = [get_fake_zk(node) for node in [node1, node2, node3]]
for i in range(1000):
fake_zk = random.choice(fake_zks)
try:
fake_zk.create("/test" + str(i + 1000), b"data")
except Exception as ex:
print("Got exception:" + str(ex))
print("Final")
fake_zks[0].create("/test10000", b"data")
finally:
for zk in fake_zks:
if zk:
zk.stop()
zk.close()
if genuine_connection:
genuine_connection.stop()
genuine_connection.close()