2021-08-18 13:34:57 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
##!/usr/bin/env python3
|
|
|
|
import pytest
|
|
|
|
from helpers.cluster import ClickHouseCluster
|
2022-09-06 10:58:14 +00:00
|
|
|
import helpers.keeper_utils as keeper_utils
|
2021-08-18 13:34:57 +00:00
|
|
|
from multiprocessing.dummy import Pool
|
2023-01-02 15:24:57 +00:00
|
|
|
from kazoo.client import KazooClient, KazooRetry
|
|
|
|
from kazoo.handlers.threading import KazooTimeoutError
|
2021-08-18 13:34:57 +00:00
|
|
|
import random
|
|
|
|
import string
|
|
|
|
import os
|
|
|
|
import time
|
|
|
|
|
|
|
|
cluster = ClickHouseCluster(__file__)
|
2022-03-22 16:39:58 +00:00
|
|
|
node1 = cluster.add_instance(
|
|
|
|
"node1", main_configs=["configs/keeper_config1.xml"], stay_alive=True
|
|
|
|
)
|
|
|
|
node2 = cluster.add_instance(
|
|
|
|
"node2", main_configs=["configs/keeper_config2.xml"], stay_alive=True
|
|
|
|
)
|
|
|
|
node3 = cluster.add_instance(
|
|
|
|
"node3", main_configs=["configs/keeper_config3.xml"], stay_alive=True
|
|
|
|
)
|
|
|
|
|
2021-08-18 13:34:57 +00:00
|
|
|
|
|
|
|
def start_zookeeper(node):
|
2022-09-06 10:58:14 +00:00
|
|
|
node.exec_in_container(["bash", "-c", "/opt/zookeeper/bin/zkServer.sh start"])
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2021-08-18 13:34:57 +00:00
|
|
|
|
|
|
|
def stop_zookeeper(node):
|
2022-03-22 16:39:58 +00:00
|
|
|
node.exec_in_container(["bash", "-c", "/opt/zookeeper/bin/zkServer.sh stop"])
|
2023-01-03 09:59:06 +00:00
|
|
|
timeout = time.time() + 60
|
2023-01-03 09:39:14 +00:00
|
|
|
while node.get_process_pid("zookeeper") != None:
|
2023-01-03 09:59:06 +00:00
|
|
|
if time.time() > timeout:
|
|
|
|
raise Exception("Failed to stop ZooKeeper in 60 secs")
|
2023-01-03 09:39:14 +00:00
|
|
|
time.sleep(0.2)
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2021-08-18 13:34:57 +00:00
|
|
|
|
2023-01-07 17:08:25 +00:00
|
|
|
def generate_zk_snapshot(node):
|
|
|
|
for _ in range(100):
|
|
|
|
stop_zookeeper(node)
|
|
|
|
start_zookeeper(node)
|
|
|
|
time.sleep(2)
|
|
|
|
stop_zookeeper(node)
|
|
|
|
|
|
|
|
# get last snapshot
|
|
|
|
last_snapshot = node.exec_in_container(
|
|
|
|
[
|
|
|
|
"bash",
|
|
|
|
"-c",
|
|
|
|
"find /zookeeper/version-2 -name 'snapshot.*' -printf '%T@ %p\n' | sort -n | awk 'END {print $2}'",
|
|
|
|
]
|
|
|
|
).strip()
|
|
|
|
|
|
|
|
print(f"Latest snapshot: {last_snapshot}")
|
|
|
|
|
|
|
|
try:
|
|
|
|
# verify last snapshot
|
|
|
|
# zkSnapShotToolkit is a tool to inspect generated snapshots - if it's broken, an exception is thrown
|
|
|
|
node.exec_in_container(
|
|
|
|
[
|
|
|
|
"bash",
|
|
|
|
"-c",
|
|
|
|
f"/opt/zookeeper/bin/zkSnapShotToolkit.sh {last_snapshot}",
|
|
|
|
]
|
|
|
|
)
|
|
|
|
return
|
|
|
|
except Exception as err:
|
|
|
|
print(f"Got error while reading snapshot: {err}")
|
|
|
|
|
|
|
|
raise Exception("Failed to generate a ZooKeeper snapshot")
|
|
|
|
|
|
|
|
|
2021-08-18 13:34:57 +00:00
|
|
|
def clear_zookeeper(node):
|
2022-03-22 16:39:58 +00:00
|
|
|
node.exec_in_container(["bash", "-c", "rm -fr /zookeeper/*"])
|
|
|
|
|
2021-08-18 13:34:57 +00:00
|
|
|
|
|
|
|
def restart_and_clear_zookeeper(node):
|
|
|
|
stop_zookeeper(node)
|
|
|
|
clear_zookeeper(node)
|
|
|
|
start_zookeeper(node)
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2023-01-02 15:24:57 +00:00
|
|
|
def restart_zookeeper(node):
|
|
|
|
stop_zookeeper(node)
|
|
|
|
start_zookeeper(node)
|
|
|
|
|
|
|
|
|
2021-08-18 13:34:57 +00:00
|
|
|
def clear_clickhouse_data(node):
|
2022-03-22 16:39:58 +00:00
|
|
|
node.exec_in_container(
|
|
|
|
[
|
|
|
|
"bash",
|
|
|
|
"-c",
|
|
|
|
"rm -fr /var/lib/clickhouse/coordination/logs/* /var/lib/clickhouse/coordination/snapshots/*",
|
|
|
|
]
|
|
|
|
)
|
|
|
|
|
2021-08-18 13:34:57 +00:00
|
|
|
|
|
|
|
def convert_zookeeper_data(node):
|
2023-01-04 09:10:16 +00:00
|
|
|
node.exec_in_container(
|
|
|
|
[
|
|
|
|
"bash",
|
|
|
|
"-c",
|
|
|
|
"tar -cvzf /var/lib/clickhouse/zk-data.tar.gz /zookeeper/version-2",
|
|
|
|
]
|
|
|
|
)
|
2022-03-22 16:39:58 +00:00
|
|
|
cmd = "/usr/bin/clickhouse keeper-converter --zookeeper-logs-dir /zookeeper/version-2/ --zookeeper-snapshots-dir /zookeeper/version-2/ --output-dir /var/lib/clickhouse/coordination/snapshots"
|
|
|
|
node.exec_in_container(["bash", "-c", cmd])
|
|
|
|
return os.path.join(
|
|
|
|
"/var/lib/clickhouse/coordination/snapshots",
|
|
|
|
node.exec_in_container(
|
|
|
|
["bash", "-c", "ls /var/lib/clickhouse/coordination/snapshots"]
|
|
|
|
).strip(),
|
|
|
|
)
|
|
|
|
|
2021-08-18 13:34:57 +00:00
|
|
|
|
|
|
|
def stop_clickhouse(node):
|
|
|
|
node.stop_clickhouse()
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2021-08-18 13:34:57 +00:00
|
|
|
def start_clickhouse(node):
|
|
|
|
node.start_clickhouse()
|
2022-09-06 10:58:14 +00:00
|
|
|
keeper_utils.wait_until_connected(cluster, node)
|
2021-08-18 13:34:57 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2021-08-18 13:34:57 +00:00
|
|
|
@pytest.fixture(scope="module")
|
|
|
|
def started_cluster():
|
|
|
|
try:
|
|
|
|
cluster.start()
|
|
|
|
|
|
|
|
yield cluster
|
|
|
|
|
|
|
|
finally:
|
|
|
|
cluster.shutdown()
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2021-08-18 13:34:57 +00:00
|
|
|
def get_fake_zk(node, timeout=30.0):
|
2022-03-22 16:39:58 +00:00
|
|
|
_fake_zk_instance = KazooClient(
|
|
|
|
hosts=cluster.get_instance_ip(node.name) + ":9181", timeout=timeout
|
|
|
|
)
|
2021-08-18 13:34:57 +00:00
|
|
|
_fake_zk_instance.start()
|
|
|
|
return _fake_zk_instance
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
|
2021-08-18 13:34:57 +00:00
|
|
|
def get_genuine_zk(node, timeout=30.0):
|
2023-01-02 15:24:57 +00:00
|
|
|
CONNECTION_RETRIES = 100
|
|
|
|
for i in range(CONNECTION_RETRIES):
|
|
|
|
try:
|
|
|
|
_genuine_zk_instance = KazooClient(
|
|
|
|
hosts=cluster.get_instance_ip(node.name) + ":2181",
|
|
|
|
timeout=timeout,
|
|
|
|
connection_retry=KazooRetry(max_tries=20),
|
|
|
|
)
|
|
|
|
_genuine_zk_instance.start()
|
|
|
|
return _genuine_zk_instance
|
|
|
|
except KazooTimeoutError:
|
|
|
|
if i == CONNECTION_RETRIES - 1:
|
|
|
|
raise
|
|
|
|
|
|
|
|
print(
|
|
|
|
"Failed to connect to ZK cluster because of timeout. Restarting cluster and trying again."
|
|
|
|
)
|
|
|
|
time.sleep(0.2)
|
|
|
|
restart_zookeeper(node)
|
2021-08-18 13:34:57 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_snapshot_and_load(started_cluster):
|
tests/integration: add missing kazoo client termination
pytest play games with logging output [1].
[1]: https://github.com/pytest-dev/pytest/issues/5502
But this does not actually affect ClickHouse tests, the only reason is
that the kazoo client is not stopped correctly without calling
kazoo.client.KazooClient.stop(), and that's why you can see the
following messages:
<details>
test_storage_rabbitmq/test.py::test_rabbitmq_big_message --- Logging error ---
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 179, in _socket_error_handling
yield
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 659, in _connect
self._socket = self.handler.create_connection(
File "/usr/local/lib/python3.8/dist-packages/kazoo/handlers/threading.py", line 178, in create_connection
return utils.create_tcp_connection(socket, *args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/kazoo/handlers/utils.py", line 265, in create_tcp_connection
sock = module.create_connection(address, timeout_at)
File "/usr/lib/python3.8/socket.py", line 808, in create_connection
raise err
File "/usr/lib/python3.8/socket.py", line 796, in create_connection
sock.connect(sa)
socket.timeout: timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 579, in _connect_attempt
read_timeout, connect_timeout = self._connect(host, hostip, port)
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 659, in _connect
self._socket = self.handler.create_connection(
File "/usr/lib/python3.8/contextlib.py", line 131, in __exit__
self.gen.throw(type, value, traceback)
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 182, in _socket_error_handling
raise ConnectionDropped("socket connection error: %s" % (err,))
kazoo.exceptions.ConnectionDropped: socket connection error: None
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.8/logging/__init__.py", line 1088, in emit
stream.write(msg + self.terminator)
ValueError: I/O operation on closed file.
Call stack:
File "/usr/lib/python3.8/threading.py", line 890, in _bootstrap
self._bootstrap_inner()
File "/usr/lib/python3.8/threading.py", line 932, in _bootstrap_inner
self.run()
File "/usr/lib/python3.8/threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 512, in zk_loop
if retry(self._connect_loop, retry) is STOP_CONNECTING:
File "/usr/local/lib/python3.8/dist-packages/kazoo/retry.py", line 126, in __call__
return func(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 552, in _connect_loop
status = self._connect_attempt(host, hostip, port, retry)
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 622, in _connect_attempt
self.logger.warning('Connection dropped: %s', e)
Message: 'Connection dropped: %s'
Arguments: (ConnectionDropped('socket connection error: None'),)
</details>
Which eventually leads to incorrectly parsed report.
This patch is an addition to the `logging.raiseExceptions=False` - #44618
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-12-27 12:53:14 +00:00
|
|
|
genuine_connection = None
|
|
|
|
fake_zks = []
|
|
|
|
|
|
|
|
try:
|
|
|
|
restart_and_clear_zookeeper(node1)
|
|
|
|
genuine_connection = get_genuine_zk(node1)
|
2023-01-07 17:08:25 +00:00
|
|
|
|
tests/integration: add missing kazoo client termination
pytest play games with logging output [1].
[1]: https://github.com/pytest-dev/pytest/issues/5502
But this does not actually affect ClickHouse tests, the only reason is
that the kazoo client is not stopped correctly without calling
kazoo.client.KazooClient.stop(), and that's why you can see the
following messages:
<details>
test_storage_rabbitmq/test.py::test_rabbitmq_big_message --- Logging error ---
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 179, in _socket_error_handling
yield
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 659, in _connect
self._socket = self.handler.create_connection(
File "/usr/local/lib/python3.8/dist-packages/kazoo/handlers/threading.py", line 178, in create_connection
return utils.create_tcp_connection(socket, *args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/kazoo/handlers/utils.py", line 265, in create_tcp_connection
sock = module.create_connection(address, timeout_at)
File "/usr/lib/python3.8/socket.py", line 808, in create_connection
raise err
File "/usr/lib/python3.8/socket.py", line 796, in create_connection
sock.connect(sa)
socket.timeout: timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 579, in _connect_attempt
read_timeout, connect_timeout = self._connect(host, hostip, port)
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 659, in _connect
self._socket = self.handler.create_connection(
File "/usr/lib/python3.8/contextlib.py", line 131, in __exit__
self.gen.throw(type, value, traceback)
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 182, in _socket_error_handling
raise ConnectionDropped("socket connection error: %s" % (err,))
kazoo.exceptions.ConnectionDropped: socket connection error: None
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.8/logging/__init__.py", line 1088, in emit
stream.write(msg + self.terminator)
ValueError: I/O operation on closed file.
Call stack:
File "/usr/lib/python3.8/threading.py", line 890, in _bootstrap
self._bootstrap_inner()
File "/usr/lib/python3.8/threading.py", line 932, in _bootstrap_inner
self.run()
File "/usr/lib/python3.8/threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 512, in zk_loop
if retry(self._connect_loop, retry) is STOP_CONNECTING:
File "/usr/local/lib/python3.8/dist-packages/kazoo/retry.py", line 126, in __call__
return func(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 552, in _connect_loop
status = self._connect_attempt(host, hostip, port, retry)
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 622, in _connect_attempt
self.logger.warning('Connection dropped: %s', e)
Message: 'Connection dropped: %s'
Arguments: (ConnectionDropped('socket connection error: None'),)
</details>
Which eventually leads to incorrectly parsed report.
This patch is an addition to the `logging.raiseExceptions=False` - #44618
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-12-27 12:53:14 +00:00
|
|
|
for node in [node1, node2, node3]:
|
|
|
|
print("Stop and clear", node.name, "with dockerid", node.docker_id)
|
|
|
|
stop_clickhouse(node)
|
|
|
|
clear_clickhouse_data(node)
|
|
|
|
|
|
|
|
for i in range(1000):
|
|
|
|
genuine_connection.create("/test" + str(i), b"data")
|
|
|
|
|
|
|
|
print("Data loaded to zookeeper")
|
|
|
|
|
2023-01-07 17:08:25 +00:00
|
|
|
generate_zk_snapshot(node1)
|
tests/integration: add missing kazoo client termination
pytest play games with logging output [1].
[1]: https://github.com/pytest-dev/pytest/issues/5502
But this does not actually affect ClickHouse tests, the only reason is
that the kazoo client is not stopped correctly without calling
kazoo.client.KazooClient.stop(), and that's why you can see the
following messages:
<details>
test_storage_rabbitmq/test.py::test_rabbitmq_big_message --- Logging error ---
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 179, in _socket_error_handling
yield
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 659, in _connect
self._socket = self.handler.create_connection(
File "/usr/local/lib/python3.8/dist-packages/kazoo/handlers/threading.py", line 178, in create_connection
return utils.create_tcp_connection(socket, *args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/kazoo/handlers/utils.py", line 265, in create_tcp_connection
sock = module.create_connection(address, timeout_at)
File "/usr/lib/python3.8/socket.py", line 808, in create_connection
raise err
File "/usr/lib/python3.8/socket.py", line 796, in create_connection
sock.connect(sa)
socket.timeout: timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 579, in _connect_attempt
read_timeout, connect_timeout = self._connect(host, hostip, port)
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 659, in _connect
self._socket = self.handler.create_connection(
File "/usr/lib/python3.8/contextlib.py", line 131, in __exit__
self.gen.throw(type, value, traceback)
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 182, in _socket_error_handling
raise ConnectionDropped("socket connection error: %s" % (err,))
kazoo.exceptions.ConnectionDropped: socket connection error: None
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.8/logging/__init__.py", line 1088, in emit
stream.write(msg + self.terminator)
ValueError: I/O operation on closed file.
Call stack:
File "/usr/lib/python3.8/threading.py", line 890, in _bootstrap
self._bootstrap_inner()
File "/usr/lib/python3.8/threading.py", line 932, in _bootstrap_inner
self.run()
File "/usr/lib/python3.8/threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 512, in zk_loop
if retry(self._connect_loop, retry) is STOP_CONNECTING:
File "/usr/local/lib/python3.8/dist-packages/kazoo/retry.py", line 126, in __call__
return func(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 552, in _connect_loop
status = self._connect_attempt(host, hostip, port, retry)
File "/usr/local/lib/python3.8/dist-packages/kazoo/protocol/connection.py", line 622, in _connect_attempt
self.logger.warning('Connection dropped: %s', e)
Message: 'Connection dropped: %s'
Arguments: (ConnectionDropped('socket connection error: None'),)
</details>
Which eventually leads to incorrectly parsed report.
This patch is an addition to the `logging.raiseExceptions=False` - #44618
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-12-27 12:53:14 +00:00
|
|
|
|
|
|
|
print("Data copied to node1")
|
|
|
|
resulted_path = convert_zookeeper_data(node1)
|
|
|
|
print("Resulted path", resulted_path)
|
|
|
|
for node in [node2, node3]:
|
|
|
|
print("Copy snapshot from", node1.name, "to", node.name)
|
|
|
|
cluster.copy_file_from_container_to_container(
|
|
|
|
node1, resulted_path, node, "/var/lib/clickhouse/coordination/snapshots"
|
|
|
|
)
|
|
|
|
|
|
|
|
print("Starting clickhouses")
|
|
|
|
|
|
|
|
p = Pool(3)
|
|
|
|
result = p.map_async(start_clickhouse, [node1, node2, node3])
|
|
|
|
result.wait()
|
|
|
|
|
|
|
|
print("Loading additional data")
|
|
|
|
fake_zks = [get_fake_zk(node) for node in [node1, node2, node3]]
|
|
|
|
for i in range(1000):
|
|
|
|
fake_zk = random.choice(fake_zks)
|
|
|
|
try:
|
|
|
|
fake_zk.create("/test" + str(i + 1000), b"data")
|
|
|
|
except Exception as ex:
|
|
|
|
print("Got exception:" + str(ex))
|
|
|
|
|
|
|
|
print("Final")
|
|
|
|
fake_zks[0].create("/test10000", b"data")
|
|
|
|
finally:
|
|
|
|
for zk in fake_zks:
|
|
|
|
if zk:
|
|
|
|
zk.stop()
|
|
|
|
zk.close()
|
|
|
|
if genuine_connection:
|
|
|
|
genuine_connection.stop()
|
|
|
|
genuine_connection.close()
|