Try restarting zookeeper if connection failed (#44834)

This commit is contained in:
Antonio Andelic 2023-01-02 16:24:57 +01:00 committed by GitHub
parent ebbd47012e
commit 509fd873ec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -4,7 +4,8 @@ import pytest
from helpers.cluster import ClickHouseCluster
import helpers.keeper_utils as keeper_utils
from multiprocessing.dummy import Pool
from kazoo.client import KazooClient, KazooState
from kazoo.client import KazooClient, KazooRetry
from kazoo.handlers.threading import KazooTimeoutError
import random
import string
import os
@ -40,6 +41,11 @@ def restart_and_clear_zookeeper(node):
start_zookeeper(node)
def restart_zookeeper(node):
stop_zookeeper(node)
start_zookeeper(node)
def clear_clickhouse_data(node):
node.exec_in_container(
[
@ -104,11 +110,25 @@ def get_fake_zk(node, timeout=30.0):
def get_genuine_zk(node, timeout=30.0):
_genuine_zk_instance = KazooClient(
hosts=cluster.get_instance_ip(node.name) + ":2181", timeout=timeout
)
_genuine_zk_instance.start()
return _genuine_zk_instance
CONNECTION_RETRIES = 100
for i in range(CONNECTION_RETRIES):
try:
_genuine_zk_instance = KazooClient(
hosts=cluster.get_instance_ip(node.name) + ":2181",
timeout=timeout,
connection_retry=KazooRetry(max_tries=20),
)
_genuine_zk_instance.start()
return _genuine_zk_instance
except KazooTimeoutError:
if i == CONNECTION_RETRIES - 1:
raise
print(
"Failed to connect to ZK cluster because of timeout. Restarting cluster and trying again."
)
time.sleep(0.2)
restart_zookeeper(node)
def test_snapshot_and_load(started_cluster):