Merge pull request #24205 from ClickHouse/integration-server-restart-increase-tiomeout

Increase timeout for server restart in integration tests.
This commit is contained in:
Nikolai Kochetov 2021-05-19 13:38:51 +03:00 committed by GitHub
commit c6800c67f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -443,8 +443,7 @@ class ClickHouseCluster:
run_and_check(self.base_cmd + ["up", "--force-recreate", "--no-deps", "-d", node.name])
node.ip_address = self.get_instance_ip(node.name)
node.client = Client(node.ip_address, command=self.client_bin_path)
start_deadline = time.time() + 20.0 # seconds
node.wait_for_start(start_deadline)
node.wait_for_start(start_timeout=20.0, connection_timeout=600.0) # seconds
return node
def get_instance_ip(self, instance_name):
@ -802,13 +801,13 @@ class ClickHouseCluster:
subprocess_check_call(clickhouse_start_cmd)
print("ClickHouse instance created")
start_deadline = time.time() + 20.0 # seconds
start_timeout = 20.0 # seconds
for instance in self.instances.values():
instance.docker_client = self.docker_client
instance.ip_address = self.get_instance_ip(instance.name)
print("Waiting for ClickHouse start...")
instance.wait_for_start(start_deadline)
instance.wait_for_start(start_timeout)
print("ClickHouse started")
instance.client = Client(instance.ip_address, command=self.client_bin_path)
@ -1245,32 +1244,54 @@ class ClickHouseInstance:
def start(self):
self.get_docker_handle().start()
def wait_for_start(self, deadline=None, timeout=None):
start_time = time.time()
def wait_for_start(self, start_timeout=None, connection_timeout=None):
if timeout is not None:
deadline = start_time + timeout
if start_timeout is None or start_timeout <= 0:
raise Exception("Invalid timeout: {}".format(start_timeout))
if connection_timeout is not None and connection_timeout < start_timeout:
raise Exception("Connection timeout {} should be grater then start timeout {}"
.format(connection_timeout, start_timeout))
start_time = time.time()
prev_rows_in_log = 0
def has_new_rows_in_log():
nonlocal prev_rows_in_log
try:
rows_in_log = int(self.count_in_log(".*").strip())
res = rows_in_log > prev_rows_in_log
prev_rows_in_log = rows_in_log
return res
except ValueError:
return False
while True:
handle = self.get_docker_handle()
status = handle.status
if status == 'exited':
raise Exception(
"Instance `{}' failed to start. Container status: {}, logs: {}".format(self.name, status,
handle.logs().decode('utf-8')))
raise Exception("Instance `{}' failed to start. Container status: {}, logs: {}"
.format(self.name, status, handle.logs().decode('utf-8')))
deadline = start_time + start_timeout
# It is possible that server starts slowly.
# If container is running, and there is some progress in log, check connection_timeout.
if connection_timeout and status == 'running' and has_new_rows_in_log():
deadline = start_time + connection_timeout
current_time = time.time()
time_left = deadline - current_time
if deadline is not None and current_time >= deadline:
if current_time >= deadline:
raise Exception("Timed out while waiting for instance `{}' with ip address {} to start. "
"Container status: {}, logs: {}".format(self.name, self.ip_address, status,
handle.logs().decode('utf-8')))
socket_timeout = min(start_timeout, deadline - current_time)
# Repeatedly poll the instance address until there is something that listens there.
# Usually it means that ClickHouse is ready to accept queries.
try:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(time_left)
sock.settimeout(socket_timeout)
sock.connect((self.ip_address, 9000))
return
except socket.timeout: