#!/usr/bin/env python3 import random import string import time import pytest from helpers.cluster import ClickHouseCluster from helpers.network import PartitionManager cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( 'node1', with_zookeeper=True, main_configs=['configs/server.xml']) node2 = cluster.add_instance( 'node2', with_zookeeper=True, main_configs=['configs/server.xml']) @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() yield cluster finally: cluster.shutdown() def get_random_string(length): return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(length)) def test_no_stall(started_cluster): for instance in started_cluster.instances.values(): instance.query(""" CREATE TABLE t (key UInt64, data String) ENGINE = ReplicatedMergeTree('/clickhouse/test/t', '{instance}') ORDER BY tuple() PARTITION BY key""") # Pause node3 until the test setup is prepared node2.query("SYSTEM STOP FETCHES t") node1.query("INSERT INTO t SELECT 1, '{}' FROM numbers(500)".format(get_random_string(104857))) node1.query("INSERT INTO t SELECT 2, '{}' FROM numbers(500)".format(get_random_string(104857))) with PartitionManager() as pm: pm.add_network_delay(node1, 2000) node2.query("SYSTEM START FETCHES t") # Wait for timeout exceptions to confirm that timeout is triggered. while True: conn_timeout_exceptions = int(node2.query( """ SELECT count() FROM system.replication_queue WHERE last_exception LIKE '%connect timed out%' """)) if conn_timeout_exceptions >= 2: break time.sleep(0.1) print("Connection timeouts tested!") # Increase connection timeout and wait for receive timeouts. node2.query(""" ALTER TABLE t MODIFY SETTING replicated_fetches_http_connection_timeout = 30, replicated_fetches_http_receive_timeout = 1""") while True: timeout_exceptions = int(node2.query( """ SELECT count() FROM system.replication_queue WHERE last_exception LIKE '%Timeout%' AND last_exception NOT LIKE '%connect timed out%' """).strip()) if timeout_exceptions >= 2: break time.sleep(0.1) for instance in started_cluster.instances.values(): # Workaround for DROP TABLE not finishing if it is started while table is readonly. instance.query("SYSTEM RESTART REPLICA t") # Cleanup data directory from test results archive. instance.query("DROP TABLE t SYNC")