ClickHouse/tests/integration/test_hedged_requests/test.py

321 lines
11 KiB
Python
Raw Normal View History

2021-01-19 19:21:06 +00:00
import os
import sys
import time
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from helpers.cluster import ClickHouseCluster
from helpers.network import PartitionManager
2021-02-26 15:53:40 +00:00
from helpers.test_tools import TSV
2021-01-19 19:21:06 +00:00
cluster = ClickHouseCluster(__file__)
2021-02-15 13:21:36 +00:00
NODES = {'node_' + str(i): None for i in (1, 2, 3)}
2021-02-17 17:34:52 +00:00
2021-02-15 13:21:36 +00:00
NODES['node'] = None
2021-01-19 19:21:06 +00:00
2021-02-15 13:21:36 +00:00
sleep_time = 30
2021-01-19 19:21:06 +00:00
@pytest.fixture(scope="module")
def started_cluster():
2021-02-15 13:21:36 +00:00
NODES['node'] = cluster.add_instance(
2021-02-17 17:34:52 +00:00
'node', with_zookeeper=True, stay_alive=True, main_configs=['configs/remote_servers.xml'], user_configs=['configs/users.xml'])
2021-02-15 13:21:36 +00:00
for name in NODES:
if name != 'node':
2021-02-17 17:34:52 +00:00
NODES[name] = cluster.add_instance(name, with_zookeeper=True, user_configs=['configs/users1.xml'])
2021-01-19 19:21:06 +00:00
try:
cluster.start()
2021-02-15 13:21:36 +00:00
for node_id, node in list(NODES.items()):
node.query('''CREATE TABLE replicated (id UInt32, date Date) ENGINE =
ReplicatedMergeTree('/clickhouse/tables/replicated', '{}') ORDER BY id PARTITION BY toYYYYMM(date)'''.format(node_id))
2021-01-19 19:21:06 +00:00
2021-02-15 13:21:36 +00:00
NODES['node'].query('''CREATE TABLE distributed (id UInt32, date Date) ENGINE =
2021-01-19 19:21:06 +00:00
Distributed('test_cluster', 'default', 'replicated')''')
2021-03-02 22:24:41 +00:00
NODES['node'].query("INSERT INTO distributed select number, toDate(number) from numbers(100);")
2021-02-15 13:21:36 +00:00
2021-01-19 19:21:06 +00:00
yield cluster
finally:
cluster.shutdown()
2021-02-15 13:21:36 +00:00
config = '''<yandex>
<profiles>
<default>
2021-02-17 17:34:52 +00:00
<sleep_in_send_tables_status>{sleep_in_send_tables_status}</sleep_in_send_tables_status>
<sleep_in_send_data>{sleep_in_send_data}</sleep_in_send_data>
2021-02-15 13:21:36 +00:00
</default>
</profiles>
</yandex>'''
2021-02-26 15:53:40 +00:00
def check_query(expected_replica, receive_timeout=300):
2021-02-17 17:34:52 +00:00
NODES['node'].restart_clickhouse()
2021-02-02 15:18:05 +00:00
# Without hedged requests select query will last more than 30 seconds,
# with hedged requests it will last just around 1-2 second
2021-01-19 19:21:06 +00:00
start = time.time()
2021-03-02 22:24:41 +00:00
result = NODES['node'].query("SELECT hostName(), id FROM distributed ORDER BY id LIMIT 1 SETTINGS receive_timeout={}".format(receive_timeout));
2021-01-19 19:21:06 +00:00
query_time = time.time() - start
2021-02-26 15:53:40 +00:00
2021-03-02 22:24:41 +00:00
assert TSV(result) == TSV(expected_replica + "\t0")
2021-02-26 15:53:40 +00:00
2021-02-17 17:34:52 +00:00
print("Query time:", query_time)
assert query_time < 10
2021-02-15 13:21:36 +00:00
2021-03-02 14:05:33 +00:00
def check_settings(node_name, sleep_in_send_tables_status, sleep_in_send_data):
attempts = 0
while attempts < 1000:
setting1 = NODES[node_name].http_query("SELECT value FROM system.settings WHERE name='sleep_in_send_tables_status'")
setting2 = NODES[node_name].http_query("SELECT value FROM system.settings WHERE name='sleep_in_send_data'")
if int(setting1) == sleep_in_send_tables_status and int(setting2) == sleep_in_send_data:
return
time.sleep(0.1)
attempts += 1
assert attempts < 1000
2021-02-26 15:53:40 +00:00
def test_stuck_replica(started_cluster):
cluster.pause_container("node_1")
2021-02-26 15:53:40 +00:00
check_query(expected_replica="node_2")
result = NODES['node'].query("SELECT slowdowns_count FROM system.clusters WHERE cluster='test_cluster' and host_name='node_1'")
assert TSV(result) == TSV("1")
result = NODES['node'].query("SELECT hostName(), id FROM distributed ORDER BY id LIMIT 1");
assert TSV(result) == TSV("node_2\t0")
# Check that we didn't choose node_1 first again and slowdowns_count didn't increase.
result = NODES['node'].query("SELECT slowdowns_count FROM system.clusters WHERE cluster='test_cluster' and host_name='node_1'")
assert TSV(result) == TSV("1")
2021-02-26 15:53:40 +00:00
cluster.unpause_container("node_1")
2021-03-02 22:24:41 +00:00
def test_long_query(started_cluster):
# Restart to reset pool states.
NODES['node'].restart_clickhouse()
2021-03-02 22:24:41 +00:00
result = NODES['node'].query("select hostName(), max(id + sleep(1.5)) from distributed settings max_block_size = 1, max_threads = 1;")
assert TSV(result) == TSV("node_1\t99")
NODES['node'].query("INSERT INTO distributed select number, toDate(number) from numbers(100);")
2021-02-15 13:21:36 +00:00
def test_send_table_status_sleep(started_cluster):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
2021-02-17 17:34:52 +00:00
config.format(sleep_in_send_tables_status=sleep_time, sleep_in_send_data=0))
2021-02-15 13:21:36 +00:00
2021-02-26 15:53:40 +00:00
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
2021-02-15 13:21:36 +00:00
2021-02-26 15:53:40 +00:00
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
2021-03-02 14:05:33 +00:00
check_settings('node_1', sleep_time, 0)
check_settings('node_2', 0, 0)
check_settings('node_3', 0, 0)
2021-02-26 15:53:40 +00:00
check_query(expected_replica="node_2")
2021-02-15 13:21:36 +00:00
2021-03-02 14:05:33 +00:00
2021-02-15 13:21:36 +00:00
def test_send_table_status_sleep2(started_cluster):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
2021-02-17 17:34:52 +00:00
config.format(sleep_in_send_tables_status=sleep_time, sleep_in_send_data=0))
2021-02-15 13:21:36 +00:00
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
2021-02-17 17:34:52 +00:00
config.format(sleep_in_send_tables_status=sleep_time, sleep_in_send_data=0))
2021-02-15 13:21:36 +00:00
2021-02-26 15:53:40 +00:00
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
2021-03-02 14:05:33 +00:00
check_settings('node_1', sleep_time, 0)
check_settings('node_2', sleep_time, 0)
check_settings('node_3', 0, 0)
2021-02-26 15:53:40 +00:00
check_query(expected_replica="node_3")
2021-02-15 13:21:36 +00:00
2021-02-17 17:34:52 +00:00
2021-02-15 13:21:36 +00:00
def test_send_data(started_cluster):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
2021-02-17 17:34:52 +00:00
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
2021-02-15 13:21:36 +00:00
2021-02-26 15:53:40 +00:00
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
2021-03-02 14:05:33 +00:00
check_settings('node_1', 0, sleep_time)
check_settings('node_2', 0, 0)
check_settings('node_3', 0, 0)
2021-02-26 15:53:40 +00:00
check_query(expected_replica="node_2")
2021-02-15 13:21:36 +00:00
def test_send_data2(started_cluster):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
2021-02-17 17:34:52 +00:00
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
2021-02-15 13:21:36 +00:00
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
2021-02-17 17:34:52 +00:00
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
2021-02-15 13:21:36 +00:00
2021-02-26 15:53:40 +00:00
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
2021-03-02 14:05:33 +00:00
check_settings('node_1', 0, sleep_time)
check_settings('node_2', 0, sleep_time)
check_settings('node_3', 0, 0)
2021-02-26 15:53:40 +00:00
check_query(expected_replica="node_3")
2021-02-15 13:21:36 +00:00
def test_combination1(started_cluster):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
2021-02-17 17:34:52 +00:00
config.format(sleep_in_send_tables_status=sleep_time, sleep_in_send_data=0))
2021-02-15 13:21:36 +00:00
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
2021-02-17 17:34:52 +00:00
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
2021-03-02 14:05:33 +00:00
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
check_settings('node_1', sleep_time, 0)
check_settings('node_2', 0, sleep_time)
check_settings('node_3', 0, 0)
2021-02-15 13:21:36 +00:00
2021-02-26 15:53:40 +00:00
check_query(expected_replica="node_3")
2021-02-15 13:21:36 +00:00
def test_combination2(started_cluster):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
2021-02-17 17:34:52 +00:00
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
2021-02-15 13:21:36 +00:00
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
2021-02-17 17:34:52 +00:00
config.format(sleep_in_send_tables_status=sleep_time, sleep_in_send_data=0))
2021-02-15 13:21:36 +00:00
2021-02-26 15:53:40 +00:00
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=0))
2021-03-02 14:05:33 +00:00
check_settings('node_1', 0, sleep_time)
check_settings('node_2', sleep_time, 0)
check_settings('node_3', 0, 0)
2021-02-26 15:53:40 +00:00
check_query(expected_replica="node_3")
2021-02-15 13:21:36 +00:00
def test_combination3(started_cluster):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
2021-02-17 17:34:52 +00:00
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
2021-02-15 13:21:36 +00:00
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
2021-02-17 17:34:52 +00:00
config.format(sleep_in_send_tables_status=1, sleep_in_send_data=0))
2021-02-15 13:21:36 +00:00
2021-02-17 17:34:52 +00:00
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=sleep_time))
2021-03-02 14:05:33 +00:00
check_settings('node_1', 0, sleep_time)
check_settings('node_2', 1, 0)
check_settings('node_3', 0, sleep_time)
2021-02-15 13:21:36 +00:00
2021-02-26 15:53:40 +00:00
check_query(expected_replica="node_2")
2021-02-15 13:21:36 +00:00
def test_combination4(started_cluster):
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
2021-02-17 17:34:52 +00:00
config.format(sleep_in_send_tables_status=1, sleep_in_send_data=sleep_time))
2021-02-15 13:21:36 +00:00
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
2021-02-17 17:34:52 +00:00
config.format(sleep_in_send_tables_status=1, sleep_in_send_data=0))
2021-02-15 13:21:36 +00:00
2021-02-17 17:34:52 +00:00
NODES['node_3'].replace_config(
2021-02-15 13:21:36 +00:00
'/etc/clickhouse-server/users.d/users1.xml',
2021-02-17 17:34:52 +00:00
config.format(sleep_in_send_tables_status=2, sleep_in_send_data=0))
2021-01-19 19:21:06 +00:00
2021-03-02 14:05:33 +00:00
check_settings('node_1', 1, sleep_time)
check_settings('node_2', 1, 0)
check_settings('node_3', 2, 0)
2021-02-26 15:53:40 +00:00
check_query(expected_replica="node_2")
def test_receive_timeout1(started_cluster):
# Check the situation when first two replicas get receive timeout
# in establishing connection, but the third replica is ok.
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=3, sleep_in_send_data=0))
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=3, sleep_in_send_data=0))
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=1))
2021-03-02 14:05:33 +00:00
check_settings('node_1', 3, 0)
check_settings('node_2', 3, 0)
check_settings('node_3', 0, 1)
2021-02-26 15:53:40 +00:00
check_query(expected_replica="node_3", receive_timeout=2)
def test_receive_timeout2(started_cluster):
# Check the situation when first replica get receive timeout
# in packet receiving but there are replicas in process of
# connection establishing.
NODES['node_1'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=0, sleep_in_send_data=4))
NODES['node_2'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=2, sleep_in_send_data=0))
NODES['node_3'].replace_config(
'/etc/clickhouse-server/users.d/users1.xml',
config.format(sleep_in_send_tables_status=2, sleep_in_send_data=0))
2021-03-02 14:05:33 +00:00
check_settings('node_1', 0, 4)
check_settings('node_2', 2, 0)
check_settings('node_3', 2, 0)
2021-02-26 15:53:40 +00:00
check_query(expected_replica="node_2", receive_timeout=3)
2021-01-19 19:21:06 +00:00