ClickHouse/tests/integration/test_hedged_requests_parallel/test.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

237 lines
7.0 KiB
Python
Raw Normal View History

2021-02-02 12:14:31 +00:00
import os
import sys
import time
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
2021-02-15 13:21:36 +00:00
NODES = {"node_" + str(i): None for i in (1, 2, 3, 4)}
NODES["node"] = None
2021-02-02 12:14:31 +00:00
2021-03-22 19:18:06 +00:00
# Cleep time in milliseconds.
sleep_time = 30000
2021-02-02 12:14:31 +00:00
2021-02-02 12:14:31 +00:00
@pytest.fixture(scope="module")
def started_cluster():
2021-02-15 13:21:36 +00:00
cluster = ClickHouseCluster(__file__)
NODES["node"] = cluster.add_instance(
2021-03-22 19:18:06 +00:00
"node",
stay_alive=True,
main_configs=["configs/remote_servers.xml"],
user_configs=["configs/users.xml"],
)
2021-02-15 13:21:36 +00:00
for name in NODES:
if name != "node":
2021-03-22 19:18:06 +00:00
NODES[name] = cluster.add_instance(
name, user_configs=["configs/users1.xml"]
)
2021-02-02 12:14:31 +00:00
try:
cluster.start()
2021-02-15 13:21:36 +00:00
for node_id, node in list(NODES.items()):
2021-03-22 19:18:06 +00:00
node.query(
"""CREATE TABLE test_hedged (id UInt32, date Date) ENGINE =
MergeTree() ORDER BY id PARTITION BY toYYYYMM(date)"""
)
2021-02-02 12:14:31 +00:00
2021-03-22 19:18:06 +00:00
node.query(
"INSERT INTO test_hedged SELECT number, toDateTime(number) FROM numbers(100)"
)
2021-02-02 12:14:31 +00:00
2021-03-22 19:18:06 +00:00
NODES["node"].query(
"""CREATE TABLE distributed (id UInt32, date Date) ENGINE =
Distributed('test_cluster', 'default', 'test_hedged')"""
)
2021-02-15 13:21:36 +00:00
2021-02-02 12:14:31 +00:00
yield cluster
finally:
cluster.shutdown()
2021-02-15 13:21:36 +00:00
config = """<clickhouse>
2021-02-15 13:21:36 +00:00
<profiles>
<default>
2021-03-22 19:18:06 +00:00
<sleep_in_send_tables_status_ms>{sleep_in_send_tables_status_ms}</sleep_in_send_tables_status_ms>
<sleep_in_send_data_ms>{sleep_in_send_data_ms}</sleep_in_send_data_ms>
2021-02-15 13:21:36 +00:00
</default>
</profiles>
</clickhouse>"""
2021-02-15 13:21:36 +00:00
QUERY_1 = "SELECT count() FROM distributed"
QUERY_2 = "SELECT * FROM distributed"
def check_query(query=QUERY_1):
2021-02-17 17:34:52 +00:00
NODES["node"].restart_clickhouse()
2021-02-15 13:21:36 +00:00
# Without hedged requests select query will last more than 30 seconds,
# with hedged requests it will last just around 1-2 second
2021-02-02 12:14:31 +00:00
start = time.time()
NODES["node"].query(query)
2021-02-02 12:14:31 +00:00
query_time = time.time() - start
2021-02-17 17:34:52 +00:00
print("Query time:", query_time)
2021-02-15 13:21:36 +00:00
assert query_time < 5
2021-03-22 19:18:06 +00:00
def check_settings(node_name, sleep_in_send_tables_status_ms, sleep_in_send_data_ms):
2021-03-02 14:05:33 +00:00
attempts = 0
while attempts < 1000:
2021-03-22 19:18:06 +00:00
setting1 = NODES[node_name].http_query(
"SELECT value FROM system.settings WHERE name='sleep_in_send_tables_status_ms'"
)
setting2 = NODES[node_name].http_query(
"SELECT value FROM system.settings WHERE name='sleep_in_send_data_ms'"
)
if (
int(setting1) == sleep_in_send_tables_status_ms
and int(setting2) == sleep_in_send_data_ms
):
2021-03-02 14:05:33 +00:00
return
time.sleep(0.1)
attempts += 1
assert attempts < 1000
def check_changing_replica_events(expected_count):
result = NODES["node"].query(
"SELECT value FROM system.events WHERE event='HedgedRequestsChangeReplica'"
)
# If server load is high we can see more than expected
# replica change events, but never less than expected
assert int(result) >= expected_count
2021-03-22 19:18:06 +00:00
def update_configs(
node_1_sleep_in_send_tables_status=0,
node_1_sleep_in_send_data=0,
node_2_sleep_in_send_tables_status=0,
node_2_sleep_in_send_data=0,
node_3_sleep_in_send_tables_status=0,
node_3_sleep_in_send_data=0,
node_4_sleep_in_send_tables_status=0,
node_4_sleep_in_send_data=0,
):
2021-02-15 13:21:36 +00:00
NODES["node_1"].replace_config(
"/etc/clickhouse-server/users.d/users1.xml",
2021-03-22 19:18:06 +00:00
config.format(
sleep_in_send_tables_status_ms=node_1_sleep_in_send_tables_status,
sleep_in_send_data_ms=node_1_sleep_in_send_data,
),
2021-03-22 19:18:06 +00:00
)
2021-02-17 17:34:52 +00:00
NODES["node_2"].replace_config(
"/etc/clickhouse-server/users.d/users1.xml",
2021-03-22 19:18:06 +00:00
config.format(
sleep_in_send_tables_status_ms=node_2_sleep_in_send_tables_status,
sleep_in_send_data_ms=node_2_sleep_in_send_data,
),
2021-03-22 19:18:06 +00:00
)
2021-03-22 19:18:06 +00:00
NODES["node_3"].replace_config(
2021-02-15 13:21:36 +00:00
"/etc/clickhouse-server/users.d/users1.xml",
2021-03-22 19:18:06 +00:00
config.format(
sleep_in_send_tables_status_ms=node_3_sleep_in_send_tables_status,
sleep_in_send_data_ms=node_3_sleep_in_send_data,
),
2021-03-22 19:18:06 +00:00
)
2021-03-22 19:18:06 +00:00
NODES["node_4"].replace_config(
2021-02-17 17:34:52 +00:00
"/etc/clickhouse-server/users.d/users1.xml",
2021-03-22 19:18:06 +00:00
config.format(
sleep_in_send_tables_status_ms=node_4_sleep_in_send_tables_status,
sleep_in_send_data_ms=node_4_sleep_in_send_data,
),
2021-03-22 19:18:06 +00:00
)
2021-03-22 19:18:06 +00:00
check_settings(
"node_1", node_1_sleep_in_send_tables_status, node_1_sleep_in_send_data
)
check_settings(
"node_2", node_2_sleep_in_send_tables_status, node_2_sleep_in_send_data
)
check_settings(
"node_3", node_3_sleep_in_send_tables_status, node_3_sleep_in_send_data
)
check_settings(
"node_4", node_4_sleep_in_send_tables_status, node_4_sleep_in_send_data
)
2021-03-02 14:05:33 +00:00
2021-03-22 19:18:06 +00:00
def test_send_table_status_sleep(started_cluster):
2023-06-26 01:04:48 +00:00
if NODES["node"].is_built_with_thread_sanitizer():
2023-06-26 01:24:09 +00:00
pytest.skip("Hedged requests don't work under Thread Sanitizer")
2023-06-26 01:04:48 +00:00
2021-03-22 19:18:06 +00:00
update_configs(
node_1_sleep_in_send_tables_status=sleep_time,
node_2_sleep_in_send_tables_status=sleep_time,
)
2021-02-15 13:21:36 +00:00
check_query()
check_changing_replica_events(2)
2021-02-15 13:21:36 +00:00
2021-03-22 19:18:06 +00:00
def test_send_data(started_cluster):
2023-06-26 01:04:48 +00:00
if NODES["node"].is_built_with_thread_sanitizer():
2023-06-26 01:24:09 +00:00
pytest.skip("Hedged requests don't work under Thread Sanitizer")
2023-06-26 01:04:48 +00:00
2021-03-22 19:18:06 +00:00
update_configs(
node_1_sleep_in_send_data=sleep_time, node_2_sleep_in_send_data=sleep_time
)
check_query()
check_changing_replica_events(2)
2021-02-15 13:21:36 +00:00
2021-03-02 14:05:33 +00:00
2021-03-22 19:18:06 +00:00
def test_combination1(started_cluster):
2023-06-26 01:04:48 +00:00
if NODES["node"].is_built_with_thread_sanitizer():
2023-06-26 01:24:09 +00:00
pytest.skip("Hedged requests don't work under Thread Sanitizer")
2023-06-26 01:04:48 +00:00
2021-03-22 19:18:06 +00:00
update_configs(
node_1_sleep_in_send_tables_status=1000,
node_2_sleep_in_send_tables_status=1000,
node_3_sleep_in_send_data=sleep_time,
)
2021-02-15 13:21:36 +00:00
check_query()
check_changing_replica_events(3)
2021-02-15 13:21:36 +00:00
2021-02-17 17:34:52 +00:00
def test_combination2(started_cluster):
2023-06-26 01:04:48 +00:00
if NODES["node"].is_built_with_thread_sanitizer():
2023-06-26 01:24:09 +00:00
pytest.skip("Hedged requests don't work under Thread Sanitizer")
2023-06-26 01:04:48 +00:00
2021-03-22 19:18:06 +00:00
update_configs(
node_1_sleep_in_send_data=sleep_time,
node_2_sleep_in_send_tables_status=1000,
node_3_sleep_in_send_data=sleep_time,
node_4_sleep_in_send_tables_status=1000,
)
2021-02-15 13:21:36 +00:00
check_query()
check_changing_replica_events(4)
def test_query_with_no_data_to_sample(started_cluster):
2023-06-26 01:04:48 +00:00
if NODES["node"].is_built_with_thread_sanitizer():
2023-06-26 01:24:09 +00:00
pytest.skip("Hedged requests don't work under Thread Sanitizer")
2023-06-26 01:04:48 +00:00
2021-03-22 19:18:06 +00:00
update_configs(
node_1_sleep_in_send_data=sleep_time, node_2_sleep_in_send_data=sleep_time
)
# When there is no way to sample data, the whole query will be performed by
# the first replica and the second replica will just send EndOfStream,
# so we will change only the first replica here.
check_query(query=QUERY_2)
check_changing_replica_events(1)