Enable hedged requests integration tests with tsan, use max_distributed_connections=1 to fix possible flakiness

This commit is contained in:
avogar 2023-09-06 17:15:23 +00:00
parent 47701d690a
commit e44580fa47

View File

@ -203,9 +203,6 @@ def update_configs(
def test_stuck_replica(started_cluster): def test_stuck_replica(started_cluster):
if NODES["node"].is_built_with_thread_sanitizer():
pytest.skip("Hedged requests don't work under Thread Sanitizer")
update_configs() update_configs()
cluster.pause_container("node_1") cluster.pause_container("node_1")
@ -236,16 +233,13 @@ def test_stuck_replica(started_cluster):
def test_long_query(started_cluster): def test_long_query(started_cluster):
if NODES["node"].is_built_with_thread_sanitizer():
pytest.skip("Hedged requests don't work under Thread Sanitizer")
update_configs() update_configs()
# Restart to reset pool states. # Restart to reset pool states.
NODES["node"].restart_clickhouse() NODES["node"].restart_clickhouse()
result = NODES["node"].query( result = NODES["node"].query(
"select hostName(), max(id + sleep(1.5)) from distributed settings max_block_size = 1, max_threads = 1;" "select hostName(), max(id + sleep(1.5)) from distributed settings max_block_size = 1, max_threads = 1, max_distributed_connections = 1;"
) )
assert TSV(result) == TSV("node_1\t99") assert TSV(result) == TSV("node_1\t99")
@ -255,18 +249,12 @@ def test_long_query(started_cluster):
def test_send_table_status_sleep(started_cluster): def test_send_table_status_sleep(started_cluster):
if NODES["node"].is_built_with_thread_sanitizer():
pytest.skip("Hedged requests don't work under Thread Sanitizer")
update_configs(node_1_sleep_in_send_tables_status=sleep_time) update_configs(node_1_sleep_in_send_tables_status=sleep_time)
check_query(expected_replica="node_2") check_query(expected_replica="node_2")
check_changing_replica_events(1) check_changing_replica_events(1)
def test_send_table_status_sleep2(started_cluster): def test_send_table_status_sleep2(started_cluster):
if NODES["node"].is_built_with_thread_sanitizer():
pytest.skip("Hedged requests don't work under Thread Sanitizer")
update_configs( update_configs(
node_1_sleep_in_send_tables_status=sleep_time, node_1_sleep_in_send_tables_status=sleep_time,
node_2_sleep_in_send_tables_status=sleep_time, node_2_sleep_in_send_tables_status=sleep_time,
@ -276,18 +264,12 @@ def test_send_table_status_sleep2(started_cluster):
def test_send_data(started_cluster): def test_send_data(started_cluster):
if NODES["node"].is_built_with_thread_sanitizer():
pytest.skip("Hedged requests don't work under Thread Sanitizer")
update_configs(node_1_sleep_in_send_data=sleep_time) update_configs(node_1_sleep_in_send_data=sleep_time)
check_query(expected_replica="node_2") check_query(expected_replica="node_2")
check_changing_replica_events(1) check_changing_replica_events(1)
def test_send_data2(started_cluster): def test_send_data2(started_cluster):
if NODES["node"].is_built_with_thread_sanitizer():
pytest.skip("Hedged requests don't work under Thread Sanitizer")
update_configs( update_configs(
node_1_sleep_in_send_data=sleep_time, node_2_sleep_in_send_data=sleep_time node_1_sleep_in_send_data=sleep_time, node_2_sleep_in_send_data=sleep_time
) )
@ -296,9 +278,6 @@ def test_send_data2(started_cluster):
def test_combination1(started_cluster): def test_combination1(started_cluster):
if NODES["node"].is_built_with_thread_sanitizer():
pytest.skip("Hedged requests don't work under Thread Sanitizer")
update_configs( update_configs(
node_1_sleep_in_send_tables_status=sleep_time, node_1_sleep_in_send_tables_status=sleep_time,
node_2_sleep_in_send_data=sleep_time, node_2_sleep_in_send_data=sleep_time,
@ -308,9 +287,6 @@ def test_combination1(started_cluster):
def test_combination2(started_cluster): def test_combination2(started_cluster):
if NODES["node"].is_built_with_thread_sanitizer():
pytest.skip("Hedged requests don't work under Thread Sanitizer")
update_configs( update_configs(
node_1_sleep_in_send_data=sleep_time, node_1_sleep_in_send_data=sleep_time,
node_2_sleep_in_send_tables_status=sleep_time, node_2_sleep_in_send_tables_status=sleep_time,
@ -320,9 +296,6 @@ def test_combination2(started_cluster):
def test_combination3(started_cluster): def test_combination3(started_cluster):
if NODES["node"].is_built_with_thread_sanitizer():
pytest.skip("Hedged requests don't work under Thread Sanitizer")
update_configs( update_configs(
node_1_sleep_in_send_data=sleep_time, node_1_sleep_in_send_data=sleep_time,
node_2_sleep_in_send_tables_status=1000, node_2_sleep_in_send_tables_status=1000,
@ -333,9 +306,6 @@ def test_combination3(started_cluster):
def test_combination4(started_cluster): def test_combination4(started_cluster):
if NODES["node"].is_built_with_thread_sanitizer():
pytest.skip("Hedged requests don't work under Thread Sanitizer")
update_configs( update_configs(
node_1_sleep_in_send_tables_status=1000, node_1_sleep_in_send_tables_status=1000,
node_1_sleep_in_send_data=sleep_time, node_1_sleep_in_send_data=sleep_time,
@ -347,9 +317,6 @@ def test_combination4(started_cluster):
def test_receive_timeout1(started_cluster): def test_receive_timeout1(started_cluster):
if NODES["node"].is_built_with_thread_sanitizer():
pytest.skip("Hedged requests don't work under Thread Sanitizer")
# Check the situation when first two replicas get receive timeout # Check the situation when first two replicas get receive timeout
# in establishing connection, but the third replica is ok. # in establishing connection, but the third replica is ok.
update_configs( update_configs(
@ -362,9 +329,6 @@ def test_receive_timeout1(started_cluster):
def test_receive_timeout2(started_cluster): def test_receive_timeout2(started_cluster):
if NODES["node"].is_built_with_thread_sanitizer():
pytest.skip("Hedged requests don't work under Thread Sanitizer")
# Check the situation when first replica get receive timeout # Check the situation when first replica get receive timeout
# in packet receiving but there are replicas in process of # in packet receiving but there are replicas in process of
# connection establishing. # connection establishing.
@ -378,9 +342,6 @@ def test_receive_timeout2(started_cluster):
def test_initial_receive_timeout(started_cluster): def test_initial_receive_timeout(started_cluster):
if NODES["node"].is_built_with_thread_sanitizer():
pytest.skip("Hedged requests don't work under Thread Sanitizer")
# Check the situation when replicas don't respond after # Check the situation when replicas don't respond after
# receiving query (so, no packets were send to initiator) # receiving query (so, no packets were send to initiator)
update_configs( update_configs(
@ -399,9 +360,6 @@ def test_initial_receive_timeout(started_cluster):
def test_async_connect(started_cluster): def test_async_connect(started_cluster):
if NODES["node"].is_built_with_thread_sanitizer():
pytest.skip("Hedged requests don't work under Thread Sanitizer")
update_configs() update_configs()
NODES["node"].restart_clickhouse() NODES["node"].restart_clickhouse()
@ -414,7 +372,7 @@ def test_async_connect(started_cluster):
) )
NODES["node"].query( NODES["node"].query(
"SELECT hostName(), id FROM distributed_connect ORDER BY id LIMIT 1 SETTINGS prefer_localhost_replica = 0, connect_timeout_with_failover_ms=5000, async_query_sending_for_remote=0, max_threads=1" "SELECT hostName(), id FROM distributed_connect ORDER BY id LIMIT 1 SETTINGS prefer_localhost_replica = 0, connect_timeout_with_failover_ms=5000, async_query_sending_for_remote=0, max_threads=1, max_distributed_connections=1"
) )
check_changing_replica_events(2) check_changing_replica_events(2)
check_if_query_sending_was_not_suspended() check_if_query_sending_was_not_suspended()
@ -423,7 +381,7 @@ def test_async_connect(started_cluster):
NODES["node"].restart_clickhouse() NODES["node"].restart_clickhouse()
NODES["node"].query( NODES["node"].query(
"SELECT hostName(), id FROM distributed_connect ORDER BY id LIMIT 1 SETTINGS prefer_localhost_replica = 0, connect_timeout_with_failover_ms=5000, async_query_sending_for_remote=1, max_threads=1" "SELECT hostName(), id FROM distributed_connect ORDER BY id LIMIT 1 SETTINGS prefer_localhost_replica = 0, connect_timeout_with_failover_ms=5000, async_query_sending_for_remote=1, max_threads=1, max_distributed_connections=1"
) )
check_changing_replica_events(2) check_changing_replica_events(2)
check_if_query_sending_was_suspended() check_if_query_sending_was_suspended()
@ -432,9 +390,6 @@ def test_async_connect(started_cluster):
def test_async_query_sending(started_cluster): def test_async_query_sending(started_cluster):
if NODES["node"].is_built_with_thread_sanitizer():
pytest.skip("Hedged requests don't work under Thread Sanitizer")
update_configs( update_configs(
node_1_sleep_after_receiving_query=5000, node_1_sleep_after_receiving_query=5000,
node_2_sleep_after_receiving_query=5000, node_2_sleep_after_receiving_query=5000,
@ -459,13 +414,13 @@ def test_async_query_sending(started_cluster):
NODES["node"].query( NODES["node"].query(
"SELECT hostName(), id FROM distributed_query_sending ORDER BY id LIMIT 1 SETTINGS" "SELECT hostName(), id FROM distributed_query_sending ORDER BY id LIMIT 1 SETTINGS"
" prefer_localhost_replica = 0, async_query_sending_for_remote=0, max_threads = 1" " prefer_localhost_replica = 0, async_query_sending_for_remote=0, max_threads = 1, max_distributed_connections=1"
) )
check_if_query_sending_was_not_suspended() check_if_query_sending_was_not_suspended()
NODES["node"].query( NODES["node"].query(
"SELECT hostName(), id FROM distributed_query_sending ORDER BY id LIMIT 1 SETTINGS" "SELECT hostName(), id FROM distributed_query_sending ORDER BY id LIMIT 1 SETTINGS"
" prefer_localhost_replica = 0, async_query_sending_for_remote=1, max_threads = 1" " prefer_localhost_replica = 0, async_query_sending_for_remote=1, max_threads = 1, max_distributed_connections=1"
) )
check_if_query_sending_was_suspended() check_if_query_sending_was_suspended()