tests: fix 01563_distributed_query_finish flakiness (due to system.*_log_sender)

From CI logs [1], during this test was executing on server:

    2024.07.05 19:29:45.856853 [ 1328 ] {} <Error> system.zookeeper_log_sender.DistributedInsertQueue.default: Code: 210. DB::NetException: Connection reset by peer, while writing to socket (172.17.0.2:38546 -> 3.16.142.177:9440): While sending /var/lib/clickhouse/store/aa8/aa8f6e66-486b-4dc3-85a1-4941e69cb99f/shard1_replica1/447.bin. (NETWORK_ERROR), Stack trace (when copying this message, always include the lines below):

  [1]: https://s3.amazonaws.com/clickhouse-test-reports/66162/daae5d4d4661c780b6368950ec484415ca3a0492/stateless_tests__aarch64_.html

So let's add retries

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
Azat Khuzhin 2024-07-06 07:42:15 +02:00
parent 77e7850d3c
commit ffd6bf28b1
2 changed files with 17 additions and 13 deletions

View File

@ -1,2 +1 @@
1,0
NETWORK_ERROR=0

View File

@ -19,20 +19,25 @@ create table dist_01247 as data_01247 engine=Distributed(test_cluster_two_shards
select * from dist_01247 format Null;
EOL
network_errors_before=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.errors WHERE name = 'NETWORK_ERROR'")
# NOTE: it is possible to got NETWORK_ERROR even with no-parallel, at least due to system.*_log_sender to the cloud
for ((i = 0; i < 100; ++i)); do
network_errors_before=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.errors WHERE name = 'NETWORK_ERROR'")
opts=(
"--max_distributed_connections=1"
"--optimize_skip_unused_shards=1"
"--optimize_distributed_group_by_sharding_key=1"
"--prefer_localhost_replica=0"
)
$CLICKHOUSE_CLIENT "${opts[@]}" --format CSV -nm <<EOL
select count(), * from dist_01247 group by number order by number limit 1;
EOL
opts=(
"--max_distributed_connections=1"
"--optimize_skip_unused_shards=1"
"--optimize_distributed_group_by_sharding_key=1"
"--prefer_localhost_replica=0"
)
$CLICKHOUSE_CLIENT "${opts[@]}" --format CSV -nm -q "select count(), * from dist_01247 group by number order by number limit 1 format Null"
# expect zero new network errors
network_errors_after=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.errors WHERE name = 'NETWORK_ERROR'")
# expect zero new network errors
network_errors_after=$($CLICKHOUSE_CLIENT -q "SELECT value FROM system.errors WHERE name = 'NETWORK_ERROR'")
if [[ $((network_errors_after-network_errors_before)) -eq 0 ]]; then
break
fi
done
echo NETWORK_ERROR=$(( network_errors_after-network_errors_before ))
$CLICKHOUSE_CLIENT -q "drop table data_01247"