Merge pull request #40001 from ClickHouse/replication_lib_sh_avoid_timeouts

Try to avoid timeouts when checking for replication consistency
This commit is contained in:
Alexander Tokmakov 2022-08-10 14:07:04 +03:00 committed by GitHub
commit 465cc7807a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -25,13 +25,18 @@ function try_sync_replicas()
done
done
i=0
for t in "${tables_arr[@]}"
do
# The size of log may be big, so increase timeout.
$CLICKHOUSE_CLIENT --receive_timeout 400 -q "SYSTEM SYNC REPLICA $t" || $CLICKHOUSE_CLIENT -q \
"select 'sync failed, queue:', * from system.replication_queue where database=currentDatabase() and table='$t' order by database, table, node_name" &
$CLICKHOUSE_CLIENT --receive_timeout 300 -q "SYSTEM SYNC REPLICA $t" || ($CLICKHOUSE_CLIENT -q \
"select 'sync failed, queue:', * from system.replication_queue where database=currentDatabase() and table='$t' order by database, table, node_name" && exit 1) &
pids[${i}]=$!
i=$((i + 1))
done
for pid in ${pids[*]}; do
wait $pid || (echo "Failed to sync some replicas" && exit 1)
done
wait
echo "Replication did not hang: synced all replicas of $table_name_prefix"
}
@ -73,7 +78,7 @@ function check_replication_consistency()
# SYNC REPLICA is not enough if some MUTATE_PARTs are not assigned yet
wait_for_all_mutations "$table_name_prefix%"
try_sync_replicas "$table_name_prefix"
try_sync_replicas "$table_name_prefix" || exit 1
res=$($CLICKHOUSE_CLIENT -q \
"SELECT