From 12b422f82cf2bf99023617e240bc1a5d3861918a Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 10 Aug 2021 16:34:40 +0300 Subject: [PATCH 01/15] check replication consistency after 993-like tests --- ...ts_race_condition_zookeeper_long.reference | 2 ++ ...tem_parts_race_condition_zookeeper_long.sh | 23 ++++++------ ...ts_race_condition_drop_zookeeper.reference | 2 ++ ...tem_parts_race_condition_drop_zookeeper.sh | 4 +++ ..._alter_add_drop_column_zookeeper.reference | 2 ++ ...arallel_alter_add_drop_column_zookeeper.sh | 4 +++ ...llel_alter_modify_zookeeper_long.reference | 2 ++ ...79_parallel_alter_modify_zookeeper_long.sh | 4 +++ .../01154_move_partition_long.reference | 4 ++- .../0_stateless/01154_move_partition_long.sh | 11 +++--- ...utations_kill_many_replicas_long.reference | 2 ++ ...alter_mutations_kill_many_replicas_long.sh | 10 +++--- ...and_normal_merges_zookeeper_long.reference | 2 ++ ...nt_ttl_and_normal_merges_zookeeper_long.sh | 7 ++-- .../0_stateless/mergetree_mutations.lib | 20 +++++++++++ tests/queries/0_stateless/replication.lib | 35 +++++++++++++++++++ 16 files changed, 108 insertions(+), 26 deletions(-) create mode 100755 tests/queries/0_stateless/replication.lib diff --git a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.reference b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.reference index e69de29bb2d..8a6b9c4f877 100644 --- a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.reference +++ b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.reference @@ -0,0 +1,2 @@ +Replication did not hang: synced all replicas of alter_table +0 1 diff --git a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh index 793fc8e9575..19f72120912 100755 --- a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh +++ b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh @@ -3,15 +3,17 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib set -e $CLICKHOUSE_CLIENT -n -q " - DROP TABLE IF EXISTS alter_table; - DROP TABLE IF EXISTS alter_table2; + DROP TABLE IF EXISTS alter_table0; + DROP TABLE IF EXISTS alter_table1; - CREATE TABLE alter_table (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0; - CREATE TABLE alter_table2 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0 + CREATE TABLE alter_table0 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0; + CREATE TABLE alter_table1 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0 " function thread1() @@ -22,22 +24,22 @@ function thread1() function thread2() { - while true; do $CLICKHOUSE_CLIENT -n --query "ALTER TABLE alter_table ADD COLUMN h String DEFAULT '0'; ALTER TABLE alter_table MODIFY COLUMN h UInt64; ALTER TABLE alter_table DROP COLUMN h;"; done + while true; do $CLICKHOUSE_CLIENT -n --query "ALTER TABLE alter_table0 ADD COLUMN h String DEFAULT '0'; ALTER TABLE alter_table0 MODIFY COLUMN h UInt64; ALTER TABLE alter_table0 DROP COLUMN h;"; done } function thread3() { - while true; do $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table SELECT rand(1), rand(2), 1 / rand(3), toString(rand(4)), [rand(5), rand(6)], rand(7) % 2 ? NULL : generateUUIDv4(), (rand(8), rand(9)) FROM numbers(100000)"; done + while true; do $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table0 SELECT rand(1), rand(2), 1 / rand(3), toString(rand(4)), [rand(5), rand(6)], rand(7) % 2 ? NULL : generateUUIDv4(), (rand(8), rand(9)) FROM numbers(100000)"; done } function thread4() { - while true; do $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table FINAL"; done + while true; do $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table0 FINAL"; done } function thread5() { - while true; do $CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table DELETE WHERE cityHash64(a,b,c,d,e,g) % 1048576 < 524288"; done + while true; do $CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table0 DELETE WHERE cityHash64(a,b,c,d,e,g) % 1048576 < 524288"; done } # https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout @@ -74,8 +76,9 @@ timeout $TIMEOUT bash -c thread4 2> /dev/null & timeout $TIMEOUT bash -c thread5 2> /dev/null & wait +check_replication_consistency "alter_table" "count(), sum(a), sum(b), round(sum(c))" -$CLICKHOUSE_CLIENT -n -q "DROP TABLE alter_table;" & -$CLICKHOUSE_CLIENT -n -q "DROP TABLE alter_table2;" & +$CLICKHOUSE_CLIENT -n -q "DROP TABLE alter_table0;" & +$CLICKHOUSE_CLIENT -n -q "DROP TABLE alter_table1;" & wait diff --git a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.reference b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.reference index e69de29bb2d..0d13bb62797 100644 --- a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.reference +++ b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.reference @@ -0,0 +1,2 @@ +Replication did not hang: synced all replicas of alter_table_ +0 1 diff --git a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh index 32fe31f68c6..bdad08fb0e1 100755 --- a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh +++ b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh @@ -3,6 +3,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib set -e @@ -99,6 +101,8 @@ timeout $TIMEOUT bash -c thread6 2>&1 | grep "was not completely removed from Zo wait +check_replication_consistency "alter_table_" "count(), sum(a), sum(b), round(sum(c))" + for i in {0..9}; do $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS alter_table_$i" 2>&1 | grep "was not completely removed from ZooKeeper" & done diff --git a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.reference b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.reference index af33a5bfc3f..34a89ec4d07 100644 --- a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.reference +++ b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.reference @@ -1,6 +1,8 @@ Starting alters Finishing alters Equal number of columns +Replication did not hang: synced all replicas of concurrent_alter_add_drop_ +0 1 0 0 0 diff --git a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh index fd0b53cf122..4b67a03760b 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh @@ -3,6 +3,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib REPLICAS=3 @@ -101,6 +103,8 @@ while [[ $(timeout 120 ${CLICKHOUSE_CLIENT} --query "ALTER TABLE concurrent_alte sleep 1 done +check_replication_consistency "concurrent_alter_add_drop_" "count(), sum(key), sum(cityHash64(value0))" + for i in $(seq $REPLICAS); do $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA concurrent_alter_add_drop_$i" $CLICKHOUSE_CLIENT --query "SELECT COUNT() FROM system.mutations WHERE is_done = 0 and table = 'concurrent_alter_add_drop_$i'" diff --git a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.reference b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.reference index ff9c6824f00..15223e4fd99 100644 --- a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.reference +++ b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.reference @@ -5,6 +5,8 @@ 1725 Starting alters Finishing alters +Replication did not hang: synced all replicas of concurrent_alter_mt_ +0 1 1 0 1 diff --git a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh index 37d880bdce7..acbb01a1c68 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.sh @@ -3,6 +3,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib REPLICAS=5 @@ -112,6 +114,8 @@ while [[ $(timeout 120 ${CLICKHOUSE_CLIENT} --query "ALTER TABLE concurrent_alte sleep 1 done +check_replication_consistency "concurrent_alter_mt_" "count(), sum(key), sum(cityHash64(value1)), sum(cityHash64(value2))" + for i in $(seq $REPLICAS); do $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA concurrent_alter_mt_$i" $CLICKHOUSE_CLIENT --query "SELECT SUM(toUInt64(value1)) > $INITIAL_SUM FROM concurrent_alter_mt_$i" diff --git a/tests/queries/0_stateless/01154_move_partition_long.reference b/tests/queries/0_stateless/01154_move_partition_long.reference index c6d9204ed02..40aaa81456a 100644 --- a/tests/queries/0_stateless/01154_move_partition_long.reference +++ b/tests/queries/0_stateless/01154_move_partition_long.reference @@ -1 +1,3 @@ -Replication did not hang +Replication did not hang: synced all replicas of dst_ +0 1 +Replication did not hang: synced all replicas of src_ diff --git a/tests/queries/0_stateless/01154_move_partition_long.sh b/tests/queries/0_stateless/01154_move_partition_long.sh index 1b5985b9942..541550160f2 100755 --- a/tests/queries/0_stateless/01154_move_partition_long.sh +++ b/tests/queries/0_stateless/01154_move_partition_long.sh @@ -3,6 +3,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib declare -A engines engines[0]="MergeTree" @@ -116,13 +118,8 @@ timeout $TIMEOUT bash -c optimize_thread & timeout $TIMEOUT bash -c drop_part_thread & wait -for ((i=0; i<16; i++)) do - # The size of log is big, so increase timeout. - $CLICKHOUSE_CLIENT --receive_timeout 600 -q "SYSTEM SYNC REPLICA dst_$i" & - $CLICKHOUSE_CLIENT --receive_timeout 600 -q "SYSTEM SYNC REPLICA src_$i" 2>/dev/null & -done -wait -echo "Replication did not hang" +check_replication_consistency "dst_" "count(), sum(p), sum(k), sum(v)" +try_sync_replicas "src_" for ((i=0; i<16; i++)) do $CLICKHOUSE_CLIENT -q "DROP TABLE dst_$i" 2>&1| grep -Fv "is already started to be removing" & diff --git a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.reference b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.reference index f7c65e36be4..5a3c0201732 100644 --- a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.reference +++ b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.reference @@ -14,3 +14,5 @@ CREATE TABLE default.concurrent_kill_4\n(\n `key` UInt64,\n `value` Int64\ Metadata version on replica 5 equal with first replica, OK CREATE TABLE default.concurrent_kill_5\n(\n `key` UInt64,\n `value` Int64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/01593_concurrent_alter_mutations_kill_many_replicas_long_default/{shard}\', \'{replica}5\')\nORDER BY key\nSETTINGS max_replicated_mutations_in_queue = 1000, number_of_free_entries_in_pool_to_execute_mutation = 0, max_replicated_merges_in_queue = 1000, index_granularity = 8192 499999500000 +Replication did not hang: synced all replicas of concurrent_kill_ +0 1 diff --git a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.sh b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.sh index e263750c431..bb04facba15 100755 --- a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.sh +++ b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.sh @@ -3,6 +3,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib REPLICAS=5 @@ -59,10 +61,6 @@ timeout $TIMEOUT bash -c kill_mutation_thread 2> /dev/null & wait -for i in $(seq $REPLICAS); do - $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA concurrent_kill_$i" -done - # with timeout alter query can be not finished yet, so to execute new alter # we use retries counter=0 @@ -80,7 +78,7 @@ while true; do done -metadata_version=$($CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/s1/replicas/r1$i/' and name = 'metadata_version'") +metadata_version=$($CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/s1/replicas/r11/' and name = 'metadata_version'") for i in $(seq $REPLICAS); do replica_metadata_version=$($CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/s1/replicas/r1$i/' and name = 'metadata_version'") @@ -95,6 +93,8 @@ done $CLICKHOUSE_CLIENT --query "SELECT sum(value) FROM concurrent_kill_1" +check_replication_consistency "concurrent_kill_" "count(), sum(key), sum(cityHash64(value))" + for i in $(seq $REPLICAS); do $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS concurrent_kill_$i" done diff --git a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.reference b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.reference index d00491fd7e5..25e14257d8d 100644 --- a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.reference +++ b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.reference @@ -1 +1,3 @@ +Replication did not hang: synced all replicas of ttl_table +0 1 1 diff --git a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh index 13086879e0d..3daab1e9fdd 100755 --- a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh +++ b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh @@ -3,6 +3,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib NUM_REPLICAS=5 @@ -59,13 +61,12 @@ timeout $TIMEOUT bash -c optimize_thread 2> /dev/null & wait -for i in $(seq 1 $NUM_REPLICAS); do - $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA ttl_table$i" -done +check_replication_consistency "ttl_table" "count(), sum(toUInt64(key))" $CLICKHOUSE_CLIENT --query "SELECT * FROM system.replication_queue where table like 'ttl_table%' and database = '${CLICKHOUSE_DATABASE}' and type='MERGE_PARTS' and last_exception != '' FORMAT Vertical" $CLICKHOUSE_CLIENT --query "SELECT COUNT() > 0 FROM system.part_log where table like 'ttl_table%' and database = '${CLICKHOUSE_DATABASE}'" + for i in $(seq 1 $NUM_REPLICAS); do $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS ttl_table$i" & done diff --git a/tests/queries/0_stateless/mergetree_mutations.lib b/tests/queries/0_stateless/mergetree_mutations.lib index d10ac883764..ffd8dce78fd 100644 --- a/tests/queries/0_stateless/mergetree_mutations.lib +++ b/tests/queries/0_stateless/mergetree_mutations.lib @@ -20,3 +20,23 @@ function wait_for_mutation() done } + +function wait_for_all_mutations() +{ + local table=$1 + local database=$2 + database=${database:="${CLICKHOUSE_DATABASE}"} + + for i in {1..200} + do + sleep 1 + if [[ $(${CLICKHOUSE_CLIENT} --query="SELECT coalesce(minOrNull(is_done), 1) FROM system.mutations WHERE database='$database' AND table like '$table'") -eq 1 ]]; then + break + fi + + if [[ $i -eq 200 ]]; then + echo "Timed out while waiting for mutation to execute!" | tee /dev/stderr + fi + + done +} diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib new file mode 100755 index 00000000000..84224ab4a4d --- /dev/null +++ b/tests/queries/0_stateless/replication.lib @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# shellcheck source=./mergetree_mutations.lib +. "$CURDIR"/mergetree_mutations.lib + +function try_sync_replicas +{ + readarray -t tables_arr < <(${CLICKHOUSE_CLIENT} --query="SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$1%' AND engine like '%Replicated%'") + for t in "${tables_arr[@]}" + do + # The size of log may be big, so increase timeout. + $CLICKHOUSE_CLIENT --receive_timeout 300 -q "SYSTEM SYNC REPLICA $t" & + done + wait + echo "Replication did not hang: synced all replicas of $1" +} + +function check_replication_consistency() +{ + try_sync_replicas "$1" + + # SYNC REPLICA is not enough if some MUTATE_PARTs are not assigned yet + # TODO maybe just kill all mutations? + wait_for_all_mutations "$1%" + + $CLICKHOUSE_CLIENT -q \ + "SELECT + throwIf((countDistinct(data) AS c) != 1, 'Replicas have diverged'), c + FROM + ( + SELECT _table, ($2) AS data + FROM merge(currentDatabase(), '$1') GROUP BY _table + )" || $CLICKHOUSE_CLIENT -q \ + "select _table, $2, arraySort(groupArrayDistinct(_part)) from merge(currentDatabase(), '$1') group by _table" | tee /dev/stderr +} + From 35f1caddcb80b8a1bcadb48dbc93670367e23510 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 11 Aug 2021 18:24:47 +0300 Subject: [PATCH 02/15] kill mutations before syncing --- ...nt_ttl_and_normal_merges_zookeeper_long.sh | 4 +++- tests/queries/0_stateless/replication.lib | 20 ++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh index 3daab1e9fdd..6a0fa192321 100755 --- a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh +++ b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh @@ -60,7 +60,9 @@ timeout $TIMEOUT bash -c optimize_thread 2> /dev/null & timeout $TIMEOUT bash -c optimize_thread 2> /dev/null & wait - +for i in $(seq 1 $NUM_REPLICAS); do + $CLICKHOUSE_CLIENT --query "SYSTEM STOP TTL MERGES ttl_table$i" & +done check_replication_consistency "ttl_table" "count(), sum(toUInt64(key))" $CLICKHOUSE_CLIENT --query "SELECT * FROM system.replication_queue where table like 'ttl_table%' and database = '${CLICKHOUSE_DATABASE}' and type='MERGE_PARTS' and last_exception != '' FORMAT Vertical" diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index 84224ab4a4d..7dbb988ec61 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -2,7 +2,7 @@ # shellcheck source=./mergetree_mutations.lib . "$CURDIR"/mergetree_mutations.lib -function try_sync_replicas +function try_sync_replicas() { readarray -t tables_arr < <(${CLICKHOUSE_CLIENT} --query="SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$1%' AND engine like '%Replicated%'") for t in "${tables_arr[@]}" @@ -16,12 +16,14 @@ function try_sync_replicas function check_replication_consistency() { - try_sync_replicas "$1" + # Forcefully cancel mutations to avoid waiting for them to finish + ${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database=currentDatabase() AND table like '$1%'" > /dev/null # SYNC REPLICA is not enough if some MUTATE_PARTs are not assigned yet - # TODO maybe just kill all mutations? wait_for_all_mutations "$1%" + try_sync_replicas "$1" + $CLICKHOUSE_CLIENT -q \ "SELECT throwIf((countDistinct(data) AS c) != 1, 'Replicas have diverged'), c @@ -29,7 +31,15 @@ function check_replication_consistency() ( SELECT _table, ($2) AS data FROM merge(currentDatabase(), '$1') GROUP BY _table - )" || $CLICKHOUSE_CLIENT -q \ - "select _table, $2, arraySort(groupArrayDistinct(_part)) from merge(currentDatabase(), '$1') group by _table" | tee /dev/stderr + )" + res=$? + if ! [ $res -eq 0 ]; then + echo "Replicas have diverged" | tee /dev/stderr + $CLICKHOUSE_CLIENT -q "select _table, $2, arraySort(groupArrayDistinct(_part)) from merge(currentDatabase(), '$1') group by _table" | tee /dev/stderr + $CLICKHOUSE_CLIENT -q "select * from system.replication_queue where database=currentDatabase() and table like '$1%'" | tee /dev/stderr + $CLICKHOUSE_CLIENT -q "select * from system.mutations where database=currentDatabase() and table like '$1%'" | tee /dev/stderr + $CLICKHOUSE_CLIENT -q "select * from system.parts where database=currentDatabase() and table like '$1%'" | tee /dev/stderr + fi + } From a2b2e8cb3fbe1767763b5c7cd60a2ae701f66815 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 11 Aug 2021 21:02:35 +0300 Subject: [PATCH 03/15] fix --- tests/queries/0_stateless/mergetree_mutations.lib | 2 +- tests/queries/0_stateless/replication.lib | 10 +++++----- tests/queries/skip_list.json | 1 + 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/mergetree_mutations.lib b/tests/queries/0_stateless/mergetree_mutations.lib index ffd8dce78fd..bcaee740c1f 100644 --- a/tests/queries/0_stateless/mergetree_mutations.lib +++ b/tests/queries/0_stateless/mergetree_mutations.lib @@ -35,7 +35,7 @@ function wait_for_all_mutations() fi if [[ $i -eq 200 ]]; then - echo "Timed out while waiting for mutation to execute!" | tee /dev/stderr + echo "Timed out while waiting for mutation to execute!" | tee >(cat >&2) fi done diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index 7dbb988ec61..d3d93070663 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -34,11 +34,11 @@ function check_replication_consistency() )" res=$? if ! [ $res -eq 0 ]; then - echo "Replicas have diverged" | tee /dev/stderr - $CLICKHOUSE_CLIENT -q "select _table, $2, arraySort(groupArrayDistinct(_part)) from merge(currentDatabase(), '$1') group by _table" | tee /dev/stderr - $CLICKHOUSE_CLIENT -q "select * from system.replication_queue where database=currentDatabase() and table like '$1%'" | tee /dev/stderr - $CLICKHOUSE_CLIENT -q "select * from system.mutations where database=currentDatabase() and table like '$1%'" | tee /dev/stderr - $CLICKHOUSE_CLIENT -q "select * from system.parts where database=currentDatabase() and table like '$1%'" | tee /dev/stderr + echo "Replicas have diverged" | tee >(cat >&2) + $CLICKHOUSE_CLIENT -q "select _table, $2, arraySort(groupArrayDistinct(_part)) from merge(currentDatabase(), '$1') group by _table" | tee >(cat >&2) + $CLICKHOUSE_CLIENT -q "select * from system.replication_queue where database=currentDatabase() and table like '$1%'" | tee >(cat >&2) + $CLICKHOUSE_CLIENT -q "select * from system.mutations where database=currentDatabase() and table like '$1%'" | tee >(cat >&2) + $CLICKHOUSE_CLIENT -q "select * from system.parts where database=currentDatabase() and table like '$1%'" | tee >(cat >&2) fi } diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 5078dc9a256..84ec61d8281 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -321,6 +321,7 @@ "01076_parallel_alter_replicated_zookeeper", "01079_parallel_alter_add_drop_column_zookeeper", "01079_parallel_alter_detach_table_zookeeper", + "01079_parallel_alter_modify_zookeeper_long", "01080_check_for_error_incorrect_size_of_nested_column", "01083_expressions_in_engine_arguments", "01084_regexp_empty", From 0256e313b3ab4cd248086bd6dd17bdce2dd70d52 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 12 Aug 2021 00:29:37 +0300 Subject: [PATCH 04/15] fix --- tests/clickhouse-test | 1 + tests/queries/0_stateless/mergetree_mutations.lib | 2 +- tests/queries/0_stateless/replication.lib | 12 ++++++------ 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index f6833cfbd09..8ed9ac7c302 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -641,6 +641,7 @@ def run_tests_array(all_tests_with_params): status += print_test_time(total_time) status += " - having stderror:\n{}\n".format( '\n'.join(stderr.split('\n')[:100])) + status += "\nstdout:\n{}\n".format(stdout) status += 'Database: ' + testcase_args.testcase_database elif 'Exception' in stdout: failures += 1 diff --git a/tests/queries/0_stateless/mergetree_mutations.lib b/tests/queries/0_stateless/mergetree_mutations.lib index bcaee740c1f..7d02f9f1b41 100644 --- a/tests/queries/0_stateless/mergetree_mutations.lib +++ b/tests/queries/0_stateless/mergetree_mutations.lib @@ -35,7 +35,7 @@ function wait_for_all_mutations() fi if [[ $i -eq 200 ]]; then - echo "Timed out while waiting for mutation to execute!" | tee >(cat >&2) + echo "Timed out while waiting for mutation to execute!" fi done diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index d3d93070663..d67dd3721e6 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -33,12 +33,12 @@ function check_replication_consistency() FROM merge(currentDatabase(), '$1') GROUP BY _table )" res=$? - if ! [ $res -eq 0 ]; then - echo "Replicas have diverged" | tee >(cat >&2) - $CLICKHOUSE_CLIENT -q "select _table, $2, arraySort(groupArrayDistinct(_part)) from merge(currentDatabase(), '$1') group by _table" | tee >(cat >&2) - $CLICKHOUSE_CLIENT -q "select * from system.replication_queue where database=currentDatabase() and table like '$1%'" | tee >(cat >&2) - $CLICKHOUSE_CLIENT -q "select * from system.mutations where database=currentDatabase() and table like '$1%'" | tee >(cat >&2) - $CLICKHOUSE_CLIENT -q "select * from system.parts where database=currentDatabase() and table like '$1%'" | tee >(cat >&2) + if [ $res -ne 0 ]; then + echo "Replicas have diverged" + $CLICKHOUSE_CLIENT -q "select _table, $2, arraySort(groupArrayDistinct(_part)) from merge(currentDatabase(), '$1') group by _table" + $CLICKHOUSE_CLIENT -q "select * from system.replication_queue where database=currentDatabase() and table like '$1%'" + $CLICKHOUSE_CLIENT -q "select * from system.mutations where database=currentDatabase() and table like '$1%'" + $CLICKHOUSE_CLIENT -q "select * from system.parts where database=currentDatabase() and table like '$1%'" fi } From 9dd742cc29cc1743c55b107f37f677cc164e187f Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 12 Aug 2021 13:39:06 +0300 Subject: [PATCH 05/15] more debug info --- tests/clickhouse-test | 1 + ...ts_race_condition_zookeeper_long.reference | 2 +- ...ts_race_condition_drop_zookeeper.reference | 2 +- ..._alter_add_drop_column_zookeeper.reference | 2 +- ...llel_alter_modify_zookeeper_long.reference | 2 +- .../01154_move_partition_long.reference | 2 +- ...utations_kill_many_replicas_long.reference | 2 +- ...and_normal_merges_zookeeper_long.reference | 2 +- tests/queries/0_stateless/replication.lib | 25 +++++++++++-------- 9 files changed, 22 insertions(+), 18 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 8ed9ac7c302..dcb64a819e4 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -632,6 +632,7 @@ def run_tests_array(all_tests_with_params): open(stdout_file).read().split('\n')[:100]) status += '\n' + status += "\nstdout:\n{}\n".format(stdout) status += 'Database: ' + testcase_args.testcase_database elif stderr: diff --git a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.reference b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.reference index 8a6b9c4f877..c3165c3d6ef 100644 --- a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.reference +++ b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.reference @@ -1,2 +1,2 @@ Replication did not hang: synced all replicas of alter_table -0 1 +Consistency: 1 diff --git a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.reference b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.reference index 0d13bb62797..6e705f05f04 100644 --- a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.reference +++ b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.reference @@ -1,2 +1,2 @@ Replication did not hang: synced all replicas of alter_table_ -0 1 +Consistency: 1 diff --git a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.reference b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.reference index 34a89ec4d07..4b640354c1b 100644 --- a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.reference +++ b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.reference @@ -2,7 +2,7 @@ Starting alters Finishing alters Equal number of columns Replication did not hang: synced all replicas of concurrent_alter_add_drop_ -0 1 +Consistency: 1 0 0 0 diff --git a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.reference b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.reference index 15223e4fd99..435b1b1f1ae 100644 --- a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.reference +++ b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper_long.reference @@ -6,7 +6,7 @@ Starting alters Finishing alters Replication did not hang: synced all replicas of concurrent_alter_mt_ -0 1 +Consistency: 1 1 0 1 diff --git a/tests/queries/0_stateless/01154_move_partition_long.reference b/tests/queries/0_stateless/01154_move_partition_long.reference index 40aaa81456a..37f0181524e 100644 --- a/tests/queries/0_stateless/01154_move_partition_long.reference +++ b/tests/queries/0_stateless/01154_move_partition_long.reference @@ -1,3 +1,3 @@ Replication did not hang: synced all replicas of dst_ -0 1 +Consistency: 1 Replication did not hang: synced all replicas of src_ diff --git a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.reference b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.reference index 5a3c0201732..c68053e8270 100644 --- a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.reference +++ b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas_long.reference @@ -15,4 +15,4 @@ Metadata version on replica 5 equal with first replica, OK CREATE TABLE default.concurrent_kill_5\n(\n `key` UInt64,\n `value` Int64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/01593_concurrent_alter_mutations_kill_many_replicas_long_default/{shard}\', \'{replica}5\')\nORDER BY key\nSETTINGS max_replicated_mutations_in_queue = 1000, number_of_free_entries_in_pool_to_execute_mutation = 0, max_replicated_merges_in_queue = 1000, index_granularity = 8192 499999500000 Replication did not hang: synced all replicas of concurrent_kill_ -0 1 +Consistency: 1 diff --git a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.reference b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.reference index 25e14257d8d..e5a8ecd20b4 100644 --- a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.reference +++ b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.reference @@ -1,3 +1,3 @@ Replication did not hang: synced all replicas of ttl_table -0 1 +Consistency: 1 1 diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index d67dd3721e6..af5375fb235 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -8,7 +8,8 @@ function try_sync_replicas() for t in "${tables_arr[@]}" do # The size of log may be big, so increase timeout. - $CLICKHOUSE_CLIENT --receive_timeout 300 -q "SYSTEM SYNC REPLICA $t" & + $CLICKHOUSE_CLIENT --receive_timeout 400 -q "SYSTEM SYNC REPLICA $t" || $CLICKHOUSE_CLIENT -q \ + "select 'sync failed, queue:', * from system.replication_queue where database=currentDatabase() and table='$t'" & done wait echo "Replication did not hang: synced all replicas of $1" @@ -24,21 +25,23 @@ function check_replication_consistency() try_sync_replicas "$1" - $CLICKHOUSE_CLIENT -q \ + res=$($CLICKHOUSE_CLIENT -q \ "SELECT - throwIf((countDistinct(data) AS c) != 1, 'Replicas have diverged'), c + countDistinct(data) FROM ( SELECT _table, ($2) AS data FROM merge(currentDatabase(), '$1') GROUP BY _table - )" - res=$? - if [ $res -ne 0 ]; then - echo "Replicas have diverged" - $CLICKHOUSE_CLIENT -q "select _table, $2, arraySort(groupArrayDistinct(_part)) from merge(currentDatabase(), '$1') group by _table" - $CLICKHOUSE_CLIENT -q "select * from system.replication_queue where database=currentDatabase() and table like '$1%'" - $CLICKHOUSE_CLIENT -q "select * from system.mutations where database=currentDatabase() and table like '$1%'" - $CLICKHOUSE_CLIENT -q "select * from system.parts where database=currentDatabase() and table like '$1%'" + )") + + echo "Consistency: $res" + if [ $res -ne 1 ]; then + echo "Replicas have diverged:" + $CLICKHOUSE_CLIENT -q "select 'data', _table, $2, arraySort(groupArrayDistinct(_part)) from merge(currentDatabase(), '$1') group by _table" + $CLICKHOUSE_CLIENT -q "select 'queue', * from system.replication_queue where database=currentDatabase() and table like '$1%'" + $CLICKHOUSE_CLIENT -q "select 'mutations', * from system.mutations where database=currentDatabase() and table like '$1%'" + $CLICKHOUSE_CLIENT -q "select 'parts', * from system.parts where database=currentDatabase() and table like '$1%'" + echo "Good luck with debugging..." fi } From 95eeeb53d640b0bbe15bc0fda68ddaa8c6c79b61 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 12 Aug 2021 18:58:49 +0300 Subject: [PATCH 06/15] fix --- .../ReplicatedMergeTreePartCheckThread.cpp | 1 + .../MergeTree/ReplicatedMergeTreeQueue.cpp | 3 ++ src/Storages/StorageReplicatedMergeTree.cpp | 38 ++++++++++--------- tests/queries/0_stateless/replication.lib | 2 +- 4 files changed, 26 insertions(+), 18 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 35a011a4a58..797d0570fbc 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -145,6 +145,7 @@ ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreeP if (found_part_with_the_same_min_block && found_part_with_the_same_max_block) { + /// FIXME It may never appear LOG_WARNING(log, "Found parts with the same min block and with the same max block as the missing part {}. Hoping that it will eventually appear as a result of a merge.", part_name); return MissingPartSearchResult::FoundAndDontNeedFetch; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index ea5f7cfc36a..277d887a46e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1488,6 +1488,9 @@ MutationCommands ReplicatedMergeTreeQueue::getMutationCommands( /// to allow recovering from a mutation that cannot be executed. This way you can delete the mutation entry /// from /mutations in ZK and the replicas will simply skip the mutation. + /// NOTE: However, it's quite dangerous to skip MUTATE_PART. Replicas may diverge if one of them have executed part mutation, + /// and then mutation was killed before execution of MUTATE_PART on remaining replicas. + if (part->info.getDataVersion() > desired_mutation_version) { LOG_WARNING(log, "Data version of part {} is already greater than desired mutation version {}", part->name, desired_mutation_version); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 150a71a09e5..194d81ba553 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -141,6 +141,7 @@ namespace ErrorCodes extern const int DUPLICATE_DATA_PART; extern const int BAD_ARGUMENTS; extern const int CONCURRENT_ACCESS_NOT_SUPPORTED; + extern const int CHECKSUM_DOESNT_MATCH; } namespace ActionLocks @@ -1314,32 +1315,35 @@ void StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(const zkutil: } ReplicatedMergeTreePartHeader replica_part_header; - if (!part_zk_str.empty()) - replica_part_header = ReplicatedMergeTreePartHeader::fromString(part_zk_str); - else + if (part_zk_str.empty()) { - Coordination::Stat columns_stat_before, columns_stat_after; String columns_str; String checksums_str; - /// Let's check that the node's version with the columns did not change while we were reading the checksums. - /// This ensures that the columns and the checksum refer to the same - if (!zookeeper->tryGet(fs::path(current_part_path) / "columns", columns_str, &columns_stat_before) || - !zookeeper->tryGet(fs::path(current_part_path) / "checksums", checksums_str) || - !zookeeper->exists(fs::path(current_part_path) / "columns", &columns_stat_after) || - columns_stat_before.version != columns_stat_after.version) + if (zookeeper->tryGet(fs::path(current_part_path) / "columns", columns_str) && + zookeeper->tryGet(fs::path(current_part_path) / "checksums", checksums_str)) { - LOG_INFO(log, "Not checking checksums of part {} with replica {} because part changed while we were reading its checksums", part_name, replica); + replica_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes(columns_str, checksums_str); + } + else + { + if (zookeeper->exists(current_part_path)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} has empty header and does not have columns and checksums. " + "Looks like a bug.", current_part_path); + LOG_INFO(log, "Not checking checksums of part {} with replica {} because part was removed from ZooKeeper", part_name, replica); continue; } - - replica_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes( - columns_str, checksums_str); + } + else + { + replica_part_header = ReplicatedMergeTreePartHeader::fromString(part_zk_str); } if (replica_part_header.getColumnsHash() != local_part_header.getColumnsHash()) { - LOG_INFO(log, "Not checking checksums of part {} with replica {} because columns are different", part_name, replica); - continue; + /// Either it's a bug or ZooKeeper contains broken data. + /// TODO Fix KILL MUTATION and replace CHECKSUM_DOESNT_MATCH with LOGICAL_ERROR + /// (some replicas may skip killed mutation even if it was executed on other replicas) + throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Part {} from {} has different columns hash", part_name, replica); } replica_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true); @@ -6058,7 +6062,7 @@ CancellationCode StorageReplicatedMergeTree::killMutation(const String & mutatio zkutil::ZooKeeperPtr zookeeper = getZooKeeper(); - LOG_TRACE(log, "Killing mutation {}", mutation_id); + LOG_INFO(log, "Killing mutation {}", mutation_id); auto mutation_entry = queue.removeMutation(zookeeper, mutation_id); if (!mutation_entry) diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index af5375fb235..2992094b5d3 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -27,7 +27,7 @@ function check_replication_consistency() res=$($CLICKHOUSE_CLIENT -q \ "SELECT - countDistinct(data) + if((countDistinct(data) as c) == 0, 1, c) FROM ( SELECT _table, ($2) AS data From 285a5848b51a58ac71fae56d515af6cf712ab253 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 13 Aug 2021 14:27:55 +0300 Subject: [PATCH 07/15] fix --- tests/config/config.d/merge_tree.xml | 5 +++++ tests/config/install.sh | 1 + .../01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh | 3 ++- 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 tests/config/config.d/merge_tree.xml diff --git a/tests/config/config.d/merge_tree.xml b/tests/config/config.d/merge_tree.xml new file mode 100644 index 00000000000..35af1fa65eb --- /dev/null +++ b/tests/config/config.d/merge_tree.xml @@ -0,0 +1,5 @@ + + + 8 + + diff --git a/tests/config/install.sh b/tests/config/install.sh index 571dff34018..e46ac62606b 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -31,6 +31,7 @@ ln -sf $SRC_PATH/config.d/max_concurrent_queries.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/test_cluster_with_incorrect_pw.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/keeper_port.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/logging_no_rotate.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/merge_tree.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/tcp_with_proxy.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/top_level_domains_lists.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/top_level_domains_path.xml $DEST_SERVER_PATH/config.d/ diff --git a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh index 6a0fa192321..80022bd472d 100755 --- a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh +++ b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh @@ -61,7 +61,8 @@ timeout $TIMEOUT bash -c optimize_thread 2> /dev/null & wait for i in $(seq 1 $NUM_REPLICAS); do - $CLICKHOUSE_CLIENT --query "SYSTEM STOP TTL MERGES ttl_table$i" & + # disable ttl merges before checking consistency + $CLICKHOUSE_CLIENT --query "ALTER TABLE ttl_table$i MODIFY SETTING max_replicated_merges_with_ttl_in_queue=0" done check_replication_consistency "ttl_table" "count(), sum(toUInt64(key))" From e824d96c3b3d9c579f2f280bdd80c482cf81264b Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 16 Aug 2021 00:42:10 +0300 Subject: [PATCH 08/15] fix --- src/Storages/StorageReplicatedMergeTree.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 194d81ba553..b94908bca0a 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2141,6 +2141,8 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry) if (!parts_for_merge.empty() && replica.empty()) { LOG_INFO(log, "No active replica has part {}. Will fetch merged part instead.", entry.new_part_name); + /// We should enqueue it for check, because merged part may never appear if source part is lost + enqueuePartForCheck(entry.new_part_name); return false; } From d9bd4675376d2d66af83fa066c96d832d370db68 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 16 Aug 2021 12:18:52 +0300 Subject: [PATCH 09/15] fix --- src/Storages/StorageReplicatedMergeTree.cpp | 4 +-- tests/queries/0_stateless/replication.lib | 32 ++++++++++++++++----- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index b94908bca0a..4f5d635e6ea 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5466,9 +5466,9 @@ bool StorageReplicatedMergeTree::waitForTableReplicaToProcessLogEntry( const auto & stop_waiting = [&]() { - bool stop_waiting_itself = waiting_itself && (partial_shutdown_called || is_dropped); + bool stop_waiting_itself = waiting_itself && partial_shutdown_called; bool stop_waiting_non_active = !wait_for_non_active && !getZooKeeper()->exists(fs::path(table_zookeeper_path) / "replicas" / replica / "is_active"); - return stop_waiting_itself || stop_waiting_non_active; + return is_dropped || stop_waiting_itself || stop_waiting_non_active; }; /// Don't recheck ZooKeeper too often diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index 2992094b5d3..15af1dbd6c8 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -4,12 +4,30 @@ function try_sync_replicas() { - readarray -t tables_arr < <(${CLICKHOUSE_CLIENT} --query="SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$1%' AND engine like '%Replicated%'") + readarray -t empty_partitions_arr < <(${CLICKHOUSE_CLIENT} -q \ + "SELECT DISTINCT substr(new_part_name, 1, position(new_part_name, '_') - 1) AS partition_id + FROM system.replication_queue + WHERE (database = currentDatabase()) AND (table LIKE '$1%') AND (last_exception LIKE '%No active replica has part%') AND (partition_id NOT IN ( + SELECT partition_id + FROM system.parts + WHERE (database = currentDatabase()) AND (table LIKE '$1%') + ))") + readarray -t tables_arr < <(${CLICKHOUSE_CLIENT} -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$1%' AND engine like '%Replicated%'") + + for t in "${tables_arr[@]}" + do + for p in "${empty_partitions_arr[@]}" + do + # Avoid "Empty part ... is not created instead of lost part because there are no parts in partition" + $CLICKHOUSE_CLIENT -q "ALTER TABLE $t DROP PARTITION ID '$p'" 2>/dev/null + done + done + for t in "${tables_arr[@]}" do # The size of log may be big, so increase timeout. $CLICKHOUSE_CLIENT --receive_timeout 400 -q "SYSTEM SYNC REPLICA $t" || $CLICKHOUSE_CLIENT -q \ - "select 'sync failed, queue:', * from system.replication_queue where database=currentDatabase() and table='$t'" & + "select 'sync failed, queue:', * from system.replication_queue where database=currentDatabase() and table='$t' order by database, table, node_name" & done wait echo "Replication did not hang: synced all replicas of $1" @@ -18,7 +36,7 @@ function try_sync_replicas() function check_replication_consistency() { # Forcefully cancel mutations to avoid waiting for them to finish - ${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database=currentDatabase() AND table like '$1%'" > /dev/null + ${CLICKHOUSE_CLIENT} -q "KILL MUTATION WHERE database=currentDatabase() AND table like '$1%'" > /dev/null # SYNC REPLICA is not enough if some MUTATE_PARTs are not assigned yet wait_for_all_mutations "$1%" @@ -37,10 +55,10 @@ function check_replication_consistency() echo "Consistency: $res" if [ $res -ne 1 ]; then echo "Replicas have diverged:" - $CLICKHOUSE_CLIENT -q "select 'data', _table, $2, arraySort(groupArrayDistinct(_part)) from merge(currentDatabase(), '$1') group by _table" - $CLICKHOUSE_CLIENT -q "select 'queue', * from system.replication_queue where database=currentDatabase() and table like '$1%'" - $CLICKHOUSE_CLIENT -q "select 'mutations', * from system.mutations where database=currentDatabase() and table like '$1%'" - $CLICKHOUSE_CLIENT -q "select 'parts', * from system.parts where database=currentDatabase() and table like '$1%'" + $CLICKHOUSE_CLIENT -q "select 'data', _table, $2, arraySort(groupArrayDistinct(_part)) from merge(currentDatabase(), '$1') group by _table order by _table" + $CLICKHOUSE_CLIENT -q "select 'queue', * from system.replication_queue where database=currentDatabase() and table like '$1%' order by database, table, node_name" + $CLICKHOUSE_CLIENT -q "select 'mutations', * from system.mutations where database=currentDatabase() and table like '$1%' order by database, table, mutation_id" + $CLICKHOUSE_CLIENT -q "select 'parts', * from system.parts where database=currentDatabase() and table like '$1%' order by database, table, name" echo "Good luck with debugging..." fi From b8d9bc862d6b102bb05d105bea558d7fb3b89509 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 16 Aug 2021 15:36:12 +0300 Subject: [PATCH 10/15] fix --- tests/queries/0_stateless/replication.lib | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index 15af1dbd6c8..54c5f3c2faf 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -35,6 +35,10 @@ function try_sync_replicas() function check_replication_consistency() { + # Trigger pullLogsToQueue(...) and updateMutations(...) on some replica to make it pull all mutations, so it will be possible to kill them + some_table=$($CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$1%' LIMIT 1") + $CLICKHOUSE_CLIENT --receive_timeout 3 -q "SYSTEM SYNC REPLICA $some_table" 2>/dev/null + # Forcefully cancel mutations to avoid waiting for them to finish ${CLICKHOUSE_CLIENT} -q "KILL MUTATION WHERE database=currentDatabase() AND table like '$1%'" > /dev/null From 3c8611a5220fbe23a907253a9e3005e9f527eb07 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 16 Aug 2021 15:51:04 +0300 Subject: [PATCH 11/15] fix --- tests/queries/0_stateless/replication.lib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index 54c5f3c2faf..77b09dee1e0 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -37,7 +37,7 @@ function check_replication_consistency() { # Trigger pullLogsToQueue(...) and updateMutations(...) on some replica to make it pull all mutations, so it will be possible to kill them some_table=$($CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$1%' LIMIT 1") - $CLICKHOUSE_CLIENT --receive_timeout 3 -q "SYSTEM SYNC REPLICA $some_table" 2>/dev/null + $CLICKHOUSE_CLIENT --receive_timeout 3 -q "SYSTEM SYNC REPLICA $some_table" 1>/dev/null 2>/dev/null ||: # Forcefully cancel mutations to avoid waiting for them to finish ${CLICKHOUSE_CLIENT} -q "KILL MUTATION WHERE database=currentDatabase() AND table like '$1%'" > /dev/null From d50c5e3c32a0df3185e0965bd11421b95671a579 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 17 Aug 2021 15:01:51 +0300 Subject: [PATCH 12/15] fix sync replica --- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 14 +++++++++-- .../MergeTree/ReplicatedMergeTreeQueue.h | 11 ++++++++- .../ReplicatedMergeTreeRestartingThread.cpp | 23 ++++++++++++++++--- src/Storages/StorageReplicatedMergeTree.cpp | 9 ++++---- 4 files changed, 46 insertions(+), 11 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 3f3a7f19f72..3a9f477597d 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -23,6 +23,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int UNEXPECTED_NODE_IN_ZOOKEEPER; extern const int ABORTED; + extern const int READONLY; } @@ -472,9 +473,18 @@ bool ReplicatedMergeTreeQueue::removeFailedQuorumPart(const MergeTreePartInfo & return virtual_parts.remove(part_info); } -int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallback watch_callback) +int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallback watch_callback, PullLogsReason reason) { std::lock_guard lock(pull_logs_to_queue_mutex); + if (storage.is_readonly && reason != LOAD) + { + /// Pulling logs when replica is readonly may cause obscure bugs, allow it on replica startup only + if (reason == SYNC) + throw Exception(ErrorCodes::READONLY, "Cannot SYNC REPLICA, because replica is readonly"); + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Some background task ({}) tried to pull logs on readonly replica, it's a bug", reason); + } + if (pull_log_blocker.isCancelled()) throw Exception("Log pulling is cancelled", ErrorCodes::ABORTED); @@ -1834,7 +1844,7 @@ ReplicatedMergeTreeMergePredicate::ReplicatedMergeTreeMergePredicate( } } - merges_version = queue_.pullLogsToQueue(zookeeper); + merges_version = queue_.pullLogsToQueue(zookeeper, {}, ReplicatedMergeTreeQueue::MERGE_PREDICATE); { /// We avoid returning here a version to be used in a lightweight transaction. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index e49d80fc832..57e1e658665 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -294,13 +294,22 @@ public: bool removeFailedQuorumPart(const MergeTreePartInfo & part_info); + enum PullLogsReason + { + LOAD, + UPDATE, + MERGE_PREDICATE, + SYNC, + OTHER, + }; + /** Copy the new entries from the shared log to the queue of this replica. Set the log_pointer to the appropriate value. * If watch_callback is not empty, will call it when new entries appear in the log. * If there were new entries, notifies storage.queue_task_handle. * Additionally loads mutations (so that the set of mutations is always more recent than the queue). * Return the version of "logs" node (that is updated for every merge/mutation/... added to the log) */ - int32_t pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallback watch_callback = {}); + int32_t pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallback watch_callback = {}, PullLogsReason reason = OTHER); /// Load new mutation entries. If something new is loaded, schedule storage.merge_selecting_task. /// If watch_callback is not empty, will call it when new mutations appear in ZK. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 25f25480549..a7bb56f1955 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -25,6 +25,8 @@ namespace DB namespace ErrorCodes { extern const int REPLICA_IS_ALREADY_ACTIVE; + extern const int REPLICA_STATUS_CHANGED; + } namespace @@ -55,6 +57,7 @@ void ReplicatedMergeTreeRestartingThread::run() if (need_stop) return; + bool reschedule_now = false; try { if (first_time || readonly_mode_was_set || storage.getZooKeeper()->expired()) @@ -131,15 +134,29 @@ void ReplicatedMergeTreeRestartingThread::run() first_time = false; } } - catch (...) + catch (const Exception & e) { /// We couldn't activate table let's set it into readonly mode setReadonly(); + partialShutdown(); + storage.startup_event.set(); + tryLogCurrentException(log, __PRETTY_FUNCTION__); + + if (e.code() == ErrorCodes::REPLICA_STATUS_CHANGED) + reschedule_now = true; + } + catch (...) + { + setReadonly(); + partialShutdown(); storage.startup_event.set(); tryLogCurrentException(log, __PRETTY_FUNCTION__); } - task->scheduleAfter(check_period_ms); + if (reschedule_now) + task->schedule(); + else + task->scheduleAfter(check_period_ms); } @@ -159,7 +176,7 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() /// pullLogsToQueue() after we mark replica 'is_active' (and after we repair if it was lost); /// because cleanup_thread doesn't delete log_pointer of active replicas. - storage.queue.pullLogsToQueue(zookeeper); + storage.queue.pullLogsToQueue(zookeeper, {}, ReplicatedMergeTreeQueue::LOAD); storage.queue.removeCurrentPartsFromMutations(); storage.last_queue_update_finish_time.store(time(nullptr)); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 4f5d635e6ea..bdec69095ce 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3089,7 +3089,7 @@ void StorageReplicatedMergeTree::queueUpdatingTask() } try { - queue.pullLogsToQueue(getZooKeeper(), queue_updating_task->getWatchCallback()); + queue.pullLogsToQueue(getZooKeeper(), queue_updating_task->getWatchCallback(), ReplicatedMergeTreeQueue::UPDATE); last_queue_update_finish_time.store(time(nullptr)); queue_update_in_progress = false; } @@ -4325,11 +4325,9 @@ void StorageReplicatedMergeTree::startup() restarting_thread.start(); /// Wait while restarting_thread initializes LeaderElection (and so on) or makes first attempt to do it + /// TODO Do we still need startup_event? startup_event.wait(); - /// If we don't separate create/start steps, race condition will happen - /// between the assignment of queue_task_handle and queueTask that use the queue_task_handle. - background_executor.start(); startBackgroundMovesIfNeeded(); part_moves_between_shards_orchestrator.start(); @@ -6970,7 +6968,7 @@ bool StorageReplicatedMergeTree::waitForShrinkingQueueSize(size_t queue_size, UI Stopwatch watch; /// Let's fetch new log entries firstly - queue.pullLogsToQueue(getZooKeeper()); + queue.pullLogsToQueue(getZooKeeper(), {}, ReplicatedMergeTreeQueue::SYNC); /// This is significant, because the execution of this task could be delayed at BackgroundPool. /// And we force it to be executed. @@ -7208,6 +7206,7 @@ MutationCommands StorageReplicatedMergeTree::getFirstAlterMutationCommandsForPar void StorageReplicatedMergeTree::startBackgroundMovesIfNeeded() { + /// FIXME is it related to replication somehow? If it is we should start it from RestartingThread only if (areBackgroundMovesNeeded()) background_moves_executor.start(); } From 1f283aeb1f02d24af58692ecc486c09bba41c5e9 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 17 Aug 2021 21:10:08 +0300 Subject: [PATCH 13/15] fix --- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 22 ++++++++++++------- tests/queries/0_stateless/replication.lib | 4 +++- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 3a9f477597d..c71a79d2009 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -476,13 +476,10 @@ bool ReplicatedMergeTreeQueue::removeFailedQuorumPart(const MergeTreePartInfo & int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallback watch_callback, PullLogsReason reason) { std::lock_guard lock(pull_logs_to_queue_mutex); - if (storage.is_readonly && reason != LOAD) + if (storage.is_readonly && reason == SYNC) { - /// Pulling logs when replica is readonly may cause obscure bugs, allow it on replica startup only - if (reason == SYNC) - throw Exception(ErrorCodes::READONLY, "Cannot SYNC REPLICA, because replica is readonly"); - - throw Exception(ErrorCodes::LOGICAL_ERROR, "Some background task ({}) tried to pull logs on readonly replica, it's a bug", reason); + throw Exception(ErrorCodes::READONLY, "Cannot SYNC REPLICA, because replica is readonly"); + /// TODO throw logical error for other reasons (except LOAD) } if (pull_log_blocker.isCancelled()) @@ -724,13 +721,22 @@ void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, C std::vector> futures; for (const String & entry : entries_to_load) - futures.emplace_back(zookeeper->asyncGet(fs::path(zookeeper_path) / "mutations" / entry)); + futures.emplace_back(zookeeper->asyncTryGet(fs::path(zookeeper_path) / "mutations" / entry)); std::vector new_mutations; for (size_t i = 0; i < entries_to_load.size(); ++i) { + auto maybe_response = futures[i].get(); + if (maybe_response.error != Coordination::Error::ZOK) + { + assert(maybe_response.error == Coordination::Error::ZNONODE); + /// It's ok if it happened on server startup or table creation and replica loads all mutation entries. + /// It's also ok if mutation was killed. + LOG_WARNING(log, "Cannot get mutation node {} ({}), probably it was concurrently removed", entries_to_load[i], maybe_response.error); + continue; + } new_mutations.push_back(std::make_shared( - ReplicatedMergeTreeMutationEntry::parse(futures[i].get().data, entries_to_load[i]))); + ReplicatedMergeTreeMutationEntry::parse(maybe_response.data, entries_to_load[i]))); } bool some_mutations_are_probably_done = false; diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index 77b09dee1e0..053e512747f 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -36,7 +36,9 @@ function try_sync_replicas() function check_replication_consistency() { # Trigger pullLogsToQueue(...) and updateMutations(...) on some replica to make it pull all mutations, so it will be possible to kill them - some_table=$($CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$1%' LIMIT 1") + some_table=$($CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$1%' ORDER BY rand() LIMIT 1") + $CLICKHOUSE_CLIENT --receive_timeout 3 -q "SYSTEM SYNC REPLICA $some_table" 1>/dev/null 2>/dev/null ||: + some_table=$($CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$1%' ORDER BY rand() LIMIT 1") $CLICKHOUSE_CLIENT --receive_timeout 3 -q "SYSTEM SYNC REPLICA $some_table" 1>/dev/null 2>/dev/null ||: # Forcefully cancel mutations to avoid waiting for them to finish From 697f2bcedbb0fe62fffe11eca83dc39fb2f79f53 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 23 Aug 2021 22:28:30 +0300 Subject: [PATCH 14/15] fix --- tests/queries/0_stateless/replication.lib | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index 053e512747f..62417822c6b 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -35,6 +35,16 @@ function try_sync_replicas() function check_replication_consistency() { + # Do not check anything if all replicas are readonly, + # because is this case all replicas are probably lost (it may happen and it's not a bug) + res=$($CLICKHOUSE_CLIENT -q "SELECT count() - sum(is_readonly) FROM system.replicas WHERE database=currentDatabase() AND table LIKE '$1%'") + if [ $res -eq 0 ]; then + # Print dummy lines + echo "Replication did not hang: synced all replicas of $1" + echo "Consistency: 1" + return 0 + fi + # Trigger pullLogsToQueue(...) and updateMutations(...) on some replica to make it pull all mutations, so it will be possible to kill them some_table=$($CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$1%' ORDER BY rand() LIMIT 1") $CLICKHOUSE_CLIENT --receive_timeout 3 -q "SYSTEM SYNC REPLICA $some_table" 1>/dev/null 2>/dev/null ||: From 4a86deaa7de1e0e4dbbddbb3ac8b1f9e4c1c33a8 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 24 Aug 2021 18:56:32 +0300 Subject: [PATCH 15/15] fix --- tests/queries/0_stateless/replication.lib | 39 +++++++++++++---------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index 62417822c6b..8fe300b59e8 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -4,15 +4,17 @@ function try_sync_replicas() { + table_name_prefix=$1 + readarray -t empty_partitions_arr < <(${CLICKHOUSE_CLIENT} -q \ "SELECT DISTINCT substr(new_part_name, 1, position(new_part_name, '_') - 1) AS partition_id FROM system.replication_queue - WHERE (database = currentDatabase()) AND (table LIKE '$1%') AND (last_exception LIKE '%No active replica has part%') AND (partition_id NOT IN ( + WHERE (database = currentDatabase()) AND (table LIKE '$table_name_prefix%') AND (last_exception LIKE '%No active replica has part%') AND (partition_id NOT IN ( SELECT partition_id FROM system.parts - WHERE (database = currentDatabase()) AND (table LIKE '$1%') + WHERE (database = currentDatabase()) AND (table LIKE '$table_name_prefix%') ))") - readarray -t tables_arr < <(${CLICKHOUSE_CLIENT} -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$1%' AND engine like '%Replicated%'") + readarray -t tables_arr < <(${CLICKHOUSE_CLIENT} -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$table_name_prefix%' AND engine like '%Replicated%'") for t in "${tables_arr[@]}" do @@ -30,51 +32,54 @@ function try_sync_replicas() "select 'sync failed, queue:', * from system.replication_queue where database=currentDatabase() and table='$t' order by database, table, node_name" & done wait - echo "Replication did not hang: synced all replicas of $1" + echo "Replication did not hang: synced all replicas of $table_name_prefix" } function check_replication_consistency() { + table_name_prefix=$1 + check_query_part=$2 + # Do not check anything if all replicas are readonly, # because is this case all replicas are probably lost (it may happen and it's not a bug) - res=$($CLICKHOUSE_CLIENT -q "SELECT count() - sum(is_readonly) FROM system.replicas WHERE database=currentDatabase() AND table LIKE '$1%'") + res=$($CLICKHOUSE_CLIENT -q "SELECT count() - sum(is_readonly) FROM system.replicas WHERE database=currentDatabase() AND table LIKE '$table_name_prefix%'") if [ $res -eq 0 ]; then # Print dummy lines - echo "Replication did not hang: synced all replicas of $1" + echo "Replication did not hang: synced all replicas of $table_name_prefix" echo "Consistency: 1" return 0 fi # Trigger pullLogsToQueue(...) and updateMutations(...) on some replica to make it pull all mutations, so it will be possible to kill them - some_table=$($CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$1%' ORDER BY rand() LIMIT 1") + some_table=$($CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$table_name_prefix%' ORDER BY rand() LIMIT 1") $CLICKHOUSE_CLIENT --receive_timeout 3 -q "SYSTEM SYNC REPLICA $some_table" 1>/dev/null 2>/dev/null ||: - some_table=$($CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$1%' ORDER BY rand() LIMIT 1") + some_table=$($CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$table_name_prefix%' ORDER BY rand() LIMIT 1") $CLICKHOUSE_CLIENT --receive_timeout 3 -q "SYSTEM SYNC REPLICA $some_table" 1>/dev/null 2>/dev/null ||: # Forcefully cancel mutations to avoid waiting for them to finish - ${CLICKHOUSE_CLIENT} -q "KILL MUTATION WHERE database=currentDatabase() AND table like '$1%'" > /dev/null + ${CLICKHOUSE_CLIENT} -q "KILL MUTATION WHERE database=currentDatabase() AND table like '$table_name_prefix%'" > /dev/null # SYNC REPLICA is not enough if some MUTATE_PARTs are not assigned yet - wait_for_all_mutations "$1%" + wait_for_all_mutations "$table_name_prefix%" - try_sync_replicas "$1" + try_sync_replicas "$table_name_prefix" res=$($CLICKHOUSE_CLIENT -q \ "SELECT if((countDistinct(data) as c) == 0, 1, c) FROM ( - SELECT _table, ($2) AS data - FROM merge(currentDatabase(), '$1') GROUP BY _table + SELECT _table, ($check_query_part) AS data + FROM merge(currentDatabase(), '$table_name_prefix') GROUP BY _table )") echo "Consistency: $res" if [ $res -ne 1 ]; then echo "Replicas have diverged:" - $CLICKHOUSE_CLIENT -q "select 'data', _table, $2, arraySort(groupArrayDistinct(_part)) from merge(currentDatabase(), '$1') group by _table order by _table" - $CLICKHOUSE_CLIENT -q "select 'queue', * from system.replication_queue where database=currentDatabase() and table like '$1%' order by database, table, node_name" - $CLICKHOUSE_CLIENT -q "select 'mutations', * from system.mutations where database=currentDatabase() and table like '$1%' order by database, table, mutation_id" - $CLICKHOUSE_CLIENT -q "select 'parts', * from system.parts where database=currentDatabase() and table like '$1%' order by database, table, name" + $CLICKHOUSE_CLIENT -q "select 'data', _table, $check_query_part, arraySort(groupArrayDistinct(_part)) from merge(currentDatabase(), '$table_name_prefix') group by _table order by _table" + $CLICKHOUSE_CLIENT -q "select 'queue', * from system.replication_queue where database=currentDatabase() and table like '$table_name_prefix%' order by database, table, node_name" + $CLICKHOUSE_CLIENT -q "select 'mutations', * from system.mutations where database=currentDatabase() and table like '$table_name_prefix%' order by database, table, mutation_id" + $CLICKHOUSE_CLIENT -q "select 'parts', * from system.parts where database=currentDatabase() and table like '$table_name_prefix%' order by database, table, name" echo "Good luck with debugging..." fi