mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
check replication consistency after 993-like tests
This commit is contained in:
parent
84d936355a
commit
12b422f82c
@ -0,0 +1,2 @@
|
||||
Replication did not hang: synced all replicas of alter_table
|
||||
0 1
|
@ -3,15 +3,17 @@
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
# shellcheck source=./replication.lib
|
||||
. "$CURDIR"/replication.lib
|
||||
|
||||
set -e
|
||||
|
||||
$CLICKHOUSE_CLIENT -n -q "
|
||||
DROP TABLE IF EXISTS alter_table;
|
||||
DROP TABLE IF EXISTS alter_table2;
|
||||
DROP TABLE IF EXISTS alter_table0;
|
||||
DROP TABLE IF EXISTS alter_table1;
|
||||
|
||||
CREATE TABLE alter_table (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0;
|
||||
CREATE TABLE alter_table2 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0
|
||||
CREATE TABLE alter_table0 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0;
|
||||
CREATE TABLE alter_table1 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0
|
||||
"
|
||||
|
||||
function thread1()
|
||||
@ -22,22 +24,22 @@ function thread1()
|
||||
|
||||
function thread2()
|
||||
{
|
||||
while true; do $CLICKHOUSE_CLIENT -n --query "ALTER TABLE alter_table ADD COLUMN h String DEFAULT '0'; ALTER TABLE alter_table MODIFY COLUMN h UInt64; ALTER TABLE alter_table DROP COLUMN h;"; done
|
||||
while true; do $CLICKHOUSE_CLIENT -n --query "ALTER TABLE alter_table0 ADD COLUMN h String DEFAULT '0'; ALTER TABLE alter_table0 MODIFY COLUMN h UInt64; ALTER TABLE alter_table0 DROP COLUMN h;"; done
|
||||
}
|
||||
|
||||
function thread3()
|
||||
{
|
||||
while true; do $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table SELECT rand(1), rand(2), 1 / rand(3), toString(rand(4)), [rand(5), rand(6)], rand(7) % 2 ? NULL : generateUUIDv4(), (rand(8), rand(9)) FROM numbers(100000)"; done
|
||||
while true; do $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table0 SELECT rand(1), rand(2), 1 / rand(3), toString(rand(4)), [rand(5), rand(6)], rand(7) % 2 ? NULL : generateUUIDv4(), (rand(8), rand(9)) FROM numbers(100000)"; done
|
||||
}
|
||||
|
||||
function thread4()
|
||||
{
|
||||
while true; do $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table FINAL"; done
|
||||
while true; do $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table0 FINAL"; done
|
||||
}
|
||||
|
||||
function thread5()
|
||||
{
|
||||
while true; do $CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table DELETE WHERE cityHash64(a,b,c,d,e,g) % 1048576 < 524288"; done
|
||||
while true; do $CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table0 DELETE WHERE cityHash64(a,b,c,d,e,g) % 1048576 < 524288"; done
|
||||
}
|
||||
|
||||
# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout
|
||||
@ -74,8 +76,9 @@ timeout $TIMEOUT bash -c thread4 2> /dev/null &
|
||||
timeout $TIMEOUT bash -c thread5 2> /dev/null &
|
||||
|
||||
wait
|
||||
check_replication_consistency "alter_table" "count(), sum(a), sum(b), round(sum(c))"
|
||||
|
||||
$CLICKHOUSE_CLIENT -n -q "DROP TABLE alter_table;" &
|
||||
$CLICKHOUSE_CLIENT -n -q "DROP TABLE alter_table2;" &
|
||||
$CLICKHOUSE_CLIENT -n -q "DROP TABLE alter_table0;" &
|
||||
$CLICKHOUSE_CLIENT -n -q "DROP TABLE alter_table1;" &
|
||||
|
||||
wait
|
||||
|
@ -0,0 +1,2 @@
|
||||
Replication did not hang: synced all replicas of alter_table_
|
||||
0 1
|
@ -3,6 +3,8 @@
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
# shellcheck source=./replication.lib
|
||||
. "$CURDIR"/replication.lib
|
||||
|
||||
set -e
|
||||
|
||||
@ -99,6 +101,8 @@ timeout $TIMEOUT bash -c thread6 2>&1 | grep "was not completely removed from Zo
|
||||
|
||||
wait
|
||||
|
||||
check_replication_consistency "alter_table_" "count(), sum(a), sum(b), round(sum(c))"
|
||||
|
||||
for i in {0..9}; do
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS alter_table_$i" 2>&1 | grep "was not completely removed from ZooKeeper" &
|
||||
done
|
||||
|
@ -1,6 +1,8 @@
|
||||
Starting alters
|
||||
Finishing alters
|
||||
Equal number of columns
|
||||
Replication did not hang: synced all replicas of concurrent_alter_add_drop_
|
||||
0 1
|
||||
0
|
||||
0
|
||||
0
|
||||
|
@ -3,6 +3,8 @@
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
# shellcheck source=./replication.lib
|
||||
. "$CURDIR"/replication.lib
|
||||
|
||||
REPLICAS=3
|
||||
|
||||
@ -101,6 +103,8 @@ while [[ $(timeout 120 ${CLICKHOUSE_CLIENT} --query "ALTER TABLE concurrent_alte
|
||||
sleep 1
|
||||
done
|
||||
|
||||
check_replication_consistency "concurrent_alter_add_drop_" "count(), sum(key), sum(cityHash64(value0))"
|
||||
|
||||
for i in $(seq $REPLICAS); do
|
||||
$CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA concurrent_alter_add_drop_$i"
|
||||
$CLICKHOUSE_CLIENT --query "SELECT COUNT() FROM system.mutations WHERE is_done = 0 and table = 'concurrent_alter_add_drop_$i'"
|
||||
|
@ -5,6 +5,8 @@
|
||||
1725
|
||||
Starting alters
|
||||
Finishing alters
|
||||
Replication did not hang: synced all replicas of concurrent_alter_mt_
|
||||
0 1
|
||||
1
|
||||
0
|
||||
1
|
||||
|
@ -3,6 +3,8 @@
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
# shellcheck source=./replication.lib
|
||||
. "$CURDIR"/replication.lib
|
||||
|
||||
REPLICAS=5
|
||||
|
||||
@ -112,6 +114,8 @@ while [[ $(timeout 120 ${CLICKHOUSE_CLIENT} --query "ALTER TABLE concurrent_alte
|
||||
sleep 1
|
||||
done
|
||||
|
||||
check_replication_consistency "concurrent_alter_mt_" "count(), sum(key), sum(cityHash64(value1)), sum(cityHash64(value2))"
|
||||
|
||||
for i in $(seq $REPLICAS); do
|
||||
$CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA concurrent_alter_mt_$i"
|
||||
$CLICKHOUSE_CLIENT --query "SELECT SUM(toUInt64(value1)) > $INITIAL_SUM FROM concurrent_alter_mt_$i"
|
||||
|
@ -1 +1,3 @@
|
||||
Replication did not hang
|
||||
Replication did not hang: synced all replicas of dst_
|
||||
0 1
|
||||
Replication did not hang: synced all replicas of src_
|
||||
|
@ -3,6 +3,8 @@
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
# shellcheck source=./replication.lib
|
||||
. "$CURDIR"/replication.lib
|
||||
|
||||
declare -A engines
|
||||
engines[0]="MergeTree"
|
||||
@ -116,13 +118,8 @@ timeout $TIMEOUT bash -c optimize_thread &
|
||||
timeout $TIMEOUT bash -c drop_part_thread &
|
||||
wait
|
||||
|
||||
for ((i=0; i<16; i++)) do
|
||||
# The size of log is big, so increase timeout.
|
||||
$CLICKHOUSE_CLIENT --receive_timeout 600 -q "SYSTEM SYNC REPLICA dst_$i" &
|
||||
$CLICKHOUSE_CLIENT --receive_timeout 600 -q "SYSTEM SYNC REPLICA src_$i" 2>/dev/null &
|
||||
done
|
||||
wait
|
||||
echo "Replication did not hang"
|
||||
check_replication_consistency "dst_" "count(), sum(p), sum(k), sum(v)"
|
||||
try_sync_replicas "src_"
|
||||
|
||||
for ((i=0; i<16; i++)) do
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE dst_$i" 2>&1| grep -Fv "is already started to be removing" &
|
||||
|
@ -14,3 +14,5 @@ CREATE TABLE default.concurrent_kill_4\n(\n `key` UInt64,\n `value` Int64\
|
||||
Metadata version on replica 5 equal with first replica, OK
|
||||
CREATE TABLE default.concurrent_kill_5\n(\n `key` UInt64,\n `value` Int64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/01593_concurrent_alter_mutations_kill_many_replicas_long_default/{shard}\', \'{replica}5\')\nORDER BY key\nSETTINGS max_replicated_mutations_in_queue = 1000, number_of_free_entries_in_pool_to_execute_mutation = 0, max_replicated_merges_in_queue = 1000, index_granularity = 8192
|
||||
499999500000
|
||||
Replication did not hang: synced all replicas of concurrent_kill_
|
||||
0 1
|
||||
|
@ -3,6 +3,8 @@
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
# shellcheck source=./replication.lib
|
||||
. "$CURDIR"/replication.lib
|
||||
|
||||
REPLICAS=5
|
||||
|
||||
@ -59,10 +61,6 @@ timeout $TIMEOUT bash -c kill_mutation_thread 2> /dev/null &
|
||||
|
||||
wait
|
||||
|
||||
for i in $(seq $REPLICAS); do
|
||||
$CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA concurrent_kill_$i"
|
||||
done
|
||||
|
||||
# with timeout alter query can be not finished yet, so to execute new alter
|
||||
# we use retries
|
||||
counter=0
|
||||
@ -80,7 +78,7 @@ while true; do
|
||||
done
|
||||
|
||||
|
||||
metadata_version=$($CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/s1/replicas/r1$i/' and name = 'metadata_version'")
|
||||
metadata_version=$($CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/s1/replicas/r11/' and name = 'metadata_version'")
|
||||
for i in $(seq $REPLICAS); do
|
||||
replica_metadata_version=$($CLICKHOUSE_CLIENT --query "SELECT value FROM system.zookeeper WHERE path = '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/s1/replicas/r1$i/' and name = 'metadata_version'")
|
||||
|
||||
@ -95,6 +93,8 @@ done
|
||||
|
||||
$CLICKHOUSE_CLIENT --query "SELECT sum(value) FROM concurrent_kill_1"
|
||||
|
||||
check_replication_consistency "concurrent_kill_" "count(), sum(key), sum(cityHash64(value))"
|
||||
|
||||
for i in $(seq $REPLICAS); do
|
||||
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS concurrent_kill_$i"
|
||||
done
|
||||
|
@ -1 +1,3 @@
|
||||
Replication did not hang: synced all replicas of ttl_table
|
||||
0 1
|
||||
1
|
||||
|
@ -3,6 +3,8 @@
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
# shellcheck source=./replication.lib
|
||||
. "$CURDIR"/replication.lib
|
||||
|
||||
NUM_REPLICAS=5
|
||||
|
||||
@ -59,13 +61,12 @@ timeout $TIMEOUT bash -c optimize_thread 2> /dev/null &
|
||||
|
||||
wait
|
||||
|
||||
for i in $(seq 1 $NUM_REPLICAS); do
|
||||
$CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA ttl_table$i"
|
||||
done
|
||||
check_replication_consistency "ttl_table" "count(), sum(toUInt64(key))"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query "SELECT * FROM system.replication_queue where table like 'ttl_table%' and database = '${CLICKHOUSE_DATABASE}' and type='MERGE_PARTS' and last_exception != '' FORMAT Vertical"
|
||||
$CLICKHOUSE_CLIENT --query "SELECT COUNT() > 0 FROM system.part_log where table like 'ttl_table%' and database = '${CLICKHOUSE_DATABASE}'"
|
||||
|
||||
|
||||
for i in $(seq 1 $NUM_REPLICAS); do
|
||||
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS ttl_table$i" &
|
||||
done
|
||||
|
@ -20,3 +20,23 @@ function wait_for_mutation()
|
||||
|
||||
done
|
||||
}
|
||||
|
||||
function wait_for_all_mutations()
|
||||
{
|
||||
local table=$1
|
||||
local database=$2
|
||||
database=${database:="${CLICKHOUSE_DATABASE}"}
|
||||
|
||||
for i in {1..200}
|
||||
do
|
||||
sleep 1
|
||||
if [[ $(${CLICKHOUSE_CLIENT} --query="SELECT coalesce(minOrNull(is_done), 1) FROM system.mutations WHERE database='$database' AND table like '$table'") -eq 1 ]]; then
|
||||
break
|
||||
fi
|
||||
|
||||
if [[ $i -eq 200 ]]; then
|
||||
echo "Timed out while waiting for mutation to execute!" | tee /dev/stderr
|
||||
fi
|
||||
|
||||
done
|
||||
}
|
||||
|
35
tests/queries/0_stateless/replication.lib
Executable file
35
tests/queries/0_stateless/replication.lib
Executable file
@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env bash
|
||||
# shellcheck source=./mergetree_mutations.lib
|
||||
. "$CURDIR"/mergetree_mutations.lib
|
||||
|
||||
function try_sync_replicas
|
||||
{
|
||||
readarray -t tables_arr < <(${CLICKHOUSE_CLIENT} --query="SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$1%' AND engine like '%Replicated%'")
|
||||
for t in "${tables_arr[@]}"
|
||||
do
|
||||
# The size of log may be big, so increase timeout.
|
||||
$CLICKHOUSE_CLIENT --receive_timeout 300 -q "SYSTEM SYNC REPLICA $t" &
|
||||
done
|
||||
wait
|
||||
echo "Replication did not hang: synced all replicas of $1"
|
||||
}
|
||||
|
||||
function check_replication_consistency()
|
||||
{
|
||||
try_sync_replicas "$1"
|
||||
|
||||
# SYNC REPLICA is not enough if some MUTATE_PARTs are not assigned yet
|
||||
# TODO maybe just kill all mutations?
|
||||
wait_for_all_mutations "$1%"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q \
|
||||
"SELECT
|
||||
throwIf((countDistinct(data) AS c) != 1, 'Replicas have diverged'), c
|
||||
FROM
|
||||
(
|
||||
SELECT _table, ($2) AS data
|
||||
FROM merge(currentDatabase(), '$1') GROUP BY _table
|
||||
)" || $CLICKHOUSE_CLIENT -q \
|
||||
"select _table, $2, arraySort(groupArrayDistinct(_part)) from merge(currentDatabase(), '$1') group by _table" | tee /dev/stderr
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user