mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-16 11:22:12 +00:00
81da52bdf4
In case of replicated database system stop pulling replication log for rmt2 should be done on all replicas, otherwise some replica may merge the part and all other replicas may fetch it. Also, since SYSTEM STOP PULLING REPLICATION LOG does not waits for the current pull, let's trigger log pull explicitly to provide at least some guarantee that replication log pulling had been stopped, otherwise race is possible [1]. [1]: https://s3.amazonaws.com/clickhouse-test-reports/57155/f68717ccd0a07a499911c9b0db7537ae8205e41b/stateless_tests_flaky_check__asan_.html Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
83 lines
3.3 KiB
Bash
Executable File
83 lines
3.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Tags: no-ordinary-database
|
|
# Tag no-ordinary-database: requires UUID
|
|
|
|
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
|
# shellcheck source=../shell_config.sh
|
|
. "$CUR_DIR"/../shell_config.sh
|
|
|
|
# Test that retriable errors during merges/mutations
|
|
# (i.e. "No active replica has part X or covering part")
|
|
# does not appears as errors (level=Error), only as info message (level=Information).
|
|
|
|
cluster=default
|
|
if [[ $($CLICKHOUSE_CLIENT -q "select count()>0 from system.clusters where cluster = 'test_cluster_database_replicated'") = 1 ]]; then
|
|
cluster=test_cluster_database_replicated
|
|
fi
|
|
|
|
$CLICKHOUSE_CLIENT -nm --distributed_ddl_output_mode=none -q "
|
|
drop table if exists rmt1;
|
|
drop table if exists rmt2;
|
|
|
|
create table rmt1 (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', '1') order by key settings always_fetch_merged_part=1;
|
|
create table rmt2 (key Int) engine=ReplicatedMergeTree('/clickhouse/{database}', '2') order by key settings always_fetch_merged_part=0;
|
|
|
|
insert into rmt1 values (1);
|
|
insert into rmt1 values (2);
|
|
|
|
system sync replica rmt1;
|
|
-- SYSTEM STOP PULLING REPLICATION LOG does not waits for the current pull,
|
|
-- trigger it explicitly to 'avoid race' (though proper way will be to wait
|
|
-- for current pull in the StorageReplicatedMergeTree::getActionLock())
|
|
system sync replica rmt2;
|
|
-- NOTE: CLICKHOUSE_DATABASE is required
|
|
system stop pulling replication log on cluster $cluster $CLICKHOUSE_DATABASE.rmt2;
|
|
optimize table rmt1 final settings alter_sync=0, optimize_throw_if_noop=1;
|
|
" || exit 1
|
|
|
|
table_uuid=$($CLICKHOUSE_CLIENT -q "select uuid from system.tables where database = currentDatabase() and table = 'rmt1'")
|
|
if [[ -z $table_uuid ]]; then
|
|
echo "Table does not have UUID" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# NOTE: that part name can be different from all_0_1_1, in case of ZooKeeper retries
|
|
part_name='%'
|
|
|
|
# wait while there be at least one 'No active replica has part all_0_1_1 or covering part' in logs
|
|
for _ in {0..50}; do
|
|
no_active_repilica_messages=$($CLICKHOUSE_CLIENT -nm -q "
|
|
system flush logs;
|
|
|
|
select count()
|
|
from system.text_log
|
|
where
|
|
event_date >= yesterday() and event_time >= now() - 600 and
|
|
(
|
|
(logger_name = 'MergeTreeBackgroundExecutor' and message like '%{$table_uuid::$part_name}%No active replica has part $part_name or covering part%') or
|
|
(logger_name like '$table_uuid::$part_name (MergeFromLogEntryTask)' and message like '%No active replica has part $part_name or covering part%')
|
|
);
|
|
")
|
|
if [[ $no_active_repilica_messages -gt 0 ]]; then
|
|
break
|
|
fi
|
|
# too frequent "system flush logs" causes troubles
|
|
sleep 1
|
|
done
|
|
|
|
$CLICKHOUSE_CLIENT -nm -q "
|
|
system start pulling replication log rmt2;
|
|
system flush logs;
|
|
|
|
select
|
|
level, count() > 0
|
|
from system.text_log
|
|
where
|
|
event_date >= yesterday() and event_time >= now() - 600 and
|
|
(
|
|
(logger_name = 'MergeTreeBackgroundExecutor' and message like '%{$table_uuid::$part_name}%No active replica has part $part_name or covering part%') or
|
|
(logger_name like '$table_uuid::$part_name (MergeFromLogEntryTask)' and message like '%No active replica has part $part_name or covering part%')
|
|
)
|
|
group by level;
|
|
"
|