2019-08-16 16:14:08 +00:00
import pytest
from helpers . cluster import ClickHouseCluster
from helpers . network import PartitionManager
from helpers . test_tools import assert_eq_with_retry
def fill_nodes ( nodes , shard ) :
for node in nodes :
node . query (
2020-09-16 04:26:10 +00:00
'''
CREATE DATABASE test ;
CREATE TABLE test_table ( date Date , id UInt32 )
ENGINE = ReplicatedMergeTree ( ' /clickhouse/tables/test {shard} /replicated ' , ' {replica} ' )
ORDER BY id PARTITION BY toYYYYMM ( date )
SETTINGS min_replicated_logs_to_keep = 3 , max_replicated_logs_to_keep = 5 , cleanup_delay_period = 0 , cleanup_delay_period_random_add = 0 ;
''' .format(shard=shard, replica=node.name))
2019-08-16 16:14:08 +00:00
cluster = ClickHouseCluster ( __file__ )
node1 = cluster . add_instance ( ' node1 ' , main_configs = [ ' configs/remote_servers.xml ' ] , with_zookeeper = True )
node2 = cluster . add_instance ( ' node2 ' , main_configs = [ ' configs/remote_servers.xml ' ] , with_zookeeper = True )
2020-09-16 04:26:10 +00:00
2019-08-16 16:14:08 +00:00
@pytest.fixture ( scope = " module " )
def start_cluster ( ) :
try :
cluster . start ( )
fill_nodes ( [ node1 , node2 ] , 1 )
yield cluster
except Exception as ex :
2020-10-02 16:54:07 +00:00
print ( ex )
2019-08-16 16:14:08 +00:00
finally :
cluster . shutdown ( )
def test_inconsistent_parts_if_drop_while_replica_not_active ( start_cluster ) :
with PartitionManager ( ) as pm :
# insert into all replicas
2021-05-30 21:29:37 +00:00
for i in range ( 10 ) :
2019-08-19 12:06:44 +00:00
node1 . query ( " INSERT INTO test_table VALUES ( ' 2019-08-16 ' , {} ) " . format ( i ) )
2019-08-16 16:14:08 +00:00
assert_eq_with_retry ( node2 , " SELECT count(*) FROM test_table " , node1 . query ( " SELECT count(*) FROM test_table " ) )
2021-05-30 21:29:37 +00:00
# partition the first replica from the second one and (later) from zk
2019-08-16 16:14:08 +00:00
pm . partition_instances ( node1 , node2 )
2021-05-30 21:29:37 +00:00
# insert some parts on the second replica only, we will drop these parts
for i in range ( 10 ) :
node2 . query ( " INSERT INTO test_table VALUES ( ' 2019-08-16 ' , {} ) " . format ( 10 + i ) )
2019-08-16 16:14:08 +00:00
pm . drop_instance_zk_connections ( node1 )
# drop all parts on the second replica
node2 . query_with_retry ( " ALTER TABLE test_table DROP PARTITION 201908 " )
assert_eq_with_retry ( node2 , " SELECT count(*) FROM test_table " , " 0 " )
# insert into the second replica
# DROP_RANGE will be removed from the replication log and the first replica will be lost
2021-05-30 21:29:37 +00:00
for i in range ( 20 ) :
node2 . query ( " INSERT INTO test_table VALUES ( ' 2019-08-16 ' , {} ) " . format ( 20 + i ) )
2022-01-27 11:47:52 +00:00
2021-06-04 13:58:28 +00:00
assert_eq_with_retry ( node2 , " SELECT value FROM system.zookeeper WHERE path= ' /clickhouse/tables/test1/replicated/replicas/node1 ' AND name= ' is_lost ' " , " 1 " )
2019-08-16 16:14:08 +00:00
2022-01-27 11:47:52 +00:00
node2 . wait_for_log_line ( " Will mark replica node1 as lost " )
2021-06-04 09:12:35 +00:00
2019-08-16 16:14:08 +00:00
# the first replica will be cloned from the second
pm . heal_all ( )
2022-01-27 11:47:52 +00:00
node2 . wait_for_log_line ( " Sending part " )
2019-08-16 16:14:08 +00:00
assert_eq_with_retry ( node1 , " SELECT count(*) FROM test_table " , node2 . query ( " SELECT count(*) FROM test_table " ) )
2021-06-04 09:12:35 +00:00
2021-05-30 21:29:37 +00:00
# ensure replica was cloned
assert node1 . contains_in_log ( " Will mimic node2 " )
2022-01-27 11:47:52 +00:00
# 2 options:
# - There wasn't a merge in node2. Then node1 should have cloned the 2 parts
# - There was a merge in progress. node1 might have cloned the new part but still has the original 2 parts
# in the replication queue until they are finally discarded with a message like:
# `Skipping action for part 201908_40_40_0 because part 201908_21_40_4 already exists.`
#
# In any case after a short while the replication queue should be empty
assert_eq_with_retry ( node1 , " SELECT count() FROM system.replication_queue WHERE type != ' MERGE_PARTS ' " , " 0 " )
assert_eq_with_retry ( node2 , " SELECT count() FROM system.replication_queue WHERE type != ' MERGE_PARTS ' " , " 0 " )