ClickHouse/tests/integration/test_consistent_parts_after_clone_replica/test.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

116 lines
4.1 KiB
Python
Raw Normal View History

2019-08-16 16:14:08 +00:00
import pytest
from helpers.cluster import ClickHouseCluster
from helpers.network import PartitionManager
from helpers.test_tools import assert_eq_with_retry
2019-08-16 16:14:08 +00:00
def fill_nodes(nodes, shard):
for node in nodes:
node.query(
"""
CREATE DATABASE test;
CREATE TABLE test_table(date Date, id UInt32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test{shard}/replicated', '{replica}')
ORDER BY id PARTITION BY toYYYYMM(date)
SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0,
cleanup_delay_period_random_add=0, cleanup_thread_preferred_points_per_iteration=0;
""".format(
shard=shard, replica=node.name
)
)
2019-08-16 16:14:08 +00:00
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance(
"node1", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
)
node2 = cluster.add_instance(
"node2", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
)
2019-08-16 16:14:08 +00:00
2019-08-16 16:14:08 +00:00
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
fill_nodes([node1, node2], 1)
yield cluster
except Exception as ex:
2020-10-02 16:54:07 +00:00
print(ex)
2019-08-16 16:14:08 +00:00
finally:
cluster.shutdown()
def test_inconsistent_parts_if_drop_while_replica_not_active(start_cluster):
with PartitionManager() as pm:
# insert into all replicas
2021-05-30 21:29:37 +00:00
for i in range(10):
node1.query("INSERT INTO test_table VALUES ('2019-08-16', {})".format(i))
assert_eq_with_retry(
node2,
"SELECT count(*) FROM test_table",
node1.query("SELECT count(*) FROM test_table"),
)
2019-08-16 16:14:08 +00:00
2021-05-30 21:29:37 +00:00
# partition the first replica from the second one and (later) from zk
2019-08-16 16:14:08 +00:00
pm.partition_instances(node1, node2)
2021-05-30 21:29:37 +00:00
# insert some parts on the second replica only, we will drop these parts
for i in range(10):
node2.query(
"INSERT INTO test_table VALUES ('2019-08-16', {})".format(10 + i)
)
2021-05-30 21:29:37 +00:00
2022-06-10 11:22:57 +00:00
pm.drop_instance_zk_connections(node1, action="REJECT --reject-with tcp-reset")
2019-08-16 16:14:08 +00:00
# drop all parts on the second replica
node2.query_with_retry("ALTER TABLE test_table DROP PARTITION 201908")
assert_eq_with_retry(node2, "SELECT count(*) FROM test_table", "0")
# insert into the second replica
# DROP_RANGE will be removed from the replication log and the first replica will be lost
2021-05-30 21:29:37 +00:00
for i in range(20):
node2.query(
"INSERT INTO test_table VALUES ('2019-08-16', {})".format(20 + i)
)
assert_eq_with_retry(
node2,
"SELECT value FROM system.zookeeper WHERE path='/clickhouse/tables/test1/replicated/replicas/node1' AND name='is_lost'",
"1",
2022-06-10 11:22:57 +00:00
retry_count=40,
)
2019-08-16 16:14:08 +00:00
node2.wait_for_log_line("Will mark replica node1 as lost")
2021-06-04 09:12:35 +00:00
2019-08-16 16:14:08 +00:00
# the first replica will be cloned from the second
pm.heal_all()
node2.wait_for_log_line("Sending part")
assert_eq_with_retry(
node1,
"SELECT count(*) FROM test_table",
node2.query("SELECT count(*) FROM test_table"),
)
2021-06-04 09:12:35 +00:00
2021-05-30 21:29:37 +00:00
# ensure replica was cloned
assert node1.contains_in_log("Will mimic node2")
# 2 options:
# - There wasn't a merge in node2. Then node1 should have cloned the 2 parts
# - There was a merge in progress. node1 might have cloned the new part but still has the original 2 parts
# in the replication queue until they are finally discarded with a message like:
# `Skipping action for part 201908_40_40_0 because part 201908_21_40_4 already exists.`
#
# In any case after a short while the replication queue should be empty
assert_eq_with_retry(
node1,
"SELECT count() FROM system.replication_queue WHERE type != 'MERGE_PARTS'",
"0",
)
assert_eq_with_retry(
node2,
"SELECT count() FROM system.replication_queue WHERE type != 'MERGE_PARTS'",
"0",
)