ClickHouse/tests/integration/test_recovery_replica/test.py

165 lines
6.7 KiB
Python
Raw Normal View History

2018-08-29 10:04:41 +00:00
import time
import pytest
2018-08-29 10:04:41 +00:00
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import assert_eq_with_retry
2018-08-29 10:04:41 +00:00
2021-04-28 17:49:27 +00:00
SETTINGS = "SETTINGS min_replicated_logs_to_keep=3, max_replicated_logs_to_keep=5, cleanup_delay_period=0, cleanup_delay_period_random_add=0"
2021-04-28 17:49:27 +00:00
def fill_nodes(nodes):
2018-08-29 10:04:41 +00:00
for node in nodes:
node.query(
'''
CREATE TABLE test_table(date Date, id UInt32)
2021-04-28 17:49:27 +00:00
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/replicated', '{replica}') ORDER BY id PARTITION BY toYYYYMM(date)
{settings};
'''.format(replica=node.name, settings=SETTINGS))
2018-08-29 10:04:41 +00:00
cluster = ClickHouseCluster(__file__)
2020-10-06 20:05:28 +00:00
node1 = cluster.add_instance('node1', with_zookeeper=True)
node2 = cluster.add_instance('node2', with_zookeeper=True)
node3 = cluster.add_instance('node3', with_zookeeper=True)
2021-04-28 17:49:27 +00:00
nodes = [node1, node2, node3]
2018-08-29 10:04:41 +00:00
2021-04-28 17:49:27 +00:00
def sync_replicas(table):
for node in nodes:
node.query("SYSTEM SYNC REPLICA {}".format(table))
2018-08-29 10:04:41 +00:00
@pytest.fixture(scope="module")
2018-08-29 13:47:48 +00:00
def start_cluster():
2018-08-29 10:04:41 +00:00
try:
cluster.start()
2021-04-28 17:49:27 +00:00
fill_nodes([node1, node2, node3])
2018-08-29 10:04:41 +00:00
yield cluster
2018-08-29 13:47:48 +00:00
except Exception as ex:
2020-10-02 16:54:07 +00:00
print(ex)
2018-08-29 13:47:48 +00:00
2018-08-29 10:04:41 +00:00
finally:
cluster.shutdown()
2018-08-29 13:47:48 +00:00
def test_recovery(start_cluster):
2021-04-28 17:49:27 +00:00
node1.query("INSERT INTO test_table VALUES (1, 0)")
sync_replicas("test_table")
2018-08-29 10:04:41 +00:00
node2.query("DETACH TABLE test_table")
2021-04-28 17:49:27 +00:00
for i in range(1, 11):
2018-08-29 10:04:41 +00:00
node1.query("INSERT INTO test_table VALUES (1, {})".format(i))
node2.query_with_retry("ATTACH TABLE test_table",
check_callback=lambda x: len(node2.query("select * from test_table")) > 0)
2018-08-29 10:04:41 +00:00
assert_eq_with_retry(node2, "SELECT count(*) FROM test_table", node1.query("SELECT count(*) FROM test_table"))
2020-10-06 20:05:28 +00:00
lost_marker = "Will mark replica node2 as lost"
assert node1.contains_in_log(lost_marker) or node3.contains_in_log(lost_marker)
2021-04-28 17:49:27 +00:00
sync_replicas("test_table")
for node in nodes:
assert node.query("SELECT count(), sum(id) FROM test_table WHERE date=toDate(1)") == "11\t55\n"
2020-10-06 20:05:28 +00:00
def test_choose_source_replica(start_cluster):
2021-04-28 17:49:27 +00:00
node3.query("INSERT INTO test_table VALUES (2, 0)")
sync_replicas("test_table")
2020-10-06 20:05:28 +00:00
node2.query("DETACH TABLE test_table")
node1.query("SYSTEM STOP FETCHES test_table") # node1 will have many entries in queue, so node2 will clone node3
2021-04-28 17:49:27 +00:00
for i in range(1, 11):
2020-10-06 20:05:28 +00:00
node3.query("INSERT INTO test_table VALUES (2, {})".format(i))
node2.query_with_retry("ATTACH TABLE test_table",
check_callback=lambda x: len(node2.query("select * from test_table")) > 0)
node1.query("SYSTEM START FETCHES test_table")
node1.query("SYSTEM SYNC REPLICA test_table")
node2.query("SYSTEM SYNC REPLICA test_table")
assert node1.query("SELECT count(*) FROM test_table") == node3.query("SELECT count(*) FROM test_table")
assert node2.query("SELECT count(*) FROM test_table") == node3.query("SELECT count(*) FROM test_table")
lost_marker = "Will mark replica node2 as lost"
assert node1.contains_in_log(lost_marker) or node3.contains_in_log(lost_marker)
assert node2.contains_in_log("Will mimic node3")
2021-04-28 17:49:27 +00:00
sync_replicas("test_table")
for node in nodes:
assert node.query("SELECT count(), sum(id) FROM test_table WHERE date=toDate(2)") == "11\t55\n"
def test_update_metadata(start_cluster):
for node in nodes:
node.query(
'''
CREATE TABLE update_metadata(key UInt32)
ENGINE = ReplicatedMergeTree('/test/update_metadata', '{replica}') ORDER BY key PARTITION BY key % 10
{settings};
'''.format(replica=node.name, settings=SETTINGS))
for i in range(1, 11):
node1.query("INSERT INTO update_metadata VALUES ({})".format(i))
node2.query("DETACH TABLE update_metadata")
# alter without mutation
node1.query("ALTER TABLE update_metadata ADD COLUMN col1 UInt32")
for i in range(1, 11):
node1.query("INSERT INTO update_metadata VALUES ({}, {})".format(i * 10, i * 10))
lost_marker = "Will mark replica node2 as lost"
assert node1.contains_in_log(lost_marker) or node3.contains_in_log(lost_marker)
node2.query("ATTACH TABLE update_metadata")
sync_replicas("update_metadata")
assert node1.query("DESC TABLE update_metadata") == node2.query("DESC TABLE update_metadata")
assert node1.query("DESC TABLE update_metadata") == node3.query("DESC TABLE update_metadata")
for node in nodes:
assert node.query("SELECT count(), sum(key), sum(col1) FROM update_metadata") == "20\t605\t550\n"
node2.query("DETACH TABLE update_metadata")
# alter with mutation
node1.query("ALTER TABLE update_metadata DROP COLUMN col1")
for i in range(1, 11):
node1.query("INSERT INTO update_metadata VALUES ({})".format(i * 100))
lost_marker = "Will mark replica node2 as lost"
assert node1.contains_in_log(lost_marker) or node3.contains_in_log(lost_marker)
node2.query("ATTACH TABLE update_metadata")
sync_replicas("update_metadata")
assert node1.query("DESC TABLE update_metadata") == node2.query("DESC TABLE update_metadata")
assert node1.query("DESC TABLE update_metadata") == node3.query("DESC TABLE update_metadata")
# check that it's possible to execute alter on cloned replica
node2.query("ALTER TABLE update_metadata ADD COLUMN col1 UInt32")
sync_replicas("update_metadata")
for node in nodes:
assert node.query("SELECT count(), sum(key), sum(col1) FROM update_metadata") == "30\t6105\t0\n"
# more complex case with multiple alters
node2.query("TRUNCATE TABLE update_metadata")
for i in range(1, 11):
node1.query("INSERT INTO update_metadata VALUES ({}, {})".format(i, i))
# The following alters hang because of "No active replica has part ... or covering part"
#node2.query("SYSTEM STOP REPLICATED SENDS update_metadata")
#node2.query("INSERT INTO update_metadata VALUES (42, 42)") # this part will be lost
node2.query("DETACH TABLE update_metadata")
node1.query("ALTER TABLE update_metadata MODIFY COLUMN col1 String")
node1.query("ALTER TABLE update_metadata ADD COLUMN col2 INT")
for i in range(1, 11):
node3.query("INSERT INTO update_metadata VALUES ({}, '{}', {})".format(i * 10, i * 10, i * 10))
node1.query("ALTER TABLE update_metadata DROP COLUMN col1")
node1.query("ALTER TABLE update_metadata ADD COLUMN col3 Date")
node2.query("ATTACH TABLE update_metadata")
sync_replicas("update_metadata")
assert node1.query("DESC TABLE update_metadata") == node2.query("DESC TABLE update_metadata")
assert node1.query("DESC TABLE update_metadata") == node3.query("DESC TABLE update_metadata")
for node in nodes:
assert node.query("SELECT count(), sum(key), sum(col2) FROM update_metadata") == "20\t605\t550\n"