From 5281314ac0b5d68ae87004cc429c5e6b4c0dc242 Mon Sep 17 00:00:00 2001 From: Mike Kot Date: Mon, 1 Mar 2021 16:42:31 +0300 Subject: [PATCH] Finished the test draft for ATTACH PARTITION, Extracted the part data corruption function into the helper. --- .../helpers/corrupt_part_data_on_disk.py | 7 ++ .../configs/remote_servers.xml | 21 ++++ .../test_attach_without_fetching/test.py | 97 ++++++++++++++----- .../test_broken_part_during_merge/test.py | 10 +- 4 files changed, 103 insertions(+), 32 deletions(-) create mode 100644 tests/integration/helpers/corrupt_part_data_on_disk.py create mode 100644 tests/integration/test_attach_without_fetching/configs/remote_servers.xml diff --git a/tests/integration/helpers/corrupt_part_data_on_disk.py b/tests/integration/helpers/corrupt_part_data_on_disk.py new file mode 100644 index 00000000000..c60a55d12d3 --- /dev/null +++ b/tests/integration/helpers/corrupt_part_data_on_disk.py @@ -0,0 +1,7 @@ +def corrupt_part_data_on_disk(node, table, part_name, is_detached=False): + parts_table = "system.detached_parts" is is_detached else "system.parts" + part_path = node.query( + "SELECT path FROM " + parts_table + " WHERE table = '{}' and name = '{}'".format(table, part_name)).strip() + node.exec_in_container(['bash', '-c', + 'cd {p} && ls *.bin | head -n 1 | xargs -I{{}} sh -c \'echo "1" >> $1\' -- {{}}'.format( + p=part_path)], privileged=True) diff --git a/tests/integration/test_attach_without_fetching/configs/remote_servers.xml b/tests/integration/test_attach_without_fetching/configs/remote_servers.xml new file mode 100644 index 00000000000..7978f921b2e --- /dev/null +++ b/tests/integration/test_attach_without_fetching/configs/remote_servers.xml @@ -0,0 +1,21 @@ + + + + + true + + node_1_1 + 9000 + + + node_1_2 + 9000 + + + node_1_3 + 9000 + + + + + diff --git a/tests/integration/test_attach_without_fetching/test.py b/tests/integration/test_attach_without_fetching/test.py index d712cf5d2e3..a5e759c7fd6 100644 --- a/tests/integration/test_attach_without_fetching/test.py +++ b/tests/integration/test_attach_without_fetching/test.py @@ -1,49 +1,98 @@ +import time import pytest + from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseKiller +from helpers.test_tools import assert_eq_with_retry +from helpers.network import PartitionManager +from helpers.corrupt_part_data_on_disk import corrupt_part_data_on_disk + +def fill_node(node): + node.query( + ''' + CREATE TABLE test(n UInt32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/test', '{replica}') + ORDER BY n PARTITION BY n % 10; + '''.format(replica=node.name)) cluster = ClickHouseCluster(__file__) +configs =["configs/remote_servers.xml"] -node1 = cluster.add_instance('node1') - +node_1 = cluster.add_instance('replica1', with_zookeeper=True, main_configs=configs) +node_2 = cluster.add_instance('replica2', with_zookeeper=True, main_configs=configs) +node_3 = cluster.add_instance('replica3', with_zookeeper=True, main_configs=configs) @pytest.fixture(scope="module") def start_cluster(): try: cluster.start() - + fill_node(node_1) + fill_node(node_2) + # the third node is filled after the DETACH query yield cluster + + except Exception as ex: + print(ex) + finally: cluster.shutdown() +def check_data(nodes, detached_parts): + for node in nodes: + for i in range(10): + assert node.query("SELECT count() FROM test WHERE key % 10 == " + str(i)) == + "0\n" if i in detached_parts else "10\n" -# Check that ALTER TABLE ATTACH PARTITION does not fetch data from other replicas if it's present in the -# detached/ folder + assert node.query("SELECT count() FROM system.parts WHERE table='test'") == + str(10 - len(detached_parts)) + "\n" + + assert node.query("SELECT count() FROM system.detached_parts WHERE table='test'") == + str(len(detached_parts)) + "\n" + +# 1. Check that ALTER TABLE ATTACH PARTITION does not fetch data from other replicas if it's present in the +# detached/ folder. +# 2. Check that ALTER TABLE ATTACH PARTITION downloads the data from other replicas if the detached/ folder +# does not contain the part with the correct checksums. def test_attach_without_fetching(start_cluster): - node1.query( - "CREATE TABLE test (date Date, key Int32, value String) Engine=MergeTree ORDER BY key PARTITION by date") + # 0. Insert data on two replicas + node_1.query("INSERT INTO test SELECT * FROM numbers(100)") - node1.query("INSERT INTO test SELECT toDate('2019-10-01'), number, toString(number) FROM numbers(100)") + check_data([node_1, node_2], detached_parts=[]) - assert node1.query("SELECT COUNT() FROM test WHERE key % 10 == 0") == "10\n" + # 1. Detach the first three partition on the replicas - node1.query("ALTER TABLE test DETACH PARTITION '2019-10-01'") + # This part will be fetched from other replicas as it would be missing in the detached/ folder + node_1.query("ALTER TABLE test DETACH PARTITION '0_0_0_0'") + # This part will be fetched from other replicas as the checksums won't match (we'll manually break the data). + node_1.query("ALTER TABLE test DETACH PARTITION '1_0_0_0'") + # This part will be copied locally and attached without fetch + node_1.query("ALTER TABLE test DETACH PARTITION '2_0_0_0'") - assert node1.query("SELECT COUNT() FROM test WHERE key % 10 == 0") == "0\n" - assert node1.query("SELECT COUNT() FROM test") == "0\n" + check_data([node_1, node_2], detached_parts=[0, 1, 2]) - # Break the network in the partition manager - # The data is not removed from detached/ so it's ok + # 2. Create the third replica + fill_node(node_3) - # to be sure output not empty - node1.exec_in_container( - ['bash', '-c', 'find /var/lib/clickhouse/data/default/test/detached -name "checksums.txt" | grep -e ".*" '], - privileged=True, user='root') + # 3. Attach the first partition and check if it has been fetched correctly + node_3.query("ALTER TABLE test ATTACH PARTITION '0_0_0_0'") + check_data([node_1, node_2, node_3], detached_parts=[1, 2]) - node1.exec_in_container( - ['bash', '-c', 'find /var/lib/clickhouse/data/default/test/detached -name "checksums.txt" -delete'], - privileged=True, user='root') + # 4. Fetch the second partition to the third replica, break the data to corrupt the checksums, + # attach it and check if it also was fetched correctly. + node_3.query("ALTER TABLE test FETCH PARTITION '1_0_0_0' FROM '/clickhouse/tables/test'") + corrupt_part_data_on_disk(node_3, 'test', '1_0_0_0', is_detached=True) + node_3.query("ALTER TABLE test ATTACH PARTITION '1_0_0_0'") - node1.query("ALTER TABLE test ATTACH PARTITION '2019-10-01'") + check_data([node_1, node_2, node_3], detached_parts=[2]) - assert node1.query("SELECT COUNT() FROM test WHERE key % 10 == 0") == "10\n" - assert node1.query("SELECT COUNT() FROM test") == "100\n" + # 5. Fetch the third partition to the third replica, break the network as so the replica won't be able to + # download the data, attach the partition (and check it has been attached from the local data) + node_3.query("ALTER TABLE test FETCH PARTITION '2_0_0_0' FROM '/clickhouse/tables/test'") + + with PartitionManager() as pm: + pm.partition_instances(node_1, node_3) + pm.partition_instances(node_2, node_3) + + node_3.query("ALTER TABLE test ATTACH PARTITION '2_0_0_0'") + + check_data([node_1, node_2, node_3], detached_parts=[]) diff --git a/tests/integration/test_broken_part_during_merge/test.py b/tests/integration/test_broken_part_during_merge/test.py index 33719166f4a..910dbc1d1a9 100644 --- a/tests/integration/test_broken_part_during_merge/test.py +++ b/tests/integration/test_broken_part_during_merge/test.py @@ -3,6 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster from multiprocessing.dummy import Pool from helpers.network import PartitionManager +from helpers.corrupt_part_data_on_disk import corrupt_part_data_on_disk import time cluster = ClickHouseCluster(__file__) @@ -25,13 +26,6 @@ def started_cluster(): finally: cluster.shutdown() -def corrupt_data_part_on_disk(node, table, part_name): - part_path = node.query( - "SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(table, part_name)).strip() - node.exec_in_container(['bash', '-c', - 'cd {p} && ls *.bin | head -n 1 | xargs -I{{}} sh -c \'echo "1" >> $1\' -- {{}}'.format( - p=part_path)], privileged=True) - def test_merge_and_part_corruption(started_cluster): node1.query("SYSTEM STOP REPLICATION QUEUES replicated_mt") @@ -43,7 +37,7 @@ def test_merge_and_part_corruption(started_cluster): # Need to corrupt "border part" (left or right). If we will corrupt something in the middle # clickhouse will not consider merge as broken, because we have parts with the same min and max # block numbers. - corrupt_data_part_on_disk(node1, 'replicated_mt', 'all_3_3_0') + corrupt_part_data_on_disk(node1, 'replicated_mt', 'all_3_3_0') with Pool(1) as p: def optimize_with_delay(x):