ClickHouse/tests/integration/test_attach_without_fetching/test.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

169 lines
6.0 KiB
Python
Raw Normal View History

import time
2021-02-15 18:06:20 +00:00
import pytest
2021-05-25 13:40:22 +00:00
import logging
2021-02-15 18:06:20 +00:00
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import assert_eq_with_retry
from helpers.network import PartitionManager
from helpers.corrupt_part_data_on_disk import corrupt_part_data_by_path
def fill_node(node):
2021-05-17 11:16:16 +00:00
node.query_with_retry(
"""
2021-05-17 11:16:16 +00:00
CREATE TABLE IF NOT EXISTS test(n UInt32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test', '{replica}')
2023-11-07 18:44:16 +00:00
ORDER BY n PARTITION BY n % 10 SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1;
""".format(
replica=node.name
)
)
2021-02-15 18:06:20 +00:00
cluster = ClickHouseCluster(__file__)
SYSTEM RESTORE REPLICA replica [ON CLUSTER cluster] (#13652) * initial commit: add setting and stub * typo * added test stub * fix * wip merging new integration test and code proto * adding steps interpreters * adding firstly proposed solution (moving parts etc) * added checking zookeeper path existence * fixing the include * fixing and sorting includes * fixing outdated struct * fix the name * added ast ptr as level of indirection * fix ref * updating the changes * working on test stub * fix iterator -> reference * revert rocksdb submodule update * fixed show privileges test * updated the test stub * replaced rand() with thread_local_rng(), updated the tests updated the test fixed test config path test fix removed error messages fixed the test updated the test fixed string literal fixed literal typo: = * fixed the empty replica error message * updated the test and the code with logs * updated the possible test cases, updated * added the code/test milestone comments * updated the test (added more testcases) * replaced native assert with CH one * individual replicas recursive delete fix * updated the AS db.name AST * two small logging fixes * manually generated AST fixes * Updated the test, added the possible algo change * Some thoughts about optimizing the solution: ALTER MOVE PARTITION .. TO TABLE -> move to detached/ + ALTER ... ATTACH * fix * Removed the replica sync in test as it's invalid * Some test tweaks * tmp * Rewrote the algo by using the executeQuery instead of hand-crafting the ASTPtr. Two questions still active. * tr: logging active parts * Extracted the parts moving algo into a separate helper function * Fixed the test data and the queries slightly * Replaced query to system.parts to direct invocation, started building the test that breaks on various parts. * Added the case for tables when at least one replica is alive * Updated the test to test replicas restoration by detaching/attaching * Altered the test to check restoration without replica restart * Added the tables swap in the start if the server failed last time * Hotfix when only /replicas/replica... path was deleted * Restore ZK paths while creating a replicated MergeTree table * Updated the docs, fixed the algo for individual replicas restoration case * Initial parts table storage fix, tests sync fix * Reverted individual replica restoration to general algo * Slightly optimised getDataParts * Trying another solution with parts detaching * Rewrote algo without any steps, added ON CLUSTER support * Attaching parts from other replica on restoration * Getting part checksums from ZK * Removed ON CLUSTER, finished working solution * Multiple small changes after review * Fixing parallel test * Supporting rewritten form on cluster * Test fix * Moar logging * Using source replica as checksum provider * improve test, remove some code from parser * Trying solution with move to detached + forget * Moving all parts (not only Committed) to detached * Edited docs for RESTORE REPLICA * Re-merging * minor fixes Co-authored-by: Alexander Tokmakov <avtokmakov@yandex-team.ru>
2021-06-20 08:24:43 +00:00
node_1 = cluster.add_instance("replica1", with_zookeeper=True)
node_2 = cluster.add_instance("replica2", with_zookeeper=True)
node_3 = cluster.add_instance("replica3", with_zookeeper=True)
2021-02-15 18:06:20 +00:00
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
fill_node(node_1)
fill_node(node_2)
# the third node is filled after the DETACH query
2021-02-15 18:06:20 +00:00
yield cluster
except Exception as ex:
print(ex)
2021-02-15 18:06:20 +00:00
finally:
cluster.shutdown()
def check_data(nodes, detached_parts):
for node in nodes:
print(
"> Replication queue for",
node.name,
"\n> table\treplica_name\tsource_replica\ttype\tposition\n",
2021-05-17 11:16:16 +00:00
node.query_with_retry(
"SELECT table, replica_name, source_replica, type, position FROM system.replication_queue"
),
2021-05-17 11:16:16 +00:00
)
2021-05-17 11:16:16 +00:00
node.query_with_retry("SYSTEM SYNC REPLICA test")
for node in nodes:
print("> Checking data integrity for", node.name)
2021-02-15 18:06:20 +00:00
for i in range(10):
assert_eq_with_retry(
node,
"SELECT count() FROM test WHERE n % 10 == " + str(i),
"0\n" if i in detached_parts else "10\n",
)
assert_eq_with_retry(
node,
"SELECT count() FROM system.parts WHERE table='test'",
str(10 - len(detached_parts)) + "\n",
)
res: str = node.query("SELECT * FROM test ORDER BY n")
for other in nodes:
if other != node:
2021-05-24 15:30:51 +00:00
logging.debug(
f"> Checking data consistency, {other.name} vs {node.name}"
)
assert_eq_with_retry(other, "SELECT * FROM test ORDER BY n", res)
# 1. Check that ALTER TABLE ATTACH PART|PARTITION does not fetch data from other replicas if it's present in the
# detached/ folder.
# 2. Check that ALTER TABLE ATTACH PART|PARTITION downloads the data from other replicas if the detached/ folder
# does not contain the part with the correct checksums.
2021-02-15 18:06:20 +00:00
def test_attach_without_fetching(start_cluster):
# Note here requests are used for both PARTITION and PART. This is done for better test diversity.
# The partition and part are used interchangeably which is not true in most cases.
# 0. Insert data on two replicas
node_1.query("INSERT INTO test SELECT * FROM numbers(100)")
check_data([node_1, node_2], detached_parts=[])
# 1.
# This part will be fetched from other replicas as it would be missing in the detached/ folder and
# also attached locally.
node_1.query("ALTER TABLE test DETACH PART '0_0_0_0'")
# This partition will be just fetched from other replicas as the checksums won't match
# (we'll manually break the data).
node_1.query("ALTER TABLE test DETACH PARTITION 1")
# This partition will be just fetched from other replicas as the part data will be corrupted with one of the
# files missing.
node_1.query("ALTER TABLE test DETACH PARTITION 2")
2021-02-15 18:06:20 +00:00
check_data([node_1, node_2], detached_parts=[0, 1, 2])
2021-02-15 18:06:20 +00:00
# 2. Create the third replica
fill_node(node_3)
2021-02-15 18:06:20 +00:00
# 3. Break the part data on the second node to corrupt the checksums.
# Replica 3 should download the data from replica 1 as there is no local data.
# Replica 2 should also download the data from 1 as the checksums won't match.
2021-05-25 12:40:59 +00:00
logging.debug("Checking attach with corrupted part data with files missing")
2021-05-25 12:40:59 +00:00
to_delete = node_2.exec_in_container(
[
2021-05-25 12:40:59 +00:00
"bash",
"-c",
"cd {p} && ls *.bin".format(
2021-05-25 12:40:59 +00:00
p="/var/lib/clickhouse/data/default/test/detached/2_0_0_0"
),
2021-05-25 12:40:59 +00:00
],
privileged=True,
)
2021-05-25 16:53:55 +00:00
logging.debug(f"Before deleting: {to_delete}")
node_2.exec_in_container(
[
"bash",
"-c",
"cd {p} && rm -fr *.bin".format(
p="/var/lib/clickhouse/data/default/test/detached/2_0_0_0"
),
],
privileged=True,
)
node_1.query("ALTER TABLE test ATTACH PARTITION 2")
check_data([node_1, node_2, node_3], detached_parts=[0, 1])
# 4. Break the part data on the second node to corrupt the checksums.
# Replica 3 should download the data from replica 1 as there is no local data.
# Replica 2 should also download the data from 1 as the checksums won't match.
print("Checking attach with corrupted part data with all of the files present")
corrupt_part_data_by_path(
node_2, "/var/lib/clickhouse/data/default/test/detached/1_0_0_0"
)
node_1.query("ALTER TABLE test ATTACH PARTITION 1")
check_data([node_1, node_2, node_3], detached_parts=[0])
# 5. Attach the first part and check if it has been fetched correctly.
# Replica 2 should attach the local data from detached/.
# Replica 3 should download the data from replica 2 as there is no local data and other connections are broken.
print("Checking attach with valid checksums")
with PartitionManager() as pm:
# If something goes wrong and replica 2 wants to fetch data, the test will fail.
2021-04-03 10:25:40 +00:00
pm.partition_instances(node_2, node_1, action="REJECT --reject-with tcp-reset")
pm.partition_instances(node_1, node_3, action="REJECT --reject-with tcp-reset")
2021-03-03 13:51:41 +00:00
node_1.query("ALTER TABLE test ATTACH PART '0_0_0_0'")
check_data([node_1, node_2, node_3], detached_parts=[])