mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-10 00:12:27 +00:00
4c391f8e99
* initial commit: add setting and stub * typo * added test stub * fix * wip merging new integration test and code proto * adding steps interpreters * adding firstly proposed solution (moving parts etc) * added checking zookeeper path existence * fixing the include * fixing and sorting includes * fixing outdated struct * fix the name * added ast ptr as level of indirection * fix ref * updating the changes * working on test stub * fix iterator -> reference * revert rocksdb submodule update * fixed show privileges test * updated the test stub * replaced rand() with thread_local_rng(), updated the tests updated the test fixed test config path test fix removed error messages fixed the test updated the test fixed string literal fixed literal typo: = * fixed the empty replica error message * updated the test and the code with logs * updated the possible test cases, updated * added the code/test milestone comments * updated the test (added more testcases) * replaced native assert with CH one * individual replicas recursive delete fix * updated the AS db.name AST * two small logging fixes * manually generated AST fixes * Updated the test, added the possible algo change * Some thoughts about optimizing the solution: ALTER MOVE PARTITION .. TO TABLE -> move to detached/ + ALTER ... ATTACH * fix * Removed the replica sync in test as it's invalid * Some test tweaks * tmp * Rewrote the algo by using the executeQuery instead of hand-crafting the ASTPtr. Two questions still active. * tr: logging active parts * Extracted the parts moving algo into a separate helper function * Fixed the test data and the queries slightly * Replaced query to system.parts to direct invocation, started building the test that breaks on various parts. * Added the case for tables when at least one replica is alive * Updated the test to test replicas restoration by detaching/attaching * Altered the test to check restoration without replica restart * Added the tables swap in the start if the server failed last time * Hotfix when only /replicas/replica... path was deleted * Restore ZK paths while creating a replicated MergeTree table * Updated the docs, fixed the algo for individual replicas restoration case * Initial parts table storage fix, tests sync fix * Reverted individual replica restoration to general algo * Slightly optimised getDataParts * Trying another solution with parts detaching * Rewrote algo without any steps, added ON CLUSTER support * Attaching parts from other replica on restoration * Getting part checksums from ZK * Removed ON CLUSTER, finished working solution * Multiple small changes after review * Fixing parallel test * Supporting rewritten form on cluster * Test fix * Moar logging * Using source replica as checksum provider * improve test, remove some code from parser * Trying solution with move to detached + forget * Moving all parts (not only Committed) to detached * Edited docs for RESTORE REPLICA * Re-merging * minor fixes Co-authored-by: Alexander Tokmakov <avtokmakov@yandex-team.ru>
132 lines
5.6 KiB
Python
132 lines
5.6 KiB
Python
import time
|
|
import pytest
|
|
import logging
|
|
|
|
from helpers.cluster import ClickHouseCluster
|
|
from helpers.test_tools import assert_eq_with_retry
|
|
from helpers.network import PartitionManager
|
|
from helpers.corrupt_part_data_on_disk import corrupt_part_data_by_path
|
|
|
|
def fill_node(node):
|
|
node.query_with_retry(
|
|
'''
|
|
CREATE TABLE IF NOT EXISTS test(n UInt32)
|
|
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test', '{replica}')
|
|
ORDER BY n PARTITION BY n % 10;
|
|
'''.format(replica=node.name))
|
|
|
|
cluster = ClickHouseCluster(__file__)
|
|
|
|
node_1 = cluster.add_instance('replica1', with_zookeeper=True)
|
|
node_2 = cluster.add_instance('replica2', with_zookeeper=True)
|
|
node_3 = cluster.add_instance('replica3', with_zookeeper=True)
|
|
|
|
@pytest.fixture(scope="module")
|
|
def start_cluster():
|
|
try:
|
|
cluster.start()
|
|
fill_node(node_1)
|
|
fill_node(node_2)
|
|
# the third node is filled after the DETACH query
|
|
yield cluster
|
|
|
|
except Exception as ex:
|
|
print(ex)
|
|
|
|
finally:
|
|
cluster.shutdown()
|
|
|
|
def check_data(nodes, detached_parts):
|
|
for node in nodes:
|
|
print("> Replication queue for", node.name, "\n> table\treplica_name\tsource_replica\ttype\tposition\n",
|
|
node.query_with_retry("SELECT table, replica_name, source_replica, type, position FROM system.replication_queue"))
|
|
|
|
node.query_with_retry("SYSTEM SYNC REPLICA test")
|
|
|
|
print("> Checking data integrity for", node.name)
|
|
|
|
for i in range(10):
|
|
assert_eq_with_retry(node, "SELECT count() FROM test WHERE n % 10 == " + str(i),
|
|
"0\n" if i in detached_parts else "10\n")
|
|
|
|
assert_eq_with_retry(node, "SELECT count() FROM system.parts WHERE table='test'",
|
|
str(10 - len(detached_parts)) + "\n")
|
|
|
|
res: str = node.query("SELECT * FROM test ORDER BY n")
|
|
|
|
for other in nodes:
|
|
if other != node:
|
|
logging.debug(f"> Checking data consistency, {other.name} vs {node.name}")
|
|
assert_eq_with_retry(other, "SELECT * FROM test ORDER BY n", res)
|
|
|
|
|
|
# 1. Check that ALTER TABLE ATTACH PART|PARTITION does not fetch data from other replicas if it's present in the
|
|
# detached/ folder.
|
|
# 2. Check that ALTER TABLE ATTACH PART|PARTITION downloads the data from other replicas if the detached/ folder
|
|
# does not contain the part with the correct checksums.
|
|
def test_attach_without_fetching(start_cluster):
|
|
# Note here requests are used for both PARTITION and PART. This is done for better test diversity.
|
|
# The partition and part are used interchangeably which is not true in most cases.
|
|
# 0. Insert data on two replicas
|
|
node_1.query("INSERT INTO test SELECT * FROM numbers(100)")
|
|
|
|
check_data([node_1, node_2], detached_parts=[])
|
|
|
|
# 1.
|
|
# This part will be fetched from other replicas as it would be missing in the detached/ folder and
|
|
# also attached locally.
|
|
node_1.query("ALTER TABLE test DETACH PART '0_0_0_0'")
|
|
# This partition will be just fetched from other replicas as the checksums won't match
|
|
# (we'll manually break the data).
|
|
node_1.query("ALTER TABLE test DETACH PARTITION 1")
|
|
# This partition will be just fetched from other replicas as the part data will be corrupted with one of the
|
|
# files missing.
|
|
node_1.query("ALTER TABLE test DETACH PARTITION 2")
|
|
|
|
|
|
check_data([node_1, node_2], detached_parts=[0, 1, 2])
|
|
|
|
# 2. Create the third replica
|
|
fill_node(node_3)
|
|
|
|
# 3. Break the part data on the second node to corrupt the checksums.
|
|
# Replica 3 should download the data from replica 1 as there is no local data.
|
|
# Replica 2 should also download the data from 1 as the checksums won't match.
|
|
logging.debug("Checking attach with corrupted part data with files missing")
|
|
|
|
to_delete = node_2.exec_in_container(['bash', '-c',
|
|
'cd {p} && ls *.bin'.format(
|
|
p="/var/lib/clickhouse/data/default/test/detached/2_0_0_0")], privileged=True)
|
|
logging.debug(f"Before deleting: {to_delete}")
|
|
|
|
node_2.exec_in_container(['bash', '-c',
|
|
'cd {p} && rm -fr *.bin'.format(
|
|
p="/var/lib/clickhouse/data/default/test/detached/2_0_0_0")], privileged=True)
|
|
|
|
node_1.query("ALTER TABLE test ATTACH PARTITION 2")
|
|
check_data([node_1, node_2, node_3], detached_parts=[0, 1])
|
|
|
|
# 4. Break the part data on the second node to corrupt the checksums.
|
|
# Replica 3 should download the data from replica 1 as there is no local data.
|
|
# Replica 2 should also download the data from 1 as the checksums won't match.
|
|
print("Checking attach with corrupted part data with all of the files present")
|
|
|
|
corrupt_part_data_by_path(node_2, "/var/lib/clickhouse/data/default/test/detached/1_0_0_0")
|
|
|
|
node_1.query("ALTER TABLE test ATTACH PARTITION 1")
|
|
check_data([node_1, node_2, node_3], detached_parts=[0])
|
|
|
|
# 5. Attach the first part and check if it has been fetched correctly.
|
|
# Replica 2 should attach the local data from detached/.
|
|
# Replica 3 should download the data from replica 2 as there is no local data and other connections are broken.
|
|
print("Checking attach with valid checksums")
|
|
|
|
with PartitionManager() as pm:
|
|
# If something goes wrong and replica 2 wants to fetch data, the test will fail.
|
|
pm.partition_instances(node_2, node_1, action='REJECT --reject-with tcp-reset')
|
|
pm.partition_instances(node_1, node_3, action='REJECT --reject-with tcp-reset')
|
|
|
|
node_1.query("ALTER TABLE test ATTACH PART '0_0_0_0'")
|
|
|
|
check_data([node_1, node_2, node_3], detached_parts=[])
|