ClickHouse/tests/integration/test_quorum_inserts/test.py
2024-11-06 15:55:41 +00:00

434 lines
15 KiB
Python

import concurrent
import time
import uuid
import pytest
from helpers.cluster import ClickHouseCluster
from helpers.network import PartitionManager
from helpers.test_tools import TSV
cluster = ClickHouseCluster(__file__)
zero = cluster.add_instance(
"zero",
user_configs=["configs/users.d/settings.xml"],
main_configs=["configs/config.d/remote_servers.xml"],
macros={"cluster": "anime", "shard": "0", "replica": "zero"},
with_zookeeper=True,
)
first = cluster.add_instance(
"first",
user_configs=["configs/users.d/settings.xml"],
main_configs=["configs/config.d/remote_servers.xml"],
macros={"cluster": "anime", "shard": "0", "replica": "first"},
with_zookeeper=True,
)
second = cluster.add_instance(
"second",
user_configs=["configs/users.d/settings.xml"],
main_configs=["configs/config.d/remote_servers.xml"],
macros={"cluster": "anime", "shard": "0", "replica": "second"},
with_zookeeper=True,
)
@pytest.fixture(scope="module")
def started_cluster():
global cluster
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_simple_add_replica(started_cluster):
table_name = "test_simple_" + uuid.uuid4().hex
create_query = (
f"CREATE TABLE {table_name} "
"(a Int8, d Date) "
"Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{table}', '{replica}') "
"PARTITION BY d ORDER BY a"
)
zero.query(create_query)
first.query(create_query)
first.query(f"SYSTEM STOP FETCHES {table_name}")
zero.query(
f"INSERT INTO {table_name} VALUES (1, '2011-01-01')",
settings={"insert_quorum": 1},
)
assert "1\t2011-01-01\n" == zero.query(f"SELECT * from {table_name}")
assert "" == first.query(f"SELECT * from {table_name}")
first.query(f"SYSTEM START FETCHES {table_name}")
first.query(f"SYSTEM SYNC REPLICA {table_name}", timeout=20)
assert "1\t2011-01-01\n" == zero.query(f"SELECT * from {table_name}")
assert "1\t2011-01-01\n" == first.query(f"SELECT * from {table_name}")
second.query(create_query)
second.query(f"SYSTEM SYNC REPLICA {table_name}", timeout=20)
assert "1\t2011-01-01\n" == zero.query(f"SELECT * from {table_name}")
assert "1\t2011-01-01\n" == first.query(f"SELECT * from {table_name}")
assert "1\t2011-01-01\n" == second.query(f"SELECT * from {table_name}")
zero.query(f"DROP TABLE IF EXISTS {table_name} ON CLUSTER cluster")
def test_drop_replica_and_achieve_quorum(started_cluster):
table_name = "test_drop_replica_and_achieve_quorum_" + uuid.uuid4().hex
create_query = (
f"CREATE TABLE {table_name} "
"(a Int8, d Date) "
"Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{table}', '{replica}') "
"PARTITION BY d ORDER BY a"
)
print("Create Replicated table with two replicas")
zero.query(create_query)
first.query(create_query)
print("Stop fetches on one replica. Since that, it will be isolated.")
first.query(f"SYSTEM STOP FETCHES {table_name}")
print("Insert to other replica. This query will fail.")
quorum_timeout = zero.query_and_get_error(
f"INSERT INTO {table_name}(a,d) VALUES (1, '2011-01-01')",
settings={"insert_quorum_timeout": 5000},
)
assert "Timeout while waiting for quorum" in quorum_timeout, "Query must fail."
assert TSV("1\t2011-01-01\n") == TSV(
zero.query(
f"SELECT * FROM {table_name}",
settings={"select_sequential_consistency": 0},
)
)
assert TSV("") == TSV(
zero.query(
f"SELECT * FROM {table_name}",
settings={"select_sequential_consistency": 1},
)
)
# TODO:(Mikhaylov) begin; maybe delete this lines. I want clickhouse to fetch parts and update quorum.
print("START FETCHES first replica")
first.query(f"SYSTEM START FETCHES {table_name}")
print("SYNC first replica")
first.query(f"SYSTEM SYNC REPLICA {table_name}", timeout=20)
# TODO:(Mikhaylov) end
print("Add second replica")
second.query(create_query)
print("SYNC second replica")
second.query(f"SYSTEM SYNC REPLICA {table_name}", timeout=20)
print("Quorum for previous insert achieved.")
assert TSV("1\t2011-01-01\n") == TSV(
second.query(
f"SELECT * FROM {table_name}",
settings={"select_sequential_consistency": 1},
)
)
@pytest.mark.parametrize(("add_new_data"), [False, True])
def test_insert_quorum_with_drop_partition(started_cluster, add_new_data):
# use different table names for easier disambiguation in logs between runs (you may also check uuid though, but not always convenient)
table_name = (
"test_quorum_insert_with_drop_partition_new_data"
if add_new_data
else "test_quorum_insert_with_drop_partition"
) + uuid.uuid4().hex
zero.query(f"DROP TABLE IF EXISTS {table_name} ON CLUSTER cluster")
create_query = (
f"CREATE TABLE {table_name} ON CLUSTER cluster "
"(a Int8, d Date) "
"Engine = ReplicatedMergeTree "
"PARTITION BY d ORDER BY a "
)
print("Create Replicated table with three replicas")
zero.query(create_query)
print(f"Stop fetches for {table_name} at first replica.")
first.query(f"SYSTEM STOP FETCHES {table_name}")
print("Insert with quorum. (zero and second)")
zero.query(f"INSERT INTO {table_name}(a,d) VALUES(1, '2011-01-01')")
print("Drop partition.")
zero.query(f"ALTER TABLE {table_name} DROP PARTITION '2011-01-01'")
if add_new_data:
print("Insert to deleted partition")
zero.query(f"INSERT INTO {table_name}(a,d) VALUES(2, '2011-01-01')")
print(f"Resume fetches for {table_name} at first replica.")
first.query(f"SYSTEM START FETCHES {table_name}")
print("Sync first replica with others.")
first.query(f"SYSTEM SYNC REPLICA {table_name}")
assert "20110101" not in first.query(
f"""
WITH (SELECT toString(uuid) FROM system.tables WHERE name = '{table_name}') AS uuid,
'/clickhouse/tables/' || uuid || '/0/quorum/last_part' AS p
SELECT * FROM system.zookeeper WHERE path = p FORMAT Vertical
"""
)
# Sync second replica not to have `REPLICA_IS_NOT_IN_QUORUM` error
second.query(f"SYSTEM SYNC REPLICA {table_name}")
print("Select from updated partition.")
if add_new_data:
assert TSV("2\t2011-01-01\n") == TSV(zero.query(f"SELECT * FROM {table_name}"))
assert TSV("2\t2011-01-01\n") == TSV(
second.query(f"SELECT * FROM {table_name}")
)
else:
assert TSV("") == TSV(zero.query(f"SELECT * FROM {table_name}"))
assert TSV("") == TSV(second.query(f"SELECT * FROM {table_name}"))
zero.query(f"DROP TABLE IF EXISTS {table_name} ON CLUSTER cluster")
@pytest.mark.parametrize(("add_new_data"), [False, True])
def test_insert_quorum_with_move_partition(started_cluster, add_new_data):
# use different table names for easier disambiguation in logs between runs (you may also check uuid though, but not always convenient)
source_table_name = (
"test_insert_quorum_with_move_partition_source_new_data"
if add_new_data
else "test_insert_quorum_with_move_partition_source"
) + uuid.uuid4().hex
destination_table_name = (
"test_insert_quorum_with_move_partition_destination_new_data"
if add_new_data
else "test_insert_quorum_with_move_partition_destination"
) + uuid.uuid4().hex
zero.query(f"DROP TABLE IF EXISTS {source_table_name} ON CLUSTER cluster")
zero.query(f"DROP TABLE IF EXISTS {destination_table_name} ON CLUSTER cluster")
create_source = (
f"CREATE TABLE {source_table_name} ON CLUSTER cluster "
"(a Int8, d Date) "
"Engine = ReplicatedMergeTree "
"PARTITION BY d ORDER BY a "
)
create_destination = (
f"CREATE TABLE {destination_table_name} ON CLUSTER cluster "
"(a Int8, d Date) "
"Engine = ReplicatedMergeTree "
"PARTITION BY d ORDER BY a "
)
print("Create source Replicated table with three replicas")
zero.query(create_source)
print("Create destination Replicated table with three replicas")
zero.query(create_destination)
print(f"Stop fetches for {source_table_name} at first replica.")
first.query(f"SYSTEM STOP FETCHES {source_table_name}")
print("Insert with quorum. (zero and second)")
zero.query(f"INSERT INTO {source_table_name}(a,d) VALUES(1, '2011-01-01')")
print("Drop partition.")
zero.query(
f"ALTER TABLE {source_table_name} MOVE PARTITION '2011-01-01' TO TABLE {destination_table_name}"
)
if add_new_data:
print("Insert to deleted partition")
zero.query(f"INSERT INTO {source_table_name}(a,d) VALUES(2, '2011-01-01')")
print(f"Resume fetches for {source_table_name} at first replica.")
first.query(f"SYSTEM START FETCHES {source_table_name}")
print("Sync first replica with others.")
first.query(f"SYSTEM SYNC REPLICA {source_table_name}")
assert "20110101" not in first.query(
f"""
WITH (SELECT toString(uuid) FROM system.tables WHERE name = '{source_table_name}') AS uuid,
'/clickhouse/tables/' || uuid || '/0/quorum/last_part' AS p
SELECT * FROM system.zookeeper WHERE path = p FORMAT Vertical
"""
)
# Sync second replica not to have `REPLICA_IS_NOT_IN_QUORUM` error
second.query(f"SYSTEM SYNC REPLICA {source_table_name}")
print("Select from updated partition.")
if add_new_data:
assert TSV("2\t2011-01-01\n") == TSV(
zero.query(f"SELECT * FROM {source_table_name}")
)
assert TSV("2\t2011-01-01\n") == TSV(
second.query(f"SELECT * FROM {source_table_name}")
)
else:
assert TSV("") == TSV(zero.query(f"SELECT * FROM {source_table_name}"))
assert TSV("") == TSV(second.query(f"SELECT * FROM {source_table_name}"))
zero.query(f"DROP TABLE IF EXISTS {source_table_name} ON CLUSTER cluster")
zero.query(f"DROP TABLE IF EXISTS {destination_table_name} ON CLUSTER cluster")
def test_insert_quorum_with_ttl(started_cluster):
table_name = "test_insert_quorum_with_ttl_" + uuid.uuid4().hex
create_query = (
f"CREATE TABLE {table_name} "
"(a Int8, d Date) "
"Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}') "
"PARTITION BY d ORDER BY a "
"TTL d + INTERVAL 5 second DELETE WHERE toYear(d) = 2011 "
"SETTINGS merge_with_ttl_timeout=2 "
)
print("Create Replicated table with two replicas")
zero.query(create_query)
first.query(create_query)
print(f"Stop fetches for {table_name} at first replica.")
first.query(f"SYSTEM STOP FETCHES {table_name}")
print("Insert should fail since it can not reach the quorum.")
quorum_timeout = zero.query_and_get_error(
f"INSERT INTO {table_name}(a,d) VALUES(1, '2011-01-01')",
settings={"insert_quorum_timeout": 5000},
)
assert "Timeout while waiting for quorum" in quorum_timeout, "Query must fail."
print(
"Wait 10 seconds and TTL merge have to be executed. But it won't delete data."
)
time.sleep(10)
assert TSV("1\t2011-01-01\n") == TSV(
zero.query(
f"SELECT * FROM {table_name}",
settings={"select_sequential_consistency": 0},
)
)
print(f"Resume fetches for {table_name} at first replica.")
first.query(f"SYSTEM START FETCHES {table_name}")
print("Sync first replica.")
first.query(f"SYSTEM SYNC REPLICA {table_name}")
zero.query(
f"INSERT INTO {table_name}(a,d) VALUES(1, '2011-01-01')",
settings={"insert_quorum_timeout": 5000},
)
print("Inserts should resume.")
zero.query(f"INSERT INTO {table_name}(a, d) VALUES(2, '2012-02-02')")
first.query(f"OPTIMIZE TABLE {table_name}")
first.query(f"SYSTEM SYNC REPLICA {table_name}")
zero.query(f"SYSTEM SYNC REPLICA {table_name}")
assert TSV("2\t2012-02-02\n") == TSV(
first.query(
f"SELECT * FROM {table_name}",
settings={"select_sequential_consistency": 0},
)
)
assert TSV("2\t2012-02-02\n") == TSV(
first.query(
f"SELECT * FROM {table_name}",
settings={"select_sequential_consistency": 1},
)
)
zero.query(f"DROP TABLE IF EXISTS {table_name} ON CLUSTER cluster")
def test_insert_quorum_with_keeper_loss_connection(started_cluster):
table_name = "test_insert_quorum_with_keeper_loss_" + uuid.uuid4().hex
create_query = (
f"CREATE TABLE {table_name} "
"(a Int8, d Date) "
"Engine = ReplicatedMergeTree('/clickhouse/tables/{table}', '{replica}') "
"ORDER BY a "
)
zero.query(create_query)
first.query(create_query)
first.query(f"SYSTEM STOP FETCHES {table_name}")
zero.query("SYSTEM ENABLE FAILPOINT replicated_merge_tree_commit_zk_fail_after_op")
zero.query("SYSTEM ENABLE FAILPOINT replicated_merge_tree_insert_retry_pause")
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
insert_future = executor.submit(
lambda: zero.query(
f"INSERT INTO {table_name}(a,d) VALUES(1, '2011-01-01')",
settings={"insert_quorum_timeout": 150000},
)
)
with PartitionManager() as pm:
pm.drop_instance_zk_connections(zero)
retries = 0
zk = cluster.get_kazoo_client("zoo1")
while True:
if (
zk.exists(
f"/clickhouse/tables/{table_name}/replicas/zero/is_active"
)
is None
):
break
print("replica is still active")
time.sleep(1)
retries += 1
if retries == 120:
raise Exception("Can not wait cluster replica inactive")
first.query("SYSTEM ENABLE FAILPOINT finish_set_quorum_failed_parts")
quorum_fail_future = executor.submit(
lambda: first.query(
"SYSTEM WAIT FAILPOINT finish_set_quorum_failed_parts", timeout=300
)
)
first.query(f"SYSTEM START FETCHES {table_name}")
concurrent.futures.wait([quorum_fail_future])
assert quorum_fail_future.exception() is None
zero.query("SYSTEM ENABLE FAILPOINT finish_clean_quorum_failed_parts")
clean_quorum_fail_parts_future = executor.submit(
lambda: first.query(
"SYSTEM WAIT FAILPOINT finish_clean_quorum_failed_parts",
timeout=300,
)
)
pm.restore_instance_zk_connections(zero)
concurrent.futures.wait([clean_quorum_fail_parts_future])
assert clean_quorum_fail_parts_future.exception() is None
zero.query(
"SYSTEM DISABLE FAILPOINT replicated_merge_tree_insert_retry_pause"
)
concurrent.futures.wait([insert_future])
assert insert_future.exception() is not None
assert not zero.contains_in_log("LOGICAL_ERROR")
assert zero.contains_in_log(
"fails to commit and will not retry or clean garbage"
)