ClickHouse/tests/integration/test_refreshable_mv/test.py
Michael Kolupaev ab06938478
Merge pull request #70460 from ClickHouse/dep
Fix refreshable MV in system database breaking server startup
2024-10-08 21:17:12 +00:00

220 lines
8.1 KiB
Python

import os
import re
import shutil
import threading
import time
from random import randint
import pytest
from helpers.cluster import ClickHouseCluster
from helpers.network import PartitionManager
from helpers.test_tools import assert_eq_with_retry, assert_logs_contain
test_recover_staled_replica_run = 1
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance(
"node1",
main_configs=["configs/config.xml"],
user_configs=["configs/users.xml"],
with_zookeeper=True,
macros={"shard": "shard1", "replica": "1"},
stay_alive=True,
)
node2 = cluster.add_instance(
"node2",
main_configs=["configs/config.xml"],
user_configs=["configs/users.xml"],
with_zookeeper=True,
macros={"shard": "shard1", "replica": "2"},
)
nodes = [node1, node2]
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_refreshable_mv_in_replicated_db(started_cluster):
for node in nodes:
node.query(
"create database re engine = Replicated('/test/re', 'shard1', '{replica}');"
)
# Table engine check.
assert "BAD_ARGUMENTS" in node1.query_and_get_error(
"create materialized view re.a refresh every 1 second (x Int64) engine Memory as select 1 as x"
)
# Basic refreshing.
node1.query(
"create materialized view re.a refresh every 1 second (x Int64) engine ReplicatedMergeTree order by x as select number*10 as x from numbers(2)"
)
node1.query("system sync database replica re")
for node in nodes:
node.query("system wait view re.a")
assert node.query("select * from re.a order by all") == "0\n10\n"
assert (
node.query(
"select database, view, last_success_time != 0, last_refresh_time != 0, last_refresh_replica in ('1','2'), exception from system.view_refreshes"
)
== "re\ta\t1\t1\t1\t\n"
)
# Append mode, with and without coordination.
for coordinated in [True, False]:
name = "append" if coordinated else "append_uncoordinated"
refresh_settings = "" if coordinated else " settings all_replicas = 1"
node2.query(
f"create materialized view re.{name} refresh every 1 year{refresh_settings} append (x Int64) engine ReplicatedMergeTree order by x as select rand() as x"
)
# Stop the clocks.
for node in nodes:
node.query(
f"system test view re.{name} set fake time '2040-01-01 00:00:01'"
)
# Wait for quiescence.
for node in nodes:
# Wait twice to make sure we wait for a refresh that started after we adjusted the clock.
# Otherwise another refresh may start right after (because clock moved far forward).
node.query(
f"system wait view re.{name}; system refresh view re.{name}; system wait view re.{name};"
)
rows_before = int(nodes[randint(0, 1)].query(f"select count() from re.{name}"))
# Advance the clocks.
for node in nodes:
node.query(
f"system test view re.{name} set fake time '2041-01-01 00:00:01'"
)
# Wait for refresh.
for node in nodes:
assert_eq_with_retry(
node,
f"select status, last_success_time from system.view_refreshes where view = '{name}'",
"Scheduled\t2041-01-01 00:00:01",
)
node.query(f"system wait view re.{name}")
# Check results.
rows_after = int(nodes[randint(0, 1)].query(f"select count() from re.{name}"))
expected = 1 if coordinated else 2
assert rows_after - rows_before == expected
# Uncoordinated append to unreplicated table.
node1.query(
"create materialized view re.unreplicated_uncoordinated refresh every 1 second settings all_replicas = 1 append (x String) engine Memory as select 1 as x"
)
node2.query("system sync database replica re")
for node in nodes:
node.query("system wait view re.unreplicated_uncoordinated")
assert (
node.query("select distinct x from re.unreplicated_uncoordinated") == "1\n"
)
# Rename.
node2.query(
"create materialized view re.c refresh every 1 year (x Int64) engine ReplicatedMergeTree order by x empty as select rand() as x"
)
node1.query("system sync database replica re")
node1.query("rename table re.c to re.d")
node1.query(
"alter table re.d modify query select number + sleepEachRow(1) as x from numbers(5) settings max_block_size = 1"
)
# Rename while refreshing.
node1.query("system refresh view re.d")
assert_eq_with_retry(
node2,
"select status from system.view_refreshes where view = 'd'",
"RunningOnAnotherReplica",
)
node2.query("rename table re.d to re.e")
node1.query("system wait view re.e")
assert node1.query("select * from re.e order by x") == "0\n1\n2\n3\n4\n"
# A view that will be stuck refreshing until dropped.
node1.query(
"create materialized view re.f refresh every 1 second (x Int64) engine ReplicatedMergeTree order by x as select sleepEachRow(1) as x from numbers(1000000) settings max_block_size = 1"
)
assert_eq_with_retry(
node2,
"select status in ('Running', 'RunningOnAnotherReplica') from system.view_refreshes where view = 'f'",
"1",
)
# Locate coordination znodes.
znode_exists = (
lambda uuid: nodes[randint(0, 1)].query(
f"select count() from system.zookeeper where path = '/clickhouse/tables/{uuid}' and name = 'shard1'"
)
== "1\n"
)
tables = []
for row in node1.query(
"select table, uuid from system.tables where database = 're'"
).split("\n")[:-1]:
name, uuid = row.split("\t")
print(f"found table {name} {uuid}")
if name.startswith(".") or name.startswith("_tmp_replace_"):
continue
coordinated = not name.endswith("uncoordinated")
tables.append((name, uuid, coordinated))
assert coordinated == znode_exists(uuid)
assert sorted([name for (name, _, _) in tables]) == [
"a",
"append",
"append_uncoordinated",
"e",
"f",
"unreplicated_uncoordinated",
]
# Drop all tables and check that coordination znodes were deleted.
for name, uuid, coordinated in tables:
maybe_sync = " sync" if randint(0, 1) == 0 else ""
nodes[randint(0, 1)].query(f"drop table re.{name}{maybe_sync}")
# TODO: After https://github.com/ClickHouse/ClickHouse/issues/61065 is done (for MVs, not ReplicatedMergeTree), check the parent znode instead.
assert not znode_exists(uuid)
# A little stress test dropping MV while it's refreshing, hoping to hit various cases where the
# drop happens while creating/exchanging/dropping the inner table.
for i in range(20):
maybe_empty = " empty" if randint(0, 2) == 0 else ""
nodes[randint(0, 1)].query(
f"create materialized view re.g refresh every 1 second (x Int64) engine ReplicatedMergeTree order by x{maybe_empty} as select 1 as x"
)
r = randint(0, 5)
if r == 0:
pass
elif r == 1:
time.sleep(randint(0, 100) / 1000)
else:
time.sleep(randint(900, 1100) / 1000)
nodes[randint(0, 1)].query("drop table re.g")
# Check that inner and temp tables were dropped.
for node in nodes:
assert node.query("show tables from re") == ""
node1.query("drop database re sync")
node2.query("drop database re sync")
def test_refreshable_mv_in_system_db(started_cluster):
node1.query(
"create materialized view system.a refresh every 1 second (x Int64) engine Memory as select number+1 as x from numbers(2);"
"system refresh view system.a;"
)
node1.restart_clickhouse()
node1.query("system refresh view system.a")
assert node1.query("select count(), sum(x) from system.a") == "2\t3\n"
node1.query("drop table system.a")