ClickHouse/tests/integration/test_refreshable_mv/test.py

import os
import re
import shutil
import threading
import time
from random import randint

import pytest

from helpers.cluster import ClickHouseCluster
from helpers.network import PartitionManager
from helpers.test_tools import assert_eq_with_retry, assert_logs_contain

test_recover_staled_replica_run = 1

cluster = ClickHouseCluster(__file__)

node1 = cluster.add_instance(
    "node1",
    main_configs=["configs/config.xml"],
    user_configs=["configs/users.xml"],
    with_zookeeper=True,
    macros={"shard": "shard1", "replica": "1"},
    stay_alive=True,
)
node2 = cluster.add_instance(
    "node2",
    main_configs=["configs/config.xml"],
    user_configs=["configs/users.xml"],
    with_zookeeper=True,
    macros={"shard": "shard1", "replica": "2"},
)
nodes = [node1, node2]


@pytest.fixture(scope="module")
def started_cluster():
    try:
        cluster.start()
        yield cluster

    finally:
        cluster.shutdown()


def test_refreshable_mv_in_replicated_db(started_cluster):
    for node in nodes:
        node.query(
            "create database re engine = Replicated('/test/re', 'shard1', '{replica}');"
        )

    # Table engine check.
    assert "BAD_ARGUMENTS" in node1.query_and_get_error(
        "create materialized view re.a refresh every 1 second (x Int64) engine Memory as select 1 as x"
    )

    # Basic refreshing.
    node1.query(
        "create materialized view re.a refresh every 1 second (x Int64) engine ReplicatedMergeTree order by x as select number*10 as x from numbers(2)"
    )
    node1.query("system sync database replica re")
    for node in nodes:
        node.query("system wait view re.a")
        assert node.query("select * from re.a order by all") == "0\n10\n"
        assert (
            node.query(
                "select database, view, last_success_time != 0, last_refresh_time != 0, last_refresh_replica in ('1','2'), exception from system.view_refreshes"
            )
            == "re\ta\t1\t1\t1\t\n"
        )

    # Append mode, with and without coordination.
    for coordinated in [True, False]:
        name = "append" if coordinated else "append_uncoordinated"
        refresh_settings = "" if coordinated else " settings all_replicas = 1"
        node2.query(
            f"create materialized view re.{name} refresh every 1 year{refresh_settings} append (x Int64) engine ReplicatedMergeTree order by x as select rand() as x"
        )
        # Stop the clocks.
        for node in nodes:
            node.query(
                f"system test view re.{name} set fake time '2040-01-01 00:00:01'"
            )
        # Wait for quiescence.
        for node in nodes:
            # Wait twice to make sure we wait for a refresh that started after we adjusted the clock.
            # Otherwise another refresh may start right after (because clock moved far forward).
            node.query(
                f"system wait view re.{name}; system refresh view re.{name}; system wait view re.{name};"
            )
        rows_before = int(nodes[randint(0, 1)].query(f"select count() from re.{name}"))
        # Advance the clocks.
        for node in nodes:
            node.query(
                f"system test view re.{name} set fake time '2041-01-01 00:00:01'"
            )
        # Wait for refresh.
        for node in nodes:
            assert_eq_with_retry(
                node,
                f"select status, last_success_time from system.view_refreshes where view = '{name}'",
                "Scheduled\t2041-01-01 00:00:01",
            )
            node.query(f"system wait view re.{name}")
        # Check results.
        node = nodes[randint(0, 1)]
        node.query(f"system sync replica re.{name}")
        rows_after = int(node.query(f"select count() from re.{name}"))
        expected = 1 if coordinated else 2
        assert rows_after - rows_before == expected

    # Uncoordinated append to unreplicated table.
    node1.query(
        "create materialized view re.unreplicated_uncoordinated refresh every 1 second settings all_replicas = 1 append (x String) engine Memory as select 1 as x"
    )
    node2.query("system sync database replica re")
    for node in nodes:
        node.query("system wait view re.unreplicated_uncoordinated")
        assert (
            node.query("select distinct x from re.unreplicated_uncoordinated") == "1\n"
        )

    # Rename.
    node2.query(
        "create materialized view re.c refresh every 1 year (x Int64) engine ReplicatedMergeTree order by x empty as select rand() as x"
    )
    node1.query("system sync database replica re")
    node1.query("rename table re.c to re.d")
    node1.query(
        "alter table re.d modify query select number + sleepEachRow(1) as x from numbers(5) settings max_block_size = 1"
    )
    # Rename while refreshing.
    node1.query("system refresh view re.d")
    assert_eq_with_retry(
        node2,
        "select status from system.view_refreshes where view = 'd'",
        "RunningOnAnotherReplica",
    )
    node2.query("rename table re.d to re.e")
    node1.query("system wait view re.e")
    assert node1.query("select * from re.e order by x") == "0\n1\n2\n3\n4\n"

    # A view that will be stuck refreshing until dropped.
    node1.query(
        "create materialized view re.f refresh every 1 second (x Int64) engine ReplicatedMergeTree order by x as select sleepEachRow(1) as x from numbers(1000000) settings max_block_size = 1"
    )
    assert_eq_with_retry(
        node2,
        "select status in ('Running', 'RunningOnAnotherReplica') from system.view_refreshes where view = 'f'",
        "1",
    )

    # Locate coordination znodes.
    znode_exists = (
        lambda uuid: nodes[randint(0, 1)].query(
            f"select count() from system.zookeeper where path = '/clickhouse/tables/{uuid}' and name = 'shard1'"
        )
        == "1\n"
    )
    tables = []
    for row in node1.query(
        "select table, uuid from system.tables where database = 're'"
    ).split("\n")[:-1]:
        name, uuid = row.split("\t")
        print(f"found table {name} {uuid}")
        if name.startswith(".") or name.startswith("_tmp_replace_"):
            continue
        coordinated = not name.endswith("uncoordinated")
        tables.append((name, uuid, coordinated))
        assert coordinated == znode_exists(uuid)
    assert sorted([name for (name, _, _) in tables]) == [
        "a",
        "append",
        "append_uncoordinated",
        "e",
        "f",
        "unreplicated_uncoordinated",
    ]

    # Drop all tables and check that coordination znodes were deleted.
    for name, uuid, coordinated in tables:
        maybe_sync = " sync" if randint(0, 1) == 0 else ""
        nodes[randint(0, 1)].query(f"drop table re.{name}{maybe_sync}")
        # TODO: After https://github.com/ClickHouse/ClickHouse/issues/61065 is done (for MVs, not ReplicatedMergeTree), check the parent znode instead.
        assert not znode_exists(uuid)

    # A little stress test dropping MV while it's refreshing, hoping to hit various cases where the
    # drop happens while creating/exchanging/dropping the inner table.
    for i in range(20):
        maybe_empty = " empty" if randint(0, 2) == 0 else ""
        nodes[randint(0, 1)].query(
            f"create materialized view re.g refresh every 1 second (x Int64) engine ReplicatedMergeTree order by x{maybe_empty} as select 1 as x"
        )
        r = randint(0, 5)
        if r == 0:
            pass
        elif r == 1:
            time.sleep(randint(0, 100) / 1000)
        else:
            time.sleep(randint(900, 1100) / 1000)
        nodes[randint(0, 1)].query("drop table re.g")

    # Check that inner and temp tables were dropped.
    for node in nodes:
        assert node.query("show tables from re") == ""

    node1.query("drop database re sync")
    node2.query("drop database re sync")


def test_refreshable_mv_in_system_db(started_cluster):
    node1.query(
        "create materialized view system.a refresh every 1 second (x Int64) engine Memory as select number+1 as x from numbers(2);"
        "system refresh view system.a;"
    )

    node1.restart_clickhouse()
    node1.query("system refresh view system.a")
    assert node1.query("select count(), sum(x) from system.a") == "2\t3\n"

    node1.query("drop table system.a")
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`import os`
			`import re`
Automatic style fix 2024-09-30 18:26:13 +00:00			`import shutil`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`import threading`
Automatic style fix 2024-09-30 18:26:13 +00:00			`import time`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`from random import randint`

Automatic style fix 2024-09-30 18:26:13 +00:00			`import pytest`

Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`from helpers.cluster import ClickHouseCluster`
			`from helpers.network import PartitionManager`
Automatic style fix 2024-09-30 18:26:13 +00:00			`from helpers.test_tools import assert_eq_with_retry, assert_logs_contain`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00
			`test_recover_staled_replica_run = 1`

			`cluster = ClickHouseCluster(__file__)`

			`node1 = cluster.add_instance(`
			`"node1",`
			`main_configs=["configs/config.xml"],`
			`user_configs=["configs/users.xml"],`
			`with_zookeeper=True,`
			`macros={"shard": "shard1", "replica": "1"},`
Fix refreshable MV in system database breaking server startup 2024-10-08 03:32:13 +00:00			`stay_alive=True,`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`)`
			`node2 = cluster.add_instance(`
			`"node2",`
			`main_configs=["configs/config.xml"],`
			`user_configs=["configs/users.xml"],`
			`with_zookeeper=True,`
			`macros={"shard": "shard1", "replica": "2"},`
			`)`
			`nodes = [node1, node2]`


			`@pytest.fixture(scope="module")`
			`def started_cluster():`
			`try:`
			`cluster.start()`
			`yield cluster`

			`finally:`
			`cluster.shutdown()`


			`def test_refreshable_mv_in_replicated_db(started_cluster):`
			`for node in nodes:`
			`node.query(`
			`"create database re engine = Replicated('/test/re', 'shard1', '{replica}');"`
			`)`

			`# Table engine check.`
Automatic style fix 2024-06-07 01:07:32 +00:00			`assert "BAD_ARGUMENTS" in node1.query_and_get_error(`
			`"create materialized view re.a refresh every 1 second (x Int64) engine Memory as select 1 as x"`
			`)`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00
			`# Basic refreshing.`
Automatic style fix 2024-06-07 01:07:32 +00:00			`node1.query(`
			`"create materialized view re.a refresh every 1 second (x Int64) engine ReplicatedMergeTree order by x as select number*10 as x from numbers(2)"`
			`)`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`node1.query("system sync database replica re")`
			`for node in nodes:`
			`node.query("system wait view re.a")`
			`assert node.query("select * from re.a order by all") == "0\n10\n"`
Automatic style fix 2024-06-07 01:07:32 +00:00			`assert (`
			`node.query(`
Fixes 2024-08-29 18:35:30 +00:00			`"select database, view, last_success_time != 0, last_refresh_time != 0, last_refresh_replica in ('1','2'), exception from system.view_refreshes"`
Automatic style fix 2024-06-07 01:07:32 +00:00			`)`
Fixes 2024-08-29 18:35:30 +00:00			`== "re\ta\t1\t1\t1\t\n"`
Automatic style fix 2024-06-07 01:07:32 +00:00			`)`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00
			`# Append mode, with and without coordination.`
			`for coordinated in [True, False]:`
			`name = "append" if coordinated else "append_uncoordinated"`
			`refresh_settings = "" if coordinated else " settings all_replicas = 1"`
Automatic style fix 2024-06-07 01:07:32 +00:00			`node2.query(`
			`f"create materialized view re.{name} refresh every 1 year{refresh_settings} append (x Int64) engine ReplicatedMergeTree order by x as select rand() as x"`
			`)`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`# Stop the clocks.`
			`for node in nodes:`
Automatic style fix 2024-06-07 01:07:32 +00:00			`node.query(`
			`f"system test view re.{name} set fake time '2040-01-01 00:00:01'"`
			`)`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`# Wait for quiescence.`
			`for node in nodes:`
Fix flaky test_refreshable_mv_in_replicated_db 2024-10-08 04:56:51 +00:00			`# Wait twice to make sure we wait for a refresh that started after we adjusted the clock.`
			`# Otherwise another refresh may start right after (because clock moved far forward).`
Automatic style fix 2024-10-08 05:05:41 +00:00			`node.query(`
			`f"system wait view re.{name}; system refresh view re.{name}; system wait view re.{name};"`
			`)`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`rows_before = int(nodes[randint(0, 1)].query(f"select count() from re.{name}"))`
			`# Advance the clocks.`
			`for node in nodes:`
Automatic style fix 2024-06-07 01:07:32 +00:00			`node.query(`
			`f"system test view re.{name} set fake time '2041-01-01 00:00:01'"`
			`)`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`# Wait for refresh.`
			`for node in nodes:`
Automatic style fix 2024-06-07 01:07:32 +00:00			`assert_eq_with_retry(`
			`node,`
			`f"select status, last_success_time from system.view_refreshes where view = '{name}'",`
			`"Scheduled\t2041-01-01 00:00:01",`
			`)`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`node.query(f"system wait view re.{name}")`
			`# Check results.`
Probably fix test_refreshable_mv_in_replicated_db flakiness 2024-10-16 02:20:22 +00:00			`node = nodes[randint(0, 1)]`
			`node.query(f"system sync replica re.{name}")`
			`rows_after = int(node.query(f"select count() from re.{name}"))`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`expected = 1 if coordinated else 2`
			`assert rows_after - rows_before == expected`

			`# Uncoordinated append to unreplicated table.`
Automatic style fix 2024-06-07 01:07:32 +00:00			`node1.query(`
			`"create materialized view re.unreplicated_uncoordinated refresh every 1 second settings all_replicas = 1 append (x String) engine Memory as select 1 as x"`
			`)`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`node2.query("system sync database replica re")`
			`for node in nodes:`
			`node.query("system wait view re.unreplicated_uncoordinated")`
Automatic style fix 2024-06-07 01:07:32 +00:00			`assert (`
			`node.query("select distinct x from re.unreplicated_uncoordinated") == "1\n"`
			`)`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00
			`# Rename.`
Automatic style fix 2024-06-07 01:07:32 +00:00			`node2.query(`
			`"create materialized view re.c refresh every 1 year (x Int64) engine ReplicatedMergeTree order by x empty as select rand() as x"`
			`)`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`node1.query("system sync database replica re")`
			`node1.query("rename table re.c to re.d")`
Automatic style fix 2024-06-07 01:07:32 +00:00			`node1.query(`
			`"alter table re.d modify query select number + sleepEachRow(1) as x from numbers(5) settings max_block_size = 1"`
			`)`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`# Rename while refreshing.`
			`node1.query("system refresh view re.d")`
Automatic style fix 2024-06-07 01:07:32 +00:00			`assert_eq_with_retry(`
			`node2,`
			`"select status from system.view_refreshes where view = 'd'",`
			`"RunningOnAnotherReplica",`
			`)`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`node2.query("rename table re.d to re.e")`
			`node1.query("system wait view re.e")`
			`assert node1.query("select * from re.e order by x") == "0\n1\n2\n3\n4\n"`

			`# A view that will be stuck refreshing until dropped.`
Automatic style fix 2024-06-07 01:07:32 +00:00			`node1.query(`
			`"create materialized view re.f refresh every 1 second (x Int64) engine ReplicatedMergeTree order by x as select sleepEachRow(1) as x from numbers(1000000) settings max_block_size = 1"`
			`)`
			`assert_eq_with_retry(`
			`node2,`
			`"select status in ('Running', 'RunningOnAnotherReplica') from system.view_refreshes where view = 'f'",`
			`"1",`
			`)`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00
			`# Locate coordination znodes.`
Automatic style fix 2024-06-07 01:07:32 +00:00			`znode_exists = (`
			`lambda uuid: nodes[randint(0, 1)].query(`
			`f"select count() from system.zookeeper where path = '/clickhouse/tables/{uuid}' and name = 'shard1'"`
			`)`
			`== "1\n"`
			`)`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`tables = []`
Automatic style fix 2024-06-07 01:07:32 +00:00			`for row in node1.query(`
			`"select table, uuid from system.tables where database = 're'"`
			`).split("\n")[:-1]:`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`name, uuid = row.split("\t")`
			`print(f"found table {name} {uuid}")`
Fix test 2024-09-03 02:38:07 +00:00			`if name.startswith(".") or name.startswith("_tmp_replace_"):`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`continue`
			`coordinated = not name.endswith("uncoordinated")`
			`tables.append((name, uuid, coordinated))`
			`assert coordinated == znode_exists(uuid)`
Automatic style fix 2024-06-07 01:07:32 +00:00			`assert sorted([name for (name, _, _) in tables]) == [`
			`"a",`
			`"append",`
			`"append_uncoordinated",`
			`"e",`
			`"f",`
			`"unreplicated_uncoordinated",`
			`]`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00
			`# Drop all tables and check that coordination znodes were deleted.`
Automatic style fix 2024-06-07 01:07:32 +00:00			`for name, uuid, coordinated in tables:`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`maybe_sync = " sync" if randint(0, 1) == 0 else ""`
			`nodes[randint(0, 1)].query(f"drop table re.{name}{maybe_sync}")`
			`# TODO: After https://github.com/ClickHouse/ClickHouse/issues/61065 is done (for MVs, not ReplicatedMergeTree), check the parent znode instead.`
			`assert not znode_exists(uuid)`

			`# A little stress test dropping MV while it's refreshing, hoping to hit various cases where the`
			`# drop happens while creating/exchanging/dropping the inner table.`
			`for i in range(20):`
			`maybe_empty = " empty" if randint(0, 2) == 0 else ""`
Automatic style fix 2024-06-07 01:07:32 +00:00			`nodes[randint(0, 1)].query(`
			`f"create materialized view re.g refresh every 1 second (x Int64) engine ReplicatedMergeTree order by x{maybe_empty} as select 1 as x"`
			`)`
Refreshable MV: coordination and Replicated DB support 2024-03-09 00:36:06 +00:00			`r = randint(0, 5)`
			`if r == 0:`
			`pass`
			`elif r == 1:`
			`time.sleep(randint(0, 100) / 1000)`
			`else:`
			`time.sleep(randint(900, 1100) / 1000)`
			`nodes[randint(0, 1)].query("drop table re.g")`

			`# Check that inner and temp tables were dropped.`
			`for node in nodes:`
			`assert node.query("show tables from re") == ""`

			`node1.query("drop database re sync")`
			`node2.query("drop database re sync")`
Fix refreshable MV in system database breaking server startup 2024-10-08 03:32:13 +00:00
Automatic style fix 2024-10-08 04:09:40 +00:00
Fix refreshable MV in system database breaking server startup 2024-10-08 03:32:13 +00:00			`def test_refreshable_mv_in_system_db(started_cluster):`
			`node1.query(`
			`"create materialized view system.a refresh every 1 second (x Int64) engine Memory as select number+1 as x from numbers(2);"`
			`"system refresh view system.a;"`
			`)`
Make the test work when ran multiple times with the same fixture 2024-10-08 07:42:40 +00:00
Fix refreshable MV in system database breaking server startup 2024-10-08 03:32:13 +00:00			`node1.restart_clickhouse()`
			`node1.query("system refresh view system.a")`
Automatic style fix 2024-10-08 04:09:40 +00:00			`assert node1.query("select count(), sum(x) from system.a") == "2\t3\n"`
Make the test work when ran multiple times with the same fixture 2024-10-08 07:42:40 +00:00
			`node1.query("drop table system.a")`