ClickHouse/tests/integration/test_system_merges/test.py

import threading
import time

import pytest
from helpers.cluster import ClickHouseCluster

cluster = ClickHouseCluster(__file__)

node1 = cluster.add_instance('node1',
                             main_configs=['configs/logs_config.xml'],
                             with_zookeeper=True,
                             macros={"shard": 0, "replica": 1})

node2 = cluster.add_instance('node2',
                             main_configs=['configs/logs_config.xml'],
                             with_zookeeper=True,
                             macros={"shard": 0, "replica": 2})


@pytest.fixture(scope="module")
def started_cluster():
    try:
        cluster.start()
        node1.query('CREATE DATABASE test ENGINE=Ordinary') # Different paths with Atomic
        node2.query('CREATE DATABASE test ENGINE=Ordinary')
        yield cluster

    finally:
        cluster.shutdown()


def split_tsv(data):
    return [x.split("\t") for x in data.splitlines()]


@pytest.mark.parametrize("replicated", [
    "",
    "replicated"
])
def test_merge_simple(started_cluster, replicated):
    try:
        clickhouse_path = "/var/lib/clickhouse"
        db_name = "test"
        table_name = "merge_simple"
        name = db_name + "." + table_name
        table_path = "data/" + db_name + "/" + table_name
        nodes = [node1, node2] if replicated else [node1]
        engine = "ReplicatedMergeTree('/clickhouse/test_merge_simple', '{replica}')" if replicated else "MergeTree()"
        node_check = nodes[-1]
        starting_block = 0 if replicated else 1

        for node in nodes:
            node.query("""
                CREATE TABLE {name}
                (
                    `a` Int64
                )
                ENGINE = {engine}
                ORDER BY sleep(2)
            """.format(engine=engine, name=name))

        node1.query("INSERT INTO {name} VALUES (1)".format(name=name))
        node1.query("INSERT INTO {name} VALUES (2)".format(name=name))
        node1.query("INSERT INTO {name} VALUES (3)".format(name=name))

        parts = ["all_{}_{}_0".format(x, x) for x in range(starting_block, starting_block + 3)]
        result_part = "all_{}_{}_1".format(starting_block, starting_block + 2)

        def optimize():
            node1.query("OPTIMIZE TABLE {name}".format(name=name))

        wait = threading.Thread(target=time.sleep, args=(5,))
        wait.start()
        t = threading.Thread(target=optimize)
        t.start()

        time.sleep(1)
        assert split_tsv(node_check.query("""
            SELECT database, table, num_parts, source_part_names, source_part_paths, result_part_name, result_part_path, partition_id, is_mutation
                FROM system.merges
                WHERE table = '{name}'
        """.format(name=table_name))) == [
            [
                db_name,
                table_name,
                "3",
                "['{}','{}','{}']".format(*parts),
                "['{clickhouse}/{table_path}/{}/','{clickhouse}/{table_path}/{}/','{clickhouse}/{table_path}/{}/']".format(
                    *parts, clickhouse=clickhouse_path, table_path=table_path),
                result_part,
                "{clickhouse}/{table_path}/{}/".format(result_part, clickhouse=clickhouse_path, table_path=table_path),
                "all",
                "0"
            ]
        ]
        t.join()
        wait.join()

        assert node_check.query("SELECT * FROM system.merges WHERE table = '{name}'".format(name=table_name)) == ""

    finally:
        for node in nodes:
            node.query("DROP TABLE {name}".format(name=name))


@pytest.mark.parametrize("replicated", [
    "",
    "replicated"
])
def test_mutation_simple(started_cluster, replicated):
    try:
        clickhouse_path = "/var/lib/clickhouse"
        db_name = "test"
        table_name = "mutation_simple"
        name = db_name + "." + table_name
        table_path = "data/" + db_name + "/" + table_name
        nodes = [node1, node2] if replicated else [node1]
        engine = "ReplicatedMergeTree('/clickhouse/test_mutation_simple', '{replica}')" if replicated else "MergeTree()"
        node_check = nodes[-1]
        starting_block = 0 if replicated else 1

        for node in nodes:
            node.query("""
                CREATE TABLE {name}
                (
                    `a` Int64
                )
                ENGINE = {engine}
                ORDER BY tuple()
            """.format(engine=engine, name=name))

        node1.query("INSERT INTO {name} VALUES (1)".format(name=name))
        part = "all_{}_{}_0".format(starting_block, starting_block)
        result_part = "all_{}_{}_0_{}".format(starting_block, starting_block, starting_block + 1)

        def alter():
            node1.query("ALTER TABLE {name} UPDATE a = 42 WHERE sleep(2) OR 1".format(name=name), settings={
                'mutations_sync': 1,
            })

        t = threading.Thread(target=alter)
        t.start()

        time.sleep(1)
        assert split_tsv(node_check.query("""
            SELECT database, table, num_parts, source_part_names, source_part_paths, result_part_name, result_part_path, partition_id, is_mutation
                FROM system.merges
                WHERE table = '{name}'
        """.format(name=table_name))) == [
            [
                db_name,
                table_name,
                "1",
                "['{}']".format(part),
                "['{clickhouse}/{table_path}/{}/']".format(part, clickhouse=clickhouse_path, table_path=table_path),
                result_part,
                "{clickhouse}/{table_path}/{}/".format(result_part, clickhouse=clickhouse_path, table_path=table_path),
                "all",
                "1"
            ],
        ]
        t.join()

        assert node_check.query("SELECT * FROM system.merges WHERE table = '{name}'".format(name=table_name)) == ""

    finally:
        for node in nodes:
            node.query("DROP TABLE {name}".format(name=name))
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00			`import threading`
			`import time`
Format and optimize imports in integration test files This PR formats all the `*.py` files found under the `tests/integration` folder. It also reorders the imports and cleans up a bunch of unused imports. The formatting also takes care of other things like wrapping lines and fixing spaces and indents such that the tests look more readable. 2020-09-16 04:26:10 +00:00
			`import pytest`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00			`from helpers.cluster import ClickHouseCluster`

			`cluster = ClickHouseCluster(__file__)`

			`node1 = cluster.add_instance('node1',`
Format and optimize imports in integration test files This PR formats all the `*.py` files found under the `tests/integration` folder. It also reorders the imports and cleans up a bunch of unused imports. The formatting also takes care of other things like wrapping lines and fixing spaces and indents such that the tests look more readable. 2020-09-16 04:26:10 +00:00			`main_configs=['configs/logs_config.xml'],`
			`with_zookeeper=True,`
			`macros={"shard": 0, "replica": 1})`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00
			`node2 = cluster.add_instance('node2',`
Format and optimize imports in integration test files This PR formats all the `*.py` files found under the `tests/integration` folder. It also reorders the imports and cleans up a bunch of unused imports. The formatting also takes care of other things like wrapping lines and fixing spaces and indents such that the tests look more readable. 2020-09-16 04:26:10 +00:00			`main_configs=['configs/logs_config.xml'],`
			`with_zookeeper=True,`
			`macros={"shard": 0, "replica": 2})`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00

			`@pytest.fixture(scope="module")`
			`def started_cluster():`
			`try:`
			`cluster.start()`
enable more tests 2020-09-22 11:56:40 +00:00			`node1.query('CREATE DATABASE test ENGINE=Ordinary') # Different paths with Atomic`
fix some tests 2020-01-28 19:39:52 +00:00			`node2.query('CREATE DATABASE test ENGINE=Ordinary')`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00			`yield cluster`

			`finally:`
			`cluster.shutdown()`


			`def split_tsv(data):`
Format and optimize imports in integration test files This PR formats all the `*.py` files found under the `tests/integration` folder. It also reorders the imports and cleans up a bunch of unused imports. The formatting also takes care of other things like wrapping lines and fixing spaces and indents such that the tests look more readable. 2020-09-16 04:26:10 +00:00			`return [x.split("\t") for x in data.splitlines()]`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00

Added replicated tests for `system.merges`. 2019-12-07 10:15:01 +00:00			`@pytest.mark.parametrize("replicated", [`
			`"",`
			`"replicated"`
			`])`
			`def test_merge_simple(started_cluster, replicated):`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00			`try:`
			`clickhouse_path = "/var/lib/clickhouse"`
fix some tests 2020-01-28 19:39:52 +00:00			`db_name = "test"`
			`table_name = "merge_simple"`
			`name = db_name + "." + table_name`
			`table_path = "data/" + db_name + "/" + table_name`
Added replicated tests for `system.merges`. 2019-12-07 10:15:01 +00:00			`nodes = [node1, node2] if replicated else [node1]`
Fixed `test_system_merges` to correctly check replicated node set. 2019-12-07 10:35:13 +00:00			`engine = "ReplicatedMergeTree('/clickhouse/test_merge_simple', '{replica}')" if replicated else "MergeTree()"`
Added replicated tests for `system.merges`. 2019-12-07 10:15:01 +00:00			`node_check = nodes[-1]`
Fixed `test_system_merges/test.py::test_merge_simple[replicated]` test. 2019-12-07 19:02:01 +00:00			`starting_block = 0 if replicated else 1`
Added replicated tests for `system.merges`. 2019-12-07 10:15:01 +00:00
Fixed `test_system_merges` to correctly check replicated node set. 2019-12-07 10:35:13 +00:00			`for node in nodes:`
Added replicated tests for `system.merges`. 2019-12-07 10:15:01 +00:00			`node.query("""`
			`CREATE TABLE {name}`
			`(`
			`a` Int64
			`)`
			`ENGINE = {engine}`
			`ORDER BY sleep(2)`
			`""".format(engine=engine, name=name))`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00
			`node1.query("INSERT INTO {name} VALUES (1)".format(name=name))`
			`node1.query("INSERT INTO {name} VALUES (2)".format(name=name))`
			`node1.query("INSERT INTO {name} VALUES (3)".format(name=name))`

Format and optimize imports in integration test files This PR formats all the `*.py` files found under the `tests/integration` folder. It also reorders the imports and cleans up a bunch of unused imports. The formatting also takes care of other things like wrapping lines and fixing spaces and indents such that the tests look more readable. 2020-09-16 04:26:10 +00:00			`parts = ["all_{}_{}_0".format(x, x) for x in range(starting_block, starting_block + 3)]`
			`result_part = "all_{}_{}_1".format(starting_block, starting_block + 2)`
Fixed `test_system_merges/test.py::test_merge_simple[replicated]` test. 2019-12-07 19:02:01 +00:00
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00			`def optimize():`
			`node1.query("OPTIMIZE TABLE {name}".format(name=name))`

Fixed `test_system_merges` to correctly check replicated node set. 2019-12-07 10:35:13 +00:00			`wait = threading.Thread(target=time.sleep, args=(5,))`
			`wait.start()`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00			`t = threading.Thread(target=optimize)`
			`t.start()`

			`time.sleep(1)`
Added replicated tests for `system.merges`. 2019-12-07 10:15:01 +00:00			`assert split_tsv(node_check.query("""`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00			`SELECT database, table, num_parts, source_part_names, source_part_paths, result_part_name, result_part_path, partition_id, is_mutation`
			`FROM system.merges`
			`WHERE table = '{name}'`
fix some tests 2020-01-28 19:39:52 +00:00			`""".format(name=table_name))) == [`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00			`[`
fix some tests 2020-01-28 19:39:52 +00:00			`db_name,`
			`table_name,`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00			`"3",`
Fixed `test_system_merges/test.py::test_merge_simple[replicated]` test. 2019-12-07 19:02:01 +00:00			`"['{}','{}','{}']".format(*parts),`
Format and optimize imports in integration test files This PR formats all the `*.py` files found under the `tests/integration` folder. It also reorders the imports and cleans up a bunch of unused imports. The formatting also takes care of other things like wrapping lines and fixing spaces and indents such that the tests look more readable. 2020-09-16 04:26:10 +00:00			`"['{clickhouse}/{table_path}/{}/','{clickhouse}/{table_path}/{}/','{clickhouse}/{table_path}/{}/']".format(`
			`*parts, clickhouse=clickhouse_path, table_path=table_path),`
Fixed `test_system_merges/test.py::test_merge_simple[replicated]` test. 2019-12-07 19:02:01 +00:00			`result_part,`
fix some tests 2020-01-28 19:39:52 +00:00			`"{clickhouse}/{table_path}/{}/".format(result_part, clickhouse=clickhouse_path, table_path=table_path),`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00			`"all",`
			`"0"`
			`]`
			`]`
			`t.join()`
Fixed `test_system_merges` to correctly check replicated node set. 2019-12-07 10:35:13 +00:00			`wait.join()`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00
fix some tests 2020-01-28 19:39:52 +00:00			`assert node_check.query("SELECT * FROM system.merges WHERE table = '{name}'".format(name=table_name)) == ""`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00
			`finally:`
Added replicated tests for `system.merges`. 2019-12-07 10:15:01 +00:00			`for node in nodes:`
			`node.query("DROP TABLE {name}".format(name=name))`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00

Added replicated tests for `system.merges`. 2019-12-07 10:15:01 +00:00			`@pytest.mark.parametrize("replicated", [`
			`"",`
			`"replicated"`
			`])`
			`def test_mutation_simple(started_cluster, replicated):`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00			`try:`
			`clickhouse_path = "/var/lib/clickhouse"`
fix some tests 2020-01-28 19:39:52 +00:00			`db_name = "test"`
			`table_name = "mutation_simple"`
			`name = db_name + "." + table_name`
			`table_path = "data/" + db_name + "/" + table_name`
Added replicated tests for `system.merges`. 2019-12-07 10:15:01 +00:00			`nodes = [node1, node2] if replicated else [node1]`
Fixed `test_system_merges/test.py::test_mutation_simple[replicated]` test. 2019-12-07 19:23:25 +00:00			`engine = "ReplicatedMergeTree('/clickhouse/test_mutation_simple', '{replica}')" if replicated else "MergeTree()"`
Added replicated tests for `system.merges`. 2019-12-07 10:15:01 +00:00			`node_check = nodes[-1]`
Fixed `test_system_merges/test.py::test_mutation_simple[replicated]` test. 2019-12-07 19:23:25 +00:00			`starting_block = 0 if replicated else 1`
Added replicated tests for `system.merges`. 2019-12-07 10:15:01 +00:00
Fixed `test_system_merges` to correctly check replicated node set. 2019-12-07 10:35:13 +00:00			`for node in nodes:`
Added replicated tests for `system.merges`. 2019-12-07 10:15:01 +00:00			`node.query("""`
			`CREATE TABLE {name}`
			`(`
			`a` Int64
			`)`
			`ENGINE = {engine}`
			`ORDER BY tuple()`
			`""".format(engine=engine, name=name))`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00
			`node1.query("INSERT INTO {name} VALUES (1)".format(name=name))`
Fixed `test_system_merges/test.py::test_mutation_simple[replicated]` test. 2019-12-07 19:23:25 +00:00			`part = "all_{}_{}_0".format(starting_block, starting_block)`
Format and optimize imports in integration test files This PR formats all the `*.py` files found under the `tests/integration` folder. It also reorders the imports and cleans up a bunch of unused imports. The formatting also takes care of other things like wrapping lines and fixing spaces and indents such that the tests look more readable. 2020-09-16 04:26:10 +00:00			`result_part = "all_{}_{}_0_{}".format(starting_block, starting_block, starting_block + 1)`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00
			`def alter():`
Fix test_system_merges by using mutations_sync=1 After early_constant_folding started to ignore not only ignore(), but all functions with isSuitableForConstantFolding() == false, there became more sleep(2) calls for this test: - MergeTreeDataSelectExecutor::readFromParts -> DB::KeyCondition::KeyCondition - MergeTreeDataMergerMutator::mutatePartToTemporaryPart -> DB::isStorageTouchedByMutations -> FilterTransform::transform - MergeTreeDataMergerMutator::mutatePartToTemporaryPart -> DB::MergeTreeDataMergerMutator::mutateAllPartColumns -> FilterTransform::transform While before it was optimized to 0 during WHERE analysis. 2021-02-11 19:21:46 +00:00			`node1.query("ALTER TABLE {name} UPDATE a = 42 WHERE sleep(2) OR 1".format(name=name), settings={`
			`'mutations_sync': 1,`
			`})`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00
			`t = threading.Thread(target=alter)`
			`t.start()`

			`time.sleep(1)`
Added replicated tests for `system.merges`. 2019-12-07 10:15:01 +00:00			`assert split_tsv(node_check.query("""`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00			`SELECT database, table, num_parts, source_part_names, source_part_paths, result_part_name, result_part_path, partition_id, is_mutation`
			`FROM system.merges`
			`WHERE table = '{name}'`
fix some tests 2020-01-28 19:39:52 +00:00			`""".format(name=table_name))) == [`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00			`[`
fix some tests 2020-01-28 19:39:52 +00:00			`db_name,`
			`table_name,`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00			`"1",`
Fixed `test_system_merges/test.py::test_mutation_simple[replicated]` test. 2019-12-07 19:23:25 +00:00			`"['{}']".format(part),`
fix some tests 2020-01-28 19:39:52 +00:00			`"['{clickhouse}/{table_path}/{}/']".format(part, clickhouse=clickhouse_path, table_path=table_path),`
Fixed `test_system_merges/test.py::test_mutation_simple[replicated]` test. 2019-12-07 19:23:25 +00:00			`result_part,`
fix some tests 2020-01-28 19:39:52 +00:00			`"{clickhouse}/{table_path}/{}/".format(result_part, clickhouse=clickhouse_path, table_path=table_path),`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00			`"all",`
			`"1"`
			`],`
			`]`
			`t.join()`

fix some tests 2020-01-28 19:39:52 +00:00			`assert node_check.query("SELECT * FROM system.merges WHERE table = '{name}'".format(name=table_name)) == ""`
Added simple non-replicated tests for `system.merges`. 2019-12-07 08:40:49 +00:00
			`finally:`
Added replicated tests for `system.merges`. 2019-12-07 10:15:01 +00:00			`for node in nodes:`
			`node.query("DROP TABLE {name}".format(name=name))`