ClickHouse/tests/integration/test_disk_over_web_server/test.py

import pytest

from helpers.cluster import ClickHouseCluster

uuids = []


@pytest.fixture(scope="module")
def cluster():
    try:
        cluster = ClickHouseCluster(__file__)
        cluster.add_instance(
            "node1", main_configs=["configs/storage_conf.xml"], with_nginx=True
        )
        cluster.add_instance(
            "node2", main_configs=["configs/storage_conf_web.xml"], with_nginx=True
        )
        cluster.add_instance(
            "node3", main_configs=["configs/storage_conf_web.xml"], with_nginx=True
        )
        cluster.start()

        node1 = cluster.instances["node1"]
        expected = ""
        global uuids
        for i in range(3):
            node1.query(
                f"CREATE TABLE data{i} (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'def', min_bytes_for_wide_part=1;"
            )

            for _ in range(10):
                node1.query(
                    f"INSERT INTO data{i} SELECT number FROM numbers(500000 * {i+1})"
                )
            expected = node1.query(f"SELECT * FROM data{i} ORDER BY id")

            metadata_path = node1.query(
                f"SELECT data_paths FROM system.tables WHERE name='data{i}'"
            )
            metadata_path = metadata_path[
                metadata_path.find("/") : metadata_path.rfind("/") + 1
            ]
            print(f"Metadata: {metadata_path}")

            node1.exec_in_container(
                [
                    "bash",
                    "-c",
                    "/usr/bin/clickhouse static-files-disk-uploader --test-mode --url http://nginx:80/test1 --metadata-path {}".format(
                        metadata_path
                    ),
                ],
                user="root",
            )
            parts = metadata_path.split("/")
            uuids.append(parts[3])
            print(f"UUID: {parts[3]}")

        yield cluster

    finally:
        cluster.shutdown()


@pytest.mark.parametrize("node_name", ["node2"])
def test_usage(cluster, node_name):
    node1 = cluster.instances["node1"]
    node2 = cluster.instances[node_name]
    global uuids
    assert len(uuids) == 3
    for i in range(3):
        node2.query(
            """
            ATTACH TABLE test{} UUID '{}'
            (id Int32) ENGINE = MergeTree() ORDER BY id
            SETTINGS storage_policy = 'web';
        """.format(
                i, uuids[i], i, i
            )
        )

        result = node2.query("SELECT * FROM test{} settings max_threads=20".format(i))

        result = node2.query("SELECT count() FROM test{}".format(i))
        assert int(result) == 5000000 * (i + 1)

        result = node2.query(
            "SELECT id FROM test{} WHERE id % 56 = 3 ORDER BY id".format(i)
        )
        assert result == node1.query(
            "SELECT id FROM data{} WHERE id % 56 = 3 ORDER BY id".format(i)
        )

        result = node2.query(
            "SELECT id FROM test{} WHERE id > 789999 AND id < 999999 ORDER BY id".format(
                i
            )
        )
        assert result == node1.query(
            "SELECT id FROM data{} WHERE id > 789999 AND id < 999999 ORDER BY id".format(
                i
            )
        )

        node2.query("DROP TABLE test{} SYNC".format(i))
        print(f"Ok {i}")


def test_incorrect_usage(cluster):
    node1 = cluster.instances["node1"]
    node2 = cluster.instances["node3"]
    global uuids
    node2.query(
        """
        ATTACH TABLE test0 UUID '{}'
        (id Int32) ENGINE = MergeTree() ORDER BY id
        SETTINGS storage_policy = 'web';
    """.format(
            uuids[0]
        )
    )

    result = node2.query("SELECT count() FROM test0")
    assert int(result) == 5000000

    result = node2.query_and_get_error("ALTER TABLE test0 ADD COLUMN col1 Int32 first")
    assert "Table is read-only" in result

    result = node2.query_and_get_error("TRUNCATE TABLE test0")
    assert "Table is read-only" in result

    node2.query("DROP TABLE test0 SYNC")


@pytest.mark.parametrize("node_name", ["node2"])
def test_cache(cluster, node_name):
    node1 = cluster.instances["node1"]
    node2 = cluster.instances[node_name]
    global uuids
    assert len(uuids) == 3
    for i in range(3):
        node2.query(
            """
            ATTACH TABLE test{} UUID '{}'
            (id Int32) ENGINE = MergeTree() ORDER BY id
            SETTINGS storage_policy = 'cached_web';
        """.format(
                i, uuids[i], i, i
            )
        )

        result = node2.query(
            """
            SYSTEM DROP FILESYSTEM CACHE;
            SELECT count() FROM system.filesystem_cache;
        """
        )
        assert int(result) == 0

        result = node2.query("SELECT * FROM test{} settings max_threads=20".format(i))

        result = node2.query(
            """
            SELECT count() FROM system.filesystem_cache;
        """
        )
        assert int(result) > 0

        result = node2.query("SELECT count() FROM test{}".format(i))
        assert int(result) == 5000000 * (i + 1)

        result = node2.query(
            "SELECT id FROM test{} WHERE id % 56 = 3 ORDER BY id".format(i)
        )
        assert result == node1.query(
            "SELECT id FROM data{} WHERE id % 56 = 3 ORDER BY id".format(i)
        )

        result = node2.query(
            "SELECT id FROM test{} WHERE id > 789999 AND id < 999999 ORDER BY id".format(
                i
            )
        )
        assert result == node1.query(
            "SELECT id FROM data{} WHERE id > 789999 AND id < 999999 ORDER BY id".format(
                i
            )
        )

        node2.query("DROP TABLE test{} SYNC".format(i))
        print(f"Ok {i}")
Add test 2021-06-13 12:56:22 +00:00			`import pytest`

			`from helpers.cluster import ClickHouseCluster`

Continued 2021-08-16 14:50:07 +00:00			`uuids = []`
Add test 2021-06-13 12:56:22 +00:00
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00
Add test 2021-06-13 12:56:22 +00:00			`@pytest.fixture(scope="module")`
			`def cluster():`
			`try:`
			`cluster = ClickHouseCluster(__file__)`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`cluster.add_instance(`
			`"node1", main_configs=["configs/storage_conf.xml"], with_nginx=True`
			`)`
			`cluster.add_instance(`
			`"node2", main_configs=["configs/storage_conf_web.xml"], with_nginx=True`
			`)`
			`cluster.add_instance(`
			`"node3", main_configs=["configs/storage_conf_web.xml"], with_nginx=True`
			`)`
Add test 2021-06-13 12:56:22 +00:00			`cluster.start()`
Continued 2021-08-16 14:50:07 +00:00
			`node1 = cluster.instances["node1"]`
			`expected = ""`
			`global uuids`
			`for i in range(3):`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`node1.query(`
Better 2022-09-22 14:22:05 +00:00			`f"CREATE TABLE data{i} (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'def', min_bytes_for_wide_part=1;"`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`)`
Fixes for disk web 2022-09-21 19:26:55 +00:00
			`for _ in range(10):`
Revert "Better config for cache" This reverts commit 0ca6aadbdb636fe284927f326cee8b7da6e868ca. 2022-09-22 14:25:16 +00:00			`node1.query(`
			`f"INSERT INTO data{i} SELECT number FROM numbers(500000 * {i+1})"`
			`)`
Better 2022-09-22 14:22:05 +00:00			`expected = node1.query(f"SELECT * FROM data{i} ORDER BY id")`

Revert "Better config for cache" This reverts commit 0ca6aadbdb636fe284927f326cee8b7da6e868ca. 2022-09-22 14:25:16 +00:00			`metadata_path = node1.query(`
			`f"SELECT data_paths FROM system.tables WHERE name='data{i}'"`
			`)`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`metadata_path = metadata_path[`
			`metadata_path.find("/") : metadata_path.rfind("/") + 1`
			`]`
			`print(f"Metadata: {metadata_path}")`

			`node1.exec_in_container(`
			`[`
			`"bash",`
			`"-c",`
			`"/usr/bin/clickhouse static-files-disk-uploader --test-mode --url http://nginx:80/test1 --metadata-path {}".format(`
			`metadata_path`
			`),`
			`],`
			`user="root",`
			`)`
			`parts = metadata_path.split("/")`
Continued 2021-08-16 14:50:07 +00:00			`uuids.append(parts[3])`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`print(f"UUID: {parts[3]}")`
Continued 2021-08-16 14:50:07 +00:00
Add test 2021-06-13 12:56:22 +00:00			`yield cluster`

			`finally:`
			`cluster.shutdown()`


Update setting remote_filesystem_read_method 2021-11-11 10:19:49 +00:00			`@pytest.mark.parametrize("node_name", ["node2"])`
Remove redundant lines from http buffer 2021-10-29 08:40:21 +00:00			`def test_usage(cluster, node_name):`
Test 2021-06-18 14:13:00 +00:00			`node1 = cluster.instances["node1"]`
Remove redundant lines from http buffer 2021-10-29 08:40:21 +00:00			`node2 = cluster.instances[node_name]`
Continued 2021-08-16 14:50:07 +00:00			`global uuids`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`assert len(uuids) == 3`
Reimplement 2021-06-19 11:26:48 +00:00			`for i in range(3):`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`node2.query(`
			`"""`
Reimplement 2021-06-19 11:26:48 +00:00			`ATTACH TABLE test{} UUID '{}'`
			`(id Int32) ENGINE = MergeTree() ORDER BY id`
			`SETTINGS storage_policy = 'web';`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`""".format(`
			`i, uuids[i], i, i`
			`)`
			`)`
Reimplement 2021-06-19 11:26:48 +00:00
Remove redundant lines from http buffer 2021-10-29 08:40:21 +00:00			`result = node2.query("SELECT * FROM test{} settings max_threads=20".format(i))`

Reimplement 2021-06-19 11:26:48 +00:00			`result = node2.query("SELECT count() FROM test{}".format(i))`
Better 2022-09-22 14:22:05 +00:00			`assert int(result) == 5000000 * (i + 1)`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00
			`result = node2.query(`
			`"SELECT id FROM test{} WHERE id % 56 = 3 ORDER BY id".format(i)`
			`)`
			`assert result == node1.query(`
			`"SELECT id FROM data{} WHERE id % 56 = 3 ORDER BY id".format(i)`
			`)`

			`result = node2.query(`
			`"SELECT id FROM test{} WHERE id > 789999 AND id < 999999 ORDER BY id".format(`
			`i`
			`)`
			`)`
			`assert result == node1.query(`
			`"SELECT id FROM data{} WHERE id > 789999 AND id < 999999 ORDER BY id".format(`
			`i`
			`)`
			`)`
Some fixes 2021-06-19 16:36:39 +00:00
Test cache over WebObjectStorage 2022-07-14 13:40:37 +00:00			`node2.query("DROP TABLE test{} SYNC".format(i))`
Reimplement 2021-06-19 11:26:48 +00:00			`print(f"Ok {i}")`
Continued 2021-08-16 14:50:07 +00:00
Throw for alter and silence for drop 2021-08-23 11:26:54 +00:00
			`def test_incorrect_usage(cluster):`
			`node1 = cluster.instances["node1"]`
Merge branch 'master' of https://github.com/ClickHouse/ClickHouse into disk-over-web-server 2021-08-24 07:29:50 +00:00			`node2 = cluster.instances["node3"]`
Throw for alter and silence for drop 2021-08-23 11:26:54 +00:00			`global uuids`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`node2.query(`
			`"""`
Throw for alter and silence for drop 2021-08-23 11:26:54 +00:00			`ATTACH TABLE test0 UUID '{}'`
			`(id Int32) ENGINE = MergeTree() ORDER BY id`
			`SETTINGS storage_policy = 'web';`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`""".format(`
			`uuids[0]`
			`)`
			`)`
Throw for alter and silence for drop 2021-08-23 11:26:54 +00:00
			`result = node2.query("SELECT count() FROM test0")`
Better 2022-09-22 14:22:05 +00:00			`assert int(result) == 5000000`
Throw for alter and silence for drop 2021-08-23 11:26:54 +00:00
			`result = node2.query_and_get_error("ALTER TABLE test0 ADD COLUMN col1 Int32 first")`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`assert "Table is read-only" in result`
Throw for alter and silence for drop 2021-08-23 11:26:54 +00:00
			`result = node2.query_and_get_error("TRUNCATE TABLE test0")`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`assert "Table is read-only" in result`
Throw for alter and silence for drop 2021-08-23 11:26:54 +00:00
Test cache over WebObjectStorage 2022-07-14 13:40:37 +00:00			`node2.query("DROP TABLE test0 SYNC")`


			`@pytest.mark.parametrize("node_name", ["node2"])`
			`def test_cache(cluster, node_name):`
			`node1 = cluster.instances["node1"]`
			`node2 = cluster.instances[node_name]`
			`global uuids`
			`assert len(uuids) == 3`
			`for i in range(3):`
			`node2.query(`
			`"""`
			`ATTACH TABLE test{} UUID '{}'`
			`(id Int32) ENGINE = MergeTree() ORDER BY id`
			`SETTINGS storage_policy = 'cached_web';`
			`""".format(`
			`i, uuids[i], i, i`
			`)`
			`)`

			`result = node2.query(`
			`"""`
			`SYSTEM DROP FILESYSTEM CACHE;`
			`SELECT count() FROM system.filesystem_cache;`
			`"""`
			`)`
			`assert int(result) == 0`

			`result = node2.query("SELECT * FROM test{} settings max_threads=20".format(i))`

			`result = node2.query(`
			`"""`
			`SELECT count() FROM system.filesystem_cache;`
			`"""`
			`)`
			`assert int(result) > 0`

			`result = node2.query("SELECT count() FROM test{}".format(i))`
Better 2022-09-22 14:22:05 +00:00			`assert int(result) == 5000000 * (i + 1)`
Test cache over WebObjectStorage 2022-07-14 13:40:37 +00:00
			`result = node2.query(`
			`"SELECT id FROM test{} WHERE id % 56 = 3 ORDER BY id".format(i)`
			`)`
			`assert result == node1.query(`
			`"SELECT id FROM data{} WHERE id % 56 = 3 ORDER BY id".format(i)`
			`)`

			`result = node2.query(`
			`"SELECT id FROM test{} WHERE id > 789999 AND id < 999999 ORDER BY id".format(`
			`i`
			`)`
			`)`
			`assert result == node1.query(`
			`"SELECT id FROM data{} WHERE id > 789999 AND id < 999999 ORDER BY id".format(`
			`i`
			`)`
			`)`

			`node2.query("DROP TABLE test{} SYNC".format(i))`
			`print(f"Ok {i}")`