ClickHouse/tests/integration/test_limited_replicated_fetches/test.py

#!/usr/bin/env python3

import os
import random
import string
import time

import pytest

from helpers.cluster import ClickHouseCluster
from helpers.network import PartitionManager

cluster = ClickHouseCluster(__file__)
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
node1 = cluster.add_instance(
    "node1", main_configs=["configs/custom_settings.xml"], with_zookeeper=True
)
node2 = cluster.add_instance(
    "node2", main_configs=["configs/custom_settings.xml"], with_zookeeper=True
)

MAX_THREADS_FOR_FETCH = 3


@pytest.fixture(scope="module")
def started_cluster():
    try:
        cluster.start()

        yield cluster

    finally:
        cluster.shutdown()


def get_random_string(length):
    return "".join(
        random.choice(string.ascii_uppercase + string.digits) for _ in range(length)
    )


def test_limited_fetches(started_cluster):
    """
    Test checks that that we utilize all available threads for fetches
    """
    node1.query(
        "CREATE TABLE t (key UInt64, data String) ENGINE = ReplicatedMergeTree('/clickhouse/test/t', '1') ORDER BY tuple() PARTITION BY key"
    )
    node2.query(
        "CREATE TABLE t (key UInt64, data String) ENGINE = ReplicatedMergeTree('/clickhouse/test/t', '2') ORDER BY tuple() PARTITION BY key"
    )

    with PartitionManager() as pm:
        node2.query("SYSTEM STOP FETCHES t")
        node1.query(
            "INSERT INTO t SELECT 1, '{}' FROM numbers(5000)".format(
                get_random_string(104857)
            )
        )
        node1.query(
            "INSERT INTO t SELECT 2, '{}' FROM numbers(5000)".format(
                get_random_string(104857)
            )
        )
        node1.query(
            "INSERT INTO t SELECT 3, '{}' FROM numbers(5000)".format(
                get_random_string(104857)
            )
        )
        node1.query(
            "INSERT INTO t SELECT 4, '{}' FROM numbers(5000)".format(
                get_random_string(104857)
            )
        )
        node1.query(
            "INSERT INTO t SELECT 5, '{}' FROM numbers(5000)".format(
                get_random_string(104857)
            )
        )
        node1.query(
            "INSERT INTO t SELECT 6, '{}' FROM numbers(5000)".format(
                get_random_string(104857)
            )
        )
        pm.add_network_delay(node1, 80)
        node2.query("SYSTEM START FETCHES t")
        fetches_result = []
        background_fetches_metric = []
        fetched_parts = set([])
        for _ in range(1000):
            result = (
                node2.query("SELECT result_part_name FROM system.replicated_fetches")
                .strip()
                .split()
            )
            background_fetches_metric.append(
                int(
                    node2.query(
                        "select value from system.metrics where metric = 'BackgroundFetchesPoolTask'"
                    ).strip()
                )
            )
            if not result:
                if len(fetched_parts) == 6:
                    break
                time.sleep(0.1)
            else:
                for part in result:
                    fetched_parts.add(part)
                fetches_result.append(result)
                print(fetches_result[-1])
                print(background_fetches_metric[-1])
                time.sleep(0.1)

    for concurrently_fetching_parts in fetches_result:
        if len(concurrently_fetching_parts) > MAX_THREADS_FOR_FETCH:
            assert False, "Found more than {} concurrently fetching parts: {}".format(
                MAX_THREADS_FOR_FETCH, ", ".join(concurrently_fetching_parts)
            )

    assert (
        max([len(parts) for parts in fetches_result]) == 3
    ), "Strange, but we don't utilize max concurrent threads for fetches"
    assert (
        max(background_fetches_metric)
    ) == 3, "Just checking metric consistent with table"

    node1.query("DROP TABLE IF EXISTS t SYNC")
    node2.query("DROP TABLE IF EXISTS t SYNC")
Add simple integration test 2020-11-09 09:10:32 +00:00			`#!/usr/bin/env python3`

Automatic style fix 2024-09-27 10:19:39 +00:00			`import os`
			`import random`
			`import string`
Add simple integration test 2020-11-09 09:10:32 +00:00			`import time`
Automatic style fix 2024-09-27 10:19:39 +00:00
			`import pytest`

Add simple integration test 2020-11-09 09:10:32 +00:00			`from helpers.cluster import ClickHouseCluster`
			`from helpers.network import PartitionManager`

			`cluster = ClickHouseCluster(__file__)`
Update test.py 2021-04-10 20:16:27 +00:00			`SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`node1 = cluster.add_instance(`
Fixed the restriction on maximum size of replicated fetches (#42090) 2022-10-06 11:18:46 +00:00			`"node1", main_configs=["configs/custom_settings.xml"], with_zookeeper=True`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`)`
			`node2 = cluster.add_instance(`
Fixed the restriction on maximum size of replicated fetches (#42090) 2022-10-06 11:18:46 +00:00			`"node2", main_configs=["configs/custom_settings.xml"], with_zookeeper=True`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`)`
Add simple integration test 2020-11-09 09:10:32 +00:00
Proper config spec 2021-04-12 08:06:36 +00:00			`MAX_THREADS_FOR_FETCH = 3`
Add simple integration test 2020-11-09 09:10:32 +00:00
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00
Add simple integration test 2020-11-09 09:10:32 +00:00			`@pytest.fixture(scope="module")`
			`def started_cluster():`
			`try:`
			`cluster.start()`

			`yield cluster`

			`finally:`
			`cluster.shutdown()`


			`def get_random_string(length):`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`return "".join(`
			`random.choice(string.ascii_uppercase + string.digits) for _ in range(length)`
			`)`
Add simple integration test 2020-11-09 09:10:32 +00:00

			`def test_limited_fetches(started_cluster):`
Fix lost comment 2021-04-12 08:07:28 +00:00			`"""`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`Test checks that that we utilize all available threads for fetches`
Fix lost comment 2021-04-12 08:07:28 +00:00			`"""`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`node1.query(`
			`"CREATE TABLE t (key UInt64, data String) ENGINE = ReplicatedMergeTree('/clickhouse/test/t', '1') ORDER BY tuple() PARTITION BY key"`
			`)`
			`node2.query(`
			`"CREATE TABLE t (key UInt64, data String) ENGINE = ReplicatedMergeTree('/clickhouse/test/t', '2') ORDER BY tuple() PARTITION BY key"`
			`)`
Add simple integration test 2020-11-09 09:10:32 +00:00
			`with PartitionManager() as pm:`
			`node2.query("SYSTEM STOP FETCHES t")`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`node1.query(`
			`"INSERT INTO t SELECT 1, '{}' FROM numbers(5000)".format(`
			`get_random_string(104857)`
			`)`
			`)`
			`node1.query(`
			`"INSERT INTO t SELECT 2, '{}' FROM numbers(5000)".format(`
			`get_random_string(104857)`
			`)`
			`)`
			`node1.query(`
			`"INSERT INTO t SELECT 3, '{}' FROM numbers(5000)".format(`
			`get_random_string(104857)`
			`)`
			`)`
			`node1.query(`
			`"INSERT INTO t SELECT 4, '{}' FROM numbers(5000)".format(`
			`get_random_string(104857)`
			`)`
			`)`
			`node1.query(`
			`"INSERT INTO t SELECT 5, '{}' FROM numbers(5000)".format(`
			`get_random_string(104857)`
			`)`
			`)`
			`node1.query(`
			`"INSERT INTO t SELECT 6, '{}' FROM numbers(5000)".format(`
			`get_random_string(104857)`
			`)`
			`)`
Add simple integration test 2020-11-09 09:10:32 +00:00			`pm.add_network_delay(node1, 80)`
			`node2.query("SYSTEM START FETCHES t")`
			`fetches_result = []`
			`background_fetches_metric = []`
			`fetched_parts = set([])`
			`for _ in range(1000):`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`result = (`
			`node2.query("SELECT result_part_name FROM system.replicated_fetches")`
			`.strip()`
			`.split()`
			`)`
			`background_fetches_metric.append(`
			`int(`
			`node2.query(`
			`"select value from system.metrics where metric = 'BackgroundFetchesPoolTask'"`
			`).strip()`
			`)`
			`)`
Add simple integration test 2020-11-09 09:10:32 +00:00			`if not result:`
			`if len(fetched_parts) == 6:`
			`break`
			`time.sleep(0.1)`
			`else:`
			`for part in result:`
			`fetched_parts.add(part)`
			`fetches_result.append(result)`
			`print(fetches_result[-1])`
			`print(background_fetches_metric[-1])`
			`time.sleep(0.1)`

			`for concurrently_fetching_parts in fetches_result:`
Proper config spec 2021-04-12 08:06:36 +00:00			`if len(concurrently_fetching_parts) > MAX_THREADS_FOR_FETCH:`
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`assert False, "Found more than {} concurrently fetching parts: {}".format(`
			`MAX_THREADS_FOR_FETCH, ", ".join(concurrently_fetching_parts)`
			`)`
Add simple integration test 2020-11-09 09:10:32 +00:00
Apply black formatter to all *.py files in the repo 2022-03-22 16:39:58 +00:00			`assert (`
			`max([len(parts) for parts in fetches_result]) == 3`
			`), "Strange, but we don't utilize max concurrent threads for fetches"`
			`assert (`
			`max(background_fetches_metric)`
			`) == 3, "Just checking metric consistent with table"`
Try to make test_dir.tar smaller 2021-01-29 16:57:52 +00:00
			`node1.query("DROP TABLE IF EXISTS t SYNC")`
Update test.py 2021-04-10 12:05:36 +00:00			`node2.query("DROP TABLE IF EXISTS t SYNC")`