ClickHouse/tests/integration/test_asynchronous_metrics_pk_bytes_fields/test.py

import time

import pytest

from helpers.cluster import ClickHouseCluster

cluster = ClickHouseCluster(__file__)
node = cluster.add_instance(
    "node",
    main_configs=["configs/asynchronous_metrics_update_period_s.xml"],
)


@pytest.fixture(scope="module")
def started_cluster():
    try:
        cluster.start()
        yield cluster

    finally:
        cluster.shutdown()


def greater(a, b):
    return b > a


def lesser(a, b):
    return b < a


def query_until_condition(a, b, condition, retries=20, timeout=60, delay=0.5):
    """

    :param a: could be an input lambda that returns an int or just an int
    :param b: could be an input lambda that returns an int or just an int
    :param condition: lambda that returns a boolean after comparing a and b
    :param retries: number of times to retry until the condition is met
    :param timeout: time in seconds after which stop retrying
    :param delay: time in seconds between each retry
    :return: values of a and b (value post evaluation if lambda)
    """
    retries_done = 0
    start_time = time.time()
    while True:
        res1 = a() if callable(a) else a
        res2 = b() if callable(b) else b
        if condition(res1, res2):
            return res1, res2
        retries_done += 1
        if retries_done >= retries or (time.time() - start_time) > timeout:
            return res1, res2
        time.sleep(delay)


def test_total_pk_bytes_in_memory_fields(started_cluster):
    query_create = """CREATE TABLE test_pk_bytes
    (
       a UInt64,
       b UInt64
    )
    Engine=MergeTree()
    ORDER BY a SETTINGS index_granularity=1"""
    node.query(query_create)

    query_pk_bytes = "SELECT value FROM system.asynchronous_metrics WHERE metric = 'TotalPrimaryKeyBytesInMemory';"
    query_pk_bytes_allocated = """SELECT value FROM system.asynchronous_metrics
                                  WHERE metric = 'TotalPrimaryKeyBytesInMemoryAllocated';"""

    # query for metrics before inserting anything into the table
    pk_bytes_before = int(node.query(query_pk_bytes).strip())
    pk_bytes_allocated_before = int(node.query(query_pk_bytes_allocated).strip())

    # insert data into the table and select
    node.query(
        """INSERT INTO test_pk_bytes SELECT number + 20, number * 20 from numbers(1000000)"""
    )

    node.query("""SELECT * FROM test_pk_bytes where a > 1000000""")

    # functions to query primary key bytes used and allocated in memory
    def res_pk_bytes():
        return int(node.query(query_pk_bytes).strip())

    def res_pk_bytes_allocated():
        return int(node.query(query_pk_bytes_allocated).strip())

    # query again after data insertion (make a reasonable amount of retries)
    # metrics should be greater after inserting data
    pk_bytes_before, pk_bytes_after = query_until_condition(
        pk_bytes_before, res_pk_bytes, condition=greater
    )
    assert pk_bytes_after > pk_bytes_before

    pk_bytes_allocated_before, pk_bytes_allocated_after = query_until_condition(
        pk_bytes_allocated_before, res_pk_bytes_allocated, condition=greater
    )
    assert pk_bytes_allocated_after > pk_bytes_allocated_before

    # insert some more data
    node.query(
        """INSERT INTO test_pk_bytes SELECT number + 100, number * 200 from numbers(1000000)"""
    )
    node.query("""SELECT * FROM test_pk_bytes""")

    # query again and compare the metrics.
    # metrics should be greater after inserting more data
    pk_bytes_after, pk_bytes_after_2 = query_until_condition(
        pk_bytes_after, res_pk_bytes, condition=greater
    )
    assert pk_bytes_after_2 > pk_bytes_after

    pk_bytes_allocated_after, pk_bytes_allocated_after_2 = query_until_condition(
        pk_bytes_allocated_after, res_pk_bytes_allocated, condition=greater
    )
    assert pk_bytes_allocated_after_2 > pk_bytes_allocated_after

    # drop all the data
    node.query("TRUNCATE table test_pk_bytes;")

    # query again and compare the metrics.
    # metrics should be lesser after dropping some data
    before_drop, after_drop = query_until_condition(
        pk_bytes_after_2, res_pk_bytes, condition=lesser
    )
    assert before_drop > after_drop

    before_drop, after_drop = query_until_condition(
        pk_bytes_allocated_after_2, res_pk_bytes_allocated, condition=lesser
    )
    assert before_drop > after_drop

    # finally drop the table
    node.query("DROP table test_pk_bytes;")