fix test

2024-09-19 16:20:50 +00:00 · 2024-08-07 20:21:50 +01:00 · 2024-08-07 20:21:50 +01:00 · 1082792950
commit 1082792950
parent f54083a1d8
1 changed files with 45 additions and 53 deletions
--- a/tests/integration/test_async_metrics_in_cgroup/test.py
+++ b/tests/integration/test_async_metrics_in_cgroup/test.py
@ -1,11 +1,10 @@
 import pytest
-import subprocess
-import time

 from helpers.cluster import ClickHouseCluster

 cluster = ClickHouseCluster(__file__)
-node = cluster.add_instance("node")
+node1 = cluster.add_instance("node1", stay_alive=True)
+node2 = cluster.add_instance("node2", stay_alive=True)


@pytest.fixture(scope="module")
@ -17,61 +16,54 @@ def start_cluster():
        cluster.shutdown()


-def test_user_cpu_accounting(start_cluster):
-    if node.is_built_with_sanitizer():
-        pytest.skip("Disabled for sanitizers")
-
-    # check that our metrics sources actually exist
-    assert (
-        subprocess.Popen("test -f /sys/fs/cgroup/cpu.stat".split(" ")).wait() == 0
-        or subprocess.Popen(
-            "test -f /sys/fs/cgroup/cpuacct/cpuacct.stat".split(" ")
-        ).wait()
-        == 0
-    )
-
-    # first let's spawn some cpu-intensive process outside of the container and check that it doesn't accounted by ClickHouse server
-    proc = subprocess.Popen(
-        "openssl speed -multi 8".split(" "),
-        stdout=subprocess.DEVNULL,
-        stderr=subprocess.DEVNULL,
-    )
-
-    time.sleep(5)
-
-    metric = node.query(
-        """
-      SELECT max(value)
-        FROM (
-          SELECT toStartOfInterval(event_time, toIntervalSecond(1)) AS t, avg(value) AS value
-            FROM system.asynchronous_metric_log
-           WHERE event_time >= now() - 60 AND metric = 'OSUserTime'
-        GROUP BY t
-        )
-    """
-    ).strip("\n")
-
-    assert float(metric) < 2
-
-    proc.kill()
-
-    # then let's test that we will account cpu time spent by the server itself
+def run_cpu_intensive_task(node):
    node.query(
-        "SELECT cityHash64(*) FROM system.numbers_mt FORMAT Null SETTINGS max_execution_time=10",
+        "SELECT sum(*) FROM system.numbers_mt FORMAT Null SETTINGS max_execution_time=10",
        ignore_error=True,
    )

-    metric = node.query(
+
+def get_async_metric(node, metric):
+    node.query("SYSTEM FLUSH LOGS")
+    return node.query(
+        f"""
+        SELECT max(value)
+            FROM (
+            SELECT toStartOfInterval(event_time, toIntervalSecond(1)) AS t, avg(value) AS value
+                FROM system.asynchronous_metric_log
+            WHERE event_time >= now() - 60 AND metric = '{metric}'
+            GROUP BY t
+            )
+        SETTINGS max_threads = 1
        """
-      SELECT max(value)
-        FROM (
-          SELECT toStartOfInterval(event_time, toIntervalSecond(1)) AS t, avg(value) AS value
-            FROM system.asynchronous_metric_log
-           WHERE event_time >= now() - 60 AND metric = 'OSUserTime'
-        GROUP BY t
-        )
-    """
    ).strip("\n")

+
+def test_user_cpu_accounting(start_cluster):
+    if node1.is_built_with_sanitizer():
+        pytest.skip("Disabled for sanitizers")
+
+    # run query on the other node, its usage shouldn't be accounted by node1
+    run_cpu_intensive_task(node2)
+
+    node1_cpu_time = get_async_metric(node1, "OSUserTime")
+    assert float(node1_cpu_time) < 2
+
+    # then let's test that we will account cpu time spent by the server itself
+    node2_cpu_time = get_async_metric(node2, "OSUserTime")
    # this check is really weak, but CI is tough place and we cannot guarantee that test process will get many cpu time
-    assert float(metric) > 1
+    assert float(node2_cpu_time) > 2
+
+
+def test_normalized_user_cpu(start_cluster):
+    if node1.is_built_with_sanitizer():
+        pytest.skip("Disabled for sanitizers")
+
+    # run query on the other node, its usage shouldn't be accounted by node1
+    run_cpu_intensive_task(node2)
+
+    node1_cpu_time = get_async_metric(node1, "OSUserTimeNormalized")
+    assert float(node1_cpu_time) < 1.01
+
+    node2_cpu_time = get_async_metric(node2, "OSUserTimeNormalized")
+    assert float(node2_cpu_time) < 1.01