Make 01540_MemoryTracking integration

01540_MemoryTracking is failing on CI for the following reasons: - log_queries (fixed, by adding log_queries=0) - profilers (fixed) - but what can't be fixed is metric_log and so on, so we need separate instance with separate configuration (sigh).
2024-11-21 15:12:02 +00:00 · 2020-10-24 01:50:41 +03:00 · 2020-10-24 01:50:41 +03:00 · 6e5b04fad8
commit 6e5b04fad8
parent 3f594ed3ad
6 changed files with 99 additions and 98 deletions
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@ -270,8 +270,6 @@ TESTS_TO_SKIP=(
    # Look at DistributedFilesToInsert, so cannot run in parallel.
    01457_DistributedFilesToInsert

-    # Looks at MemoryTracking
-    01540_MemoryTracking
    01541_max_memory_usage_for_user
 )

--- a/tests/integration/test_MemoryTracking/init.py
+++ b/tests/integration/test_MemoryTracking/init.py
--- a/tests/integration/test_MemoryTracking/configs/no_system_log.xml
+++ b/tests/integration/test_MemoryTracking/configs/no_system_log.xml
@ -0,0 +1,7 @@
+<yandex>
+    <metric_log remove="remove"/>
+    <query_masking_rules remove="remove"/>
+    <query_thread_log remove="remove"/>
+    <text_log remove="remove"/>
+    <trace_log remove="remove"/>
+</yandex>
--- a/tests/integration/test_MemoryTracking/test.py
+++ b/tests/integration/test_MemoryTracking/test.py
@ -0,0 +1,92 @@
+# pylint: disable=unused-argument
+# pylint: disable=redefined-outer-name
+# pylint: disable=line-too-long
+
+import logging
+import pytest
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+
+node = cluster.add_instance('node', main_configs=['configs/no_system_log.xml'])
+
+logging.getLogger().setLevel(logging.INFO)
+logging.getLogger().addHandler(logging.StreamHandler())
+
+@pytest.fixture(scope='module', autouse=True)
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+query_settings = {
+    'max_threads': 1,
+    'query_profiler_real_time_period_ns': 0,
+    'query_profiler_cpu_time_period_ns': 0,
+    'log_queries': 0,
+}
+sample_query = "SELECT groupArray(repeat('a', 1000)) FROM numbers(10000) GROUP BY number%10 FORMAT JSON"
+
+def query(*args, **kwargs):
+    if 'settings' not in kwargs:
+        kwargs['settings'] = query_settings
+    else:
+        kwargs['settings'].update(query_settings)
+    return node.query(*args, **kwargs)
+def http_query(*args, **kwargs):
+    if 'params' not in kwargs:
+        kwargs['params'] = query_settings
+    else:
+        kwargs['params'].update(query_settings)
+    return node.http_query(*args, **kwargs)
+
+def get_MemoryTracking():
+    return int(http_query("SELECT value FROM system.metrics WHERE metric = 'MemoryTracking'"))
+
+def check_memory(memory):
+    # 3 changes to MemoryTracking is minimum, since:
+    # - this is not that high to not detect inacuracy
+    # - memory can go like X/X+N due to some background allocations
+    # - memory can go like X/X+N/X, so at least 2 changes
+    changes_allowed = 3
+    # if number of samples is large enough, use 10% from them
+    # (actually most of the time there will be only few changes, it was made 10% to avoid flackiness)
+    changes_allowed_auto=int(len(memory) * 0.1)
+    changes_allowed = max(changes_allowed_auto, changes_allowed)
+
+    changed=len(set(memory))
+    logging.info('Changes: allowed=%s, actual=%s, sample=%s',
+        changes_allowed, changed, len(memory))
+    assert changed < changes_allowed
+
+def test_http():
+    memory = []
+    memory.append(get_MemoryTracking())
+    for _ in range(100):
+        http_query(sample_query)
+        memory.append(get_MemoryTracking())
+    check_memory(memory)
+
+def test_tcp_multiple_sessions():
+    memory = []
+    memory.append(get_MemoryTracking())
+    for _ in range(100):
+        query(sample_query)
+        memory.append(get_MemoryTracking())
+    check_memory(memory)
+
+def test_tcp_single_session():
+    memory = []
+    memory.append(get_MemoryTracking())
+    sample_queries = [
+        sample_query,
+        "SELECT metric, value FROM system.metrics WHERE metric = 'MemoryTracking'",
+    ] * 100
+    rows = query(';'.join(sample_queries))
+    memory = rows.split('\n')
+    memory = filter(lambda x: x.startswith('MemoryTracking'), memory)
+    memory = map(lambda x: x.split('\t')[1], memory)
+    memory = [*memory]
+    check_memory(memory)
--- a/tests/queries/0_stateless/01540_MemoryTracking.reference
+++ b/tests/queries/0_stateless/01540_MemoryTracking.reference
@ -1,4 +0,0 @@
-HTTP
-TCP_ONE_SESSION
-TCP
-OK
--- a/tests/queries/0_stateless/01540_MemoryTracking.sh
+++ b/tests/queries/0_stateless/01540_MemoryTracking.sh
@ -1,92 +0,0 @@
-#!/usr/bin/env bash
-
-CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-. "$CURDIR"/../shell_config.sh
-cd "$CURDIR" || exit 1
-
-# Regression for MemoryTracker drift.
-#
-# To make this test indepedendent from the:
-# - MemoryTracking syncing with rss (via AsynchronousMetrics.cpp)
-# - and various other allocations in parallel
-# Instead of measure diff of the MemoryTracking before beginning and after the
-# end of all iterations, it measures MemoryTracking after each executed query
-# and see how much time it had been changed.
-#
-# TODO: Once #15236 will be merged, add it to the "parallel" group in skip_list.json
-
-query="SELECT groupArray(repeat('a', 1000)) FROM numbers(10000) GROUP BY number%10 FORMAT JSON"
-
-function get_MemoryTracking()
-{
-    $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_profiler_real_time_period_ns=0&query_profiler_cpu_time_period_ns=0&log_queries=0" -d@- <<<"SELECT value FROM system.metrics WHERE metric = 'MemoryTracking'"
-}
-
-function test_execute_http()
-{
-    local i=$1 && shift
-    for _ in $(seq 1 "$i"); do
-        $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&max_threads=1&query_profiler_real_time_period_ns=0&query_profiler_cpu_time_period_ns=0&log_queries=0" -d@- <<<"$query" | grep -F DB::Exception:
-        get_MemoryTracking >> 01540_MemoryTracking.memory.log
-    done
-}
-function test_execute_tcp()
-{
-    # NOTE: slow in debug
-    local i=$1 && shift
-    for _ in $(seq 1 "$i"); do
-        ${CLICKHOUSE_CLIENT} --max_threads=1 --query_profiler_real_time_period_ns=0 --query_profiler_cpu_time_period_ns=0 --log_queries=0 -q "$query" >/dev/null
-        get_MemoryTracking >> 01540_MemoryTracking.memory.log
-    done
-}
-function test_execute_tcp_one_session()
-{
-    local i=$1 && shift
-    for _ in $(seq 1 "$i"); do
-        echo "$query;"
-        echo "SELECT metric, value FROM system.metrics WHERE metric = 'MemoryTracking';"
-    done | ${CLICKHOUSE_CLIENT} -nm --max_threads=1 --query_profiler_real_time_period_ns=0 --query_profiler_cpu_time_period_ns=0 --log_queries=0 | {
-        grep ^MemoryTracking | cut -f2 > 01540_MemoryTracking.memory.log
-    }
-}
-
-# run test and check the MemoryTracking
-function run_test()
-{
-    local iterations=$1 && shift
-    local test_func=$1 && shift
-
-    # 3 changes to MemoryTracking is minimum, since:
-    # - this is not that high to not detect inacuracy
-    # - memory can go like X/X+N due to some background allocations
-    # - memory can go like X/X+N/X, so at least 2 changes
-    local changes_allowed=3
-    local changes_allowed_auto=$((iterations/6))
-    if [[ $changes_allowed_auto -gt $changes_allowed ]]; then
-        # if iterations is large enough, use 6% from them
-        changes_allowed=$changes_allowed_auto
-    fi
-
-    rm -f 01540_MemoryTracking.memory.log
-    $test_func "$iterations"
-
-    local changed
-    changed="$(awk '{changed+=(prev && prev!=$0); prev=$0; } END {print changed}' 01540_MemoryTracking.memory.log)"
-
-    if [[ $changed -gt $changes_allowed ]]; then
-        echo "Test $test_func failed. MemoryTracking changed too frequently: $changed (allowed $changes_allowed)" >&2
-    fi
-}
-
-# cleanup
-trap 'rm -f 01540_MemoryTracking.memory.log' EXIT
-
-echo 'HTTP'
-run_test 100 test_execute_http
-echo 'TCP_ONE_SESSION'
-run_test 100 test_execute_tcp_one_session
-echo 'TCP'
-run_test 100 test_execute_tcp
-echo 'OK'
-
-exit 0