ClickHouse/tests/integration/test_prometheus_endpoint/test.py
Azat Khuzhin 38ad4ef493 Fix timeout for prometheus exporter for HTTP/1.1 (due to keep-alive)
Before:

    $ time curl -s --http1.1 127.1:9363/metrics > /dev/null
    real    0m10.018s # default keep_alive_timeout is 10 seconds
    user    0m0.005s
    sys     0m0.001s

After

    $ time curl -s --http1.1 127.1:9363/metrics > /dev/null
    real    0m0.008s
    user    0m0.006s
    sys     0m0.000s

And if you will look at the test_prometheus_endpoint, you will see that
it takes > 30 seconds (it obtains metrics 3 times), after this patch it
should be finished more or less instantly.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2024-01-19 13:29:17 +03:00

90 lines
2.4 KiB
Python

import re
import time
import pytest
import requests
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance("node", main_configs=["configs/prom_conf.xml"])
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def parse_response_line(line):
allowed_prefixes = [
"ClickHouse",
"# HELP",
"# TYPE",
]
assert any(line.startswith(prefix) for prefix in allowed_prefixes)
if line.startswith("#"):
return {}
match = re.match("^([a-zA-Z_:][a-zA-Z0-9_:]+)(\{.*\})? -?(\d)", line)
assert match, line
name, _, val = match.groups()
return {name: int(val)}
def get_and_check_metrics(retries):
while True:
try:
response = requests.get(
"http://{host}:{port}/metrics".format(host=node.ip_address, port=8001),
allow_redirects=False,
# less then default keep-alive timeout (10 seconds)
timeout=5,
)
if response.status_code != 200:
response.raise_for_status()
break
except:
if retries >= 0:
retries -= 1
time.sleep(0.5)
continue
else:
raise
assert response.headers["content-type"].startswith("text/plain")
results = {}
for resp_line in response.text.split("\n"):
resp_line = resp_line.rstrip()
if not resp_line:
continue
res = parse_response_line(resp_line)
results.update(res)
return results
def test_prometheus_endpoint(start_cluster):
metrics_dict = get_and_check_metrics(10)
assert metrics_dict["ClickHouseProfileEvents_Query"] >= 0
prev_query_count = metrics_dict["ClickHouseProfileEvents_Query"]
node.query("SELECT 1")
node.query("SELECT 2")
node.query("SELECT 3")
metrics_dict = get_and_check_metrics(10)
assert metrics_dict["ClickHouseProfileEvents_Query"] >= prev_query_count + 3
node.query_and_get_error(
"SELECT throwIf(1, 'test', toInt16(42)) SETTINGS allow_custom_error_code_in_throwif=1"
)
metrics_dict = get_and_check_metrics(10)
assert metrics_dict["ClickHouseErrorMetric_NUMBER_OF_ARGUMENTS_DOESNT_MATCH"] >= 1
assert metrics_dict["ClickHouseErrorMetric_ALL"] >= 1