ClickHouse/tests/ci/performance_comparison_check.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

292 lines
9.3 KiB
Python
Raw Normal View History

2021-11-16 11:16:10 +00:00
#!/usr/bin/env python3
import os
import logging
import sys
import json
import subprocess
import traceback
import re
2022-05-16 18:39:10 +00:00
from typing import Dict
2021-11-16 11:16:10 +00:00
from github import Github
from commit_status_helper import RerunHelper, get_commit, post_commit_status
from ci_config import CI_CONFIG
2022-05-16 18:39:10 +00:00
from docker_pull_helper import get_image_with_version
2022-08-11 13:01:32 +00:00
from env_helper import GITHUB_EVENT_PATH, GITHUB_RUN_URL, S3_BUILDS_BUCKET, S3_DOWNLOAD
2022-06-03 15:26:06 +00:00
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
2021-11-16 11:16:10 +00:00
from pr_info import PRInfo
from s3_helper import S3Helper
2021-12-09 12:02:13 +00:00
from tee_popen import TeePopen
from clickhouse_helper import get_instance_type
from stopwatch import Stopwatch
2021-11-16 11:16:10 +00:00
IMAGE_NAME = "clickhouse/performance-comparison"
def get_run_command(
check_start_time,
check_name,
workspace,
result_path,
repo_tests_path,
pr_to_test,
sha_to_test,
additional_env,
image,
):
instance_type = get_instance_type()
envs = [
f"-e CHECK_START_TIME='{check_start_time}'",
f"-e CHECK_NAME='{check_name}'",
f"-e INSTANCE_TYPE='{instance_type}'",
f"-e PR_TO_TEST={pr_to_test}",
f"-e SHA_TO_TEST={sha_to_test}",
]
env_str = " ".join(envs)
return (
2022-05-16 18:39:10 +00:00
f"docker run --privileged --volume={workspace}:/workspace "
f"--volume={result_path}:/output "
f"--volume={repo_tests_path}:/usr/share/clickhouse-test "
f"--cap-add syslog --cap-add sys_admin --cap-add sys_rawio "
2023-08-12 22:36:50 +00:00
f"{env_str} {additional_env} "
2021-11-16 11:16:10 +00:00
f"{image}"
)
2021-11-16 11:16:10 +00:00
2021-12-09 09:04:05 +00:00
class RamDrive:
def __init__(self, path, size):
self.path = path
self.size = size
def __enter__(self):
if not os.path.exists(self.path):
os.makedirs(self.path)
subprocess.check_call(
f"sudo mount -t tmpfs -o rw,size={self.size} tmpfs {self.path}", shell=True
)
2021-12-09 09:04:05 +00:00
def __exit__(self, exc_type, exc_val, exc_tb):
2021-12-09 12:02:13 +00:00
subprocess.check_call(f"sudo umount {self.path}", shell=True)
2021-12-09 09:04:05 +00:00
2021-11-16 11:16:10 +00:00
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
stopwatch = Stopwatch()
2021-11-16 11:16:10 +00:00
temp_path = os.getenv("TEMP_PATH", os.path.abspath("."))
repo_path = os.getenv("REPO_COPY", os.path.abspath("../../"))
2022-02-15 12:03:51 +00:00
repo_tests_path = os.path.join(repo_path, "tests")
2021-12-09 09:04:05 +00:00
ramdrive_path = os.getenv("RAMDRIVE_PATH", os.path.join(temp_path, "ramdrive"))
2021-12-13 08:56:54 +00:00
# currently unused, doesn't make tests more stable
ramdrive_size = os.getenv("RAMDRIVE_SIZE", "0G")
2021-11-16 11:16:10 +00:00
reports_path = os.getenv("REPORTS_PATH", "./reports")
check_name = sys.argv[1]
required_build = CI_CONFIG.test_configs[check_name].required_build
2021-11-16 11:16:10 +00:00
if not os.path.exists(temp_path):
os.makedirs(temp_path)
2022-05-16 18:39:10 +00:00
with open(GITHUB_EVENT_PATH, "r", encoding="utf-8") as event_file:
2021-11-16 11:16:10 +00:00
event = json.load(event_file)
gh = Github(get_best_robot_token(), per_page=100)
2021-11-16 11:16:10 +00:00
pr_info = PRInfo(event)
commit = get_commit(gh, pr_info.sha)
docker_env = ""
2021-11-16 11:16:10 +00:00
2022-08-11 13:01:32 +00:00
docker_env += f" -e S3_URL={S3_DOWNLOAD}/{S3_BUILDS_BUCKET}"
docker_env += f" -e BUILD_NAME={required_build}"
2021-11-16 11:16:10 +00:00
if pr_info.number == 0:
pr_link = commit.html_url
else:
pr_link = f"https://github.com/ClickHouse/ClickHouse/pull/{pr_info.number}"
docker_env += (
f' -e CHPC_ADD_REPORT_LINKS="<a href={GITHUB_RUN_URL}>'
f'Job (actions)</a> <a href={pr_link}>Tested commit</a>"'
)
if "RUN_BY_HASH_TOTAL" in os.environ:
2022-05-16 18:39:10 +00:00
run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL", "1"))
run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "1"))
docker_env += (
f" -e CHPC_TEST_RUN_BY_HASH_TOTAL={run_by_hash_total}"
f" -e CHPC_TEST_RUN_BY_HASH_NUM={run_by_hash_num}"
)
check_name_with_group = (
check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]"
)
2021-12-13 09:04:08 +00:00
else:
check_name_with_group = check_name
2021-12-13 08:56:54 +00:00
is_aarch64 = "aarch64" in os.getenv("CHECK_NAME", "Performance Comparison").lower()
if pr_info.number != 0 and is_aarch64 and "pr-performance" not in pr_info.labels:
status = "success"
message = "Skipped, not labeled with 'pr-performance'"
report_url = GITHUB_RUN_URL
post_commit_status(
commit, status, report_url, message, check_name_with_group, pr_info
)
sys.exit(0)
rerun_helper = RerunHelper(commit, check_name_with_group)
2022-01-21 15:40:03 +00:00
if rerun_helper.is_already_finished_by_status():
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
check_name_prefix = (
check_name_with_group.lower()
.replace(" ", "_")
.replace("(", "_")
.replace(")", "_")
.replace(",", "_")
.replace("/", "_")
)
2021-11-16 11:16:10 +00:00
docker_image = get_image_with_version(reports_path, IMAGE_NAME)
# with RamDrive(ramdrive_path, ramdrive_size):
2021-12-10 13:09:00 +00:00
result_path = ramdrive_path
if not os.path.exists(result_path):
os.makedirs(result_path)
database_url = get_parameter_from_ssm("clickhouse-test-stat-url")
database_username = get_parameter_from_ssm("clickhouse-test-stat-login")
database_password = get_parameter_from_ssm("clickhouse-test-stat-password")
env_extra = {
"CLICKHOUSE_PERFORMANCE_COMPARISON_DATABASE_URL": f"{database_url}:9440",
"CLICKHOUSE_PERFORMANCE_COMPARISON_DATABASE_USER": database_username,
"CLICKHOUSE_PERFORMANCE_COMPARISON_DATABASE_USER_PASSWORD": database_password,
"CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME": check_name_with_group,
"CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX": check_name_prefix,
}
2022-08-26 13:49:40 +00:00
docker_env += "".join([f" -e {name}" for name in env_extra])
2022-06-07 10:00:31 +00:00
run_command = get_run_command(
stopwatch.start_time_str,
check_name,
result_path,
result_path,
repo_tests_path,
pr_info.number,
pr_info.sha,
docker_env,
docker_image,
)
2021-12-10 13:09:00 +00:00
logging.info("Going to run command %s", run_command)
2022-06-03 15:26:06 +00:00
2022-12-26 15:29:32 +00:00
run_log_path = os.path.join(temp_path, "run.log")
2023-08-13 02:54:20 +00:00
compare_log_path = os.path.join(result_path, "compare.log")
popen_env = os.environ.copy()
popen_env.update(env_extra)
2022-06-03 15:26:06 +00:00
with TeePopen(run_command, run_log_path, env=popen_env) as process:
2021-12-10 13:09:00 +00:00
retcode = process.wait()
if retcode == 0:
logging.info("Run successfully")
else:
logging.info("Run failed")
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
paths = {
2023-08-13 02:54:20 +00:00
"compare.log": compare_log_path,
"output.7z": os.path.join(result_path, "output.7z"),
"report.html": os.path.join(result_path, "report.html"),
"all-queries.html": os.path.join(result_path, "all-queries.html"),
"queries.rep": os.path.join(result_path, "queries.rep"),
"all-query-metrics.tsv": os.path.join(
result_path, "report/all-query-metrics.tsv"
),
2022-12-26 15:29:32 +00:00
"run.log": run_log_path,
2021-12-10 13:09:00 +00:00
}
s3_prefix = f"{pr_info.number}/{pr_info.sha}/{check_name_prefix}/"
2022-08-11 13:01:32 +00:00
s3_helper = S3Helper()
2022-05-16 18:39:10 +00:00
uploaded = {} # type: Dict[str, str]
for name, path in paths.items():
2021-11-16 11:16:10 +00:00
try:
2022-05-16 18:39:10 +00:00
uploaded[name] = s3_helper.upload_test_report_to_s3(path, s3_prefix + name)
2021-11-16 11:16:10 +00:00
except Exception:
2022-05-16 18:39:10 +00:00
uploaded[name] = ""
2021-11-16 11:16:10 +00:00
traceback.print_exc()
2021-12-10 13:09:00 +00:00
# Upload all images and flamegraphs to S3
try:
s3_helper.upload_test_folder_to_s3(
os.path.join(result_path, "images"), s3_prefix + "images"
2021-12-10 13:09:00 +00:00
)
except Exception:
traceback.print_exc()
2023-05-30 18:53:45 +00:00
def too_many_slow(msg):
2023-06-02 22:08:47 +00:00
match = re.search(r"(|.* )(\d+) slower.*", msg)
2023-05-30 18:53:45 +00:00
# This threshold should be synchronized with the value in https://github.com/ClickHouse/ClickHouse/blob/master/docker/test/performance-comparison/report.py#L629
threshold = 5
return int(match.group(2).strip()) > threshold if match else False
2021-12-10 13:09:00 +00:00
# Try to fetch status from the report.
status = ""
message = ""
2021-12-10 13:09:00 +00:00
try:
2022-05-16 18:39:10 +00:00
with open(
os.path.join(result_path, "report.html"), "r", encoding="utf-8"
) as report_fd:
report_text = report_fd.read()
status_match = re.search("<!--[ ]*status:(.*)-->", report_text)
message_match = re.search("<!--[ ]*message:(.*)-->", report_text)
2021-12-10 13:09:00 +00:00
if status_match:
status = status_match.group(1).strip()
if message_match:
message = message_match.group(1).strip()
2021-12-14 10:31:31 +00:00
# TODO: Remove me, always green mode for the first time, unless errors
status = "success"
2023-05-30 18:53:45 +00:00
if "errors" in message.lower() or too_many_slow(message.lower()):
status = "failure"
# TODO: Remove until here
2021-12-10 13:09:00 +00:00
except Exception:
traceback.print_exc()
status = "failure"
message = "Failed to parse the report."
2021-12-10 13:09:00 +00:00
if not status:
status = "failure"
message = "No status in report."
2021-12-10 13:09:00 +00:00
elif not message:
status = "failure"
message = "No message in report."
2021-12-10 13:09:00 +00:00
report_url = GITHUB_RUN_URL
2021-12-10 13:09:00 +00:00
2022-12-26 15:29:32 +00:00
if uploaded["run.log"]:
report_url = uploaded["run.log"]
2021-12-10 13:09:00 +00:00
2022-05-16 18:39:10 +00:00
if uploaded["compare.log"]:
report_url = uploaded["compare.log"]
2021-12-10 13:09:00 +00:00
2022-05-16 18:39:10 +00:00
if uploaded["output.7z"]:
report_url = uploaded["output.7z"]
2021-12-10 13:09:00 +00:00
2022-05-16 18:39:10 +00:00
if uploaded["report.html"]:
report_url = uploaded["report.html"]
2021-12-10 13:09:00 +00:00
post_commit_status(
commit, status, report_url, message, check_name_with_group, pr_info
)
2022-03-29 12:41:47 +00:00
if status == "error":
sys.exit(1)