2021-11-16 11:16:10 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
import json
|
2024-02-06 12:39:34 +00:00
|
|
|
import logging
|
|
|
|
import os
|
|
|
|
import re
|
2021-11-16 11:16:10 +00:00
|
|
|
import subprocess
|
2024-02-06 12:39:34 +00:00
|
|
|
import sys
|
2021-11-16 11:16:10 +00:00
|
|
|
import traceback
|
2023-08-29 14:35:53 +00:00
|
|
|
from pathlib import Path
|
2021-11-16 11:16:10 +00:00
|
|
|
|
|
|
|
from github import Github
|
|
|
|
|
2024-02-06 12:39:34 +00:00
|
|
|
from build_download_helper import download_builds_filter
|
2024-06-10 09:18:03 +00:00
|
|
|
from ci_config import CI
|
2024-02-06 12:39:34 +00:00
|
|
|
from clickhouse_helper import get_instance_id, get_instance_type
|
|
|
|
from commit_status_helper import get_commit
|
|
|
|
from docker_images_helper import get_docker_image, pull_image
|
2023-09-27 14:27:37 +00:00
|
|
|
from env_helper import (
|
|
|
|
GITHUB_EVENT_PATH,
|
|
|
|
GITHUB_RUN_URL,
|
|
|
|
REPO_COPY,
|
2024-02-06 12:39:34 +00:00
|
|
|
REPORT_PATH,
|
2023-09-27 14:27:37 +00:00
|
|
|
S3_BUILDS_BUCKET,
|
|
|
|
S3_DOWNLOAD,
|
|
|
|
TEMP_PATH,
|
|
|
|
)
|
2022-06-03 15:26:06 +00:00
|
|
|
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
|
2021-11-16 11:16:10 +00:00
|
|
|
from pr_info import PRInfo
|
2024-02-06 12:39:34 +00:00
|
|
|
from report import FAILURE, SUCCESS, JobReport
|
2023-08-12 22:34:01 +00:00
|
|
|
from stopwatch import Stopwatch
|
2024-02-06 12:39:34 +00:00
|
|
|
from tee_popen import TeePopen
|
2021-11-16 11:16:10 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
IMAGE_NAME = "clickhouse/performance-comparison"
|
|
|
|
|
|
|
|
|
|
|
|
def get_run_command(
|
2023-08-12 22:34:01 +00:00
|
|
|
check_start_time,
|
|
|
|
check_name,
|
2022-03-22 16:39:58 +00:00
|
|
|
workspace,
|
|
|
|
result_path,
|
|
|
|
repo_tests_path,
|
|
|
|
pr_to_test,
|
|
|
|
sha_to_test,
|
|
|
|
additional_env,
|
|
|
|
image,
|
|
|
|
):
|
2023-08-12 22:34:01 +00:00
|
|
|
instance_type = get_instance_type()
|
2023-09-08 23:44:14 +00:00
|
|
|
instance_id = get_instance_id()
|
2023-08-12 22:34:01 +00:00
|
|
|
|
|
|
|
envs = [
|
|
|
|
f"-e CHECK_START_TIME='{check_start_time}'",
|
|
|
|
f"-e CHECK_NAME='{check_name}'",
|
|
|
|
f"-e INSTANCE_TYPE='{instance_type}'",
|
2023-09-08 23:44:14 +00:00
|
|
|
f"-e INSTANCE_ID='{instance_id}'",
|
2023-08-12 22:34:01 +00:00
|
|
|
f"-e PR_TO_TEST={pr_to_test}",
|
|
|
|
f"-e SHA_TO_TEST={sha_to_test}",
|
|
|
|
]
|
|
|
|
|
|
|
|
env_str = " ".join(envs)
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
return (
|
2022-05-16 18:39:10 +00:00
|
|
|
f"docker run --privileged --volume={workspace}:/workspace "
|
|
|
|
f"--volume={result_path}:/output "
|
2022-03-22 16:39:58 +00:00
|
|
|
f"--volume={repo_tests_path}:/usr/share/clickhouse-test "
|
2023-12-18 08:07:22 +00:00
|
|
|
f"--volume={TEMP_PATH}:/artifacts "
|
2022-03-22 16:39:58 +00:00
|
|
|
f"--cap-add syslog --cap-add sys_admin --cap-add sys_rawio "
|
2023-08-12 22:36:50 +00:00
|
|
|
f"{env_str} {additional_env} "
|
2021-11-16 11:16:10 +00:00
|
|
|
f"{image}"
|
2022-03-22 16:39:58 +00:00
|
|
|
)
|
|
|
|
|
2021-11-16 11:16:10 +00:00
|
|
|
|
2023-09-27 14:27:37 +00:00
|
|
|
def main():
|
2021-11-16 11:16:10 +00:00
|
|
|
logging.basicConfig(level=logging.INFO)
|
2023-08-12 22:34:01 +00:00
|
|
|
|
|
|
|
stopwatch = Stopwatch()
|
|
|
|
|
2023-09-27 14:27:37 +00:00
|
|
|
temp_path = Path(TEMP_PATH)
|
|
|
|
temp_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
repo_tests_path = Path(REPO_COPY, "tests")
|
2021-11-16 11:16:10 +00:00
|
|
|
|
2023-12-18 08:07:22 +00:00
|
|
|
check_name = sys.argv[1] if len(sys.argv) > 1 else os.getenv("CHECK_NAME")
|
|
|
|
assert (
|
|
|
|
check_name
|
|
|
|
), "Check name must be provided as an input arg or in CHECK_NAME env"
|
2024-06-10 09:18:03 +00:00
|
|
|
required_build = CI.JOB_CONFIGS[check_name].get_required_build()
|
2021-11-16 11:16:10 +00:00
|
|
|
|
2022-05-16 18:39:10 +00:00
|
|
|
with open(GITHUB_EVENT_PATH, "r", encoding="utf-8") as event_file:
|
2021-11-16 11:16:10 +00:00
|
|
|
event = json.load(event_file)
|
|
|
|
|
2022-07-30 05:07:22 +00:00
|
|
|
gh = Github(get_best_robot_token(), per_page=100)
|
2021-11-16 11:16:10 +00:00
|
|
|
pr_info = PRInfo(event)
|
|
|
|
commit = get_commit(gh, pr_info.sha)
|
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
docker_env = ""
|
2021-11-16 11:16:10 +00:00
|
|
|
|
2022-08-11 13:01:32 +00:00
|
|
|
docker_env += f" -e S3_URL={S3_DOWNLOAD}/{S3_BUILDS_BUCKET}"
|
2022-05-13 11:36:08 +00:00
|
|
|
docker_env += f" -e BUILD_NAME={required_build}"
|
2021-11-16 11:16:10 +00:00
|
|
|
|
|
|
|
if pr_info.number == 0:
|
|
|
|
pr_link = commit.html_url
|
|
|
|
else:
|
|
|
|
pr_link = f"https://github.com/ClickHouse/ClickHouse/pull/{pr_info.number}"
|
|
|
|
|
2022-03-24 14:37:53 +00:00
|
|
|
docker_env += (
|
|
|
|
f' -e CHPC_ADD_REPORT_LINKS="<a href={GITHUB_RUN_URL}>'
|
|
|
|
f'Job (actions)</a> <a href={pr_link}>Tested commit</a>"'
|
2022-03-22 16:39:58 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
if "RUN_BY_HASH_TOTAL" in os.environ:
|
2022-05-16 18:39:10 +00:00
|
|
|
run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL", "1"))
|
|
|
|
run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "1"))
|
|
|
|
docker_env += (
|
|
|
|
f" -e CHPC_TEST_RUN_BY_HASH_TOTAL={run_by_hash_total}"
|
|
|
|
f" -e CHPC_TEST_RUN_BY_HASH_NUM={run_by_hash_num}"
|
|
|
|
)
|
2022-03-22 16:39:58 +00:00
|
|
|
check_name_with_group = (
|
|
|
|
check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]"
|
|
|
|
)
|
2021-12-13 09:04:08 +00:00
|
|
|
else:
|
|
|
|
check_name_with_group = check_name
|
2021-12-13 08:56:54 +00:00
|
|
|
|
2022-11-25 16:55:10 +00:00
|
|
|
is_aarch64 = "aarch64" in os.getenv("CHECK_NAME", "Performance Comparison").lower()
|
|
|
|
if pr_info.number != 0 and is_aarch64 and "pr-performance" not in pr_info.labels:
|
2024-01-04 15:35:09 +00:00
|
|
|
print("Skipped, not labeled with 'pr-performance'")
|
2022-01-21 15:40:03 +00:00
|
|
|
sys.exit(0)
|
|
|
|
|
2022-07-05 12:24:58 +00:00
|
|
|
check_name_prefix = (
|
|
|
|
check_name_with_group.lower()
|
|
|
|
.replace(" ", "_")
|
|
|
|
.replace("(", "_")
|
|
|
|
.replace(")", "_")
|
|
|
|
.replace(",", "_")
|
2023-04-25 16:16:29 +00:00
|
|
|
.replace("/", "_")
|
2022-07-05 12:24:58 +00:00
|
|
|
)
|
|
|
|
|
2023-12-18 08:07:22 +00:00
|
|
|
docker_image = pull_image(get_docker_image(IMAGE_NAME))
|
2021-11-16 11:16:10 +00:00
|
|
|
|
2023-09-27 14:27:37 +00:00
|
|
|
result_path = temp_path / "result"
|
|
|
|
result_path.mkdir(parents=True, exist_ok=True)
|
2021-12-10 13:09:00 +00:00
|
|
|
|
2022-08-26 09:21:42 +00:00
|
|
|
database_url = get_parameter_from_ssm("clickhouse-test-stat-url")
|
|
|
|
database_username = get_parameter_from_ssm("clickhouse-test-stat-login")
|
|
|
|
database_password = get_parameter_from_ssm("clickhouse-test-stat-password")
|
|
|
|
|
|
|
|
env_extra = {
|
|
|
|
"CLICKHOUSE_PERFORMANCE_COMPARISON_DATABASE_URL": f"{database_url}:9440",
|
|
|
|
"CLICKHOUSE_PERFORMANCE_COMPARISON_DATABASE_USER": database_username,
|
|
|
|
"CLICKHOUSE_PERFORMANCE_COMPARISON_DATABASE_USER_PASSWORD": database_password,
|
|
|
|
"CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME": check_name_with_group,
|
|
|
|
"CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME_PREFIX": check_name_prefix,
|
|
|
|
}
|
|
|
|
|
2023-12-18 08:07:22 +00:00
|
|
|
download_builds_filter(
|
2024-03-19 13:01:31 +00:00
|
|
|
check_name, REPORT_PATH, temp_path, lambda url: "performance.tar.zst" in url
|
2023-12-18 08:07:22 +00:00
|
|
|
)
|
|
|
|
assert os.path.exists(f"{TEMP_PATH}/performance.tar.zst"), "Perf artifact not found"
|
|
|
|
|
2022-08-26 13:49:40 +00:00
|
|
|
docker_env += "".join([f" -e {name}" for name in env_extra])
|
2022-06-07 10:00:31 +00:00
|
|
|
|
2022-03-22 16:39:58 +00:00
|
|
|
run_command = get_run_command(
|
2023-08-12 22:34:01 +00:00
|
|
|
stopwatch.start_time_str,
|
|
|
|
check_name,
|
2022-03-22 16:39:58 +00:00
|
|
|
result_path,
|
|
|
|
result_path,
|
|
|
|
repo_tests_path,
|
|
|
|
pr_info.number,
|
|
|
|
pr_info.sha,
|
|
|
|
docker_env,
|
|
|
|
docker_image,
|
|
|
|
)
|
2021-12-10 13:09:00 +00:00
|
|
|
logging.info("Going to run command %s", run_command)
|
2022-06-03 15:26:06 +00:00
|
|
|
|
2023-09-27 14:27:37 +00:00
|
|
|
run_log_path = temp_path / "run.log"
|
|
|
|
compare_log_path = result_path / "compare.log"
|
2022-08-26 09:21:42 +00:00
|
|
|
|
|
|
|
popen_env = os.environ.copy()
|
|
|
|
popen_env.update(env_extra)
|
2022-06-03 15:26:06 +00:00
|
|
|
with TeePopen(run_command, run_log_path, env=popen_env) as process:
|
2021-12-10 13:09:00 +00:00
|
|
|
retcode = process.wait()
|
|
|
|
if retcode == 0:
|
|
|
|
logging.info("Run successfully")
|
|
|
|
else:
|
|
|
|
logging.info("Run failed")
|
|
|
|
|
|
|
|
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
|
|
|
|
|
2024-01-04 15:35:09 +00:00
|
|
|
def too_many_slow(msg):
|
|
|
|
match = re.search(r"(|.* )(\d+) slower.*", msg)
|
|
|
|
# This threshold should be synchronized with the value in
|
|
|
|
# https://github.com/ClickHouse/ClickHouse/blob/master/docker/test/performance-comparison/report.py#L629
|
|
|
|
threshold = 5
|
|
|
|
return int(match.group(2).strip()) > threshold if match else False
|
|
|
|
|
2021-12-10 13:09:00 +00:00
|
|
|
paths = {
|
2023-08-13 02:54:20 +00:00
|
|
|
"compare.log": compare_log_path,
|
2023-09-27 14:27:37 +00:00
|
|
|
"output.7z": result_path / "output.7z",
|
|
|
|
"report.html": result_path / "report.html",
|
|
|
|
"all-queries.html": result_path / "all-queries.html",
|
|
|
|
"queries.rep": result_path / "queries.rep",
|
|
|
|
"all-query-metrics.tsv": result_path / "report/all-query-metrics.tsv",
|
2022-12-26 15:29:32 +00:00
|
|
|
"run.log": run_log_path,
|
2021-12-10 13:09:00 +00:00
|
|
|
}
|
|
|
|
|
2024-01-04 15:35:09 +00:00
|
|
|
# FIXME: where images come from? dir does not exist atm.
|
|
|
|
image_files = (
|
|
|
|
list((Path(result_path) / "images").iterdir())
|
|
|
|
if (Path(result_path) / "images").exists()
|
|
|
|
else []
|
|
|
|
)
|
2023-05-30 18:53:45 +00:00
|
|
|
|
2021-12-10 13:09:00 +00:00
|
|
|
# Try to fetch status from the report.
|
2022-03-22 16:39:58 +00:00
|
|
|
status = ""
|
|
|
|
message = ""
|
2021-12-10 13:09:00 +00:00
|
|
|
try:
|
2023-09-27 14:27:37 +00:00
|
|
|
with open(result_path / "report.html", "r", encoding="utf-8") as report_fd:
|
2022-05-16 18:39:10 +00:00
|
|
|
report_text = report_fd.read()
|
|
|
|
status_match = re.search("<!--[ ]*status:(.*)-->", report_text)
|
|
|
|
message_match = re.search("<!--[ ]*message:(.*)-->", report_text)
|
2021-12-10 13:09:00 +00:00
|
|
|
if status_match:
|
|
|
|
status = status_match.group(1).strip()
|
|
|
|
if message_match:
|
|
|
|
message = message_match.group(1).strip()
|
2021-12-14 10:31:31 +00:00
|
|
|
|
2022-02-21 14:49:52 +00:00
|
|
|
# TODO: Remove me, always green mode for the first time, unless errors
|
2024-02-02 17:10:47 +00:00
|
|
|
status = SUCCESS
|
2023-05-30 18:53:45 +00:00
|
|
|
if "errors" in message.lower() or too_many_slow(message.lower()):
|
2024-02-06 12:39:34 +00:00
|
|
|
status = FAILURE
|
2022-02-21 14:49:52 +00:00
|
|
|
# TODO: Remove until here
|
2021-12-10 13:09:00 +00:00
|
|
|
except Exception:
|
|
|
|
traceback.print_exc()
|
2024-02-06 12:39:34 +00:00
|
|
|
status = FAILURE
|
2022-03-22 16:39:58 +00:00
|
|
|
message = "Failed to parse the report."
|
2021-12-10 13:09:00 +00:00
|
|
|
|
|
|
|
if not status:
|
2024-02-06 12:39:34 +00:00
|
|
|
status = FAILURE
|
2022-03-22 16:39:58 +00:00
|
|
|
message = "No status in report."
|
2021-12-10 13:09:00 +00:00
|
|
|
elif not message:
|
2024-02-06 12:39:34 +00:00
|
|
|
status = FAILURE
|
2022-03-22 16:39:58 +00:00
|
|
|
message = "No message in report."
|
2021-12-10 13:09:00 +00:00
|
|
|
|
2024-01-04 15:35:09 +00:00
|
|
|
JobReport(
|
|
|
|
description=message,
|
|
|
|
test_results=[],
|
|
|
|
status=status,
|
|
|
|
start_time=stopwatch.start_time_str,
|
|
|
|
duration=stopwatch.duration_seconds,
|
|
|
|
additional_files=[v for _, v in paths.items()] + image_files,
|
|
|
|
check_name=check_name_with_group,
|
|
|
|
).dump()
|
2022-03-29 12:41:47 +00:00
|
|
|
|
2024-02-02 17:10:47 +00:00
|
|
|
if status != SUCCESS:
|
2022-03-29 12:41:47 +00:00
|
|
|
sys.exit(1)
|
2023-09-27 14:27:37 +00:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|