Use a single BuildResult class across builds and report checks

This commit is contained in:
Mikhail f. Shiryaev 2023-09-01 22:35:31 +02:00
parent 88664bef3f
commit 6fc73e0e1f
No known key found for this signature in database
GPG Key ID: 4B02ED204C7D93F4
3 changed files with 295 additions and 268 deletions

View File

@ -1,10 +1,9 @@
#!/usr/bin/env python3
from pathlib import Path
from typing import List, Tuple
from typing import Tuple
import subprocess
import logging
import json
import os
import sys
import time
@ -22,6 +21,7 @@ from env_helper import (
)
from git_helper import Git, git_runner
from pr_info import PRInfo
from report import BuildResult, FAILURE, StatusType, SUCCESS
from s3_helper import S3Helper
from tee_popen import TeePopen
from version_helper import (
@ -98,7 +98,7 @@ def get_packager_cmd(
def build_clickhouse(
packager_cmd: str, logs_path: Path, build_output_path: Path
) -> Tuple[Path, bool]:
) -> Tuple[Path, StatusType]:
build_log_path = logs_path / BUILD_LOG_NAME
success = False
with TeePopen(packager_cmd, build_log_path) as process:
@ -118,15 +118,16 @@ def build_clickhouse(
)
else:
logging.info("Build failed")
return build_log_path, success
return build_log_path, SUCCESS if success else FAILURE
def check_for_success_run(
s3_helper: S3Helper,
s3_prefix: str,
build_name: str,
build_config: BuildConfig,
version: ClickHouseVersion,
) -> None:
# TODO: Remove after S3 artifacts
# the final empty argument is necessary for distinguish build and build_suffix
logged_prefix = os.path.join(S3_BUILDS_BUCKET, s3_prefix, "")
logging.info("Checking for artifacts in %s", logged_prefix)
@ -155,15 +156,16 @@ def check_for_success_run(
return
success = len(build_urls) > 0
create_json_artifact(
TEMP_PATH,
build_result = BuildResult(
build_name,
log_url,
build_urls,
build_config,
version.describe,
SUCCESS if success else FAILURE,
0,
success,
GITHUB_JOB,
)
build_result.write_json(Path(TEMP_PATH))
# Fail build job if not successeded
if not success:
sys.exit(1)
@ -171,36 +173,6 @@ def check_for_success_run(
sys.exit(0)
def create_json_artifact(
temp_path: str,
build_name: str,
log_url: str,
build_urls: List[str],
build_config: BuildConfig,
elapsed: int,
success: bool,
) -> None:
subprocess.check_call(
f"echo 'BUILD_URLS=build_urls_{build_name}' >> $GITHUB_ENV", shell=True
)
result = {
"log_url": log_url,
"build_urls": build_urls,
"build_config": build_config.__dict__,
"elapsed_seconds": elapsed,
"status": success,
"job_name": GITHUB_JOB,
}
json_name = "build_urls_" + build_name + ".json"
print(f"Dump json report {result} to {json_name} with env build_urls_{build_name}")
with open(os.path.join(temp_path, json_name), "w", encoding="utf-8") as build_links:
json.dump(result, build_links)
def get_release_or_pr(pr_info: PRInfo, version: ClickHouseVersion) -> Tuple[str, str]:
"Return prefixes for S3 artifacts paths"
# FIXME performance
@ -269,7 +241,7 @@ def main():
# If this is rerun, then we try to find already created artifacts and just
# put them as github actions artifact (result)
check_for_success_run(s3_helper, s3_path_prefix, build_name, build_config)
check_for_success_run(s3_helper, s3_path_prefix, build_name, version)
docker_image = get_image_with_version(IMAGES_PATH, IMAGE_NAME)
image_version = docker_image.version
@ -312,16 +284,17 @@ def main():
os.makedirs(logs_path, exist_ok=True)
start = time.time()
log_path, success = build_clickhouse(packager_cmd, logs_path, build_output_path)
log_path, build_status = build_clickhouse(
packager_cmd, logs_path, build_output_path
)
elapsed = int(time.time() - start)
subprocess.check_call(
f"sudo chown -R ubuntu:ubuntu {build_output_path}", shell=True
)
logging.info("Build finished with %s, log path %s", success, log_path)
if success:
logging.info("Build finished as %s, log path %s", build_status, log_path)
if build_status == SUCCESS:
cargo_cache.upload()
if not success:
else:
# We check if docker works, because if it's down, it's infrastructure
try:
subprocess.check_call("docker info", shell=True)
@ -367,8 +340,20 @@ def main():
print(f"::notice ::Log URL: {log_url}")
create_json_artifact(
TEMP_PATH, build_name, log_url, build_urls, build_config, elapsed, success
build_result = BuildResult(
build_name,
log_url,
build_urls,
version.describe,
build_status,
elapsed,
GITHUB_JOB,
)
result_json_path = build_result.write_json(temp_path)
logging.info(
"Build result file %s is written, content:\n %s",
result_json_path,
result_json_path.read_text(encoding="utf-8"),
)
upload_master_static_binaries(pr_info, build_config, s3_helper, build_output_path)
@ -449,7 +434,7 @@ FORMAT JSONCompactEachRow"""
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
[],
"success" if success else "failure",
build_status,
stopwatch.duration_seconds,
stopwatch.start_time_str,
log_url,
@ -458,7 +443,7 @@ FORMAT JSONCompactEachRow"""
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
# Fail the build job if it didn't succeed
if not success:
if build_status != SUCCESS:
sys.exit(1)

View File

@ -6,19 +6,24 @@ import os
import sys
import atexit
from pathlib import Path
from typing import Dict, List, Tuple
from github import Github
from env_helper import (
GITHUB_JOB_URL,
GITHUB_REPOSITORY,
GITHUB_RUN_URL,
GITHUB_SERVER_URL,
REPORTS_PATH,
TEMP_PATH,
)
from report import create_build_html_report, BuildResult, BuildResults
from report import (
BuildResult,
ERROR,
PENDING,
SUCCESS,
create_build_html_report,
get_worst_status,
)
from s3_helper import S3Helper
from get_robot_token import get_best_robot_token
from pr_info import NeedsDataType, PRInfo
@ -35,95 +40,18 @@ from ci_config import CI_CONFIG
NEEDS_DATA_PATH = os.getenv("NEEDS_DATA_PATH", "")
def group_by_artifacts(build_urls: List[str]) -> Dict[str, List[str]]:
groups = {
"apk": [],
"deb": [],
"binary": [],
"tgz": [],
"rpm": [],
"performance": [],
} # type: Dict[str, List[str]]
for url in build_urls:
if url.endswith("performance.tar.zst"):
groups["performance"].append(url)
elif (
url.endswith(".deb")
or url.endswith(".buildinfo")
or url.endswith(".changes")
or url.endswith(".tar.gz")
):
groups["deb"].append(url)
elif url.endswith(".apk"):
groups["apk"].append(url)
elif url.endswith(".rpm"):
groups["rpm"].append(url)
elif url.endswith(".tgz") or url.endswith(".tgz.sha512"):
groups["tgz"].append(url)
else:
groups["binary"].append(url)
return groups
def get_failed_report(
job_name: str,
) -> Tuple[BuildResults, List[List[str]], List[str]]:
message = f"{job_name} failed"
build_result = BuildResult(
compiler="unknown",
debug_build=False,
sanitizer="unknown",
status=message,
elapsed_seconds=0,
comment="",
)
return [build_result], [[""]], [GITHUB_RUN_URL]
def process_report(
build_report: dict,
) -> Tuple[BuildResults, List[List[str]], List[str]]:
build_config = build_report["build_config"]
build_result = BuildResult(
compiler=build_config["compiler"],
debug_build=build_config["debug_build"],
sanitizer=build_config["sanitizer"],
status="success" if build_report["status"] else "failure",
elapsed_seconds=build_report["elapsed_seconds"],
comment=build_config["comment"],
)
build_results = []
build_urls = []
build_logs_urls = []
urls_groups = group_by_artifacts(build_report["build_urls"])
found_group = False
for _, group_urls in urls_groups.items():
if group_urls:
build_results.append(build_result)
build_urls.append(group_urls)
build_logs_urls.append(build_report["log_url"])
found_group = True
# No one group of urls is found, a failed report
if not found_group:
build_results.append(build_result)
build_urls.append([""])
build_logs_urls.append(build_report["log_url"])
return build_results, build_urls, build_logs_urls
def get_build_name_from_file_name(file_name):
return file_name.replace("build_urls_", "").replace(".json", "")
def main():
logging.basicConfig(level=logging.INFO)
temp_path = Path(TEMP_PATH)
logging.info("Reports path %s", REPORTS_PATH)
temp_path.mkdir(parents=True, exist_ok=True)
logging.info("Reports path %s", REPORTS_PATH)
reports_path = Path(REPORTS_PATH)
logging.info(
"Reports found:\n %s",
"\n ".join(p.as_posix() for p in reports_path.rglob("*.json")),
)
build_check_name = sys.argv[1]
needs_data = {} # type: NeedsDataType
required_builds = 0
@ -132,11 +60,11 @@ def main():
needs_data = json.load(file_handler)
required_builds = len(needs_data)
if needs_data and all(i["result"] == "skipped" for i in needs_data.values()):
logging.info("All builds are skipped, exiting")
sys.exit(0)
logging.info("The next builds are required: %s", ", ".join(needs_data))
if needs_data:
logging.info("The next builds are required: %s", ", ".join(needs_data))
if all(i["result"] == "skipped" for i in needs_data.values()):
logging.info("All builds are skipped, exiting")
sys.exit(0)
gh = Github(get_best_robot_token(), per_page=100)
pr_info = PRInfo()
@ -153,73 +81,41 @@ def main():
required_builds = required_builds or len(builds_for_check)
# Collect reports from json artifacts
builds_report_map = {}
for root, _, files in os.walk(REPORTS_PATH):
for f in files:
if f.startswith("build_urls_") and f.endswith(".json"):
logging.info("Found build report json %s", f)
build_name = get_build_name_from_file_name(f)
if build_name in builds_for_check:
with open(os.path.join(root, f), "rb") as file_handler:
builds_report_map[build_name] = json.load(file_handler)
else:
logging.info(
"Skipping report %s for build %s, it's not in our reports list",
f,
build_name,
)
build_results = []
for build_name in builds_for_check:
report_name = BuildResult.get_report_name(build_name).stem
build_result = BuildResult.read_json(reports_path / report_name, build_name)
if build_result.is_missing:
logging.warning("Build results for %s are missing", build_name)
continue
build_results.append(build_result)
# Sort reports by config order
build_reports = [
builds_report_map[build_name]
for build_name in builds_for_check
if build_name in builds_report_map
# The code to collect missing reports for failed jobs
missing_job_names = [
name
for name in needs_data
if not any(1 for build_result in build_results if build_result.job_name == name)
]
some_builds_are_missing = len(build_reports) < required_builds
missing_build_names = []
if some_builds_are_missing:
logging.warning(
"Expected to get %s build results, got only %s",
required_builds,
len(build_reports),
)
missing_build_names = [
name
for name in needs_data
if not any(rep for rep in build_reports if rep["job_name"] == name)
]
else:
logging.info("Got exactly %s builds", len(builds_report_map))
# Group build artifacts by groups
build_results = [] # type: BuildResults
build_artifacts = [] # type: List[List[str]]
build_logs = [] # type: List[str]
for build_report in build_reports:
_build_results, build_artifacts_url, build_logs_url = process_report(
build_report
)
missing_builds = len(missing_job_names)
for job_name in reversed(missing_job_names):
build_result = BuildResult.missing_result("missing")
build_result.job_name = job_name
build_result.status = PENDING
logging.info(
"Got %s artifact groups for build report report", len(_build_results)
"There is missing report for %s, created a dummy result %s",
job_name,
build_result,
)
build_results.extend(_build_results)
build_artifacts.extend(build_artifacts_url)
build_logs.extend(build_logs_url)
build_results.insert(0, build_result)
for failed_job in missing_build_names:
_build_results, build_artifacts_url, build_logs_url = get_failed_report(
failed_job
)
build_results.extend(_build_results)
build_artifacts.extend(build_artifacts_url)
build_logs.extend(build_logs_url)
total_groups = len(build_results)
# Calculate artifact groups like packages and binaries
total_groups = sum(len(br.grouped_urls) for br in build_results)
ok_groups = sum(
len(br.grouped_urls) for br in build_results if br.status == SUCCESS
)
logging.info("Totally got %s artifact groups", total_groups)
if total_groups == 0:
logging.error("No success builds, failing check")
logging.error("No success builds, failing check without creating a status")
sys.exit(1)
s3_helper = S3Helper()
@ -234,8 +130,6 @@ def main():
report = create_build_html_report(
build_check_name,
build_results,
build_logs,
build_artifacts,
task_url,
branch_url,
branch_name,
@ -258,27 +152,20 @@ def main():
print(f"::notice ::Report url: {url}")
# Prepare a commit status
ok_groups = 0
summary_status = "success"
for build_result in build_results:
if build_result.status == "failure" and summary_status != "error":
summary_status = "failure"
if build_result.status == "error" or not build_result.status:
summary_status = "error"
if build_result.status == "success":
ok_groups += 1
summary_status = get_worst_status(br.status for br in build_results)
# Check if there are no builds at all, do not override bad status
if summary_status == "success":
if some_builds_are_missing:
summary_status = "pending"
if summary_status == SUCCESS:
if missing_builds:
summary_status = PENDING
elif ok_groups == 0:
summary_status = "error"
summary_status = ERROR
addition = ""
if some_builds_are_missing:
addition = f" ({len(build_reports)} of {required_builds} builds are OK)"
if missing_builds:
addition = (
f" ({required_builds - missing_builds} of {required_builds} builds are OK)"
)
description = format_description(
f"{ok_groups}/{total_groups} artifact groups are OK{addition}"
@ -288,7 +175,7 @@ def main():
commit, summary_status, url, description, build_check_name, pr_info
)
if summary_status == "error":
if summary_status == ERROR:
sys.exit(1)

View File

@ -2,12 +2,19 @@
from ast import literal_eval
from dataclasses import dataclass
from pathlib import Path
from typing import Final, Iterable, List, Literal, Optional, Tuple
from typing import Dict, Final, Iterable, List, Literal, Optional, Tuple
from html import escape
import csv
import os
import datetime
import json
import logging
import os
from ci_config import BuildConfig, CI_CONFIG
from env_helper import get_job_id_url
logger = logging.getLogger(__name__)
ERROR: Final = "error"
FAILURE: Final = "failure"
@ -281,12 +288,159 @@ def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestRes
@dataclass
class BuildResult:
compiler: str
debug_build: bool
sanitizer: str
status: str
build_name: str
log_url: str
build_urls: List[str]
version: str
status: StatusType
elapsed_seconds: int
comment: str
job_name: str
_job_link: Optional[str] = None
_grouped_urls: Optional[List[List[str]]] = None
@property
def build_config(self) -> Optional[BuildConfig]:
return CI_CONFIG.build_config.get(self.build_name, None)
@property
def comment(self) -> str:
if self.build_config is None:
return self._wrong_config_message
return self.build_config.comment
@property
def compiler(self) -> str:
if self.build_config is None:
return self._wrong_config_message
return self.build_config.compiler
@property
def debug_build(self) -> bool:
if self.build_config is None:
return False
return self.build_config.debug_build
@property
def sanitizer(self) -> str:
if self.build_config is None:
return self._wrong_config_message
return self.build_config.sanitizer
@property
def grouped_urls(self) -> List[List[str]]:
"Combine and preserve build_urls by artifact types"
if self._grouped_urls is not None:
return self._grouped_urls
if not self.build_urls:
self._grouped_urls = [[]]
return self._grouped_urls
artifacts_groups = {
"apk": [],
"deb": [],
"binary": [],
"tgz": [],
"rpm": [],
"performance": [],
} # type: Dict[str, List[str]]
for url in self.build_urls:
if url.endswith("performance.tar.zst"):
artifacts_groups["performance"].append(url)
elif (
url.endswith(".deb")
or url.endswith(".buildinfo")
or url.endswith(".changes")
or url.endswith(".tar.gz")
):
artifacts_groups["deb"].append(url)
elif url.endswith(".apk"):
artifacts_groups["apk"].append(url)
elif url.endswith(".rpm"):
artifacts_groups["rpm"].append(url)
elif url.endswith(".tgz") or url.endswith(".tgz.sha512"):
artifacts_groups["tgz"].append(url)
else:
artifacts_groups["binary"].append(url)
self._grouped_urls = [urls for urls in artifacts_groups.values() if urls]
return self._grouped_urls
@property
def _wrong_config_message(self) -> str:
return "missing"
@property
def file_name(self) -> Path:
return self.get_report_name(self.build_name)
@property
def is_missing(self) -> bool:
"The report is created for missing json file"
return not (
self.log_url
or self.build_urls
or self.version != "missing"
or self.status != ERROR
)
@property
def job_link(self) -> str:
if self._job_link is not None:
return self._job_link
_, job_url = get_job_id_url(self.job_name)
self._job_link = f'<a href="{job_url}">{self.job_name}</a>'
return self._job_link
@staticmethod
def get_report_name(name: str) -> Path:
return Path(f"build_report_{name}.json")
@staticmethod
def read_json(directory: Path, build_name: str) -> "BuildResult":
path = directory / BuildResult.get_report_name(build_name)
try:
with open(path, "r", encoding="utf-8") as pf:
data = json.load(pf) # type: dict
except FileNotFoundError:
logger.warning(
"File %s for build named '%s' is not found", path, build_name
)
return BuildResult.missing_result(build_name)
return BuildResult(
data.get("build_name", build_name),
data.get("log_url", ""),
data.get("build_urls", []),
data.get("version", ""),
data.get("status", ERROR),
data.get("elapsed_seconds", 0),
data.get("job_name", ""),
)
@staticmethod
def missing_result(build_name: str) -> "BuildResult":
return BuildResult(build_name, "", [], "missing", ERROR, 0, "missing")
def write_json(self, directory: Path) -> Path:
path = directory / self.file_name
path.write_text(
json.dumps(
{
"build_name": self.build_name,
"log_url": self.log_url,
"build_urls": self.build_urls,
"version": self.version,
"status": self.status,
"elapsed_seconds": self.elapsed_seconds,
"job_name": self.job_name,
}
),
encoding="utf-8",
)
# TODO: remove after the artifacts are in S3 completely
env_path = Path(os.getenv("GITHUB_ENV", "/dev/null"))
with env_path.open("a", encoding="utf-8") as ef:
ef.write(f"BUILD_URLS={path.stem}")
return path
BuildResults = List[BuildResult]
@ -476,8 +630,10 @@ HTML_BASE_BUILD_TEMPLATE = (
</p>
<table>
<tr>
<th>Config/job name</th>
<th>Compiler</th>
<th>Build type</th>
<th>Version</th>
<th>Sanitizer</th>
<th>Status</th>
<th>Build log</th>
@ -497,60 +653,59 @@ LINK_TEMPLATE = '<a href="{url}">{text}</a>'
def create_build_html_report(
header: str,
build_results: BuildResults,
build_logs_urls: List[str],
artifact_urls_list: List[List[str]],
task_url: str,
branch_url: str,
branch_name: str,
commit_url: str,
) -> str:
rows = []
for build_result, build_log_url, artifact_urls in zip(
build_results, build_logs_urls, artifact_urls_list
):
row = ["<tr>"]
row.append(f"<td>{build_result.compiler}</td>")
if build_result.debug_build:
row.append("<td>debug</td>")
else:
row.append("<td>relwithdebuginfo</td>")
if build_result.sanitizer:
row.append(f"<td>{build_result.sanitizer}</td>")
else:
row.append("<td>none</td>")
for build_result in build_results:
for artifact_urls in build_result.grouped_urls:
row = ["<tr>"]
row.append(
f"<td>{build_result.build_name}<br/>{build_result.job_link}</td>"
)
row.append(f"<td>{build_result.compiler}</td>")
if build_result.debug_build:
row.append("<td>debug</td>")
else:
row.append("<td>relwithdebuginfo</td>")
row.append(f"<td>{build_result.version}</td>")
if build_result.sanitizer:
row.append(f"<td>{build_result.sanitizer}</td>")
else:
row.append("<td>none</td>")
if build_result.status:
style = _get_status_style(build_result.status)
row.append(f'<td style="{style}">{build_result.status}</td>')
else:
style = _get_status_style(ERROR)
row.append(f'<td style="{style}">error</td>')
if build_result.status:
style = _get_status_style(build_result.status)
row.append(f'<td style="{style}">{build_result.status}</td>')
else:
style = _get_status_style(ERROR)
row.append(f'<td style="{style}">error</td>')
row.append(f'<td><a href="{build_log_url}">link</a></td>')
row.append(f'<td><a href="{build_result.log_url}">link</a></td>')
if build_result.elapsed_seconds:
delta = datetime.timedelta(seconds=build_result.elapsed_seconds)
else:
delta = "unknown" # type: ignore
delta = "unknown"
if build_result.elapsed_seconds:
delta = str(datetime.timedelta(seconds=build_result.elapsed_seconds))
row.append(f"<td>{delta}</td>")
row.append(f"<td>{delta}</td>")
links = ""
link_separator = "<br/>"
if artifact_urls:
for artifact_url in artifact_urls:
links += LINK_TEMPLATE.format(
text=_get_html_url_name(artifact_url), url=artifact_url
)
links += link_separator
if links:
links = links[: -len(link_separator)]
row.append(f"<td>{links}</td>")
links = []
link_separator = "<br/>"
if artifact_urls:
for artifact_url in artifact_urls:
links.append(
LINK_TEMPLATE.format(
text=_get_html_url_name(artifact_url), url=artifact_url
)
)
row.append(f"<td>{link_separator.join(links)}</td>")
row.append(f"<td>{build_result.comment}</td>")
row.append(f"<td>{build_result.comment}</td>")
row.append("</tr>")
rows.append("".join(row))
row.append("</tr>")
rows.append("".join(row))
return HTML_BASE_BUILD_TEMPLATE.format(
title=_format_header(header, branch_name),
header=_format_header(header, branch_name, branch_url),