# -*- coding: utf-8 -*- from ast import literal_eval from dataclasses import dataclass from pathlib import Path from typing import Dict, Final, Iterable, List, Literal, Optional, Tuple from html import escape import csv import datetime import json import logging import os from build_download_helper import get_gh_api from ci_config import BuildConfig, CI_CONFIG logger = logging.getLogger(__name__) ERROR: Final = "error" FAILURE: Final = "failure" PENDING: Final = "pending" SUCCESS: Final = "success" OK: Final = "OK" FAIL: Final = "FAIL" StatusType = Literal["error", "failure", "pending", "success"] # The order of statuses from the worst to the best _STATES = {ERROR: 0, FAILURE: 1, PENDING: 2, SUCCESS: 3} def get_worst_status(statuses: Iterable[str]) -> str: worst_status = None for status in statuses: if _STATES.get(status) is None: continue if worst_status is None: worst_status = status continue if _STATES.get(status) < _STATES.get(worst_status): worst_status = status if worst_status == ERROR: break if worst_status is None: return "" return worst_status ### BEST FRONTEND PRACTICES BELOW HEAD_HTML_TEMPLATE = """ {title}
🌚🌞

{header}

""" FOOTER_HTML_TEMPLATE = """ """ HTML_BASE_TEST_TEMPLATE = ( f"{HEAD_HTML_TEMPLATE}" """ {test_part} """ f"{FOOTER_HTML_TEMPLATE}" ) HTML_TEST_PART = """ {headers} {rows}
""" BASE_HEADERS = ["Test name", "Test status"] @dataclass class TestResult: name: str status: str # the following fields are optional time: Optional[float] = None log_files: Optional[List[Path]] = None raw_logs: Optional[str] = None # the field for uploaded logs URLs log_urls: Optional[List[str]] = None def set_raw_logs(self, raw_logs: str) -> None: self.raw_logs = raw_logs def set_log_files(self, log_files_literal: str) -> None: self.log_files = [] # type: Optional[List[Path]] log_paths = literal_eval(log_files_literal) if not isinstance(log_paths, list): raise ValueError( f"Malformed input: must be a list literal: {log_files_literal}" ) for log_path in log_paths: file = Path(log_path) assert file.exists(), file self.log_files.append(file) @staticmethod def create_check_timeout_expired(timeout: float) -> "TestResult": return TestResult("Check timeout expired", "FAIL", timeout) TestResults = List[TestResult] def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestResults: results = [] # type: TestResults with open(results_path, "r", encoding="utf-8") as descriptor: reader = csv.reader(descriptor, delimiter="\t") for line in reader: name = line[0] status = line[1] time = None if len(line) >= 3 and line[2] and line[2] != "\\N": # The value can be emtpy, but when it's not, # it's the time spent on the test try: time = float(line[2]) except ValueError: pass result = TestResult(name, status, time) if len(line) == 4 and line[3]: # The value can be emtpy, but when it's not, # the 4th value is a pythonic list, e.g. ['file1', 'file2'] if with_raw_logs: # Python does not support TSV, so we unescape manually result.set_raw_logs( line[3].replace("\\t", "\t").replace("\\n", "\n") ) else: result.set_log_files(line[3]) results.append(result) return results @dataclass class BuildResult: build_name: str log_url: str build_urls: List[str] version: str status: StatusType elapsed_seconds: int job_api_url: str _job_name: Optional[str] = None _job_html_url: Optional[str] = None _job_html_link: Optional[str] = None _grouped_urls: Optional[List[List[str]]] = None @property def build_config(self) -> Optional[BuildConfig]: return CI_CONFIG.build_config.get(self.build_name, None) @property def comment(self) -> str: if self.build_config is None: return self._wrong_config_message return self.build_config.comment @property def compiler(self) -> str: if self.build_config is None: return self._wrong_config_message return self.build_config.compiler @property def debug_build(self) -> bool: if self.build_config is None: return False return self.build_config.debug_build @property def sanitizer(self) -> str: if self.build_config is None: return self._wrong_config_message return self.build_config.sanitizer @property def grouped_urls(self) -> List[List[str]]: "Combine and preserve build_urls by artifact types" if self._grouped_urls is not None: return self._grouped_urls if not self.build_urls: self._grouped_urls = [[]] return self._grouped_urls artifacts_groups = { "apk": [], "deb": [], "binary": [], "tgz": [], "rpm": [], "performance": [], } # type: Dict[str, List[str]] for url in self.build_urls: if url.endswith("performance.tar.zst"): artifacts_groups["performance"].append(url) elif ( url.endswith(".deb") or url.endswith(".buildinfo") or url.endswith(".changes") or url.endswith(".tar.gz") ): artifacts_groups["deb"].append(url) elif url.endswith(".apk"): artifacts_groups["apk"].append(url) elif url.endswith(".rpm"): artifacts_groups["rpm"].append(url) elif url.endswith(".tgz") or url.endswith(".tgz.sha512"): artifacts_groups["tgz"].append(url) else: artifacts_groups["binary"].append(url) self._grouped_urls = [urls for urls in artifacts_groups.values() if urls] return self._grouped_urls @property def _wrong_config_message(self) -> str: return "missing" @property def file_name(self) -> Path: return self.get_report_name(self.build_name) @property def is_missing(self) -> bool: "The report is created for missing json file" return not ( self.log_url or self.build_urls or self.version != "missing" or self.status != ERROR ) @property def job_link(self) -> str: if self._job_html_link is not None: return self._job_html_link self._job_html_link = f'{self.job_name}' return self._job_html_link @property def job_html_url(self) -> str: if self._job_html_url is not None: return self._job_html_url self._set_properties() return self._job_html_url or "" @property def job_name(self) -> str: if self._job_name is not None: return self._job_name self._set_properties() return self._job_name or "" @job_name.setter def job_name(self, job_name: str) -> None: self._job_name = job_name def _set_properties(self) -> None: if all(p is not None for p in (self._job_name, self._job_html_url)): return job_data = {} # quick check @self.job_api_url is valid url before request. it's set to "missing" for dummy BuildResult if "http" in self.job_api_url: try: job_data = get_gh_api(self.job_api_url).json() except Exception: pass # job_name can be set manually self._job_name = self._job_name or job_data.get("name", "unknown") self._job_html_url = job_data.get("html_url", "") @staticmethod def get_report_name(name: str) -> Path: return Path(f"build_report_{name}.json") @staticmethod def read_json(directory: Path, build_name: str) -> "BuildResult": path = directory / BuildResult.get_report_name(build_name) try: with open(path, "r", encoding="utf-8") as pf: data = json.load(pf) # type: dict except FileNotFoundError: logger.warning( "File %s for build named '%s' is not found", path, build_name ) return BuildResult.missing_result(build_name) return BuildResult( data.get("build_name", build_name), data.get("log_url", ""), data.get("build_urls", []), data.get("version", ""), data.get("status", ERROR), data.get("elapsed_seconds", 0), data.get("job_api_url", ""), ) @staticmethod def missing_result(build_name: str) -> "BuildResult": return BuildResult(build_name, "", [], "missing", ERROR, 0, "missing") def write_json(self, directory: Path) -> Path: path = directory / self.file_name path.write_text( json.dumps( { "build_name": self.build_name, "log_url": self.log_url, "build_urls": self.build_urls, "version": self.version, "status": self.status, "elapsed_seconds": self.elapsed_seconds, "job_api_url": self.job_api_url, } ), encoding="utf-8", ) # TODO: remove after the artifacts are in S3 completely env_path = Path(os.getenv("GITHUB_ENV", "/dev/null")) with env_path.open("a", encoding="utf-8") as ef: ef.write(f"BUILD_URLS={path.stem}") return path BuildResults = List[BuildResult] class ReportColorTheme: class ReportColor: yellow = "#FFB400" red = "#F00" green = "#0A0" blue = "#00B4FF" default = (ReportColor.green, ReportColor.red, ReportColor.yellow) bugfixcheck = (ReportColor.yellow, ReportColor.blue, ReportColor.blue) ColorTheme = Tuple[str, str, str] def _format_header( header: str, branch_name: str, branch_url: Optional[str] = None ) -> str: # Following line does not lower CI->Ci and SQLancer->Sqlancer. It only # capitalizes the first letter and doesn't touch the rest of the word result = " ".join([w[0].upper() + w[1:] for w in header.split(" ") if w]) result = result.replace("Clickhouse", "ClickHouse") result = result.replace("clickhouse", "ClickHouse") if "ClickHouse" not in result: result = f"ClickHouse {result}" if branch_url: result = f'{result} for {branch_name}' else: result = f"{result} for {branch_name}" return result def _get_status_style(status: str, colortheme: Optional[ColorTheme] = None) -> str: ok_statuses = (OK, SUCCESS, "PASSED") fail_statuses = (FAIL, FAILURE, ERROR, "FAILED", "Timeout", "NOT_FAILED") if colortheme is None: colortheme = ReportColorTheme.default style = "font-weight: bold;" if status in ok_statuses: style += f"color: {colortheme[0]};" elif status in fail_statuses: style += f"color: {colortheme[1]};" else: style += f"color: {colortheme[2]};" return style def _get_html_url_name(url): if isinstance(url, str): return os.path.basename(url).replace("%2B", "+").replace("%20", " ") if isinstance(url, tuple): return url[1].replace("%2B", "+").replace("%20", " ") return None def _get_html_url(url): href = None name = None if isinstance(url, str): href, name = url, _get_html_url_name(url) if isinstance(url, tuple): href, name = url[0], _get_html_url_name(url) if href and name: return f'{_get_html_url_name(url)}' return "" def create_test_html_report( header: str, test_results: TestResults, raw_log_url: str, task_url: str, job_url: str, branch_url: str, branch_name: str, commit_url: str, additional_urls: Optional[List[str]] = None, statuscolors: Optional[ColorTheme] = None, ) -> str: if additional_urls is None: additional_urls = [] if test_results: rows_part = [] num_fails = 0 has_test_time = False has_log_urls = False # Display entires with logs at the top (they correspond to failed tests) test_results.sort( key=lambda result: result.raw_logs is None and result.log_files is None ) for test_result in test_results: colspan = 0 if test_result.log_files is not None: has_log_urls = True row = [] if test_result.raw_logs is not None: row.append('') else: row.append("") row.append(f"{test_result.name}") colspan += 1 style = _get_status_style(test_result.status, colortheme=statuscolors) # Allow to quickly scroll to the first failure. fail_id = "" has_error = test_result.status in ("FAIL", "NOT_FAILED") if has_error: num_fails = num_fails + 1 fail_id = f'id="fail{num_fails}" ' row.append(f'{test_result.status}') colspan += 1 if test_result.time is not None: has_test_time = True row.append(f"{test_result.time}") colspan += 1 if test_result.log_urls is not None: has_log_urls = True test_logs_html = "
".join( [_get_html_url(url) for url in test_result.log_urls] ) row.append(f"{test_logs_html}") colspan += 1 row.append("") rows_part.append("\n".join(row)) if test_result.raw_logs is not None: raw_logs = escape(test_result.raw_logs) row_raw_logs = ( '' f'
{raw_logs}
' "" ) rows_part.append(row_raw_logs) headers = BASE_HEADERS.copy() if has_test_time: headers.append("Test time, sec.") if has_log_urls: headers.append("Logs") headers_html = "".join(["" + h + "" for h in headers]) test_part = HTML_TEST_PART.format(headers=headers_html, rows="".join(rows_part)) else: test_part = "" additional_html_urls = " ".join( [_get_html_url(url) for url in sorted(additional_urls, key=_get_html_url_name)] ) raw_log_name = os.path.basename(raw_log_url) if "?" in raw_log_name: raw_log_name = raw_log_name.split("?")[0] html = HTML_BASE_TEST_TEMPLATE.format( title=_format_header(header, branch_name), header=_format_header(header, branch_name, branch_url), raw_log_name=raw_log_name, raw_log_url=raw_log_url, task_url=task_url, job_url=job_url, test_part=test_part, branch_name=branch_name, commit_url=commit_url, additional_urls=additional_html_urls, ) return html HTML_BASE_BUILD_TEMPLATE = ( f"{HEAD_HTML_TEMPLATE}" """ {rows}
Config/job name Compiler Build type Version Sanitizer Status Build log Build time Artifacts Comment
""" f"{FOOTER_HTML_TEMPLATE}" ) LINK_TEMPLATE = '{text}' def create_build_html_report( header: str, build_results: BuildResults, task_url: str, branch_url: str, branch_name: str, commit_url: str, ) -> str: rows = [] for build_result in build_results: for artifact_urls in build_result.grouped_urls: row = [""] row.append( f"{build_result.build_name}
{build_result.job_link}" ) row.append(f"{build_result.compiler}") if build_result.debug_build: row.append("debug") else: row.append("relwithdebuginfo") row.append(f"{build_result.version}") if build_result.sanitizer: row.append(f"{build_result.sanitizer}") else: row.append("none") if build_result.status: style = _get_status_style(build_result.status) row.append(f'{build_result.status}') else: style = _get_status_style(ERROR) row.append(f'error') row.append(f'link') delta = "unknown" if build_result.elapsed_seconds: delta = str(datetime.timedelta(seconds=build_result.elapsed_seconds)) row.append(f"{delta}") links = [] link_separator = "
" if artifact_urls: for artifact_url in artifact_urls: links.append( LINK_TEMPLATE.format( text=_get_html_url_name(artifact_url), url=artifact_url ) ) row.append(f"{link_separator.join(links)}") comment = build_result.comment if ( build_result.build_config is not None and build_result.build_config.sparse_checkout ): comment += " (note: sparse checkout is used)" row.append(f"{comment}") row.append("") rows.append("".join(row)) return HTML_BASE_BUILD_TEMPLATE.format( title=_format_header(header, branch_name), header=_format_header(header, branch_name, branch_url), rows="".join(rows), task_url=task_url, branch_name=branch_name, commit_url=commit_url, )