From 3dedd8d76b1c3fdb533e9f47d537e4cdf369af5e Mon Sep 17 00:00:00 2001 From: Max K Date: Sat, 3 Aug 2024 10:40:12 +0200 Subject: [PATCH] CI: Minor refactoring in ci_utils --- tests/ci/bugfix_validate_check.py | 5 +- tests/ci/ci.py | 8 +- tests/ci/ci_cache.py | 4 +- tests/ci/ci_config.py | 7 +- tests/ci/ci_definitions.py | 22 ----- tests/ci/ci_settings.py | 9 +- tests/ci/ci_utils.py | 150 +++++------------------------ tests/ci/report.py | 3 +- tests/ci/run_check.py | 152 +++++++++++++++++++++++++++--- tests/ci/test_ci_config.py | 12 ++- 10 files changed, 186 insertions(+), 186 deletions(-) diff --git a/tests/ci/bugfix_validate_check.py b/tests/ci/bugfix_validate_check.py index 71b18572938..932d709a7b8 100644 --- a/tests/ci/bugfix_validate_check.py +++ b/tests/ci/bugfix_validate_check.py @@ -8,7 +8,6 @@ from pathlib import Path from typing import List, Sequence, Tuple from ci_config import CI -from ci_utils import normalize_string from env_helper import TEMP_PATH from functional_test_check import NO_CHANGES_MSG from report import ( @@ -142,7 +141,9 @@ def main(): for file in set(jr.additional_files): file_ = Path(file) file_name = file_.name - file_name = file_name.replace(".", "__" + normalize_string(job_id) + ".", 1) + file_name = file_name.replace( + ".", "__" + CI.Utils.normalize_string(job_id) + ".", 1 + ) file_ = file_.rename(file_.parent / file_name) additional_files.append(file_) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 2565c8944e4..e36f2904182 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -16,7 +16,7 @@ import upload_result_helper from build_check import get_release_or_pr from ci_config import CI from ci_metadata import CiMetadata -from ci_utils import GH, normalize_string, Utils +from ci_utils import GH, Utils from clickhouse_helper import ( CiLogsCredentials, ClickHouseHelper, @@ -296,7 +296,7 @@ def _pre_action(s3, job_name, batch, indata, pr_info): # do not set report prefix for scheduled or dispatched wf (in case it started from feature branch while # testing), otherwise reports won't be found if not (pr_info.is_scheduled or pr_info.is_dispatched): - report_prefix = normalize_string(pr_info.head_ref) + report_prefix = Utils.normalize_string(pr_info.head_ref) print( f"Use report prefix [{report_prefix}], pr_num [{pr_info.number}], head_ref [{pr_info.head_ref}]" ) @@ -718,7 +718,7 @@ def _upload_build_artifacts( ( get_release_or_pr(pr_info, get_version_from_repo())[1], pr_info.sha, - normalize_string(build_name), + Utils.normalize_string(build_name), "performance.tar.zst", ) ) @@ -1248,7 +1248,7 @@ def main() -> int: ( get_release_or_pr(pr_info, get_version_from_repo())[0], pr_info.sha, - normalize_string( + Utils.normalize_string( job_report.check_name or _get_ext_check_name(args.job_name) ), ) diff --git a/tests/ci/ci_cache.py b/tests/ci/ci_cache.py index 4846233ab03..a59fd3e5a29 100644 --- a/tests/ci/ci_cache.py +++ b/tests/ci/ci_cache.py @@ -7,7 +7,7 @@ from typing import Dict, Optional, Any, Union, Sequence, List, Set from ci_config import CI -from ci_utils import is_hex, GH +from ci_utils import Utils, GH from commit_status_helper import CommitStatusData from env_helper import ( TEMP_PATH, @@ -240,7 +240,7 @@ class CiCache: int(job_properties[-1]), ) - if not is_hex(job_digest): + if not Utils.is_hex(job_digest): print("ERROR: wrong record job digest") return None diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index c031ca9b805..ef48466e451 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -3,7 +3,7 @@ import re from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser from typing import Dict, Optional, List -from ci_utils import normalize_string +from ci_utils import Utils from ci_definitions import * @@ -13,7 +13,6 @@ class CI: each config item in the below dicts should be an instance of JobConfig class or inherited from it """ - MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI = 5 MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI = 2 # reimport types to CI class so that they visible as CI.* and mypy is happy @@ -37,9 +36,7 @@ class CI: from ci_utils import GH as GH from ci_utils import Shell as Shell from ci_definitions import Labels as Labels - from ci_definitions import TRUSTED_CONTRIBUTORS as TRUSTED_CONTRIBUTORS from ci_definitions import WorkFlowNames as WorkFlowNames - from ci_utils import CATEGORY_TO_LABEL as CATEGORY_TO_LABEL # Jobs that run for doc related updates _DOCS_CHECK_JOBS = [JobNames.DOCS_CHECK, JobNames.STYLE_CHECK] @@ -558,7 +555,7 @@ class CI: @classmethod def get_tag_config(cls, label_name: str) -> Optional[LabelConfig]: for label, config in cls.TAG_CONFIGS.items(): - if normalize_string(label_name) == normalize_string(label): + if Utils.normalize_string(label_name) == Utils.normalize_string(label): return config return None diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index de6791acda8..795bda3d4b0 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -32,28 +32,6 @@ class Labels: AUTO_BACKPORT = {"pr-critical-bugfix"} -TRUSTED_CONTRIBUTORS = { - e.lower() - for e in [ - "amosbird", - "azat", # SEMRush - "bharatnc", # Many contributions. - "cwurm", # ClickHouse, Inc - "den-crane", # Documentation contributor - "ildus", # adjust, ex-pgpro - "nvartolomei", # Seasoned contributor, CloudFlare - "taiyang-li", - "ucasFL", # Amos Bird's friend - "thomoco", # ClickHouse, Inc - "tonickkozlov", # Cloudflare - "tylerhannan", # ClickHouse, Inc - "tsolodov", # ClickHouse, Inc - "justindeguzman", # ClickHouse, Inc - "XuJia0210", # ClickHouse, Inc - ] -} - - class WorkflowStages(metaclass=WithIter): """ Stages of GitHUb actions workflow diff --git a/tests/ci/ci_settings.py b/tests/ci/ci_settings.py index d6e9765ceb7..05929179e06 100644 --- a/tests/ci/ci_settings.py +++ b/tests/ci/ci_settings.py @@ -2,7 +2,6 @@ import re from dataclasses import dataclass, asdict from typing import Optional, List, Dict, Any, Iterable -from ci_utils import normalize_string from ci_config import CI from git_helper import Runner as GitRunner, GIT_PREFIX from pr_info import PRInfo @@ -89,14 +88,14 @@ class CiSettings: if not res.include_keywords: res.include_keywords = [] res.include_keywords.append( - normalize_string(match.removeprefix("ci_include_")) + CI.Utils.normalize_string(match.removeprefix("ci_include_")) ) elif match.startswith("ci_exclude_"): if not res.exclude_keywords: res.exclude_keywords = [] keywords = match.removeprefix("ci_exclude_").split("|") res.exclude_keywords += [ - normalize_string(keyword) for keyword in keywords + CI.Utils.normalize_string(keyword) for keyword in keywords ] elif match == CI.Tags.NO_CI_CACHE: res.no_ci_cache = True @@ -163,7 +162,7 @@ class CiSettings: # do not exclude builds if self.exclude_keywords and not CI.is_build_job(job): for keyword in self.exclude_keywords: - if keyword in normalize_string(job): + if keyword in CI.Utils.normalize_string(job): print(f"Job [{job}] matches Exclude keyword [{keyword}] - deny") return False @@ -174,7 +173,7 @@ class CiSettings: # never exclude Style Check by include keywords return True for keyword in self.include_keywords: - if keyword in normalize_string(job): + if keyword in CI.Utils.normalize_string(job): print(f"Job [{job}] matches Include keyword [{keyword}] - pass") return True to_deny = True diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index dae1520afb6..067bedb19c3 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -6,7 +6,7 @@ import sys import time from contextlib import contextmanager from pathlib import Path -from typing import Any, Iterator, List, Union, Optional, Sequence, Tuple +from typing import Any, Iterator, List, Union, Optional, Sequence import requests @@ -20,41 +20,6 @@ class Envs: GITHUB_WORKFLOW = os.getenv("GITHUB_WORKFLOW", "") -LABEL_CATEGORIES = { - "pr-backward-incompatible": ["Backward Incompatible Change"], - "pr-bugfix": [ - "Bug Fix", - "Bug Fix (user-visible misbehavior in an official stable release)", - "Bug Fix (user-visible misbehaviour in official stable or prestable release)", - "Bug Fix (user-visible misbehavior in official stable or prestable release)", - ], - "pr-critical-bugfix": ["Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)"], - "pr-build": [ - "Build/Testing/Packaging Improvement", - "Build Improvement", - "Build/Testing Improvement", - "Build", - "Packaging Improvement", - ], - "pr-documentation": [ - "Documentation (changelog entry is not required)", - "Documentation", - ], - "pr-feature": ["New Feature"], - "pr-improvement": ["Improvement"], - "pr-not-for-changelog": [ - "Not for changelog (changelog entry is not required)", - "Not for changelog", - ], - "pr-performance": ["Performance Improvement"], - "pr-ci": ["CI Fix or Improvement (changelog entry is not required)"], -} - -CATEGORY_TO_LABEL = { - c: lb for lb, categories in LABEL_CATEGORIES.items() for c in categories -} - - class WithIter(type): def __iter__(cls): return (v for k, v in cls.__dict__.items() if not k.startswith("_")) @@ -70,21 +35,6 @@ def cd(path: Union[Path, str]) -> Iterator[None]: os.chdir(oldpwd) -def is_hex(s): - try: - int(s, 16) - return True - except ValueError: - return False - - -def normalize_string(string: str) -> str: - res = string.lower() - for r in ((" ", "_"), ("(", "_"), (")", "_"), (",", "_"), ("/", "_"), ("-", "_")): - res = res.replace(*r) - return res - - class GH: class ActionsNames: RunConfig = "RunConfig" @@ -149,8 +99,8 @@ class GH: ) -> str: assert len(token) == 40 assert len(commit_sha) == 40 - assert is_hex(commit_sha) - assert not is_hex(token) + assert Utils.is_hex(commit_sha) + assert not Utils.is_hex(token) url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/statuses?per_page={200}" headers = { "Authorization": f"token {token}", @@ -298,79 +248,23 @@ class Utils: Shell.check("sudo dmesg --clear", verbose=True) @staticmethod - def check_pr_description(pr_body: str, repo_name: str) -> Tuple[str, str]: - """The function checks the body to being properly formatted according to - .github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty, - then there is an error.""" - lines = list(map(lambda x: x.strip(), pr_body.split("\n") if pr_body else [])) - lines = [re.sub(r"\s+", " ", line) for line in lines] + def is_hex(s): + try: + int(s, 16) + return True + except ValueError: + return False - # Check if body contains "Reverts ClickHouse/ClickHouse#36337" - if [ - True for line in lines if re.match(rf"\AReverts {repo_name}#[\d]+\Z", line) - ]: - return "", LABEL_CATEGORIES["pr-not-for-changelog"][0] - - category = "" - entry = "" - description_error = "" - - i = 0 - while i < len(lines): - if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]): - i += 1 - if i >= len(lines): - break - # Can have one empty line between header and the category - # itself. Filter it out. - if not lines[i]: - i += 1 - if i >= len(lines): - break - category = re.sub(r"^[-*\s]*", "", lines[i]) - i += 1 - - # Should not have more than one category. Require empty line - # after the first found category. - if i >= len(lines): - break - if lines[i]: - second_category = re.sub(r"^[-*\s]*", "", lines[i]) - description_error = ( - "More than one changelog category specified: " - f"'{category}', '{second_category}'" - ) - return description_error, category - - elif re.match( - r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i] - ): - i += 1 - # Can have one empty line between header and the entry itself. - # Filter it out. - if i < len(lines) and not lines[i]: - i += 1 - # All following lines until empty one are the changelog entry. - entry_lines = [] - while i < len(lines) and lines[i]: - entry_lines.append(lines[i]) - i += 1 - entry = " ".join(entry_lines) - # Don't accept changelog entries like '...'. - entry = re.sub(r"[#>*_.\- ]", "", entry) - # Don't accept changelog entries like 'Close #12345'. - entry = re.sub(r"^[\w\-\s]{0,10}#?\d{5,6}\.?$", "", entry) - else: - i += 1 - - if not category: - description_error = "Changelog category is empty" - # Filter out the PR categories that are not for changelog. - elif "(changelog entry is not required)" in category: - pass # to not check the rest of the conditions - elif category not in CATEGORY_TO_LABEL: - description_error, category = f"Category '{category}' is not valid", "" - elif not entry: - description_error = f"Changelog entry required for category '{category}'" - - return description_error, category + @staticmethod + def normalize_string(string: str) -> str: + res = string.lower() + for r in ( + (" ", "_"), + ("(", "_"), + (")", "_"), + (",", "_"), + ("/", "_"), + ("-", "_"), + ): + res = res.replace(*r) + return res diff --git a/tests/ci/report.py b/tests/ci/report.py index f50ed4c1f85..f5571939d0b 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -22,7 +22,6 @@ from typing import ( from build_download_helper import get_gh_api from ci_config import CI -from ci_utils import normalize_string from env_helper import REPORT_PATH, GITHUB_WORKSPACE logger = logging.getLogger(__name__) @@ -622,7 +621,7 @@ class BuildResult: def write_json(self, directory: Union[Path, str] = REPORT_PATH) -> Path: path = Path(directory) / self.get_report_name( - self.build_name, self.pr_number or normalize_string(self.head_ref) + self.build_name, self.pr_number or CI.Utils.normalize_string(self.head_ref) ) path.write_text( json.dumps( diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 0ad01e3accd..55a0c383812 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import logging +import re import sys from typing import Tuple @@ -16,7 +17,6 @@ from commit_status_helper import ( from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL from get_robot_token import get_best_robot_token from ci_config import CI -from ci_utils import Utils from pr_info import PRInfo from report import FAILURE, PENDING, SUCCESS, StatusType @@ -25,12 +25,144 @@ TRUSTED_ORG_IDS = { 54801242, # clickhouse } +TRUSTED_CONTRIBUTORS = { + e.lower() + for e in [ + "amosbird", + "azat", # SEMRush + "bharatnc", # Many contributions. + "cwurm", # ClickHouse, Inc + "den-crane", # Documentation contributor + "ildus", # adjust, ex-pgpro + "nvartolomei", # Seasoned contributor, CloudFlare + "taiyang-li", + "ucasFL", # Amos Bird's friend + "thomoco", # ClickHouse, Inc + "tonickkozlov", # Cloudflare + "tylerhannan", # ClickHouse, Inc + "tsolodov", # ClickHouse, Inc + "justindeguzman", # ClickHouse, Inc + "XuJia0210", # ClickHouse, Inc + ] +} + OK_SKIP_LABELS = {CI.Labels.RELEASE, CI.Labels.PR_BACKPORT, CI.Labels.PR_CHERRYPICK} PR_CHECK = "PR Check" +LABEL_CATEGORIES = { + "pr-backward-incompatible": ["Backward Incompatible Change"], + "pr-bugfix": [ + "Bug Fix", + "Bug Fix (user-visible misbehavior in an official stable release)", + "Bug Fix (user-visible misbehaviour in official stable or prestable release)", + "Bug Fix (user-visible misbehavior in official stable or prestable release)", + ], + "pr-critical-bugfix": ["Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)"], + "pr-build": [ + "Build/Testing/Packaging Improvement", + "Build Improvement", + "Build/Testing Improvement", + "Build", + "Packaging Improvement", + ], + "pr-documentation": [ + "Documentation (changelog entry is not required)", + "Documentation", + ], + "pr-feature": ["New Feature"], + "pr-improvement": ["Improvement"], + "pr-not-for-changelog": [ + "Not for changelog (changelog entry is not required)", + "Not for changelog", + ], + "pr-performance": ["Performance Improvement"], + "pr-ci": ["CI Fix or Improvement (changelog entry is not required)"], +} + +CATEGORY_TO_LABEL = { + c: lb for lb, categories in LABEL_CATEGORIES.items() for c in categories +} + + +def check_pr_description(pr_body: str, repo_name: str) -> Tuple[str, str]: + """The function checks the body to being properly formatted according to + .github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty, + then there is an error.""" + lines = list(map(lambda x: x.strip(), pr_body.split("\n") if pr_body else [])) + lines = [re.sub(r"\s+", " ", line) for line in lines] + + # Check if body contains "Reverts ClickHouse/ClickHouse#36337" + if [True for line in lines if re.match(rf"\AReverts {repo_name}#[\d]+\Z", line)]: + return "", LABEL_CATEGORIES["pr-not-for-changelog"][0] + + category = "" + entry = "" + description_error = "" + + i = 0 + while i < len(lines): + if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]): + i += 1 + if i >= len(lines): + break + # Can have one empty line between header and the category + # itself. Filter it out. + if not lines[i]: + i += 1 + if i >= len(lines): + break + category = re.sub(r"^[-*\s]*", "", lines[i]) + i += 1 + + # Should not have more than one category. Require empty line + # after the first found category. + if i >= len(lines): + break + if lines[i]: + second_category = re.sub(r"^[-*\s]*", "", lines[i]) + description_error = ( + "More than one changelog category specified: " + f"'{category}', '{second_category}'" + ) + return description_error, category + + elif re.match( + r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i] + ): + i += 1 + # Can have one empty line between header and the entry itself. + # Filter it out. + if i < len(lines) and not lines[i]: + i += 1 + # All following lines until empty one are the changelog entry. + entry_lines = [] + while i < len(lines) and lines[i]: + entry_lines.append(lines[i]) + i += 1 + entry = " ".join(entry_lines) + # Don't accept changelog entries like '...'. + entry = re.sub(r"[#>*_.\- ]", "", entry) + # Don't accept changelog entries like 'Close #12345'. + entry = re.sub(r"^[\w\-\s]{0,10}#?\d{5,6}\.?$", "", entry) + else: + i += 1 + + if not category: + description_error = "Changelog category is empty" + # Filter out the PR categories that are not for changelog. + elif "(changelog entry is not required)" in category: + pass # to not check the rest of the conditions + elif category not in CATEGORY_TO_LABEL: + description_error, category = f"Category '{category}' is not valid", "" + elif not entry: + description_error = f"Changelog entry required for category '{category}'" + + return description_error, category + + def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): - if pr_user_login.lower() in CI.TRUSTED_CONTRIBUTORS: + if pr_user_login.lower() in TRUSTED_CONTRIBUTORS: logging.info("User '%s' is trusted", pr_user_login) return True @@ -92,22 +224,20 @@ def main(): commit = get_commit(gh, pr_info.sha) status = SUCCESS # type: StatusType - description_error, category = Utils.check_pr_description( - pr_info.body, GITHUB_REPOSITORY - ) + description_error, category = check_pr_description(pr_info.body, GITHUB_REPOSITORY) pr_labels_to_add = [] pr_labels_to_remove = [] if ( - category in CI.CATEGORY_TO_LABEL - and CI.CATEGORY_TO_LABEL[category] not in pr_info.labels + category in CATEGORY_TO_LABEL + and CATEGORY_TO_LABEL[category] not in pr_info.labels ): - pr_labels_to_add.append(CI.CATEGORY_TO_LABEL[category]) + pr_labels_to_add.append(CATEGORY_TO_LABEL[category]) for label in pr_info.labels: if ( - label in CI.CATEGORY_TO_LABEL.values() - and category in CI.CATEGORY_TO_LABEL - and label != CI.CATEGORY_TO_LABEL[category] + label in CATEGORY_TO_LABEL.values() + and category in CATEGORY_TO_LABEL + and label != CATEGORY_TO_LABEL[category] ): pr_labels_to_remove.append(label) diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index f376a129e6f..6ffedfdecd4 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -9,7 +9,7 @@ from ci_settings import CiSettings from pr_info import PRInfo, EventType from s3_helper import S3Helper from ci_cache import CiCache -from ci_utils import normalize_string +from ci_utils import Utils _TEST_EVENT_JSON = {"dummy": "dummy"} @@ -55,7 +55,7 @@ class TestCIConfig(unittest.TestCase): if CI.JOB_CONFIGS[job].job_name_keyword: self.assertTrue( CI.JOB_CONFIGS[job].job_name_keyword.lower() - in normalize_string(job), + in Utils.normalize_string(job), f"Job [{job}] apparently uses wrong common config with job keyword [{CI.JOB_CONFIGS[job].job_name_keyword}]", ) @@ -291,7 +291,9 @@ class TestCIConfig(unittest.TestCase): assert tag_config set_jobs = tag_config.run_jobs for job in set_jobs: - if any(k in normalize_string(job) for k in settings.exclude_keywords): + if any( + k in Utils.normalize_string(job) for k in settings.exclude_keywords + ): continue expected_jobs_to_do.append(job) for job, config in CI.JOB_CONFIGS.items(): @@ -303,12 +305,12 @@ class TestCIConfig(unittest.TestCase): # expected to run all builds jobs expected_jobs_to_do.append(job) if not any( - keyword in normalize_string(job) + keyword in Utils.normalize_string(job) for keyword in settings.include_keywords ): continue if any( - keyword in normalize_string(job) + keyword in Utils.normalize_string(job) for keyword in settings.exclude_keywords ): continue