CI: Minor refactoring in ci_utils

2024-09-19 16:20:50 +00:00 · 2024-08-03 10:40:12 +02:00 · 2024-08-03 10:40:12 +02:00 · 3dedd8d76b
commit 3dedd8d76b
parent 7af5340e6e
10 changed files with 186 additions and 186 deletions
--- a/tests/ci/bugfix_validate_check.py
+++ b/tests/ci/bugfix_validate_check.py
@ -8,7 +8,6 @@ from pathlib import Path
 from typing import List, Sequence, Tuple

 from ci_config import CI
-from ci_utils import normalize_string
 from env_helper import TEMP_PATH
 from functional_test_check import NO_CHANGES_MSG
 from report import (
@ -142,7 +141,9 @@ def main():
        for file in set(jr.additional_files):
            file_ = Path(file)
            file_name = file_.name
-            file_name = file_name.replace(".", "__" + normalize_string(job_id) + ".", 1)
+            file_name = file_name.replace(
+                ".", "__" + CI.Utils.normalize_string(job_id) + ".", 1
+            )
            file_ = file_.rename(file_.parent / file_name)
            additional_files.append(file_)

--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@ -16,7 +16,7 @@ import upload_result_helper
 from build_check import get_release_or_pr
 from ci_config import CI
 from ci_metadata import CiMetadata
-from ci_utils import GH, normalize_string, Utils
+from ci_utils import GH, Utils
 from clickhouse_helper import (
    CiLogsCredentials,
    ClickHouseHelper,
@ -296,7 +296,7 @@ def _pre_action(s3, job_name, batch, indata, pr_info):
        # do not set report prefix for scheduled or dispatched wf (in case it started from feature branch while
        #   testing), otherwise reports won't be found
        if not (pr_info.is_scheduled or pr_info.is_dispatched):
-            report_prefix = normalize_string(pr_info.head_ref)
+            report_prefix = Utils.normalize_string(pr_info.head_ref)
    print(
        f"Use report prefix [{report_prefix}], pr_num [{pr_info.number}], head_ref [{pr_info.head_ref}]"
    )
@ -718,7 +718,7 @@ def _upload_build_artifacts(
        (
            get_release_or_pr(pr_info, get_version_from_repo())[1],
            pr_info.sha,
-            normalize_string(build_name),
+            Utils.normalize_string(build_name),
            "performance.tar.zst",
        )
    )
@ -1248,7 +1248,7 @@ def main() -> int:
                    (
                        get_release_or_pr(pr_info, get_version_from_repo())[0],
                        pr_info.sha,
-                        normalize_string(
+                        Utils.normalize_string(
                            job_report.check_name or _get_ext_check_name(args.job_name)
                        ),
                    )
--- a/tests/ci/ci_cache.py
+++ b/tests/ci/ci_cache.py
@ -7,7 +7,7 @@ from typing import Dict, Optional, Any, Union, Sequence, List, Set

 from ci_config import CI

-from ci_utils import is_hex, GH
+from ci_utils import Utils, GH
 from commit_status_helper import CommitStatusData
 from env_helper import (
    TEMP_PATH,
@ -240,7 +240,7 @@ class CiCache:
            int(job_properties[-1]),
        )

-        if not is_hex(job_digest):
+        if not Utils.is_hex(job_digest):
            print("ERROR: wrong record job digest")
            return None

--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@ -3,7 +3,7 @@ import re
 from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
 from typing import Dict, Optional, List

-from ci_utils import normalize_string
+from ci_utils import Utils
 from ci_definitions import *


@ -13,7 +13,6 @@ class CI:
    each config item in the below dicts should be an instance of JobConfig class or inherited from it
    """

-    MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI = 5
    MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI = 2

    # reimport types to CI class so that they visible as CI.* and mypy is happy
@ -37,9 +36,7 @@ class CI:
    from ci_utils import GH as GH
    from ci_utils import Shell as Shell
    from ci_definitions import Labels as Labels
-    from ci_definitions import TRUSTED_CONTRIBUTORS as TRUSTED_CONTRIBUTORS
    from ci_definitions import WorkFlowNames as WorkFlowNames
-    from ci_utils import CATEGORY_TO_LABEL as CATEGORY_TO_LABEL

    # Jobs that run for doc related updates
    _DOCS_CHECK_JOBS = [JobNames.DOCS_CHECK, JobNames.STYLE_CHECK]
@ -558,7 +555,7 @@ class CI:
    @classmethod
    def get_tag_config(cls, label_name: str) -> Optional[LabelConfig]:
        for label, config in cls.TAG_CONFIGS.items():
-            if normalize_string(label_name) == normalize_string(label):
+            if Utils.normalize_string(label_name) == Utils.normalize_string(label):
                return config
        return None

--- a/tests/ci/ci_definitions.py
+++ b/tests/ci/ci_definitions.py
@ -32,28 +32,6 @@ class Labels:
    AUTO_BACKPORT = {"pr-critical-bugfix"}


-TRUSTED_CONTRIBUTORS = {
-    e.lower()
-    for e in [
-        "amosbird",
-        "azat",  # SEMRush
-        "bharatnc",  # Many contributions.
-        "cwurm",  # ClickHouse, Inc
-        "den-crane",  # Documentation contributor
-        "ildus",  # adjust, ex-pgpro
-        "nvartolomei",  # Seasoned contributor, CloudFlare
-        "taiyang-li",
-        "ucasFL",  # Amos Bird's friend
-        "thomoco",  # ClickHouse, Inc
-        "tonickkozlov",  # Cloudflare
-        "tylerhannan",  # ClickHouse, Inc
-        "tsolodov",  # ClickHouse, Inc
-        "justindeguzman",  # ClickHouse, Inc
-        "XuJia0210",  # ClickHouse, Inc
-    ]
-}
-
-
 class WorkflowStages(metaclass=WithIter):
    """
    Stages of GitHUb actions workflow
--- a/tests/ci/ci_settings.py
+++ b/tests/ci/ci_settings.py
@ -2,7 +2,6 @@ import re
 from dataclasses import dataclass, asdict
 from typing import Optional, List, Dict, Any, Iterable

-from ci_utils import normalize_string
 from ci_config import CI
 from git_helper import Runner as GitRunner, GIT_PREFIX
 from pr_info import PRInfo
@ -89,14 +88,14 @@ class CiSettings:
                if not res.include_keywords:
                    res.include_keywords = []
                res.include_keywords.append(
-                    normalize_string(match.removeprefix("ci_include_"))
+                    CI.Utils.normalize_string(match.removeprefix("ci_include_"))
                )
            elif match.startswith("ci_exclude_"):
                if not res.exclude_keywords:
                    res.exclude_keywords = []
                keywords = match.removeprefix("ci_exclude_").split("|")
                res.exclude_keywords += [
-                    normalize_string(keyword) for keyword in keywords
+                    CI.Utils.normalize_string(keyword) for keyword in keywords
                ]
            elif match == CI.Tags.NO_CI_CACHE:
                res.no_ci_cache = True
@ -163,7 +162,7 @@ class CiSettings:
        # do not exclude builds
        if self.exclude_keywords and not CI.is_build_job(job):
            for keyword in self.exclude_keywords:
-                if keyword in normalize_string(job):
+                if keyword in CI.Utils.normalize_string(job):
                    print(f"Job [{job}] matches Exclude keyword [{keyword}] - deny")
                    return False

@ -174,7 +173,7 @@ class CiSettings:
                # never exclude Style Check by include keywords
                return True
            for keyword in self.include_keywords:
-                if keyword in normalize_string(job):
+                if keyword in CI.Utils.normalize_string(job):
                    print(f"Job [{job}] matches Include keyword [{keyword}] - pass")
                    return True
            to_deny = True
--- a/tests/ci/ci_utils.py
+++ b/tests/ci/ci_utils.py
@ -6,7 +6,7 @@ import sys
 import time
 from contextlib import contextmanager
 from pathlib import Path
-from typing import Any, Iterator, List, Union, Optional, Sequence, Tuple
+from typing import Any, Iterator, List, Union, Optional, Sequence

 import requests

@ -20,41 +20,6 @@ class Envs:
    GITHUB_WORKFLOW = os.getenv("GITHUB_WORKFLOW", "")


-LABEL_CATEGORIES = {
-    "pr-backward-incompatible": ["Backward Incompatible Change"],
-    "pr-bugfix": [
-        "Bug Fix",
-        "Bug Fix (user-visible misbehavior in an official stable release)",
-        "Bug Fix (user-visible misbehaviour in official stable or prestable release)",
-        "Bug Fix (user-visible misbehavior in official stable or prestable release)",
-    ],
-    "pr-critical-bugfix": ["Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)"],
-    "pr-build": [
-        "Build/Testing/Packaging Improvement",
-        "Build Improvement",
-        "Build/Testing Improvement",
-        "Build",
-        "Packaging Improvement",
-    ],
-    "pr-documentation": [
-        "Documentation (changelog entry is not required)",
-        "Documentation",
-    ],
-    "pr-feature": ["New Feature"],
-    "pr-improvement": ["Improvement"],
-    "pr-not-for-changelog": [
-        "Not for changelog (changelog entry is not required)",
-        "Not for changelog",
-    ],
-    "pr-performance": ["Performance Improvement"],
-    "pr-ci": ["CI Fix or Improvement (changelog entry is not required)"],
-}
-
-CATEGORY_TO_LABEL = {
-    c: lb for lb, categories in LABEL_CATEGORIES.items() for c in categories
-}
-
-
 class WithIter(type):
    def __iter__(cls):
        return (v for k, v in cls.__dict__.items() if not k.startswith("_"))
@ -70,21 +35,6 @@ def cd(path: Union[Path, str]) -> Iterator[None]:
        os.chdir(oldpwd)


-def is_hex(s):
-    try:
-        int(s, 16)
-        return True
-    except ValueError:
-        return False
-
-
-def normalize_string(string: str) -> str:
-    res = string.lower()
-    for r in ((" ", "_"), ("(", "_"), (")", "_"), (",", "_"), ("/", "_"), ("-", "_")):
-        res = res.replace(*r)
-    return res
-
-
 class GH:
    class ActionsNames:
        RunConfig = "RunConfig"
@ -149,8 +99,8 @@ class GH:
    ) -> str:
        assert len(token) == 40
        assert len(commit_sha) == 40
-        assert is_hex(commit_sha)
-        assert not is_hex(token)
+        assert Utils.is_hex(commit_sha)
+        assert not Utils.is_hex(token)
        url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/statuses?per_page={200}"
        headers = {
            "Authorization": f"token {token}",
@ -298,79 +248,23 @@ class Utils:
        Shell.check("sudo dmesg --clear", verbose=True)

    @staticmethod
-    def check_pr_description(pr_body: str, repo_name: str) -> Tuple[str, str]:
-        """The function checks the body to being properly formatted according to
-        .github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty,
-        then there is an error."""
-        lines = list(map(lambda x: x.strip(), pr_body.split("\n") if pr_body else []))
-        lines = [re.sub(r"\s+", " ", line) for line in lines]
+    def is_hex(s):
+        try:
+            int(s, 16)
+            return True
+        except ValueError:
+            return False

-        # Check if body contains "Reverts ClickHouse/ClickHouse#36337"
-        if [
-            True for line in lines if re.match(rf"\AReverts {repo_name}#[\d]+\Z", line)
-        ]:
-            return "", LABEL_CATEGORIES["pr-not-for-changelog"][0]
-
-        category = ""
-        entry = ""
-        description_error = ""
-
-        i = 0
-        while i < len(lines):
-            if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
-                i += 1
-                if i >= len(lines):
-                    break
-                # Can have one empty line between header and the category
-                # itself. Filter it out.
-                if not lines[i]:
-                    i += 1
-                    if i >= len(lines):
-                        break
-                category = re.sub(r"^[-*\s]*", "", lines[i])
-                i += 1
-
-                # Should not have more than one category. Require empty line
-                # after the first found category.
-                if i >= len(lines):
-                    break
-                if lines[i]:
-                    second_category = re.sub(r"^[-*\s]*", "", lines[i])
-                    description_error = (
-                        "More than one changelog category specified: "
-                        f"'{category}', '{second_category}'"
-                    )
-                    return description_error, category
-
-            elif re.match(
-                r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
+    @staticmethod
+    def normalize_string(string: str) -> str:
+        res = string.lower()
+        for r in (
+            (" ", "_"),
+            ("(", "_"),
+            (")", "_"),
+            (",", "_"),
+            ("/", "_"),
+            ("-", "_"),
        ):
-                i += 1
-                # Can have one empty line between header and the entry itself.
-                # Filter it out.
-                if i < len(lines) and not lines[i]:
-                    i += 1
-                # All following lines until empty one are the changelog entry.
-                entry_lines = []
-                while i < len(lines) and lines[i]:
-                    entry_lines.append(lines[i])
-                    i += 1
-                entry = " ".join(entry_lines)
-                # Don't accept changelog entries like '...'.
-                entry = re.sub(r"[#>*_.\- ]", "", entry)
-                # Don't accept changelog entries like 'Close #12345'.
-                entry = re.sub(r"^[\w\-\s]{0,10}#?\d{5,6}\.?$", "", entry)
-            else:
-                i += 1
-
-        if not category:
-            description_error = "Changelog category is empty"
-        # Filter out the PR categories that are not for changelog.
-        elif "(changelog entry is not required)" in category:
-            pass  # to not check the rest of the conditions
-        elif category not in CATEGORY_TO_LABEL:
-            description_error, category = f"Category '{category}' is not valid", ""
-        elif not entry:
-            description_error = f"Changelog entry required for category '{category}'"
-
-        return description_error, category
+            res = res.replace(*r)
+        return res
--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@ -22,7 +22,6 @@ from typing import (

 from build_download_helper import get_gh_api
 from ci_config import CI
-from ci_utils import normalize_string
 from env_helper import REPORT_PATH, GITHUB_WORKSPACE

 logger = logging.getLogger(__name__)
@ -622,7 +621,7 @@ class BuildResult:

    def write_json(self, directory: Union[Path, str] = REPORT_PATH) -> Path:
        path = Path(directory) / self.get_report_name(
-            self.build_name, self.pr_number or normalize_string(self.head_ref)
+            self.build_name, self.pr_number or CI.Utils.normalize_string(self.head_ref)
        )
        path.write_text(
            json.dumps(
--- a/tests/ci/run_check.py
+++ b/tests/ci/run_check.py
@ -1,5 +1,6 @@
 #!/usr/bin/env python3
 import logging
+import re
 import sys
 from typing import Tuple

@ -16,7 +17,6 @@ from commit_status_helper import (
 from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL
 from get_robot_token import get_best_robot_token
 from ci_config import CI
-from ci_utils import Utils
 from pr_info import PRInfo
 from report import FAILURE, PENDING, SUCCESS, StatusType

@ -25,12 +25,144 @@ TRUSTED_ORG_IDS = {
    54801242,  # clickhouse
 }

+TRUSTED_CONTRIBUTORS = {
+    e.lower()
+    for e in [
+        "amosbird",
+        "azat",  # SEMRush
+        "bharatnc",  # Many contributions.
+        "cwurm",  # ClickHouse, Inc
+        "den-crane",  # Documentation contributor
+        "ildus",  # adjust, ex-pgpro
+        "nvartolomei",  # Seasoned contributor, CloudFlare
+        "taiyang-li",
+        "ucasFL",  # Amos Bird's friend
+        "thomoco",  # ClickHouse, Inc
+        "tonickkozlov",  # Cloudflare
+        "tylerhannan",  # ClickHouse, Inc
+        "tsolodov",  # ClickHouse, Inc
+        "justindeguzman",  # ClickHouse, Inc
+        "XuJia0210",  # ClickHouse, Inc
+    ]
+}
+
 OK_SKIP_LABELS = {CI.Labels.RELEASE, CI.Labels.PR_BACKPORT, CI.Labels.PR_CHERRYPICK}
 PR_CHECK = "PR Check"


+LABEL_CATEGORIES = {
+    "pr-backward-incompatible": ["Backward Incompatible Change"],
+    "pr-bugfix": [
+        "Bug Fix",
+        "Bug Fix (user-visible misbehavior in an official stable release)",
+        "Bug Fix (user-visible misbehaviour in official stable or prestable release)",
+        "Bug Fix (user-visible misbehavior in official stable or prestable release)",
+    ],
+    "pr-critical-bugfix": ["Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)"],
+    "pr-build": [
+        "Build/Testing/Packaging Improvement",
+        "Build Improvement",
+        "Build/Testing Improvement",
+        "Build",
+        "Packaging Improvement",
+    ],
+    "pr-documentation": [
+        "Documentation (changelog entry is not required)",
+        "Documentation",
+    ],
+    "pr-feature": ["New Feature"],
+    "pr-improvement": ["Improvement"],
+    "pr-not-for-changelog": [
+        "Not for changelog (changelog entry is not required)",
+        "Not for changelog",
+    ],
+    "pr-performance": ["Performance Improvement"],
+    "pr-ci": ["CI Fix or Improvement (changelog entry is not required)"],
+}
+
+CATEGORY_TO_LABEL = {
+    c: lb for lb, categories in LABEL_CATEGORIES.items() for c in categories
+}
+
+
+def check_pr_description(pr_body: str, repo_name: str) -> Tuple[str, str]:
+    """The function checks the body to being properly formatted according to
+    .github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty,
+    then there is an error."""
+    lines = list(map(lambda x: x.strip(), pr_body.split("\n") if pr_body else []))
+    lines = [re.sub(r"\s+", " ", line) for line in lines]
+
+    # Check if body contains "Reverts ClickHouse/ClickHouse#36337"
+    if [True for line in lines if re.match(rf"\AReverts {repo_name}#[\d]+\Z", line)]:
+        return "", LABEL_CATEGORIES["pr-not-for-changelog"][0]
+
+    category = ""
+    entry = ""
+    description_error = ""
+
+    i = 0
+    while i < len(lines):
+        if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
+            i += 1
+            if i >= len(lines):
+                break
+            # Can have one empty line between header and the category
+            # itself. Filter it out.
+            if not lines[i]:
+                i += 1
+                if i >= len(lines):
+                    break
+            category = re.sub(r"^[-*\s]*", "", lines[i])
+            i += 1
+
+            # Should not have more than one category. Require empty line
+            # after the first found category.
+            if i >= len(lines):
+                break
+            if lines[i]:
+                second_category = re.sub(r"^[-*\s]*", "", lines[i])
+                description_error = (
+                    "More than one changelog category specified: "
+                    f"'{category}', '{second_category}'"
+                )
+                return description_error, category
+
+        elif re.match(
+            r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
+        ):
+            i += 1
+            # Can have one empty line between header and the entry itself.
+            # Filter it out.
+            if i < len(lines) and not lines[i]:
+                i += 1
+            # All following lines until empty one are the changelog entry.
+            entry_lines = []
+            while i < len(lines) and lines[i]:
+                entry_lines.append(lines[i])
+                i += 1
+            entry = " ".join(entry_lines)
+            # Don't accept changelog entries like '...'.
+            entry = re.sub(r"[#>*_.\- ]", "", entry)
+            # Don't accept changelog entries like 'Close #12345'.
+            entry = re.sub(r"^[\w\-\s]{0,10}#?\d{5,6}\.?$", "", entry)
+        else:
+            i += 1
+
+    if not category:
+        description_error = "Changelog category is empty"
+    # Filter out the PR categories that are not for changelog.
+    elif "(changelog entry is not required)" in category:
+        pass  # to not check the rest of the conditions
+    elif category not in CATEGORY_TO_LABEL:
+        description_error, category = f"Category '{category}' is not valid", ""
+    elif not entry:
+        description_error = f"Changelog entry required for category '{category}'"
+
+    return description_error, category
+
+
 def pr_is_by_trusted_user(pr_user_login, pr_user_orgs):
-    if pr_user_login.lower() in CI.TRUSTED_CONTRIBUTORS:
+    if pr_user_login.lower() in TRUSTED_CONTRIBUTORS:
        logging.info("User '%s' is trusted", pr_user_login)
        return True

@ -92,22 +224,20 @@ def main():
    commit = get_commit(gh, pr_info.sha)
    status = SUCCESS  # type: StatusType

-    description_error, category = Utils.check_pr_description(
-        pr_info.body, GITHUB_REPOSITORY
-    )
+    description_error, category = check_pr_description(pr_info.body, GITHUB_REPOSITORY)
    pr_labels_to_add = []
    pr_labels_to_remove = []
    if (
-        category in CI.CATEGORY_TO_LABEL
-        and CI.CATEGORY_TO_LABEL[category] not in pr_info.labels
+        category in CATEGORY_TO_LABEL
+        and CATEGORY_TO_LABEL[category] not in pr_info.labels
    ):
-        pr_labels_to_add.append(CI.CATEGORY_TO_LABEL[category])
+        pr_labels_to_add.append(CATEGORY_TO_LABEL[category])

    for label in pr_info.labels:
        if (
-            label in CI.CATEGORY_TO_LABEL.values()
-            and category in CI.CATEGORY_TO_LABEL
-            and label != CI.CATEGORY_TO_LABEL[category]
+            label in CATEGORY_TO_LABEL.values()
+            and category in CATEGORY_TO_LABEL
+            and label != CATEGORY_TO_LABEL[category]
        ):
            pr_labels_to_remove.append(label)

--- a/tests/ci/test_ci_config.py
+++ b/tests/ci/test_ci_config.py
@ -9,7 +9,7 @@ from ci_settings import CiSettings
 from pr_info import PRInfo, EventType
 from s3_helper import S3Helper
 from ci_cache import CiCache
-from ci_utils import normalize_string
+from ci_utils import Utils


 _TEST_EVENT_JSON = {"dummy": "dummy"}
@ -55,7 +55,7 @@ class TestCIConfig(unittest.TestCase):
            if CI.JOB_CONFIGS[job].job_name_keyword:
                self.assertTrue(
                    CI.JOB_CONFIGS[job].job_name_keyword.lower()
-                    in normalize_string(job),
+                    in Utils.normalize_string(job),
                    f"Job [{job}] apparently uses wrong common config with job keyword [{CI.JOB_CONFIGS[job].job_name_keyword}]",
                )

@ -291,7 +291,9 @@ class TestCIConfig(unittest.TestCase):
            assert tag_config
            set_jobs = tag_config.run_jobs
            for job in set_jobs:
-                if any(k in normalize_string(job) for k in settings.exclude_keywords):
+                if any(
+                    k in Utils.normalize_string(job) for k in settings.exclude_keywords
+                ):
                    continue
                expected_jobs_to_do.append(job)
        for job, config in CI.JOB_CONFIGS.items():
@ -303,12 +305,12 @@ class TestCIConfig(unittest.TestCase):
                # expected to run all builds jobs
                expected_jobs_to_do.append(job)
            if not any(
-                keyword in normalize_string(job)
+                keyword in Utils.normalize_string(job)
                for keyword in settings.include_keywords
            ):
                continue
            if any(
-                keyword in normalize_string(job)
+                keyword in Utils.normalize_string(job)
                for keyword in settings.exclude_keywords
            ):
                continue