CI: Minor refactoring in ci_utils

This commit is contained in:
Max K 2024-08-03 10:40:12 +02:00
parent 7af5340e6e
commit 3dedd8d76b
10 changed files with 186 additions and 186 deletions

View File

@ -8,7 +8,6 @@ from pathlib import Path
from typing import List, Sequence, Tuple
from ci_config import CI
from ci_utils import normalize_string
from env_helper import TEMP_PATH
from functional_test_check import NO_CHANGES_MSG
from report import (
@ -142,7 +141,9 @@ def main():
for file in set(jr.additional_files):
file_ = Path(file)
file_name = file_.name
file_name = file_name.replace(".", "__" + normalize_string(job_id) + ".", 1)
file_name = file_name.replace(
".", "__" + CI.Utils.normalize_string(job_id) + ".", 1
)
file_ = file_.rename(file_.parent / file_name)
additional_files.append(file_)

View File

@ -16,7 +16,7 @@ import upload_result_helper
from build_check import get_release_or_pr
from ci_config import CI
from ci_metadata import CiMetadata
from ci_utils import GH, normalize_string, Utils
from ci_utils import GH, Utils
from clickhouse_helper import (
CiLogsCredentials,
ClickHouseHelper,
@ -296,7 +296,7 @@ def _pre_action(s3, job_name, batch, indata, pr_info):
# do not set report prefix for scheduled or dispatched wf (in case it started from feature branch while
# testing), otherwise reports won't be found
if not (pr_info.is_scheduled or pr_info.is_dispatched):
report_prefix = normalize_string(pr_info.head_ref)
report_prefix = Utils.normalize_string(pr_info.head_ref)
print(
f"Use report prefix [{report_prefix}], pr_num [{pr_info.number}], head_ref [{pr_info.head_ref}]"
)
@ -718,7 +718,7 @@ def _upload_build_artifacts(
(
get_release_or_pr(pr_info, get_version_from_repo())[1],
pr_info.sha,
normalize_string(build_name),
Utils.normalize_string(build_name),
"performance.tar.zst",
)
)
@ -1248,7 +1248,7 @@ def main() -> int:
(
get_release_or_pr(pr_info, get_version_from_repo())[0],
pr_info.sha,
normalize_string(
Utils.normalize_string(
job_report.check_name or _get_ext_check_name(args.job_name)
),
)

View File

@ -7,7 +7,7 @@ from typing import Dict, Optional, Any, Union, Sequence, List, Set
from ci_config import CI
from ci_utils import is_hex, GH
from ci_utils import Utils, GH
from commit_status_helper import CommitStatusData
from env_helper import (
TEMP_PATH,
@ -240,7 +240,7 @@ class CiCache:
int(job_properties[-1]),
)
if not is_hex(job_digest):
if not Utils.is_hex(job_digest):
print("ERROR: wrong record job digest")
return None

View File

@ -3,7 +3,7 @@ import re
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
from typing import Dict, Optional, List
from ci_utils import normalize_string
from ci_utils import Utils
from ci_definitions import *
@ -13,7 +13,6 @@ class CI:
each config item in the below dicts should be an instance of JobConfig class or inherited from it
"""
MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI = 5
MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI = 2
# reimport types to CI class so that they visible as CI.* and mypy is happy
@ -37,9 +36,7 @@ class CI:
from ci_utils import GH as GH
from ci_utils import Shell as Shell
from ci_definitions import Labels as Labels
from ci_definitions import TRUSTED_CONTRIBUTORS as TRUSTED_CONTRIBUTORS
from ci_definitions import WorkFlowNames as WorkFlowNames
from ci_utils import CATEGORY_TO_LABEL as CATEGORY_TO_LABEL
# Jobs that run for doc related updates
_DOCS_CHECK_JOBS = [JobNames.DOCS_CHECK, JobNames.STYLE_CHECK]
@ -558,7 +555,7 @@ class CI:
@classmethod
def get_tag_config(cls, label_name: str) -> Optional[LabelConfig]:
for label, config in cls.TAG_CONFIGS.items():
if normalize_string(label_name) == normalize_string(label):
if Utils.normalize_string(label_name) == Utils.normalize_string(label):
return config
return None

View File

@ -32,28 +32,6 @@ class Labels:
AUTO_BACKPORT = {"pr-critical-bugfix"}
TRUSTED_CONTRIBUTORS = {
e.lower()
for e in [
"amosbird",
"azat", # SEMRush
"bharatnc", # Many contributions.
"cwurm", # ClickHouse, Inc
"den-crane", # Documentation contributor
"ildus", # adjust, ex-pgpro
"nvartolomei", # Seasoned contributor, CloudFlare
"taiyang-li",
"ucasFL", # Amos Bird's friend
"thomoco", # ClickHouse, Inc
"tonickkozlov", # Cloudflare
"tylerhannan", # ClickHouse, Inc
"tsolodov", # ClickHouse, Inc
"justindeguzman", # ClickHouse, Inc
"XuJia0210", # ClickHouse, Inc
]
}
class WorkflowStages(metaclass=WithIter):
"""
Stages of GitHUb actions workflow

View File

@ -2,7 +2,6 @@ import re
from dataclasses import dataclass, asdict
from typing import Optional, List, Dict, Any, Iterable
from ci_utils import normalize_string
from ci_config import CI
from git_helper import Runner as GitRunner, GIT_PREFIX
from pr_info import PRInfo
@ -89,14 +88,14 @@ class CiSettings:
if not res.include_keywords:
res.include_keywords = []
res.include_keywords.append(
normalize_string(match.removeprefix("ci_include_"))
CI.Utils.normalize_string(match.removeprefix("ci_include_"))
)
elif match.startswith("ci_exclude_"):
if not res.exclude_keywords:
res.exclude_keywords = []
keywords = match.removeprefix("ci_exclude_").split("|")
res.exclude_keywords += [
normalize_string(keyword) for keyword in keywords
CI.Utils.normalize_string(keyword) for keyword in keywords
]
elif match == CI.Tags.NO_CI_CACHE:
res.no_ci_cache = True
@ -163,7 +162,7 @@ class CiSettings:
# do not exclude builds
if self.exclude_keywords and not CI.is_build_job(job):
for keyword in self.exclude_keywords:
if keyword in normalize_string(job):
if keyword in CI.Utils.normalize_string(job):
print(f"Job [{job}] matches Exclude keyword [{keyword}] - deny")
return False
@ -174,7 +173,7 @@ class CiSettings:
# never exclude Style Check by include keywords
return True
for keyword in self.include_keywords:
if keyword in normalize_string(job):
if keyword in CI.Utils.normalize_string(job):
print(f"Job [{job}] matches Include keyword [{keyword}] - pass")
return True
to_deny = True

View File

@ -6,7 +6,7 @@ import sys
import time
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Iterator, List, Union, Optional, Sequence, Tuple
from typing import Any, Iterator, List, Union, Optional, Sequence
import requests
@ -20,41 +20,6 @@ class Envs:
GITHUB_WORKFLOW = os.getenv("GITHUB_WORKFLOW", "")
LABEL_CATEGORIES = {
"pr-backward-incompatible": ["Backward Incompatible Change"],
"pr-bugfix": [
"Bug Fix",
"Bug Fix (user-visible misbehavior in an official stable release)",
"Bug Fix (user-visible misbehaviour in official stable or prestable release)",
"Bug Fix (user-visible misbehavior in official stable or prestable release)",
],
"pr-critical-bugfix": ["Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)"],
"pr-build": [
"Build/Testing/Packaging Improvement",
"Build Improvement",
"Build/Testing Improvement",
"Build",
"Packaging Improvement",
],
"pr-documentation": [
"Documentation (changelog entry is not required)",
"Documentation",
],
"pr-feature": ["New Feature"],
"pr-improvement": ["Improvement"],
"pr-not-for-changelog": [
"Not for changelog (changelog entry is not required)",
"Not for changelog",
],
"pr-performance": ["Performance Improvement"],
"pr-ci": ["CI Fix or Improvement (changelog entry is not required)"],
}
CATEGORY_TO_LABEL = {
c: lb for lb, categories in LABEL_CATEGORIES.items() for c in categories
}
class WithIter(type):
def __iter__(cls):
return (v for k, v in cls.__dict__.items() if not k.startswith("_"))
@ -70,21 +35,6 @@ def cd(path: Union[Path, str]) -> Iterator[None]:
os.chdir(oldpwd)
def is_hex(s):
try:
int(s, 16)
return True
except ValueError:
return False
def normalize_string(string: str) -> str:
res = string.lower()
for r in ((" ", "_"), ("(", "_"), (")", "_"), (",", "_"), ("/", "_"), ("-", "_")):
res = res.replace(*r)
return res
class GH:
class ActionsNames:
RunConfig = "RunConfig"
@ -149,8 +99,8 @@ class GH:
) -> str:
assert len(token) == 40
assert len(commit_sha) == 40
assert is_hex(commit_sha)
assert not is_hex(token)
assert Utils.is_hex(commit_sha)
assert not Utils.is_hex(token)
url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/statuses?per_page={200}"
headers = {
"Authorization": f"token {token}",
@ -298,79 +248,23 @@ class Utils:
Shell.check("sudo dmesg --clear", verbose=True)
@staticmethod
def check_pr_description(pr_body: str, repo_name: str) -> Tuple[str, str]:
"""The function checks the body to being properly formatted according to
.github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty,
then there is an error."""
lines = list(map(lambda x: x.strip(), pr_body.split("\n") if pr_body else []))
lines = [re.sub(r"\s+", " ", line) for line in lines]
def is_hex(s):
try:
int(s, 16)
return True
except ValueError:
return False
# Check if body contains "Reverts ClickHouse/ClickHouse#36337"
if [
True for line in lines if re.match(rf"\AReverts {repo_name}#[\d]+\Z", line)
]:
return "", LABEL_CATEGORIES["pr-not-for-changelog"][0]
category = ""
entry = ""
description_error = ""
i = 0
while i < len(lines):
if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
i += 1
if i >= len(lines):
break
# Can have one empty line between header and the category
# itself. Filter it out.
if not lines[i]:
i += 1
if i >= len(lines):
break
category = re.sub(r"^[-*\s]*", "", lines[i])
i += 1
# Should not have more than one category. Require empty line
# after the first found category.
if i >= len(lines):
break
if lines[i]:
second_category = re.sub(r"^[-*\s]*", "", lines[i])
description_error = (
"More than one changelog category specified: "
f"'{category}', '{second_category}'"
)
return description_error, category
elif re.match(
r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
):
i += 1
# Can have one empty line between header and the entry itself.
# Filter it out.
if i < len(lines) and not lines[i]:
i += 1
# All following lines until empty one are the changelog entry.
entry_lines = []
while i < len(lines) and lines[i]:
entry_lines.append(lines[i])
i += 1
entry = " ".join(entry_lines)
# Don't accept changelog entries like '...'.
entry = re.sub(r"[#>*_.\- ]", "", entry)
# Don't accept changelog entries like 'Close #12345'.
entry = re.sub(r"^[\w\-\s]{0,10}#?\d{5,6}\.?$", "", entry)
else:
i += 1
if not category:
description_error = "Changelog category is empty"
# Filter out the PR categories that are not for changelog.
elif "(changelog entry is not required)" in category:
pass # to not check the rest of the conditions
elif category not in CATEGORY_TO_LABEL:
description_error, category = f"Category '{category}' is not valid", ""
elif not entry:
description_error = f"Changelog entry required for category '{category}'"
return description_error, category
@staticmethod
def normalize_string(string: str) -> str:
res = string.lower()
for r in (
(" ", "_"),
("(", "_"),
(")", "_"),
(",", "_"),
("/", "_"),
("-", "_"),
):
res = res.replace(*r)
return res

View File

@ -22,7 +22,6 @@ from typing import (
from build_download_helper import get_gh_api
from ci_config import CI
from ci_utils import normalize_string
from env_helper import REPORT_PATH, GITHUB_WORKSPACE
logger = logging.getLogger(__name__)
@ -622,7 +621,7 @@ class BuildResult:
def write_json(self, directory: Union[Path, str] = REPORT_PATH) -> Path:
path = Path(directory) / self.get_report_name(
self.build_name, self.pr_number or normalize_string(self.head_ref)
self.build_name, self.pr_number or CI.Utils.normalize_string(self.head_ref)
)
path.write_text(
json.dumps(

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python3
import logging
import re
import sys
from typing import Tuple
@ -16,7 +17,6 @@ from commit_status_helper import (
from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL
from get_robot_token import get_best_robot_token
from ci_config import CI
from ci_utils import Utils
from pr_info import PRInfo
from report import FAILURE, PENDING, SUCCESS, StatusType
@ -25,12 +25,144 @@ TRUSTED_ORG_IDS = {
54801242, # clickhouse
}
TRUSTED_CONTRIBUTORS = {
e.lower()
for e in [
"amosbird",
"azat", # SEMRush
"bharatnc", # Many contributions.
"cwurm", # ClickHouse, Inc
"den-crane", # Documentation contributor
"ildus", # adjust, ex-pgpro
"nvartolomei", # Seasoned contributor, CloudFlare
"taiyang-li",
"ucasFL", # Amos Bird's friend
"thomoco", # ClickHouse, Inc
"tonickkozlov", # Cloudflare
"tylerhannan", # ClickHouse, Inc
"tsolodov", # ClickHouse, Inc
"justindeguzman", # ClickHouse, Inc
"XuJia0210", # ClickHouse, Inc
]
}
OK_SKIP_LABELS = {CI.Labels.RELEASE, CI.Labels.PR_BACKPORT, CI.Labels.PR_CHERRYPICK}
PR_CHECK = "PR Check"
LABEL_CATEGORIES = {
"pr-backward-incompatible": ["Backward Incompatible Change"],
"pr-bugfix": [
"Bug Fix",
"Bug Fix (user-visible misbehavior in an official stable release)",
"Bug Fix (user-visible misbehaviour in official stable or prestable release)",
"Bug Fix (user-visible misbehavior in official stable or prestable release)",
],
"pr-critical-bugfix": ["Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)"],
"pr-build": [
"Build/Testing/Packaging Improvement",
"Build Improvement",
"Build/Testing Improvement",
"Build",
"Packaging Improvement",
],
"pr-documentation": [
"Documentation (changelog entry is not required)",
"Documentation",
],
"pr-feature": ["New Feature"],
"pr-improvement": ["Improvement"],
"pr-not-for-changelog": [
"Not for changelog (changelog entry is not required)",
"Not for changelog",
],
"pr-performance": ["Performance Improvement"],
"pr-ci": ["CI Fix or Improvement (changelog entry is not required)"],
}
CATEGORY_TO_LABEL = {
c: lb for lb, categories in LABEL_CATEGORIES.items() for c in categories
}
def check_pr_description(pr_body: str, repo_name: str) -> Tuple[str, str]:
"""The function checks the body to being properly formatted according to
.github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty,
then there is an error."""
lines = list(map(lambda x: x.strip(), pr_body.split("\n") if pr_body else []))
lines = [re.sub(r"\s+", " ", line) for line in lines]
# Check if body contains "Reverts ClickHouse/ClickHouse#36337"
if [True for line in lines if re.match(rf"\AReverts {repo_name}#[\d]+\Z", line)]:
return "", LABEL_CATEGORIES["pr-not-for-changelog"][0]
category = ""
entry = ""
description_error = ""
i = 0
while i < len(lines):
if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
i += 1
if i >= len(lines):
break
# Can have one empty line between header and the category
# itself. Filter it out.
if not lines[i]:
i += 1
if i >= len(lines):
break
category = re.sub(r"^[-*\s]*", "", lines[i])
i += 1
# Should not have more than one category. Require empty line
# after the first found category.
if i >= len(lines):
break
if lines[i]:
second_category = re.sub(r"^[-*\s]*", "", lines[i])
description_error = (
"More than one changelog category specified: "
f"'{category}', '{second_category}'"
)
return description_error, category
elif re.match(
r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
):
i += 1
# Can have one empty line between header and the entry itself.
# Filter it out.
if i < len(lines) and not lines[i]:
i += 1
# All following lines until empty one are the changelog entry.
entry_lines = []
while i < len(lines) and lines[i]:
entry_lines.append(lines[i])
i += 1
entry = " ".join(entry_lines)
# Don't accept changelog entries like '...'.
entry = re.sub(r"[#>*_.\- ]", "", entry)
# Don't accept changelog entries like 'Close #12345'.
entry = re.sub(r"^[\w\-\s]{0,10}#?\d{5,6}\.?$", "", entry)
else:
i += 1
if not category:
description_error = "Changelog category is empty"
# Filter out the PR categories that are not for changelog.
elif "(changelog entry is not required)" in category:
pass # to not check the rest of the conditions
elif category not in CATEGORY_TO_LABEL:
description_error, category = f"Category '{category}' is not valid", ""
elif not entry:
description_error = f"Changelog entry required for category '{category}'"
return description_error, category
def pr_is_by_trusted_user(pr_user_login, pr_user_orgs):
if pr_user_login.lower() in CI.TRUSTED_CONTRIBUTORS:
if pr_user_login.lower() in TRUSTED_CONTRIBUTORS:
logging.info("User '%s' is trusted", pr_user_login)
return True
@ -92,22 +224,20 @@ def main():
commit = get_commit(gh, pr_info.sha)
status = SUCCESS # type: StatusType
description_error, category = Utils.check_pr_description(
pr_info.body, GITHUB_REPOSITORY
)
description_error, category = check_pr_description(pr_info.body, GITHUB_REPOSITORY)
pr_labels_to_add = []
pr_labels_to_remove = []
if (
category in CI.CATEGORY_TO_LABEL
and CI.CATEGORY_TO_LABEL[category] not in pr_info.labels
category in CATEGORY_TO_LABEL
and CATEGORY_TO_LABEL[category] not in pr_info.labels
):
pr_labels_to_add.append(CI.CATEGORY_TO_LABEL[category])
pr_labels_to_add.append(CATEGORY_TO_LABEL[category])
for label in pr_info.labels:
if (
label in CI.CATEGORY_TO_LABEL.values()
and category in CI.CATEGORY_TO_LABEL
and label != CI.CATEGORY_TO_LABEL[category]
label in CATEGORY_TO_LABEL.values()
and category in CATEGORY_TO_LABEL
and label != CATEGORY_TO_LABEL[category]
):
pr_labels_to_remove.append(label)

View File

@ -9,7 +9,7 @@ from ci_settings import CiSettings
from pr_info import PRInfo, EventType
from s3_helper import S3Helper
from ci_cache import CiCache
from ci_utils import normalize_string
from ci_utils import Utils
_TEST_EVENT_JSON = {"dummy": "dummy"}
@ -55,7 +55,7 @@ class TestCIConfig(unittest.TestCase):
if CI.JOB_CONFIGS[job].job_name_keyword:
self.assertTrue(
CI.JOB_CONFIGS[job].job_name_keyword.lower()
in normalize_string(job),
in Utils.normalize_string(job),
f"Job [{job}] apparently uses wrong common config with job keyword [{CI.JOB_CONFIGS[job].job_name_keyword}]",
)
@ -291,7 +291,9 @@ class TestCIConfig(unittest.TestCase):
assert tag_config
set_jobs = tag_config.run_jobs
for job in set_jobs:
if any(k in normalize_string(job) for k in settings.exclude_keywords):
if any(
k in Utils.normalize_string(job) for k in settings.exclude_keywords
):
continue
expected_jobs_to_do.append(job)
for job, config in CI.JOB_CONFIGS.items():
@ -303,12 +305,12 @@ class TestCIConfig(unittest.TestCase):
# expected to run all builds jobs
expected_jobs_to_do.append(job)
if not any(
keyword in normalize_string(job)
keyword in Utils.normalize_string(job)
for keyword in settings.include_keywords
):
continue
if any(
keyword in normalize_string(job)
keyword in Utils.normalize_string(job)
for keyword in settings.exclude_keywords
):
continue