ClickHouse/tests/ci/ci.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

753 lines
27 KiB
Python
Raw Normal View History

import argparse
2023-12-18 12:44:11 +00:00
import concurrent.futures
import json
import os
import re
import subprocess
import sys
2023-12-18 12:44:11 +00:00
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional
import docker_images_helper
2023-12-18 12:44:11 +00:00
from ci_config import CI_CONFIG
from commit_status_helper import (
CommitStatusData,
format_description,
get_commit,
set_status_comment,
)
2023-12-18 12:44:11 +00:00
from digest_helper import DockerDigester, JobDigester
from env_helper import CI, REPORT_PATH, ROOT_DIR, S3_BUILDS_BUCKET, TEMP_PATH
from get_robot_token import get_best_robot_token
2023-12-18 12:44:11 +00:00
from git_helper import GIT_PREFIX, Git
from git_helper import Runner as GitRunner
from github import Github
from pr_info import PRInfo
from report import BuildResult
2023-12-18 12:44:11 +00:00
from s3_helper import S3Helper
from version_helper import get_version_from_repo
def get_check_name(check_name: str, batch: int, num_batches: int) -> str:
res = check_name
if num_batches > 1:
res = f"{check_name} [{batch+1}/{num_batches}]"
return res
def normalize_check_name(check_name: str) -> str:
res = check_name.lower()
for r in ((" ", "_"), ("(", "_"), (")", "_"), (",", "_"), ("/", "_")):
res = res.replace(*r)
return res
def is_build_job(job: str) -> bool:
if "package_" in job or "binary_" in job or job == "fuzzers":
return True
return False
def is_test_job(job: str) -> bool:
return not is_build_job(job) and not "Style" in job and not "Docs check" in job
def is_docs_job(job: str) -> bool:
return "Docs check" in job
def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace:
# FIXME: consider switching to sub_parser for configure, pre, run, post actions
parser.add_argument(
"--configure",
action="store_true",
help="Action that configures ci run. Calculates digests, checks job to be executed, generates json output",
)
parser.add_argument(
"--update-gh-statuses",
action="store_true",
help="Action that recreate success GH statuses for jobs that finished successfully in past and will be skipped this time",
)
parser.add_argument(
"--pre",
action="store_true",
help="Action that executes prerequesetes for the job provided in --job-name",
)
parser.add_argument(
"--run",
action="store_true",
help="Action that executes run action for specified --job-name. run_command must be configured for a given job name.",
)
parser.add_argument(
"--post",
action="store_true",
help="Action that executes post actions for the job provided in --job-name",
)
parser.add_argument(
"--mark-success",
action="store_true",
help="Action that marks job provided in --job-name (with batch provided in --batch) as successful",
)
parser.add_argument(
"--job-name",
default="",
type=str,
help="Job name as in config",
)
parser.add_argument(
"--batch",
default=-1,
type=int,
help="Current batch number (required for --mark-success), -1 or omit for single-batch job",
)
parser.add_argument(
"--infile",
default="",
type=str,
help="Input json file or json string with ci run config",
)
parser.add_argument(
"--outfile",
default="",
type=str,
required=False,
help="output file to write json result to, if not set - stdout",
)
parser.add_argument(
"--pretty",
action="store_true",
default=False,
help="makes json output pretty formatted",
)
parser.add_argument(
"--skip-docker",
action="store_true",
default=False,
help="skip fetching docker data from dockerhub, used in --configure action (for debugging)",
)
parser.add_argument(
"--docker-digest-or-latest",
action="store_true",
default=False,
help="temporary hack to fallback to latest if image with digest as a tag is not on docker hub",
)
parser.add_argument(
"--skip-jobs",
action="store_true",
default=False,
help="skip fetching data about job runs, used in --configure action (for debugging)",
)
parser.add_argument(
"--rebuild-all-docker",
action="store_true",
default=False,
help="will create run config for rebuilding all dockers, used in --configure action (for nightly docker job)",
)
parser.add_argument(
"--rebuild-all-binaries",
action="store_true",
default=False,
help="will create run config without skipping build jobs in any case, used in --configure action (for release branches)",
)
return parser.parse_args()
def get_file_flag_name(
job_name: str, digest: str, batch: int = 0, num_batches: int = 1
) -> str:
if num_batches < 2:
return f"job_{job_name}_{digest}.ci"
else:
return f"job_{job_name}_{digest}_{batch}_{num_batches}.ci"
def get_s3_path(build_digest: str) -> str:
return f"CI_data/BUILD-{build_digest}/"
def get_s3_path_docs(digest: str) -> str:
return f"CI_data/DOCS-{digest}/"
def check_missing_images_on_dockerhub(
image_name_tag: Dict[str, str], arch: Optional[str] = None
) -> Dict[str, str]:
"""
Checks missing images on dockerhub.
Works concurrently for all given images.
Docker must be logged in.
"""
def run_docker_command(
image: str, image_digest: str, arch: Optional[str] = None
) -> Dict:
"""
aux command for fetching single docker manifest
"""
command = [
"docker",
"manifest",
"inspect",
f"{image}:{image_digest}" if not arch else f"{image}:{image_digest}-{arch}",
]
process = subprocess.run(
command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
)
return {
"image": image,
"image_digest": image_digest,
"arch": arch,
"stdout": process.stdout,
"stderr": process.stderr,
"return_code": process.returncode,
}
result: Dict[str, str] = {}
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = [
executor.submit(run_docker_command, image, tag, arch)
for image, tag in image_name_tag.items()
]
responses = [
future.result() for future in concurrent.futures.as_completed(futures)
]
for resp in responses:
name, stdout, stderr, digest, arch = (
resp["image"],
resp["stdout"],
resp["stderr"],
resp["image_digest"],
resp["arch"],
)
if stderr:
if stderr.startswith("no such manifest"):
result[name] = digest
else:
print(f"Error: Unknown error: {stderr}, {name}, {arch}")
elif stdout:
if "mediaType" in stdout:
pass
else:
print(f"Error: Unknown response: {stdout}")
assert False, "FIXME"
else:
print(f"Error: No response for {name}, {digest}, {arch}")
assert False, "FIXME"
return result
def _check_and_update_for_early_style_check(run_config: dict) -> None:
"""
This is temporary hack to start style check before docker build if possible
FIXME: need better solution to do style check as soon as possible and as fast as possible w/o dependency on docker job
"""
jobs_to_do = run_config.get("jobs_data", {}).get("jobs_to_do", [])
docker_to_build = run_config.get("docker_data", {}).get("missing_multi", [])
if (
"Style check" in jobs_to_do
and docker_to_build
and "clickhouse/style-test" not in docker_to_build
):
index = jobs_to_do.index("Style check")
jobs_to_do[index] = "Style check early"
def _update_config_for_docs_only(run_config: dict) -> None:
DOCS_CHECK_JOBS = ["Docs check", "Style check"]
print(f"NOTE: Will keep only docs related jobs: [{DOCS_CHECK_JOBS}]")
jobs_to_do = run_config.get("jobs_data", {}).get("jobs_to_do", [])
run_config["jobs_data"]["jobs_to_do"] = [
job for job in jobs_to_do if job in DOCS_CHECK_JOBS
]
def _configure_docker_jobs(
rebuild_all_dockers: bool, docker_digest_or_latest: bool = False
) -> Dict:
# generate docker jobs data
docker_digester = DockerDigester()
imagename_digest_dict = (
docker_digester.get_all_digests()
) # 'image name - digest' mapping
images_info = docker_images_helper.get_images_info()
# a. check missing images
print("Start checking missing images in dockerhub")
# FIXME: we need login as docker manifest inspect goes directly to one of the *.docker.com hosts instead of "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"]
# find if it's possible to use the setting of /etc/docker/daemon.json
docker_images_helper.docker_login()
if not rebuild_all_dockers:
missing_multi_dict = check_missing_images_on_dockerhub(imagename_digest_dict)
missing_multi = list(missing_multi_dict)
missing_amd64 = []
missing_aarch64 = []
if not docker_digest_or_latest:
# look for missing arm and amd images only among missing multiarch manifests @missing_multi_dict
# to avoid extra dockerhub api calls
missing_amd64 = list(
check_missing_images_on_dockerhub(missing_multi_dict, "amd64")
)
# FIXME: WA until full arm support: skip not supported arm images
missing_aarch64 = list(
check_missing_images_on_dockerhub(
{
im: digest
for im, digest in missing_multi_dict.items()
if not images_info[im]["only_amd64"]
},
"aarch64",
)
)
else:
# add all images to missing
missing_multi = list(imagename_digest_dict)
missing_amd64 = missing_multi
# FIXME: WA until full arm support: skip not supported arm images
missing_aarch64 = [
name
for name in imagename_digest_dict
if not images_info[name]["only_amd64"]
]
# FIXME: temporary hack, remove after transition to docker digest as tag
if docker_digest_or_latest:
if missing_multi:
print(
f"WARNING: Missing images {list(missing_multi)} - fallback to latest tag"
)
for image in missing_multi:
imagename_digest_dict[image] = "latest"
print("...checking missing images in dockerhub - done")
return {
"images": imagename_digest_dict,
"missing_aarch64": missing_aarch64,
"missing_amd64": missing_amd64,
"missing_multi": missing_multi,
}
def _configure_jobs(
build_digest: str,
docs_digest: str,
job_digester: JobDigester,
s3: S3Helper,
rebuild_all_binaries: bool,
pr_labels: Iterable[str],
commit_tokens: List[str],
) -> Dict:
# a. digest each item from the config
job_digester = JobDigester()
jobs_params: Dict[str, Dict] = {}
jobs_to_do: List[str] = []
jobs_to_skip: List[str] = []
digests: Dict[str, str] = {}
print("Calculating job digests - start")
for job in CI_CONFIG.job_generator():
digest = job_digester.get_job_digest(CI_CONFIG.get_digest_config(job))
digests[job] = digest
print(f" job [{job.rjust(50)}] has digest [{digest}]")
print("Calculating job digests - done")
# b. check if we have something done
path = get_s3_path(build_digest)
done_files = s3.list_prefix(path)
done_files = [file.split("/")[-1] for file in done_files]
print(f"S3 CI files for the build [{build_digest}]: {done_files}")
docs_path = get_s3_path_docs(docs_digest)
done_files_docs = s3.list_prefix(docs_path)
done_files_docs = [file.split("/")[-1] for file in done_files_docs]
print(f"S3 CI files for the docs [{docs_digest}]: {done_files_docs}")
done_files += done_files_docs
for job in digests:
digest = digests[job]
job_config = CI_CONFIG.get_job_config(job)
num_batches: int = job_config.num_batches
batches_to_do: List[int] = []
if job_config.run_by_label:
# this job controlled by label, add to todo if it's labe is set in pr
if job_config.run_by_label in pr_labels:
for batch in range(num_batches): # type: ignore
batches_to_do.append(batch)
else:
# this job controlled by digest, add to todo if it's not successfully done before
for batch in range(num_batches): # type: ignore
success_flag_name = get_file_flag_name(job, digest, batch, num_batches)
if success_flag_name not in done_files or (
rebuild_all_binaries and is_build_job(job)
):
batches_to_do.append(batch)
if batches_to_do:
jobs_to_do.append(job)
jobs_params[job] = {
"batches": batches_to_do,
"num_batches": num_batches,
}
else:
jobs_to_skip += (job,)
if commit_tokens:
requested_jobs = [
token[len("#job_") :]
for token in commit_tokens
if token.startswith("#job_")
]
assert any(
len(x) > 1 for x in requested_jobs
), f"Invalid job names requested [{requested_jobs}]"
if requested_jobs:
jobs_to_do_requested = []
for job in requested_jobs:
job_with_parents = CI_CONFIG.get_job_with_parents(job)
# always add requested job itself, even if it could be skipped
jobs_to_do_requested.append(job_with_parents[0])
for parent in job_with_parents[1:]:
if parent in jobs_to_do and parent not in jobs_to_do_requested:
jobs_to_do_requested.append(parent)
print(
f"NOTE: Only specific job(s) were requested: [{jobs_to_do_requested}]"
)
jobs_to_do = jobs_to_do_requested
return {
"digests": digests,
"jobs_to_do": jobs_to_do,
"jobs_to_skip": jobs_to_skip,
"jobs_params": jobs_params,
}
def _update_gh_statuses(indata: Dict, s3: S3Helper) -> None:
# This action is required to re-create all GH statuses for skipped jobs, so that ci report can be generated afterwards
temp_path = Path(TEMP_PATH)
if not temp_path.exists():
temp_path.mkdir(parents=True, exist_ok=True)
# clean up before start
for file in temp_path.glob("*.ci"):
file.unlink()
# download all metadata files
path = get_s3_path(indata["build"])
files = s3.download_files( # type: ignore
bucket=S3_BUILDS_BUCKET,
s3_path=path,
file_suffix=".ci",
local_directory=temp_path,
)
print(f"CI metadata files [{files}]")
path = get_s3_path_docs(indata["docs"])
files_docs = s3.download_files( # type: ignore
bucket=S3_BUILDS_BUCKET,
s3_path=path,
file_suffix=".ci",
local_directory=temp_path,
)
print(f"CI docs metadata files [{files_docs}]")
files += files_docs
# parse CI metadata
job_digests = indata["jobs_data"]["digests"]
# create GH status
pr_info = PRInfo()
commit = get_commit(Github(get_best_robot_token(), per_page=100), pr_info.sha)
def run_create_status(job, digest, batch, num_batches):
success_flag_name = get_file_flag_name(job, digest, batch, num_batches)
if success_flag_name in files:
print(f"Going to re-create GH status for job [{job}] sha [{pr_info.sha}]")
job_status = CommitStatusData.load_from_file(
f"{TEMP_PATH}/{success_flag_name}"
) # type: CommitStatusData
assert job_status.status == "success", "BUG!"
commit.create_status(
state=job_status.status,
target_url=job_status.report_url,
description=format_description(
f"Reused from [{job_status.pr_num}-{job_status.sha[0:8]}]: "
f"{job_status.description}"
),
context=get_check_name(job, batch=batch, num_batches=num_batches),
)
print(f"GH status re-created from file [{success_flag_name}]")
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
for job in job_digests:
if is_build_job(job):
# no GH status for build jobs
continue
digest = job_digests[job]
num_batches = CI_CONFIG.get_job_config(job).num_batches
for batch in range(num_batches):
future = executor.submit(
run_create_status, job, digest, batch, num_batches
)
futures.append(future)
done, _ = concurrent.futures.wait(futures)
for future in done:
try:
_ = future.result()
except Exception as e:
raise e
print("Going to update overall CI report")
set_status_comment(commit, pr_info)
print("... CI report update - done")
# clean up
ci_files = list(temp_path.glob("*.ci"))
for file in ci_files:
file.unlink()
def _fetch_commit_tokens(message: str) -> List[str]:
pattern = r"#[\w-]+"
matches = re.findall(pattern, message)
return matches
def main() -> int:
exit_code = 0
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
args = parse_args(parser)
if args.mark_success or args.pre or args.post or args.run:
assert args.infile, "Run config must be provided via --infile"
assert args.job_name, "Job name must be provided via --job-name"
indata: Optional[Dict[str, Any]] = None
if args.infile:
indata = (
json.loads(args.infile)
if not os.path.isfile(args.infile)
else json.load(open(args.infile))
)
assert indata and isinstance(indata, dict), "Invalid --infile json"
result: Dict[str, Any] = {}
s3 = S3Helper()
if args.configure:
GR = GitRunner()
pr_info = PRInfo(need_changed_files=True)
docker_data = {}
git_ref = GR.run(f"{GIT_PREFIX} rev-parse HEAD")
# if '#no-merge-commit' is set in commit message - set git ref to PR branch head to avoid merge-commit
tokens = []
if pr_info.number != 0:
message = GR.run(f"{GIT_PREFIX} log {pr_info.sha} --format=%B -n 1")
tokens = _fetch_commit_tokens(message)
print(f"Found commit message tokens: [{tokens}]")
if "#no-merge-commit" in tokens and CI:
GR.run(f"{GIT_PREFIX} checkout {pr_info.sha}")
git_ref = GR.run(f"{GIT_PREFIX} rev-parse HEAD")
print(
"#no-merge-commit is set in commit message - Setting git ref to PR branch HEAD to not use merge commit"
)
# let's get CH version
version = get_version_from_repo(git=Git(True)).string
print(f"Got CH version for this commit: [{version}]")
docker_data = (
_configure_docker_jobs(
args.rebuild_all_docker, args.docker_digest_or_latest
)
if not args.skip_docker
else {}
)
job_digester = JobDigester()
build_digest = job_digester.get_job_digest(
CI_CONFIG.get_digest_config("package_release")
)
docs_digest = job_digester.get_job_digest(
CI_CONFIG.get_digest_config("Docs check")
)
jobs_data = (
_configure_jobs(
build_digest,
docs_digest,
job_digester,
s3,
args.rebuild_all_binaries,
pr_info.labels,
tokens,
)
if not args.skip_jobs
else {}
)
# conclude results
result["git_ref"] = git_ref
result["version"] = version
result["build"] = build_digest
result["docs"] = docs_digest
result["jobs_data"] = jobs_data
result["docker_data"] = docker_data
if pr_info.number != 0 and not args.docker_digest_or_latest:
_check_and_update_for_early_style_check(result)
if pr_info.number != 0 and pr_info.has_changes_in_documentation_only():
_update_config_for_docs_only(result)
elif args.update_gh_statuses:
assert indata, "Run config must be provided via --infile"
_update_gh_statuses(indata=indata, s3=s3)
elif args.pre:
# remove job status file if any
CommitStatusData.cleanup()
if is_test_job(args.job_name):
assert indata, "Run config must be provided via --infile"
report_path = Path(REPORT_PATH)
report_path.mkdir(exist_ok=True, parents=True)
path = get_s3_path(indata["build"])
files = s3.download_files( # type: ignore
bucket=S3_BUILDS_BUCKET,
s3_path=path,
file_suffix=".json",
local_directory=report_path,
)
print(
f"Pre action done. Report files [{files}] have been downloaded from [{path}] to [{report_path}]"
)
else:
print(f"Pre action done. Nothing to do for [{args.job_name}]")
elif args.run:
assert CI_CONFIG.get_job_config(
args.job_name
).run_command, f"Run command must be configured in CI_CONFIG for [{args.job_name}] or in GH workflow"
if CI_CONFIG.get_job_config(args.job_name).timeout:
os.environ["KILL_TIMEOUT"] = str(
CI_CONFIG.get_job_config(args.job_name).timeout
)
os.environ["CHECK_NAME"] = args.job_name
run_command = (
"./tests/ci/" + CI_CONFIG.get_job_config(args.job_name).run_command
)
if ".py" in run_command:
run_command = "python3 " + run_command
print(f"Going to start run command [{run_command}]")
process = subprocess.run(
run_command,
stdout=sys.stdout,
stderr=sys.stderr,
text=True,
check=False,
shell=True,
)
if process.returncode == 0:
print(f"Run action done for: [{args.job_name}]")
else:
print(
f"Run action failed for: [{args.job_name}] with exit code [{process.returncode}]"
)
exit_code = process.returncode
elif args.post:
if is_build_job(args.job_name):
report_path = Path(TEMP_PATH) # build-check.py stores report in TEMP_PATH
assert report_path.is_dir(), f"File [{report_path}] is not a dir"
files = list(report_path.glob(f"*{args.job_name}.json")) # type: ignore[arg-type]
assert len(files) == 1, f"Which is the report file: {files}?"
local_report = f"{files[0]}"
report_name = BuildResult.get_report_name(args.job_name)
assert indata
s3_path = Path(get_s3_path(indata["build"])) / report_name
report_url = s3.upload_file(
bucket=S3_BUILDS_BUCKET, file_path=local_report, s3_path=s3_path
)
print(
f"Post action done. Report file [{local_report}] has been uploaded to [{report_url}]"
)
else:
print(f"Post action done. Nothing to do for [{args.job_name}]")
elif args.mark_success:
assert indata, "Run config must be provided via --infile"
job = args.job_name
num_batches = CI_CONFIG.get_job_config(job).num_batches
assert (
num_batches <= 1 or 0 <= args.batch < num_batches
), f"--batch must be provided and in range [0, {num_batches}) for {job}"
# FIXME: find generic design for propagating and handling job status (e.g. stop using statuses in GH api)
# now job ca be build job w/o status data, any other job that exit with 0 with or w/o status data
if is_build_job(job):
# there is no status for build jobs
# create dummy success to mark it as done
job_status = CommitStatusData(
status="success", description="dummy status", report_url="dummy_url"
)
else:
if not CommitStatusData.is_present():
# apparently exit after rerun-helper check
# do nothing, exit without failure
print("ERROR: no status file for job [{job}]")
job_status = CommitStatusData(
status="dummy failure",
description="dummy status",
report_url="dummy_url",
)
else:
# normal case
job_status = CommitStatusData.load_status()
# Storing job data (report_url) to restore OK GH status on job results reuse
if job_status.is_ok():
success_flag_name = get_file_flag_name(
job, indata["jobs_data"]["digests"][job], args.batch, num_batches
)
if not is_docs_job(job):
path = get_s3_path(indata["build"]) + success_flag_name
else:
path = get_s3_path_docs(indata["docs"]) + success_flag_name
job_status.dump_to_file(success_flag_name)
_ = s3.upload_file(
bucket=S3_BUILDS_BUCKET, file_path=success_flag_name, s3_path=path
)
os.remove(success_flag_name)
print(
f"Job [{job}] with digest [{indata['jobs_data']['digests'][job]}] {f'and batch {args.batch}/{num_batches}' if num_batches > 1 else ''} marked as successful. path: [{path}]"
)
else:
print(f"Job [{job}] is not ok, status [{job_status.status}]")
# print results
if args.outfile:
with open(args.outfile, "w") as f:
if isinstance(result, str):
print(result, file=f)
elif isinstance(result, dict):
print(json.dumps(result, indent=2 if args.pretty else None), file=f)
else:
raise AssertionError(f"Unexpected type for 'res': {type(result)}")
else:
if isinstance(result, str):
print(result)
elif isinstance(result, dict):
print(json.dumps(result, indent=2 if args.pretty else None))
else:
raise AssertionError(f"Unexpected type for 'res': {type(result)}")
return exit_code
if __name__ == "__main__":
os.chdir(ROOT_DIR)
sys.exit(main())