ClickHouse/tests/ci/docker_manifests_merge.py
2023-10-19 11:35:35 +02:00

323 lines
10 KiB
Python

#!/usr/bin/env python3
import argparse
import json
import logging
import os
import subprocess
from pathlib import Path
from typing import List, Dict, Tuple
from github import Github
from clickhouse_helper import (
ClickHouseHelper,
prepare_tests_results_for_clickhouse,
CHException,
)
from commit_status_helper import format_description, get_commit, post_commit_status
from docker_images_helper import IMAGES_FILE_PATH, get_image_names
from env_helper import RUNNER_TEMP, REPO_COPY
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
from git_helper import Runner
from pr_info import PRInfo
from report import TestResults, TestResult
from s3_helper import S3Helper
from stopwatch import Stopwatch
from upload_result_helper import upload_results
NAME = "Push multi-arch images to Dockerhub"
CHANGED_IMAGES = "changed_images_{}.json"
Images = Dict[str, List[str]]
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="The program gets images from changed_images_*.json, merges images "
"with different architectures into one manifest and pushes back to docker hub",
)
parser.add_argument(
"--suffix",
dest="suffixes",
type=str,
required=True,
action="append",
help="suffixes for existing images' tags. More than two should be given",
)
parser.add_argument(
"--path",
type=Path,
default=RUNNER_TEMP,
help="path to changed_images_*.json files",
)
parser.add_argument("--reports", default=True, help=argparse.SUPPRESS)
parser.add_argument(
"--no-reports",
action="store_false",
dest="reports",
default=argparse.SUPPRESS,
help="don't push reports to S3 and github",
)
parser.add_argument("--push", default=True, help=argparse.SUPPRESS)
parser.add_argument(
"--no-push-images",
action="store_false",
dest="push",
default=argparse.SUPPRESS,
help="don't push images to docker hub",
)
args = parser.parse_args()
if len(args.suffixes) < 2:
parser.error("more than two --suffix should be given")
return args
def load_images(path: Path, suffix: str) -> Images:
with open(path / CHANGED_IMAGES.format(suffix), "rb") as images:
return json.load(images) # type: ignore
def strip_suffix(suffix: str, images: Images) -> Images:
result = {}
for image, versions in images.items():
for v in versions:
if not v.endswith(f"-{suffix}"):
raise ValueError(
f"version {image}:{v} does not contain suffix {suffix}"
)
result[image] = [v[: -len(suffix) - 1] for v in versions]
return result
def check_sources(to_merge: Dict[str, Images]) -> Images:
"""get a dict {arch1: Images, arch2: Images}"""
result = {} # type: Images
first_suffix = ""
for suffix, images in to_merge.items():
if not result:
first_suffix = suffix
result = strip_suffix(suffix, images)
continue
if not result == strip_suffix(suffix, images):
raise ValueError(
f"images in {images} are not equal to {to_merge[first_suffix]}"
)
return result
def get_changed_images(images: Images) -> Dict[str, str]:
"""The original json format is {"image": "tag"}, so the output artifact is
produced here. The latest version is {PR_NUMBER}-{SHA1}
"""
return {k: v[-1] for k, v in images.items()}
def merge_images(to_merge: Dict[str, Images]) -> Dict[str, List[List[str]]]:
"""The function merges image-name:version-suffix1 and image-name:version-suffix2
into image-name:version"""
suffixes = to_merge.keys()
result_images = check_sources(to_merge)
merge = {} # type: Dict[str, List[List[str]]]
for image, versions in result_images.items():
merge[image] = []
for i, v in enumerate(versions):
merged_v = [v] # type: List[str]
for suf in suffixes:
merged_v.append(to_merge[suf][image][i])
merge[image].append(merged_v)
return merge
def create_manifest(image: str, tags: List[str], push: bool) -> Tuple[str, str]:
tag = tags[0]
manifest = f"{image}:{tag}"
cmd = "docker manifest create --amend " + " ".join((f"{image}:{t}" for t in tags))
logging.info("running: %s", cmd)
with subprocess.Popen(
cmd,
shell=True,
stderr=subprocess.STDOUT,
stdout=subprocess.PIPE,
universal_newlines=True,
) as popen:
retcode = popen.wait()
if retcode != 0:
output = popen.stdout.read() # type: ignore
logging.error("failed to create manifest for %s:\n %s\n", manifest, output)
return manifest, "FAIL"
if not push:
return manifest, "OK"
cmd = f"docker manifest push {manifest}"
logging.info("running: %s", cmd)
with subprocess.Popen(
cmd,
shell=True,
stderr=subprocess.STDOUT,
stdout=subprocess.PIPE,
universal_newlines=True,
) as popen:
retcode = popen.wait()
if retcode != 0:
output = popen.stdout.read() # type: ignore
logging.error("failed to push %s:\n %s\n", manifest, output)
return manifest, "FAIL"
return manifest, "OK"
def enrich_images(changed_images: Dict[str, str]) -> None:
all_image_names = get_image_names(Path(REPO_COPY), IMAGES_FILE_PATH)
images_to_find_tags_for = [
image for image in all_image_names if image not in changed_images
]
images_to_find_tags_for.sort()
logging.info(
"Trying to find versions for images:\n %s", "\n ".join(images_to_find_tags_for)
)
COMMIT_SHA_BATCH_SIZE = 100
MAX_COMMIT_BATCHES_TO_CHECK = 10
# Gets the sha of the last COMMIT_SHA_BATCH_SIZE commits after skipping some commits (see below)
LAST_N_ANCESTOR_SHA_COMMAND = f"git log --format=format:'%H' --max-count={COMMIT_SHA_BATCH_SIZE} --skip={{}} --merges"
git_runner = Runner()
GET_COMMIT_SHAS_QUERY = """
WITH {commit_shas:Array(String)} AS commit_shas,
{images:Array(String)} AS images
SELECT
splitByChar(':', test_name)[1] AS image_name,
argMax(splitByChar(':', test_name)[2], check_start_time) AS tag
FROM checks
WHERE
check_name == 'Push multi-arch images to Dockerhub'
AND position(test_name, checks.commit_sha)
AND checks.commit_sha IN commit_shas
AND image_name IN images
GROUP BY image_name
"""
batch_count = 0
# We use always publicly available DB here intentionally
ch_helper = ClickHouseHelper(
"https://play.clickhouse.com", {"X-ClickHouse-User": "play"}
)
while (
batch_count <= MAX_COMMIT_BATCHES_TO_CHECK and len(images_to_find_tags_for) != 0
):
commit_shas = git_runner(
LAST_N_ANCESTOR_SHA_COMMAND.format(batch_count * COMMIT_SHA_BATCH_SIZE)
).split("\n")
result = ch_helper.select_json_each_row(
"default",
GET_COMMIT_SHAS_QUERY,
{"commit_shas": commit_shas, "images": images_to_find_tags_for},
)
result.sort(key=lambda x: x["image_name"])
logging.info(
"Found images for commits %s..%s:\n %s",
commit_shas[0],
commit_shas[-1],
"\n ".join(f"{im['image_name']}:{im['tag']}" for im in result),
)
for row in result:
image_name = row["image_name"]
changed_images[image_name] = row["tag"]
images_to_find_tags_for.remove(image_name)
batch_count += 1
def main():
logging.basicConfig(level=logging.INFO)
stopwatch = Stopwatch()
args = parse_args()
if args.push:
subprocess.check_output( # pylint: disable=unexpected-keyword-arg
"docker login --username 'robotclickhouse' --password-stdin",
input=get_parameter_from_ssm("dockerhub_robot_password"),
encoding="utf-8",
shell=True,
)
to_merge = {}
for suf in args.suffixes:
to_merge[suf] = load_images(args.path, suf)
changed_images = get_changed_images(check_sources(to_merge))
os.environ["DOCKER_CLI_EXPERIMENTAL"] = "enabled"
merged = merge_images(to_merge)
status = "success"
test_results = [] # type: TestResults
for image, versions in merged.items():
for tags in versions:
manifest, test_result = create_manifest(image, tags, args.push)
test_results.append(TestResult(manifest, test_result))
if test_result != "OK":
status = "failure"
enriched_images = changed_images.copy()
try:
# changed_images now contains all the images that are changed in this PR. Let's find the latest tag for the images that are not changed.
enrich_images(enriched_images)
except CHException as ex:
logging.warning("Couldn't get proper tags for not changed images: %s", ex)
with open(args.path / "changed_images.json", "w", encoding="utf-8") as ci:
json.dump(enriched_images, ci)
pr_info = PRInfo()
s3_helper = S3Helper()
url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME)
print(f"::notice ::Report url: {url}")
if not args.reports:
return
if changed_images:
description = "Updated " + ", ".join(changed_images.keys())
else:
description = "Nothing to update"
description = format_description(description)
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
post_commit_status(commit, status, url, description, NAME, pr_info)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_results,
status,
stopwatch.duration_seconds,
stopwatch.start_time_str,
url,
NAME,
)
ch_helper = ClickHouseHelper()
ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
if __name__ == "__main__":
main()