ClickHouse/tests/ci/docker_images_check.py

344 lines
11 KiB
Python
Raw Normal View History

2021-09-15 16:32:17 +00:00
#!/usr/bin/env python3
import argparse
2021-09-15 17:01:16 +00:00
import json
import logging
2021-09-15 16:58:36 +00:00
import os
import shutil
import subprocess
import time
from typing import List, Tuple
from github import Github
2021-11-26 14:00:09 +00:00
from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP
from s3_helper import S3Helper
2021-11-26 14:00:09 +00:00
from pr_info import PRInfo
2021-10-20 11:48:27 +00:00
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
2021-11-12 11:39:00 +00:00
from upload_result_helper import upload_results
from commit_status_helper import post_commit_status
2021-11-19 14:47:04 +00:00
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from stopwatch import Stopwatch
2021-09-15 16:32:17 +00:00
NAME = "Push to Dockerhub (actions)"
TEMP_PATH = os.path.join(RUNNER_TEMP, "docker_images_check")
2021-12-21 16:32:04 +00:00
def get_changed_docker_images(
pr_info: PRInfo, repo_path: str, image_file_path: str
) -> List[Tuple[str, str]]:
2021-09-15 16:32:17 +00:00
images_dict = {}
path_to_images_file = os.path.join(repo_path, image_file_path)
if os.path.exists(path_to_images_file):
2021-12-21 16:32:04 +00:00
with open(path_to_images_file, "r") as dict_file:
2021-09-15 16:32:17 +00:00
images_dict = json.load(dict_file)
else:
2021-12-21 16:32:04 +00:00
logging.info(
"Image file %s doesnt exists in repo %s", image_file_path, repo_path
)
2021-09-15 16:32:17 +00:00
if not images_dict:
return []
2021-09-15 16:32:17 +00:00
files_changed = pr_info.changed_files
2021-12-21 16:32:04 +00:00
logging.info(
"Changed files for PR %s @ %s: %s",
pr_info.number,
pr_info.sha,
str(files_changed),
)
2021-09-15 16:32:17 +00:00
changed_images = []
for dockerfile_dir, image_description in images_dict.items():
2021-11-23 09:43:49 +00:00
for f in files_changed:
if f.startswith(dockerfile_dir):
logging.info(
2021-12-21 16:32:04 +00:00
"Found changed file '%s' which affects "
"docker image '%s' with path '%s'",
f,
image_description["name"],
dockerfile_dir,
)
2021-11-23 09:43:49 +00:00
changed_images.append(dockerfile_dir)
break
2021-09-15 16:32:17 +00:00
# The order is important: dependents should go later than bases, so that
# they are built with updated base versions.
index = 0
while index < len(changed_images):
image = changed_images[index]
2021-12-21 16:32:04 +00:00
for dependent in images_dict[image]["dependent"]:
2021-09-15 16:32:17 +00:00
logging.info(
2021-12-21 16:32:04 +00:00
"Marking docker image '%s' as changed because it "
"depends on changed docker image '%s'",
dependent,
image,
)
2021-09-15 16:32:17 +00:00
changed_images.append(dependent)
index += 1
if index > 5 * len(images_dict):
2021-09-15 16:32:17 +00:00
# Sanity check to prevent infinite loop.
2021-12-21 16:32:04 +00:00
raise RuntimeError(
f"Too many changed docker images, this is a bug. {changed_images}"
)
2021-09-15 16:32:17 +00:00
# If a dependent image was already in the list because its own files
# changed, but then it was added as a dependent of a changed base, we
# must remove the earlier entry so that it doesn't go earlier than its
# base. This way, the dependent will be rebuilt later than the base, and
# will correctly use the updated version of the base.
seen = set()
no_dups_reversed = []
for x in reversed(changed_images):
if x not in seen:
seen.add(x)
no_dups_reversed.append(x)
2021-12-21 16:32:04 +00:00
result = [(x, images_dict[x]["name"]) for x in reversed(no_dups_reversed)]
logging.info(
"Changed docker images for PR %s @ %s: '%s'",
pr_info.number,
pr_info.sha,
result,
)
return result
2021-09-15 16:32:17 +00:00
2021-12-21 16:32:04 +00:00
def build_and_push_one_image(
path_to_dockerfile_folder: str, image_name: str, version_string: str, push: bool
) -> Tuple[bool, str]:
path = path_to_dockerfile_folder
2021-12-21 16:32:04 +00:00
logging.info(
"Building docker image %s with version %s from path %s",
image_name,
version_string,
path,
2021-12-21 16:32:04 +00:00
)
build_log = os.path.join(
TEMP_PATH,
"build_and_push_log_{}_{}".format(
str(image_name).replace("/", "_"), version_string
),
)
push_arg = ""
if push:
push_arg = "--push "
with open(build_log, "w") as bl:
cmd = (
2021-12-24 16:15:08 +00:00
"docker buildx build --builder default "
f"--build-arg FROM_TAG={version_string} "
f"--build-arg BUILDKIT_INLINE_CACHE=1 "
f"--tag {image_name}:{version_string} "
f"--cache-from type=registry,ref={image_name}:{version_string} "
f"{push_arg}"
f"--progress plain {path}"
2021-12-21 16:32:04 +00:00
)
logging.info("Docker command to run: %s", cmd)
retcode = subprocess.Popen(cmd, shell=True, stderr=bl, stdout=bl).wait()
2021-09-15 16:32:17 +00:00
if retcode != 0:
return False, build_log
2021-09-15 16:32:17 +00:00
logging.info("Processing of %s successfully finished", image_name)
return True, build_log
2021-09-15 16:32:17 +00:00
2021-12-21 16:32:04 +00:00
def process_single_image(
versions: List[str], path_to_dockerfile_folder: str, image_name: str, push: bool
) -> List[Tuple[str, str, str]]:
2021-12-21 16:32:04 +00:00
logging.info("Image will be pushed with versions %s", ", ".join(versions))
2021-09-15 16:32:17 +00:00
result = []
for ver in versions:
for i in range(5):
success, build_log = build_and_push_one_image(
path_to_dockerfile_folder, image_name, ver, push
2021-12-21 16:32:04 +00:00
)
2021-09-15 16:32:17 +00:00
if success:
result.append((image_name + ":" + ver, build_log, "OK"))
2021-09-15 16:32:17 +00:00
break
2021-12-21 16:32:04 +00:00
logging.info(
"Got error will retry %s time and sleep for %s seconds", i, i * 5
)
2021-09-15 16:32:17 +00:00
time.sleep(i * 5)
else:
result.append((image_name + ":" + ver, build_log, "FAIL"))
2021-09-15 16:32:17 +00:00
logging.info("Processing finished")
return result
def process_test_results(
s3_client: S3Helper, test_results: List[Tuple[str, str, str]], s3_path_prefix: str
) -> Tuple[str, List[Tuple[str, str]]]:
2021-12-21 16:32:04 +00:00
overall_status = "success"
2021-09-15 16:32:17 +00:00
processed_test_results = []
for image, build_log, status in test_results:
2021-12-21 16:32:04 +00:00
if status != "OK":
overall_status = "failure"
url_part = ""
2021-09-15 16:32:17 +00:00
if build_log is not None and os.path.exists(build_log):
build_url = s3_client.upload_test_report_to_s3(
2021-12-21 16:32:04 +00:00
build_log, s3_path_prefix + "/" + os.path.basename(build_log)
)
2021-09-15 16:32:17 +00:00
url_part += '<a href="{}">build_log</a>'.format(build_url)
if url_part:
2021-12-21 16:32:04 +00:00
test_name = image + " (" + url_part + ")"
2021-09-15 16:32:17 +00:00
else:
test_name = image
processed_test_results.append((test_name, status))
return overall_status, processed_test_results
2021-12-21 16:32:04 +00:00
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="Program to build changed or given docker images with all "
"dependant images. Example for local running: "
"python docker_images_check.py --no-push-images --no-reports "
"--image-path docker/packager/binary",
)
parser.add_argument(
"--suffix",
type=str,
help="suffix for all built images tags and resulting json file; the parameter "
"significantly changes the script behavior, e.g. changed_images.json is called "
"changed_images_{suffix}.json and contains list of all tags",
)
parser.add_argument(
"--repo",
type=str,
default="clickhouse",
help="docker hub repository prefix",
)
parser.add_argument(
"--image-path",
type=str,
action="append",
help="list of image paths to build instead of using pr_info + diff URL, "
"e.g. 'docker/packager/binary'",
)
parser.add_argument(
"--no-reports",
action="store_true",
help="don't push reports to S3 and github",
)
parser.add_argument(
"--no-push-images",
action="store_true",
help="don't push images to docker hub",
)
return parser.parse_args()
2021-11-19 14:47:04 +00:00
def main():
logging.basicConfig(level=logging.INFO)
2021-11-19 14:47:04 +00:00
stopwatch = Stopwatch()
args = parse_args()
if args.suffix:
global NAME
NAME += f" {args.suffix}"
changed_json = os.path.join(TEMP_PATH, f"changed_images_{args.suffix}.json")
else:
changed_json = os.path.join(TEMP_PATH, "changed_images.json")
push = not args.no_push_images
if push:
subprocess.check_output( # pylint: disable=unexpected-keyword-arg
"docker login --username 'robotclickhouse' --password-stdin",
input=get_parameter_from_ssm("dockerhub_robot_password"),
encoding="utf-8",
shell=True,
)
2021-11-26 14:00:09 +00:00
repo_path = GITHUB_WORKSPACE
2021-09-15 16:32:17 +00:00
if os.path.exists(TEMP_PATH):
shutil.rmtree(TEMP_PATH)
os.makedirs(TEMP_PATH)
2021-09-16 10:52:04 +00:00
if args.image_path:
pr_info = PRInfo()
pr_info.changed_files = set(i for i in args.image_path)
else:
pr_info = PRInfo(need_changed_files=True)
2021-09-15 18:48:06 +00:00
changed_images = get_changed_docker_images(pr_info, repo_path, "docker/images.json")
2021-12-21 16:32:04 +00:00
logging.info(
"Has changed images %s", ", ".join([str(image[0]) for image in changed_images])
)
pr_commit_version = str(pr_info.number) + "-" + pr_info.sha
# The order is important, PR number is used as cache during the build
2021-09-15 16:32:17 +00:00
versions = [str(pr_info.number), pr_commit_version]
result_version = pr_commit_version
2021-10-29 15:01:29 +00:00
if pr_info.number == 0:
# First get the latest for cache
versions.insert(0, "latest")
2021-09-15 16:32:17 +00:00
if args.suffix:
# We should build architecture specific images separately and merge a
# manifest lately in a different script
versions = [f"{v}-{args.suffix}" for v in versions]
# changed_images_{suffix}.json should contain all changed images
result_version = versions
2021-09-15 16:32:17 +00:00
2021-09-15 18:26:48 +00:00
result_images = {}
2021-09-15 16:32:17 +00:00
images_processing_result = []
for rel_path, image_name in changed_images:
full_path = os.path.join(repo_path, rel_path)
2021-12-21 16:32:04 +00:00
images_processing_result += process_single_image(
versions, full_path, image_name, push
2021-12-21 16:32:04 +00:00
)
result_images[image_name] = result_version
2021-09-15 16:32:17 +00:00
if changed_images:
2021-12-21 16:32:04 +00:00
description = "Updated " + ",".join([im[1] for im in changed_images])
2021-09-15 16:32:17 +00:00
else:
description = "Nothing to update"
if len(description) >= 140:
description = description[:136] + "..."
with open(changed_json, "w") as images_file:
json.dump(result_images, images_file)
2021-12-21 16:32:04 +00:00
s3_helper = S3Helper("https://s3.amazonaws.com")
2021-09-15 16:32:17 +00:00
2021-12-21 16:32:04 +00:00
s3_path_prefix = (
str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(" ", "_")
)
status, test_results = process_test_results(
s3_helper, images_processing_result, s3_path_prefix
)
2021-09-15 16:32:17 +00:00
2021-11-12 11:39:00 +00:00
url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME)
2021-09-15 16:32:17 +00:00
2021-09-16 10:39:36 +00:00
print("::notice ::Report url: {}".format(url))
2021-12-21 16:32:04 +00:00
print('::set-output name=url_output::"{}"'.format(url))
if args.no_reports:
return
2021-11-12 12:36:25 +00:00
gh = Github(get_best_robot_token())
post_commit_status(gh, pr_info.sha, NAME, description, status, url)
2021-12-21 16:32:04 +00:00
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_results,
status,
stopwatch.duration_seconds,
stopwatch.start_time_str,
url,
NAME,
)
ch_helper = ClickHouseHelper()
2021-11-19 14:47:04 +00:00
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
if __name__ == "__main__":
main()