ClickHouse/tests/ci/docker_images_check.py

275 lines
9.0 KiB
Python
Raw Normal View History

2021-09-15 16:32:17 +00:00
#!/usr/bin/env python3
import subprocess
import logging
2021-09-15 17:01:16 +00:00
import json
2021-09-15 16:58:36 +00:00
import os
import time
import shutil
from typing import List, Tuple
from github import Github # type: ignore
2021-11-26 14:00:09 +00:00
from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP
from s3_helper import S3Helper
2021-11-26 14:00:09 +00:00
from pr_info import PRInfo
2021-10-20 11:48:27 +00:00
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
2021-11-12 11:39:00 +00:00
from upload_result_helper import upload_results
2021-11-12 12:36:25 +00:00
from commit_status_helper import get_commit
2021-11-19 14:47:04 +00:00
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from stopwatch import Stopwatch
2021-09-15 16:32:17 +00:00
NAME = "Push to Dockerhub (actions)"
2021-12-21 16:32:04 +00:00
def get_changed_docker_images(
pr_info: PRInfo, repo_path: str, image_file_path: str
) -> Tuple[List[Tuple[str, str]], str]:
2021-09-15 16:32:17 +00:00
images_dict = {}
path_to_images_file = os.path.join(repo_path, image_file_path)
if os.path.exists(path_to_images_file):
2021-12-21 16:32:04 +00:00
with open(path_to_images_file, "r") as dict_file:
2021-09-15 16:32:17 +00:00
images_dict = json.load(dict_file)
else:
2021-12-21 16:32:04 +00:00
logging.info(
"Image file %s doesnt exists in repo %s", image_file_path, repo_path
)
2021-09-15 16:32:17 +00:00
2021-12-21 16:32:04 +00:00
dockerhub_repo_name = "yandex"
2021-09-15 16:32:17 +00:00
if not images_dict:
return [], dockerhub_repo_name
files_changed = pr_info.changed_files
2021-12-21 16:32:04 +00:00
logging.info(
"Changed files for PR %s @ %s: %s",
pr_info.number,
pr_info.sha,
str(files_changed),
)
2021-09-15 16:32:17 +00:00
changed_images = []
for dockerfile_dir, image_description in images_dict.items():
2021-12-21 16:32:04 +00:00
if image_description["name"].startswith("clickhouse/"):
dockerhub_repo_name = "clickhouse"
2021-09-15 16:32:17 +00:00
2021-11-23 09:43:49 +00:00
for f in files_changed:
if f.startswith(dockerfile_dir):
logging.info(
2021-12-21 16:32:04 +00:00
"Found changed file '%s' which affects "
"docker image '%s' with path '%s'",
f,
image_description["name"],
dockerfile_dir,
)
2021-11-23 09:43:49 +00:00
changed_images.append(dockerfile_dir)
break
2021-09-15 16:32:17 +00:00
# The order is important: dependents should go later than bases, so that
# they are built with updated base versions.
index = 0
while index < len(changed_images):
image = changed_images[index]
2021-12-21 16:32:04 +00:00
for dependent in images_dict[image]["dependent"]:
2021-09-15 16:32:17 +00:00
logging.info(
2021-12-21 16:32:04 +00:00
"Marking docker image '%s' as changed because it "
"depends on changed docker image '%s'",
dependent,
image,
)
2021-09-15 16:32:17 +00:00
changed_images.append(dependent)
index += 1
if index > 100:
# Sanity check to prevent infinite loop.
2021-12-21 16:32:04 +00:00
raise RuntimeError(
f"Too many changed docker images, this is a bug. {changed_images}"
)
2021-09-15 16:32:17 +00:00
# If a dependent image was already in the list because its own files
# changed, but then it was added as a dependent of a changed base, we
# must remove the earlier entry so that it doesn't go earlier than its
# base. This way, the dependent will be rebuilt later than the base, and
# will correctly use the updated version of the base.
seen = set()
no_dups_reversed = []
for x in reversed(changed_images):
if x not in seen:
seen.add(x)
no_dups_reversed.append(x)
2021-12-21 16:32:04 +00:00
result = [(x, images_dict[x]["name"]) for x in reversed(no_dups_reversed)]
logging.info(
"Changed docker images for PR %s @ %s: '%s'",
pr_info.number,
pr_info.sha,
result,
)
2021-09-15 16:32:17 +00:00
return result, dockerhub_repo_name
2021-12-21 16:32:04 +00:00
def build_and_push_one_image(
path_to_dockerfile_folder: str, image_name: str, version_string: str
) -> Tuple[bool, str]:
path = path_to_dockerfile_folder
2021-12-21 16:32:04 +00:00
logging.info(
"Building docker image %s with version %s from path %s",
image_name,
version_string,
path,
2021-12-21 16:32:04 +00:00
)
2021-09-15 16:32:17 +00:00
build_log = None
2021-12-21 16:32:04 +00:00
with open(
"build_and_push_log_{}_{}".format(
str(image_name).replace("/", "_"), version_string
),
"w",
2021-12-21 16:32:04 +00:00
) as pl:
cmd = (
2021-12-24 16:15:08 +00:00
"docker buildx build --builder default "
f"--build-arg FROM_TAG={version_string} "
f"--build-arg BUILDKIT_INLINE_CACHE=1 "
f"--tag {image_name}:{version_string} "
f"--cache-from type=registry,ref={image_name}:{version_string} "
f"--progress plain --push {path}"
2021-12-21 16:32:04 +00:00
)
logging.info("Docker command to run: %s", cmd)
2021-09-15 16:32:17 +00:00
retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait()
build_log = str(pl.name)
if retcode != 0:
return False, build_log
2021-09-15 16:32:17 +00:00
logging.info("Processing of %s successfully finished", image_name)
return True, build_log
2021-09-15 16:32:17 +00:00
2021-12-21 16:32:04 +00:00
def process_single_image(
versions: List[str], path_to_dockerfile_folder: str, image_name: str
) -> List[Tuple[str, str, str]]:
2021-12-21 16:32:04 +00:00
logging.info("Image will be pushed with versions %s", ", ".join(versions))
2021-09-15 16:32:17 +00:00
result = []
for ver in versions:
for i in range(5):
success, build_log = build_and_push_one_image(
2021-12-21 16:32:04 +00:00
path_to_dockerfile_folder, image_name, ver
)
2021-09-15 16:32:17 +00:00
if success:
result.append((image_name + ":" + ver, build_log, "OK"))
2021-09-15 16:32:17 +00:00
break
2021-12-21 16:32:04 +00:00
logging.info(
"Got error will retry %s time and sleep for %s seconds", i, i * 5
)
2021-09-15 16:32:17 +00:00
time.sleep(i * 5)
else:
result.append((image_name + ":" + ver, build_log, "FAIL"))
2021-09-15 16:32:17 +00:00
logging.info("Processing finished")
return result
def process_test_results(
s3_client: S3Helper, test_results: List[Tuple[str, str, str]], s3_path_prefix: str
) -> Tuple[str, List[Tuple[str, str]]]:
2021-12-21 16:32:04 +00:00
overall_status = "success"
2021-09-15 16:32:17 +00:00
processed_test_results = []
for image, build_log, status in test_results:
2021-12-21 16:32:04 +00:00
if status != "OK":
overall_status = "failure"
url_part = ""
2021-09-15 16:32:17 +00:00
if build_log is not None and os.path.exists(build_log):
build_url = s3_client.upload_test_report_to_s3(
2021-12-21 16:32:04 +00:00
build_log, s3_path_prefix + "/" + os.path.basename(build_log)
)
2021-09-15 16:32:17 +00:00
url_part += '<a href="{}">build_log</a>'.format(build_url)
if url_part:
2021-12-21 16:32:04 +00:00
test_name = image + " (" + url_part + ")"
2021-09-15 16:32:17 +00:00
else:
test_name = image
processed_test_results.append((test_name, status))
return overall_status, processed_test_results
2021-12-21 16:32:04 +00:00
2021-09-15 16:32:17 +00:00
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
2021-11-19 14:47:04 +00:00
stopwatch = Stopwatch()
2021-11-26 14:00:09 +00:00
repo_path = GITHUB_WORKSPACE
2021-12-21 16:32:04 +00:00
temp_path = os.path.join(RUNNER_TEMP, "docker_images_check")
dockerhub_password = get_parameter_from_ssm("dockerhub_robot_password")
2021-09-15 16:32:17 +00:00
2021-09-16 10:52:04 +00:00
if os.path.exists(temp_path):
shutil.rmtree(temp_path)
2021-09-15 18:48:06 +00:00
if not os.path.exists(temp_path):
os.makedirs(temp_path)
2021-11-26 14:00:09 +00:00
pr_info = PRInfo(need_changed_files=True)
2021-12-21 16:32:04 +00:00
changed_images, dockerhub_repo_name = get_changed_docker_images(
pr_info, repo_path, "docker/images.json"
)
logging.info(
"Has changed images %s", ", ".join([str(image[0]) for image in changed_images])
)
pr_commit_version = str(pr_info.number) + "-" + pr_info.sha
2021-09-15 16:32:17 +00:00
versions = [str(pr_info.number), pr_commit_version]
2021-10-29 15:01:29 +00:00
if pr_info.number == 0:
versions.append("latest")
2021-09-15 16:32:17 +00:00
2021-12-21 16:32:04 +00:00
subprocess.check_output(
"docker login --username 'robotclickhouse' --password '{}'".format(
dockerhub_password
),
shell=True,
)
2021-09-15 16:32:17 +00:00
2021-09-15 18:26:48 +00:00
result_images = {}
2021-09-15 16:32:17 +00:00
images_processing_result = []
for rel_path, image_name in changed_images:
full_path = os.path.join(repo_path, rel_path)
2021-12-21 16:32:04 +00:00
images_processing_result += process_single_image(
versions, full_path, image_name
)
2021-09-15 18:26:48 +00:00
result_images[image_name] = pr_commit_version
2021-09-15 16:32:17 +00:00
if changed_images:
2021-12-21 16:32:04 +00:00
description = "Updated " + ",".join([im[1] for im in changed_images])
2021-09-15 16:32:17 +00:00
else:
description = "Nothing to update"
if len(description) >= 140:
description = description[:136] + "..."
2021-12-21 16:32:04 +00:00
s3_helper = S3Helper("https://s3.amazonaws.com")
2021-09-15 16:32:17 +00:00
2021-12-21 16:32:04 +00:00
s3_path_prefix = (
str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(" ", "_")
)
status, test_results = process_test_results(
s3_helper, images_processing_result, s3_path_prefix
)
2021-09-15 16:32:17 +00:00
2021-11-19 14:47:04 +00:00
ch_helper = ClickHouseHelper()
2021-11-12 11:39:00 +00:00
url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME)
2021-09-15 16:32:17 +00:00
2021-12-21 16:32:04 +00:00
with open(os.path.join(temp_path, "changed_images.json"), "w") as images_file:
2021-09-15 18:26:48 +00:00
json.dump(result_images, images_file)
2021-09-16 09:33:38 +00:00
2021-09-16 10:39:36 +00:00
print("::notice ::Report url: {}".format(url))
2021-12-21 16:32:04 +00:00
print('::set-output name=url_output::"{}"'.format(url))
2021-11-12 12:36:25 +00:00
gh = Github(get_best_robot_token())
commit = get_commit(gh, pr_info.sha)
2021-12-21 16:32:04 +00:00
commit.create_status(
context=NAME, description=description, state=status, target_url=url
)
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
test_results,
status,
stopwatch.duration_seconds,
stopwatch.start_time_str,
url,
NAME,
)
2021-11-19 14:47:04 +00:00
ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)