diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index d698d18a58b..2fbca7ab548 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -6,7 +6,7 @@ import os import shutil import subprocess import time -from typing import List, Tuple +from typing import List, Optional, Set, Tuple, Union from github import Github @@ -24,9 +24,37 @@ NAME = "Push to Dockerhub (actions)" TEMP_PATH = os.path.join(RUNNER_TEMP, "docker_images_check") +class DockerImage: + def __init__( + self, + path: str, + repo: str, + parent: Optional["DockerImage"] = None, + gh_repo_path: str = GITHUB_WORKSPACE, + ): + self.path = path + self.full_path = os.path.join(gh_repo_path, path) + self.repo = repo + self.parent = parent + self.built = False + + def __eq__(self, other): + """Is used to check if DockerImage is in a set or not""" + return self.path == other.path + + def __hash__(self): + return hash(self.path) + + def __str__(self): + return self.repo + + def __repr__(self): + return f"DockerImage(path={self.path},path={self.path},parent={self.parent})" + + def get_changed_docker_images( pr_info: PRInfo, repo_path: str, image_file_path: str -) -> List[Tuple[str, str]]: +) -> Set[DockerImage]: images_dict = {} path_to_images_file = os.path.join(repo_path, image_file_path) if os.path.exists(path_to_images_file): @@ -38,7 +66,7 @@ def get_changed_docker_images( ) if not images_dict: - return [] + return set() files_changed = pr_info.changed_files @@ -54,14 +82,15 @@ def get_changed_docker_images( for dockerfile_dir, image_description in images_dict.items(): for f in files_changed: if f.startswith(dockerfile_dir): + name = image_description["name"] logging.info( "Found changed file '%s' which affects " "docker image '%s' with path '%s'", f, - image_description["name"], + name, dockerfile_dir, ) - changed_images.append(dockerfile_dir) + changed_images.append(DockerImage(dockerfile_dir, name)) break # The order is important: dependents should go later than bases, so that @@ -69,14 +98,14 @@ def get_changed_docker_images( index = 0 while index < len(changed_images): image = changed_images[index] - for dependent in images_dict[image]["dependent"]: + for dependent in images_dict[image.path]["dependent"]: logging.info( "Marking docker image '%s' as changed because it " "depends on changed docker image '%s'", dependent, image, ) - changed_images.append(dependent) + changed_images.append(DockerImage(dependent, image.repo, image)) index += 1 if index > 5 * len(images_dict): # Sanity check to prevent infinite loop. @@ -84,19 +113,9 @@ def get_changed_docker_images( f"Too many changed docker images, this is a bug. {changed_images}" ) - # If a dependent image was already in the list because its own files - # changed, but then it was added as a dependent of a changed base, we - # must remove the earlier entry so that it doesn't go earlier than its - # base. This way, the dependent will be rebuilt later than the base, and - # will correctly use the updated version of the base. - seen = set() - no_dups_reversed = [] - for x in reversed(changed_images): - if x not in seen: - seen.add(x) - no_dups_reversed.append(x) - - result = [(x, images_dict[x]["name"]) for x in reversed(no_dups_reversed)] + # With reversed changed_images set will use images with parents first, and + # images without parents then + result = set(reversed(changed_images)) logging.info( "Changed docker images for PR %s @ %s: '%s'", pr_info.number, @@ -106,66 +125,112 @@ def get_changed_docker_images( return result +def gen_versions( + pr_info: PRInfo, suffix: Optional[str] +) -> Tuple[List[str], Union[str, List[str]]]: + pr_commit_version = str(pr_info.number) + "-" + pr_info.sha + # The order is important, PR number is used as cache during the build + versions = [str(pr_info.number), pr_commit_version] + result_version = pr_commit_version + if pr_info.number == 0: + # First get the latest for cache + versions.insert(0, "latest") + + if suffix: + # We should build architecture specific images separately and merge a + # manifest lately in a different script + versions = [f"{v}-{suffix}" for v in versions] + # changed_images_{suffix}.json should contain all changed images + result_version = versions + + return versions, result_version + + def build_and_push_one_image( - path_to_dockerfile_folder: str, image_name: str, version_string: str, push: bool + image: DockerImage, + version_string: str, + push: bool, + child: bool, ) -> Tuple[bool, str]: - path = path_to_dockerfile_folder logging.info( "Building docker image %s with version %s from path %s", - image_name, + image.repo, version_string, - path, + image.full_path, ) build_log = os.path.join( TEMP_PATH, "build_and_push_log_{}_{}".format( - str(image_name).replace("/", "_"), version_string + str(image.repo).replace("/", "_"), version_string ), ) push_arg = "" if push: push_arg = "--push " + from_tag_arg = "" + if child: + from_tag_arg = f"--build-arg FROM_TAG={version_string} " + with open(build_log, "w") as bl: cmd = ( "docker buildx build --builder default " - f"--build-arg FROM_TAG={version_string} " + f"{from_tag_arg}" f"--build-arg BUILDKIT_INLINE_CACHE=1 " - f"--tag {image_name}:{version_string} " - f"--cache-from type=registry,ref={image_name}:{version_string} " + f"--tag {image.repo}:{version_string} " + f"--cache-from type=registry,ref={image.repo}:{version_string} " f"{push_arg}" - f"--progress plain {path}" + f"--progress plain {image.full_path}" ) logging.info("Docker command to run: %s", cmd) - retcode = subprocess.Popen(cmd, shell=True, stderr=bl, stdout=bl).wait() + with subprocess.Popen(cmd, shell=True, stderr=bl, stdout=bl) as proc: + retcode = proc.wait() + if retcode != 0: return False, build_log - logging.info("Processing of %s successfully finished", image_name) + logging.info("Processing of %s successfully finished", image.repo) return True, build_log def process_single_image( - versions: List[str], path_to_dockerfile_folder: str, image_name: str, push: bool + image: DockerImage, + versions: List[str], + push: bool, + child: bool, ) -> List[Tuple[str, str, str]]: logging.info("Image will be pushed with versions %s", ", ".join(versions)) result = [] for ver in versions: for i in range(5): - success, build_log = build_and_push_one_image( - path_to_dockerfile_folder, image_name, ver, push - ) + success, build_log = build_and_push_one_image(image, ver, push, child) if success: - result.append((image_name + ":" + ver, build_log, "OK")) + result.append((image.repo + ":" + ver, build_log, "OK")) break logging.info( "Got error will retry %s time and sleep for %s seconds", i, i * 5 ) time.sleep(i * 5) else: - result.append((image_name + ":" + ver, build_log, "FAIL")) + result.append((image.repo + ":" + ver, build_log, "FAIL")) logging.info("Processing finished") + image.built = True + return result + + +def process_image_with_parents( + image: DockerImage, versions: List[str], push: bool, child: bool = False +) -> List[Tuple[str, str, str]]: + result = [] # type: List[Tuple[str,str,str]] + if image.built: + return result + + if image.parent is not None: + result += process_image_with_parents(image.parent, versions, push, False) + child = True + + result += process_single_image(image, versions, push, child) return result @@ -255,8 +320,6 @@ def main(): shell=True, ) - repo_path = GITHUB_WORKSPACE - if os.path.exists(TEMP_PATH): shutil.rmtree(TEMP_PATH) os.makedirs(TEMP_PATH) @@ -267,36 +330,23 @@ def main(): else: pr_info = PRInfo(need_changed_files=True) - changed_images = get_changed_docker_images(pr_info, repo_path, "docker/images.json") - logging.info( - "Has changed images %s", ", ".join([str(image[0]) for image in changed_images]) + changed_images = get_changed_docker_images( + pr_info, GITHUB_WORKSPACE, "docker/images.json" ) - pr_commit_version = str(pr_info.number) + "-" + pr_info.sha - # The order is important, PR number is used as cache during the build - versions = [str(pr_info.number), pr_commit_version] - result_version = pr_commit_version - if pr_info.number == 0: - # First get the latest for cache - versions.insert(0, "latest") + logging.info("Has changed images %s", ", ".join([im.path for im in changed_images])) - if args.suffix: - # We should build architecture specific images separately and merge a - # manifest lately in a different script - versions = [f"{v}-{args.suffix}" for v in versions] - # changed_images_{suffix}.json should contain all changed images - result_version = versions + image_versions, result_version = gen_versions(pr_info, args.suffix) result_images = {} images_processing_result = [] - for rel_path, image_name in changed_images: - full_path = os.path.join(repo_path, rel_path) - images_processing_result += process_single_image( - versions, full_path, image_name, push + for image in changed_images: + images_processing_result += process_image_with_parents( + image, image_versions, push ) - result_images[image_name] = result_version + result_images[image.repo] = result_version if changed_images: - description = "Updated " + ",".join([im[1] for im in changed_images]) + description = "Updated " + ",".join([im.repo for im in changed_images]) else: description = "Nothing to update" diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index da635631c54..3fed5112005 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -2,10 +2,13 @@ import os import unittest +from unittest.mock import patch from pr_info import PRInfo import docker_images_check as di +# di.logging.basicConfig(level=di.logging.INFO) + class TestDockerImageCheck(unittest.TestCase): docker_images_path = os.path.join( @@ -13,28 +16,84 @@ class TestDockerImageCheck(unittest.TestCase): ) def test_get_changed_docker_images(self): - pr_info = PRInfo() + pr_info = PRInfo(PRInfo.default_event.copy()) pr_info.changed_files = { "docker/test/stateless", "docker/test/base", "docker/docs/builder", } images = di.get_changed_docker_images(pr_info, "/", self.docker_images_path) - expected = [ - ("docker/test/base", "clickhouse/test-base"), - ("docker/docs/builder", "clickhouse/docs-builder"), - ("docker/test/stateless", "clickhouse/stateless-test"), - ("docker/test/integration/base", "clickhouse/integration-test"), - ("docker/test/fuzzer", "clickhouse/fuzzer"), - ("docker/test/keeper-jepsen", "clickhouse/keeper-jepsen-test"), - ("docker/docs/check", "clickhouse/docs-check"), - ("docker/docs/release", "clickhouse/docs-release"), - ("docker/test/stateful", "clickhouse/stateful-test"), - ("docker/test/unit", "clickhouse/unit-test"), - ("docker/test/stress", "clickhouse/stress-test"), - ] + expected = { + di.DockerImage("docker/test/base", "clickhouse/test-base"), + di.DockerImage("docker/docs/builder", "clickhouse/docs-builder"), + di.DockerImage("docker/test/stateless", "clickhouse/stateless-test"), + di.DockerImage( + "docker/test/integration/base", "clickhouse/integration-test" + ), + di.DockerImage("docker/test/fuzzer", "clickhouse/fuzzer"), + di.DockerImage( + "docker/test/keeper-jepsen", "clickhouse/keeper-jepsen-test" + ), + di.DockerImage("docker/docs/check", "clickhouse/docs-check"), + di.DockerImage("docker/docs/release", "clickhouse/docs-release"), + di.DockerImage("docker/test/stateful", "clickhouse/stateful-test"), + di.DockerImage("docker/test/unit", "clickhouse/unit-test"), + di.DockerImage("docker/test/stress", "clickhouse/stress-test"), + } self.assertEqual(images, expected) + def test_gen_version(self): + pr_info = PRInfo(PRInfo.default_event.copy()) + versions, result_version = di.gen_versions(pr_info, None) + self.assertEqual(versions, ["latest", "0", "0-HEAD"]) + self.assertEqual(result_version, "0-HEAD") + versions, result_version = di.gen_versions(pr_info, "suffix") + self.assertEqual(versions, ["latest-suffix", "0-suffix", "0-HEAD-suffix"]) + self.assertEqual(result_version, versions) + pr_info.number = 1 + versions, result_version = di.gen_versions(pr_info, None) + self.assertEqual(versions, ["1", "1-HEAD"]) + self.assertEqual(result_version, "1-HEAD") + + @patch("builtins.open") + @patch("subprocess.Popen") + def test_build_and_push_one_image(self, mock_popen, mock_open): + mock_popen.return_value.__enter__.return_value.wait.return_value = 0 + image = di.DockerImage("path", "name", gh_repo_path="") + + result, _ = di.build_and_push_one_image(image, "version", True, True) + mock_open.assert_called_once() + mock_popen.assert_called_once() + self.assertIn( + "docker buildx build --builder default --build-arg FROM_TAG=version " + "--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version --cache-from " + "type=registry,ref=name:version --push --progress plain path", + mock_popen.call_args.args, + ) + self.assertTrue(result) + + mock_open.reset() + mock_popen.reset() + mock_popen.return_value.__enter__.return_value.wait.return_value = 0 + result, _ = di.build_and_push_one_image(image, "version2", False, True) + self.assertIn( + "docker buildx build --builder default --build-arg FROM_TAG=version2 " + "--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version2 --cache-from " + "type=registry,ref=name:version2 --progress plain path", + mock_popen.call_args.args, + ) + self.assertTrue(result) + + mock_popen.return_value.__enter__.return_value.wait.return_value = 1 + result, _ = di.build_and_push_one_image(image, "version2", False, False) + self.assertIn( + "docker buildx build --builder default " + "--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version2 --cache-from " + "type=registry,ref=name:version2 --progress plain path", + mock_popen.call_args.args, + ) + self.assertFalse(result) + if __name__ == "__main__": unittest.main()