Use not the absolute latest docker image tag (#54369)

* Add params for select queries in clickhouse_helper.py

* Find the latest image tags from master

* Get full git history

* Do not checkout submodules

* Automatic style fix

* Fix style errors

* Make logging pretty

* Revert accidentally committed changes

* Eliminate pformat

* Update all workflows

* Remove flag with default value

* Improve exception handling

* Utilizing the loop condition in while loop

* Make the logic robust over temporary errors

* Prettify logs

* Automatic style fix

* Add accidentally removed line

* Revert "Make the logic robust over temporary errors"

This reverts commit 91e6b5d7ba.

* Do not fail in case the tag cannot be queried from ClickHouse

* Support partial finding of tags

---------

Co-authored-by: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
This commit is contained in:
János Benjamin Antal 2023-09-08 08:42:56 +02:00 committed by GitHub
parent 8b35f0ebe1
commit 5862c4ec93
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 137 additions and 28 deletions

View File

@ -76,6 +76,7 @@ jobs:
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1
with: with:
clear-repository: true clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
- name: Download changed aarch64 images - name: Download changed aarch64 images
uses: actions/download-artifact@v3 uses: actions/download-artifact@v3
with: with:

View File

@ -73,6 +73,7 @@ jobs:
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1
with: with:
clear-repository: true clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
- name: Download changed aarch64 images - name: Download changed aarch64 images
uses: actions/download-artifact@v3 uses: actions/download-artifact@v3
with: with:

View File

@ -60,6 +60,7 @@ jobs:
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1
with: with:
clear-repository: true clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
- name: Download changed aarch64 images - name: Download changed aarch64 images
uses: actions/download-artifact@v3 uses: actions/download-artifact@v3
with: with:

View File

@ -53,6 +53,7 @@ jobs:
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1
with: with:
clear-repository: true clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
- name: Download changed aarch64 images - name: Download changed aarch64 images
uses: actions/download-artifact@v3 uses: actions/download-artifact@v3
with: with:

View File

@ -94,6 +94,7 @@ jobs:
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1
with: with:
clear-repository: true clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
- name: Download changed aarch64 images - name: Download changed aarch64 images
uses: actions/download-artifact@v3 uses: actions/download-artifact@v3
with: with:

View File

@ -52,6 +52,7 @@ jobs:
uses: ClickHouse/checkout@v1 uses: ClickHouse/checkout@v1
with: with:
clear-repository: true clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
- name: Download changed aarch64 images - name: Download changed aarch64 images
uses: actions/download-artifact@v3 uses: actions/download-artifact@v3
with: with:

View File

@ -13,6 +13,10 @@ from pr_info import PRInfo
from report import TestResults from report import TestResults
class CHException(Exception):
pass
class InsertException(Exception): class InsertException(Exception):
pass pass
@ -131,12 +135,16 @@ class ClickHouseHelper:
if not safe: if not safe:
raise raise
def _select_and_get_json_each_row(self, db, query): def _select_and_get_json_each_row(self, db, query, query_params):
params = { params = {
"database": db, "database": db,
"query": query, "query": query,
"default_format": "JSONEachRow", "default_format": "JSONEachRow",
} }
if query_params is not None:
for name, value in query_params.items():
params[f"param_{name}"] = str(value)
for i in range(5): for i in range(5):
response = None response = None
try: try:
@ -144,15 +152,15 @@ class ClickHouseHelper:
response.raise_for_status() response.raise_for_status()
return response.text return response.text
except Exception as ex: except Exception as ex:
logging.warning("Cannot insert with exception %s", str(ex)) logging.warning("Select query failed with exception %s", str(ex))
if response: if response:
logging.warning("Reponse text %s", response.text) logging.warning("Response text %s", response.text)
time.sleep(0.1 * i) time.sleep(0.1 * i)
raise Exception("Cannot fetch data from clickhouse") raise CHException("Cannot fetch data from clickhouse")
def select_json_each_row(self, db, query): def select_json_each_row(self, db, query, query_params=None):
text = self._select_and_get_json_each_row(db, query) text = self._select_and_get_json_each_row(db, query, query_params)
result = [] result = []
for line in text.split("\n"): for line in text.split("\n"):
if line: if line:

View File

@ -9,7 +9,7 @@ import subprocess
import time import time
import sys import sys
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple, Union from typing import Any, List, Optional, Set, Tuple, Union
from github import Github from github import Github
@ -23,13 +23,12 @@ from s3_helper import S3Helper
from stopwatch import Stopwatch from stopwatch import Stopwatch
from tee_popen import TeePopen from tee_popen import TeePopen
from upload_result_helper import upload_results from upload_result_helper import upload_results
from docker_images_helper import ImagesDict, IMAGES_FILE_PATH, get_images_dict
NAME = "Push to Dockerhub" NAME = "Push to Dockerhub"
TEMP_PATH = os.path.join(RUNNER_TEMP, "docker_images_check") TEMP_PATH = os.path.join(RUNNER_TEMP, "docker_images_check")
ImagesDict = Dict[str, dict]
class DockerImage: class DockerImage:
def __init__( def __init__(
@ -78,21 +77,6 @@ class DockerImage:
return f"DockerImage(path={self.path},repo={self.repo},parent={self.parent})" return f"DockerImage(path={self.path},repo={self.repo},parent={self.parent})"
def get_images_dict(repo_path: str, image_file_path: str) -> ImagesDict:
"""Return images suppose to build on the current architecture host"""
images_dict = {}
path_to_images_file = os.path.join(repo_path, image_file_path)
if os.path.exists(path_to_images_file):
with open(path_to_images_file, "rb") as dict_file:
images_dict = json.load(dict_file)
else:
logging.info(
"Image file %s doesn't exist in repo %s", image_file_path, repo_path
)
return images_dict
def get_changed_docker_images( def get_changed_docker_images(
pr_info: PRInfo, images_dict: ImagesDict pr_info: PRInfo, images_dict: ImagesDict
) -> Set[DockerImage]: ) -> Set[DockerImage]:
@ -410,7 +394,7 @@ def main():
shutil.rmtree(TEMP_PATH) shutil.rmtree(TEMP_PATH)
os.makedirs(TEMP_PATH) os.makedirs(TEMP_PATH)
images_dict = get_images_dict(GITHUB_WORKSPACE, "docker/images.json") images_dict = get_images_dict(GITHUB_WORKSPACE, IMAGES_FILE_PATH)
pr_info = PRInfo() pr_info = PRInfo()
if args.all: if args.all:

View File

@ -0,0 +1,30 @@
#!/usr/bin/env python3
import json
import logging
import os
from typing import Dict, List
IMAGES_FILE_PATH = "docker/images.json"
ImagesDict = Dict[str, dict]
def get_images_dict(repo_path: str, images_file_path: str) -> ImagesDict:
"""Return images suppose to build on the current architecture host"""
images_dict = {}
path_to_images_file = os.path.join(repo_path, images_file_path)
if os.path.exists(path_to_images_file):
with open(path_to_images_file, "rb") as dict_file:
images_dict = json.load(dict_file)
else:
logging.info(
"Image file %s doesn't exist in repo %s", images_file_path, repo_path
)
return images_dict
def get_image_names(repo_path: str, images_file_path: str) -> List[str]:
images_dict = get_images_dict(repo_path, images_file_path)
return [info["name"] for (_, info) in images_dict.items()]

View File

@ -9,10 +9,16 @@ import subprocess
from typing import List, Dict, Tuple from typing import List, Dict, Tuple
from github import Github from github import Github
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from clickhouse_helper import (
ClickHouseHelper,
prepare_tests_results_for_clickhouse,
CHException,
)
from commit_status_helper import format_description, get_commit, post_commit_status from commit_status_helper import format_description, get_commit, post_commit_status
from env_helper import RUNNER_TEMP from docker_images_helper import IMAGES_FILE_PATH, get_image_names
from env_helper import RUNNER_TEMP, GITHUB_WORKSPACE
from get_robot_token import get_best_robot_token, get_parameter_from_ssm from get_robot_token import get_best_robot_token, get_parameter_from_ssm
from git_helper import Runner
from pr_info import PRInfo from pr_info import PRInfo
from report import TestResults, TestResult from report import TestResults, TestResult
from s3_helper import S3Helper from s3_helper import S3Helper
@ -167,6 +173,74 @@ def create_manifest(image: str, tags: List[str], push: bool) -> Tuple[str, str]:
return manifest, "OK" return manifest, "OK"
def enrich_images(changed_images: Dict[str, str]) -> None:
all_image_names = get_image_names(GITHUB_WORKSPACE, IMAGES_FILE_PATH)
images_to_find_tags_for = [
image for image in all_image_names if image not in changed_images
]
images_to_find_tags_for.sort()
logging.info(
"Trying to find versions for images:\n %s", "\n ".join(images_to_find_tags_for)
)
COMMIT_SHA_BATCH_SIZE = 100
MAX_COMMIT_BATCHES_TO_CHECK = 10
# Gets the sha of the last COMMIT_SHA_BATCH_SIZE commits after skipping some commits (see below)
LAST_N_ANCESTOR_SHA_COMMAND = f"git log --format=format:'%H' --max-count={COMMIT_SHA_BATCH_SIZE} --skip={{}} --merges"
git_runner = Runner()
GET_COMMIT_SHAS_QUERY = """
WITH {commit_shas:Array(String)} AS commit_shas,
{images:Array(String)} AS images
SELECT
substring(test_name, 1, position(test_name, ':') -1) AS image_name,
argMax(commit_sha, check_start_time) AS commit_sha
FROM checks
WHERE
check_name == 'Push multi-arch images to Dockerhub'
AND position(test_name, checks.commit_sha)
AND checks.commit_sha IN commit_shas
AND image_name IN images
GROUP BY image_name
"""
batch_count = 0
ch_helper = ClickHouseHelper()
while (
batch_count <= MAX_COMMIT_BATCHES_TO_CHECK and len(images_to_find_tags_for) != 0
):
commit_shas = git_runner(
LAST_N_ANCESTOR_SHA_COMMAND.format(batch_count * COMMIT_SHA_BATCH_SIZE)
).split("\n")
result = ch_helper.select_json_each_row(
"default",
GET_COMMIT_SHAS_QUERY,
{"commit_shas": commit_shas, "images": images_to_find_tags_for},
)
result.sort(key=lambda x: x["image_name"])
logging.info(
"Found images for commits %s..%s:\n %s",
commit_shas[0],
commit_shas[-1],
"\n ".join(f"{im['image_name']}:{im['commit_sha']}" for im in result),
)
for row in result:
image_name = row["image_name"]
commit_sha = row["commit_sha"]
# As we only get the SHAs of merge commits from master, the PR number will be always 0
tag = f"0-{commit_sha}"
changed_images[image_name] = tag
images_to_find_tags_for.remove(image_name)
batch_count += 1
def main(): def main():
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
stopwatch = Stopwatch() stopwatch = Stopwatch()
@ -198,6 +272,12 @@ def main():
if test_result != "OK": if test_result != "OK":
status = "failure" status = "failure"
try:
# changed_images now contains all the images that are changed in this PR. Let's find the latest tag for the images that are not changed.
enrich_images(changed_images)
except CHException as ex:
logging.warning("Couldn't get proper tags for not changed images: %s", ex)
with open( with open(
os.path.join(args.path, "changed_images.json"), "w", encoding="utf-8" os.path.join(args.path, "changed_images.json"), "w", encoding="utf-8"
) as ci: ) as ci:

View File

@ -9,6 +9,7 @@ from env_helper import GITHUB_RUN_URL
from pr_info import PRInfo from pr_info import PRInfo
from report import TestResult from report import TestResult
import docker_images_check as di import docker_images_check as di
from docker_images_helper import get_images_dict
from version_helper import get_version_from_string from version_helper import get_version_from_string
import docker_server as ds import docker_server as ds
@ -31,7 +32,7 @@ class TestDockerImageCheck(unittest.TestCase):
images = sorted( images = sorted(
list( list(
di.get_changed_docker_images( di.get_changed_docker_images(
pr_info, di.get_images_dict("/", self.docker_images_path) pr_info, get_images_dict("/", self.docker_images_path)
) )
) )
) )