ClickHouse/tests/ci/env_helper.py

#!/usr/bin/env python

import logging
import os
from os import path as p
from typing import Tuple

from build_download_helper import APIException, get_gh_api

module_dir = p.abspath(p.dirname(__file__))
git_root = p.abspath(p.join(module_dir, "..", ".."))
ROOT_DIR = git_root
CI = bool(os.getenv("CI"))
TEMP_PATH = os.getenv("TEMP_PATH", p.abspath(p.join(module_dir, "./tmp")))
REPORT_PATH = f"{TEMP_PATH}/reports"
# FIXME: latest should not be used in CI, set temporary for transition to "docker with digest as a tag"
DOCKER_TAG = os.getenv("DOCKER_TAG", "latest")
CACHES_PATH = os.getenv("CACHES_PATH", TEMP_PATH)
CLOUDFLARE_TOKEN = os.getenv("CLOUDFLARE_TOKEN")
GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH", "")
GITHUB_JOB = os.getenv("GITHUB_JOB_OVERRIDDEN", "") or os.getenv("GITHUB_JOB", "local")
GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")
GITHUB_RUN_ID = os.getenv("GITHUB_RUN_ID", "0")
GITHUB_SERVER_URL = os.getenv("GITHUB_SERVER_URL", "https://github.com")
GITHUB_UPSTREAM_REPOSITORY = os.getenv(
    "GITHUB_UPSTREAM_REPOSITORY", "ClickHouse/ClickHouse"
)
GITHUB_WORKSPACE = os.getenv("GITHUB_WORKSPACE", git_root)
GITHUB_RUN_URL = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"
IMAGES_PATH = os.getenv("IMAGES_PATH", TEMP_PATH)
REPO_COPY = os.getenv("REPO_COPY", GITHUB_WORKSPACE)
RUNNER_TEMP = os.getenv("RUNNER_TEMP", p.abspath(p.join(module_dir, "./tmp")))
S3_BUILDS_BUCKET = os.getenv("S3_BUILDS_BUCKET", "clickhouse-builds")
S3_BUILDS_BUCKET_PUBLIC = "clickhouse-builds"
S3_TEST_REPORTS_BUCKET = os.getenv("S3_TEST_REPORTS_BUCKET", "clickhouse-test-reports")
S3_URL = os.getenv("S3_URL", "https://s3.amazonaws.com")
S3_DOWNLOAD = os.getenv("S3_DOWNLOAD", S3_URL)
S3_ARTIFACT_DOWNLOAD_TEMPLATE = (
    f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/"
    "{pr_or_release}/{commit}/{build_name}/{artifact}"
)

# These parameters are set only on demand, and only once
_GITHUB_JOB_ID = ""
_GITHUB_JOB_URL = ""
_GITHUB_JOB_API_URL = ""


def GITHUB_JOB_ID(safe: bool = True) -> str:
    global _GITHUB_JOB_ID
    global _GITHUB_JOB_URL
    global _GITHUB_JOB_API_URL
    if _GITHUB_JOB_ID:
        return _GITHUB_JOB_ID
    try:
        _GITHUB_JOB_ID, _GITHUB_JOB_URL, _GITHUB_JOB_API_URL = get_job_id_url(
            GITHUB_JOB
        )
    except APIException as e:
        logging.warning("Unable to retrieve the job info from GH API: %s", e)
        if not safe:
            raise e
    return _GITHUB_JOB_ID


def GITHUB_JOB_URL(safe: bool = True) -> str:
    try:
        GITHUB_JOB_ID()
    except APIException:
        if safe:
            logging.warning("Using run URL as a fallback to not fail the job")
            return GITHUB_RUN_URL
        raise

    return _GITHUB_JOB_URL


def GITHUB_JOB_API_URL(safe: bool = True) -> str:
    GITHUB_JOB_ID(safe)
    return _GITHUB_JOB_API_URL


def get_job_id_url(job_name: str) -> Tuple[str, str, str]:
    job_id = ""
    job_url = ""
    job_api_url = ""
    if GITHUB_RUN_ID == "0":
        job_id = "0"
    if job_id:
        return job_id, job_url, job_api_url
    jobs = []
    page = 1
    while not job_id:
        response = get_gh_api(
            f"https://api.github.com/repos/{GITHUB_REPOSITORY}/"
            f"actions/runs/{GITHUB_RUN_ID}/jobs?per_page=100&page={page}"
        )
        page += 1
        data = response.json()
        jobs.extend(data["jobs"])
        for job in data["jobs"]:
            if job["name"] != job_name:
                continue
            job_id = job["id"]
            job_url = job["html_url"]
            job_api_url = job["url"]
            return job_id, job_url, job_api_url
        if (
            len(jobs) >= data["total_count"]  # just in case of inconsistency
            or len(data["jobs"]) == 0  # if we excided pages
        ):
            job_id = "0"

    if not job_url:
        # This is a terrible workaround for the case of another broken part of
        # GitHub actions. For nested workflows it doesn't provide a proper job_name
        # value, but only the final one. So, for `OriginalJob / NestedJob / FinalJob`
        # full name, job_name contains only FinalJob
        matched_jobs = []
        for job in jobs:
            nested_parts = job["name"].split(" / ")
            if len(nested_parts) <= 1:
                continue
            if nested_parts[-1] == job_name:
                matched_jobs.append(job)
        if len(matched_jobs) == 1:
            # The best case scenario
            job_id = matched_jobs[0]["id"]
            job_url = matched_jobs[0]["html_url"]
            job_api_url = matched_jobs[0]["url"]
            return job_id, job_url, job_api_url
        if matched_jobs:
            logging.error(
                "We could not get the ID and URL for the current job name %s, there "
                "are more than one jobs match it for the nested workflows. Please, "
                "refer to https://github.com/actions/runner/issues/2577",
                job_name,
            )

    return job_id, job_url, job_api_url
Decouple env_helper.get_job_id_url 2023-09-05 12:37:37 +00:00			`#!/usr/bin/env python`

The best effor to get the current job ID and URL This partially addresses https://github.com/actions/runner/issues/2577 2023-05-12 10:17:37 +00:00			`import logging`
refactor CI tests 2021-11-26 14:00:09 +00:00			`import os`
Rework env_helper to return correct paths 2022-03-23 10:53:32 +00:00			`from os import path as p`
Decouple env_helper.get_job_id_url 2023-09-05 12:37:37 +00:00			`from typing import Tuple`
Rework env_helper to return correct paths 2022-03-23 10:53:32 +00:00
Do not fail job on failed get_job_id_url 2024-04-15 12:32:02 +00:00			`from build_download_helper import APIException, get_gh_api`
Add JOB_ID and JOB_URL getters 2022-08-09 16:34:12 +00:00
Rework env_helper to return correct paths 2022-03-23 10:53:32 +00:00			`module_dir = p.abspath(p.dirname(__file__))`
			`git_root = p.abspath(p.join(module_dir, "..", ".."))`
Reapply "improve CI with digest for docker, build and test jobs" (#57904) * Revert "Revert "improve CI with digest for docker, build and test jobs"" * fix: docker manifest merge for missing images only 2023-12-18 08:07:22 +00:00			`ROOT_DIR = git_root`
refactor CI tests 2021-11-26 14:00:09 +00:00			`CI = bool(os.getenv("CI"))`
Add failed builds to the build report 2022-05-25 13:15:11 +00:00			`TEMP_PATH = os.getenv("TEMP_PATH", p.abspath(p.join(module_dir, "./tmp")))`
Reapply "improve CI with digest for docker, build and test jobs" (#57904) * Revert "Revert "improve CI with digest for docker, build and test jobs"" * fix: docker manifest merge for missing images only 2023-12-18 08:07:22 +00:00			`REPORT_PATH = f"{TEMP_PATH}/reports"`
			`# FIXME: latest should not be used in CI, set temporary for transition to "docker with digest as a tag"`
			`DOCKER_TAG = os.getenv("DOCKER_TAG", "latest")`
refactor CI tests 2021-11-26 14:00:09 +00:00			`CACHES_PATH = os.getenv("CACHES_PATH", TEMP_PATH)`
			`CLOUDFLARE_TOKEN = os.getenv("CLOUDFLARE_TOKEN")`
Fix linter issues 2022-05-16 18:39:10 +00:00			`GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH", "")`
Work around issues with GH callable actions 2023-11-07 14:56:00 +00:00			`GITHUB_JOB = os.getenv("GITHUB_JOB_OVERRIDDEN", "") or os.getenv("GITHUB_JOB", "local")`
refactor CI tests 2021-11-26 14:00:09 +00:00			`GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")`
Make GITHUB_RUN_URL variable and use it 2022-03-24 14:37:53 +00:00			`GITHUB_RUN_ID = os.getenv("GITHUB_RUN_ID", "0")`
refactor CI tests 2021-11-26 14:00:09 +00:00			`GITHUB_SERVER_URL = os.getenv("GITHUB_SERVER_URL", "https://github.com")`
Add processing `A Sync` to ci.py 2024-05-03 15:45:39 +00:00			`GITHUB_UPSTREAM_REPOSITORY = os.getenv(`
			`"GITHUB_UPSTREAM_REPOSITORY", "ClickHouse/ClickHouse"`
			`)`
Rework env_helper to return correct paths 2022-03-23 10:53:32 +00:00			`GITHUB_WORKSPACE = os.getenv("GITHUB_WORKSPACE", git_root)`
Make GITHUB_RUN_URL variable and use it 2022-03-24 14:37:53 +00:00			`GITHUB_RUN_URL = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"`
Add default IMAGES_PATH value 2022-04-07 16:22:29 +00:00			`IMAGES_PATH = os.getenv("IMAGES_PATH", TEMP_PATH)`
Rewrite style checks to reusable workflow 2023-11-10 20:16:54 +00:00			`REPO_COPY = os.getenv("REPO_COPY", GITHUB_WORKSPACE)`
Rework env_helper to return correct paths 2022-03-23 10:53:32 +00:00			`RUNNER_TEMP = os.getenv("RUNNER_TEMP", p.abspath(p.join(module_dir, "./tmp")))`
refactor CI tests 2021-11-26 14:00:09 +00:00			`S3_BUILDS_BUCKET = os.getenv("S3_BUILDS_BUCKET", "clickhouse-builds")`
CI: Cancel sync wf on new push 2024-05-23 14:36:24 +00:00			`S3_BUILDS_BUCKET_PUBLIC = "clickhouse-builds"`
refactor CI tests 2021-11-26 14:00:09 +00:00			`S3_TEST_REPORTS_BUCKET = os.getenv("S3_TEST_REPORTS_BUCKET", "clickhouse-test-reports")`
Move download template to env_helper 2022-09-07 15:10:58 +00:00			`S3_URL = os.getenv("S3_URL", "https://s3.amazonaws.com")`
			`S3_DOWNLOAD = os.getenv("S3_DOWNLOAD", S3_URL)`
			`S3_ARTIFACT_DOWNLOAD_TEMPLATE = (`
			`f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/"`
			`"{pr_or_release}/{commit}/{build_name}/{artifact}"`
			`)`
Add JOB_ID and JOB_URL getters 2022-08-09 16:34:12 +00:00
			`# These parameters are set only on demand, and only once`
			`_GITHUB_JOB_ID = ""`
			`_GITHUB_JOB_URL = ""`
Work around issues with GH callable actions 2023-11-07 14:56:00 +00:00			`_GITHUB_JOB_API_URL = ""`
Add JOB_ID and JOB_URL getters 2022-08-09 16:34:12 +00:00

Do not fail job on failed get_job_id_url 2024-04-15 12:32:02 +00:00			`def GITHUB_JOB_ID(safe: bool = True) -> str:`
Add JOB_ID and JOB_URL getters 2022-08-09 16:34:12 +00:00			`global _GITHUB_JOB_ID`
			`global _GITHUB_JOB_URL`
Work around issues with GH callable actions 2023-11-07 14:56:00 +00:00			`global _GITHUB_JOB_API_URL`
Add JOB_ID and JOB_URL getters 2022-08-09 16:34:12 +00:00			`if _GITHUB_JOB_ID:`
			`return _GITHUB_JOB_ID`
Do not fail job on failed get_job_id_url 2024-04-15 12:32:02 +00:00			`try:`
			`_GITHUB_JOB_ID, _GITHUB_JOB_URL, _GITHUB_JOB_API_URL = get_job_id_url(`
			`GITHUB_JOB`
			`)`
			`except APIException as e:`
			`logging.warning("Unable to retrieve the job info from GH API: %s", e)`
			`if not safe:`
			`raise e`
Decouple env_helper.get_job_id_url 2023-09-05 12:37:37 +00:00			`return _GITHUB_JOB_ID`


Do not fail job on failed get_job_id_url 2024-04-15 12:32:02 +00:00			`def GITHUB_JOB_URL(safe: bool = True) -> str:`
			`try:`
			`GITHUB_JOB_ID()`
			`except APIException:`
			`if safe:`
			`logging.warning("Using run URL as a fallback to not fail the job")`
			`return GITHUB_RUN_URL`
Add forgotten "raise" on non-safe GITHUB_JOB_URL 2024-04-19 08:58:47 +00:00			`raise`

Decouple env_helper.get_job_id_url 2023-09-05 12:37:37 +00:00			`return _GITHUB_JOB_URL`


Do not fail job on failed get_job_id_url 2024-04-15 12:32:02 +00:00			`def GITHUB_JOB_API_URL(safe: bool = True) -> str:`
			`GITHUB_JOB_ID(safe)`
Work around issues with GH callable actions 2023-11-07 14:56:00 +00:00			`return _GITHUB_JOB_API_URL`


			`def get_job_id_url(job_name: str) -> Tuple[str, str, str]:`
Decouple env_helper.get_job_id_url 2023-09-05 12:37:37 +00:00			`job_id = ""`
			`job_url = ""`
Work around issues with GH callable actions 2023-11-07 14:56:00 +00:00			`job_api_url = ""`
Decouple env_helper.get_job_id_url 2023-09-05 12:37:37 +00:00			`if GITHUB_RUN_ID == "0":`
			`job_id = "0"`
			`if job_id:`
Work around issues with GH callable actions 2023-11-07 14:56:00 +00:00			`return job_id, job_url, job_api_url`
Add JOB_ID and JOB_URL getters 2022-08-09 16:34:12 +00:00			`jobs = []`
Fix pagination issue in GITHUB_JOB_ID() 2022-11-25 16:28:13 +00:00			`page = 1`
Decouple env_helper.get_job_id_url 2023-09-05 12:37:37 +00:00			`while not job_id:`
Add a fallback to authenticated requests to GH API 2023-04-28 16:26:50 +00:00			`response = get_gh_api(`
Add JOB_ID and JOB_URL getters 2022-08-09 16:34:12 +00:00			`f"https://api.github.com/repos/{GITHUB_REPOSITORY}/"`
Fix pagination issue in GITHUB_JOB_ID() 2022-11-25 16:28:13 +00:00			`f"actions/runs/{GITHUB_RUN_ID}/jobs?per_page=100&page={page}"`
Add JOB_ID and JOB_URL getters 2022-08-09 16:34:12 +00:00			`)`
Fix pagination issue in GITHUB_JOB_ID() 2022-11-25 16:28:13 +00:00			`page += 1`
Add JOB_ID and JOB_URL getters 2022-08-09 16:34:12 +00:00			`data = response.json()`
			`jobs.extend(data["jobs"])`
			`for job in data["jobs"]:`
Decouple env_helper.get_job_id_url 2023-09-05 12:37:37 +00:00			`if job["name"] != job_name:`
Add JOB_ID and JOB_URL getters 2022-08-09 16:34:12 +00:00			`continue`
Decouple env_helper.get_job_id_url 2023-09-05 12:37:37 +00:00			`job_id = job["id"]`
			`job_url = job["html_url"]`
Work around issues with GH callable actions 2023-11-07 14:56:00 +00:00			`job_api_url = job["url"]`
			`return job_id, job_url, job_api_url`
Fix pagination issue in GITHUB_JOB_ID() 2022-11-25 16:28:13 +00:00			`if (`
			`len(jobs) >= data["total_count"] # just in case of inconsistency`
			`or len(data["jobs"]) == 0 # if we excided pages`
			`):`
Decouple env_helper.get_job_id_url 2023-09-05 12:37:37 +00:00			`job_id = "0"`
Add JOB_ID and JOB_URL getters 2022-08-09 16:34:12 +00:00
Decouple env_helper.get_job_id_url 2023-09-05 12:37:37 +00:00			`if not job_url:`
The best effor to get the current job ID and URL This partially addresses https://github.com/actions/runner/issues/2577 2023-05-12 10:17:37 +00:00			`# This is a terrible workaround for the case of another broken part of`
Decouple env_helper.get_job_id_url 2023-09-05 12:37:37 +00:00			`# GitHub actions. For nested workflows it doesn't provide a proper job_name`
The best effor to get the current job ID and URL This partially addresses https://github.com/actions/runner/issues/2577 2023-05-12 10:17:37 +00:00			# value, but only the final one. So, for `OriginalJob / NestedJob / FinalJob`
Decouple env_helper.get_job_id_url 2023-09-05 12:37:37 +00:00			`# full name, job_name contains only FinalJob`
The best effor to get the current job ID and URL This partially addresses https://github.com/actions/runner/issues/2577 2023-05-12 10:17:37 +00:00			`matched_jobs = []`
			`for job in jobs:`
			`nested_parts = job["name"].split(" / ")`
			`if len(nested_parts) <= 1:`
			`continue`
Decouple env_helper.get_job_id_url 2023-09-05 12:37:37 +00:00			`if nested_parts[-1] == job_name:`
The best effor to get the current job ID and URL This partially addresses https://github.com/actions/runner/issues/2577 2023-05-12 10:17:37 +00:00			`matched_jobs.append(job)`
			`if len(matched_jobs) == 1:`
			`# The best case scenario`
Decouple env_helper.get_job_id_url 2023-09-05 12:37:37 +00:00			`job_id = matched_jobs[0]["id"]`
			`job_url = matched_jobs[0]["html_url"]`
Work around issues with GH callable actions 2023-11-07 14:56:00 +00:00			`job_api_url = matched_jobs[0]["url"]`
			`return job_id, job_url, job_api_url`
The best effor to get the current job ID and URL This partially addresses https://github.com/actions/runner/issues/2577 2023-05-12 10:17:37 +00:00			`if matched_jobs:`
			`logging.error(`
			`"We could not get the ID and URL for the current job name %s, there "`
			`"are more than one jobs match it for the nested workflows. Please, "`
			`"refer to https://github.com/actions/runner/issues/2577",`
Decouple env_helper.get_job_id_url 2023-09-05 12:37:37 +00:00			`job_name,`
The best effor to get the current job ID and URL This partially addresses https://github.com/actions/runner/issues/2577 2023-05-12 10:17:37 +00:00			`)`

Work around issues with GH callable actions 2023-11-07 14:56:00 +00:00			`return job_id, job_url, job_api_url`