ClickHouse/tests/ci/pr_info.py

370 lines
14 KiB
Python
Raw Normal View History

2021-09-15 12:59:39 +00:00
#!/usr/bin/env python3
import json
import logging
2021-10-29 13:57:47 +00:00
import os
from typing import Dict, List, Set, Union
2021-10-29 13:57:47 +00:00
from unidiff import PatchSet # type: ignore
2021-09-15 16:32:17 +00:00
from build_download_helper import get_gh_api
2022-01-26 11:09:35 +00:00
from env_helper import (
GITHUB_REPOSITORY,
GITHUB_SERVER_URL,
GITHUB_RUN_URL,
2022-01-26 11:09:35 +00:00
GITHUB_EVENT_PATH,
)
2022-04-21 14:33:46 +00:00
FORCE_TESTS_LABEL = "force tests"
SKIP_MERGEABLE_CHECK_LABEL = "skip mergeable check"
2022-11-15 12:01:27 +00:00
NeedsDataType = Dict[str, Dict[str, Union[str, Dict[str, str]]]]
2022-04-21 14:33:46 +00:00
2022-01-26 11:09:35 +00:00
DIFF_IN_DOCUMENTATION_EXT = [
".html",
".md",
".yml",
".txt",
".css",
".js",
".xml",
".ico",
".conf",
".svg",
".png",
".jpg",
".py",
".sh",
".json",
]
RETRY_SLEEP = 0
2021-09-15 12:59:39 +00:00
2021-10-29 09:58:25 +00:00
2021-11-22 09:56:13 +00:00
def get_pr_for_commit(sha, ref):
2022-01-18 14:43:35 +00:00
if not ref:
2022-01-24 15:34:01 +00:00
return None
2022-01-26 11:09:35 +00:00
try_get_pr_url = (
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/commits/{sha}/pulls"
)
2021-11-22 09:39:45 +00:00
try:
response = get_gh_api(try_get_pr_url, sleep=RETRY_SLEEP)
2021-11-22 09:39:45 +00:00
data = response.json()
our_prs = [] # type: List[Dict]
2021-11-22 09:39:45 +00:00
if len(data) > 1:
print("Got more than one pr for commit", sha)
for pr in data:
# We need to check if the PR is created in our repo, because
# https://github.com/kaynewu/ClickHouse/pull/2
# has broke our PR search once in a while
if pr["base"]["repo"]["full_name"] != GITHUB_REPOSITORY:
continue
2021-11-22 09:56:13 +00:00
# refs for pushes looks like refs/head/XX
# refs for RPs looks like XX
2022-01-26 11:09:35 +00:00
if pr["head"]["ref"] in ref:
2021-11-22 09:39:45 +00:00
return pr
our_prs.append(pr)
2021-11-26 14:00:09 +00:00
print("Cannot find PR with required ref", ref, "returning first one")
first_pr = our_prs[0]
2021-11-22 09:39:45 +00:00
return first_pr
except Exception as ex:
print("Cannot fetch PR info from commit", ex)
return None
2021-09-15 12:59:39 +00:00
class PRInfo:
2022-01-18 14:43:35 +00:00
default_event = {
2022-01-26 11:09:35 +00:00
"commits": 1,
"head_commit": {"message": "commit_message"},
2022-01-26 11:09:35 +00:00
"before": "HEAD~",
"after": "HEAD",
"ref": None,
}
def __init__(
self,
github_event=None,
need_orgs=False,
need_changed_files=False,
pr_event_from_api=False,
2022-01-26 11:09:35 +00:00
):
2021-11-26 14:00:09 +00:00
if not github_event:
if GITHUB_EVENT_PATH:
2022-01-26 11:09:35 +00:00
with open(GITHUB_EVENT_PATH, "r", encoding="utf-8") as event_file:
2021-11-26 14:00:09 +00:00
github_event = json.load(event_file)
else:
2022-01-18 14:43:35 +00:00
github_event = PRInfo.default_event.copy()
2021-11-26 14:00:09 +00:00
self.event = github_event
2022-07-01 13:06:56 +00:00
self.changed_files = set() # type: Set[str]
2022-01-13 11:08:31 +00:00
self.body = ""
self.diff_urls = [] # type: List[str]
# release_pr and merged_pr are used for docker images additional cache
self.release_pr = 0
self.merged_pr = 0
ref = github_event.get("ref", "refs/heads/master")
2022-01-26 11:09:35 +00:00
if ref and ref.startswith("refs/heads/"):
ref = ref[11:]
2021-12-24 18:22:29 +00:00
# workflow completed event, used for PRs only
2022-01-26 11:09:35 +00:00
if "action" in github_event and github_event["action"] == "completed":
self.sha = github_event["workflow_run"]["head_sha"] # type: str
prs_for_sha = get_gh_api(
2022-01-26 11:09:35 +00:00
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/commits/{self.sha}"
"/pulls",
sleep=RETRY_SLEEP,
2022-01-26 11:09:35 +00:00
).json()
2021-12-24 18:22:29 +00:00
if len(prs_for_sha) != 0:
2022-01-26 11:09:35 +00:00
github_event["pull_request"] = prs_for_sha[0]
2021-12-24 18:22:29 +00:00
2022-01-26 11:09:35 +00:00
if "pull_request" in github_event: # pull request and other similar events
self.number = github_event["pull_request"]["number"] # type: int
if pr_event_from_api:
try:
response = get_gh_api(
f"https://api.github.com/repos/{GITHUB_REPOSITORY}"
f"/pulls/{self.number}",
sleep=RETRY_SLEEP,
)
github_event["pull_request"] = response.json()
except Exception as e:
logging.warning(
"Unable to get pull request event %s from API, "
"fallback to received event. Exception: %s",
self.number,
e,
)
2022-01-26 11:09:35 +00:00
if "after" in github_event:
self.sha = github_event["after"]
2021-10-29 15:01:29 +00:00
else:
2022-01-26 11:09:35 +00:00
self.sha = github_event["pull_request"]["head"]["sha"]
2021-10-29 15:01:29 +00:00
2021-11-26 14:00:09 +00:00
repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}"
self.task_url = GITHUB_RUN_URL
2021-11-19 14:47:04 +00:00
2021-11-26 14:00:09 +00:00
self.repo_full_name = GITHUB_REPOSITORY
2021-11-19 14:47:04 +00:00
self.commit_html_url = f"{repo_prefix}/commits/{self.sha}"
self.pr_html_url = f"{repo_prefix}/pull/{self.number}"
# master or backport/xx.x/xxxxx - where the PR will be merged
self.base_ref = github_event["pull_request"]["base"]["ref"] # type: str
# ClickHouse/ClickHouse
self.base_name = github_event["pull_request"]["base"]["repo"][
"full_name"
] # type: str
# any_branch-name - the name of working branch name
self.head_ref = github_event["pull_request"]["head"]["ref"] # type: str
# UserName/ClickHouse or ClickHouse/ClickHouse
self.head_name = github_event["pull_request"]["head"]["repo"][
"full_name"
] # type: str
2022-01-26 11:09:35 +00:00
self.body = github_event["pull_request"]["body"]
self.labels = {
label["name"] for label in github_event["pull_request"]["labels"]
2022-11-10 16:11:23 +00:00
} # type: Set[str]
2021-12-22 07:54:50 +00:00
self.user_login = github_event["pull_request"]["user"]["login"] # type: str
self.user_orgs = set() # type: Set[str]
2021-10-29 15:01:29 +00:00
if need_orgs:
user_orgs_response = get_gh_api(
github_event["pull_request"]["user"]["organizations_url"],
sleep=RETRY_SLEEP,
2022-01-26 11:09:35 +00:00
)
2021-10-29 15:01:29 +00:00
if user_orgs_response.ok:
response_json = user_orgs_response.json()
2022-01-26 11:09:35 +00:00
self.user_orgs = set(org["id"] for org in response_json)
2021-10-29 15:01:29 +00:00
self.diff_urls.append(github_event["pull_request"]["diff_url"])
2022-01-26 11:09:35 +00:00
elif "commits" in github_event:
# `head_commit` always comes with `commits`
commit_message = github_event["head_commit"]["message"] # type: str
if commit_message.startswith("Merge pull request #"):
merged_pr = commit_message.split(maxsplit=4)[3]
try:
self.merged_pr = int(merged_pr[1:])
except ValueError:
logging.error("Failed to convert %s to integer", merged_pr)
2022-01-26 11:09:35 +00:00
self.sha = github_event["after"]
pull_request = get_pr_for_commit(self.sha, github_event["ref"])
2021-11-26 14:00:09 +00:00
repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}"
self.task_url = GITHUB_RUN_URL
2021-11-19 14:47:04 +00:00
self.commit_html_url = f"{repo_prefix}/commits/{self.sha}"
2021-11-26 14:00:09 +00:00
self.repo_full_name = GITHUB_REPOSITORY
2022-01-26 11:09:35 +00:00
if pull_request is None or pull_request["state"] == "closed":
# it's merged PR to master
2021-11-22 09:39:45 +00:00
self.number = 0
2022-11-10 16:11:23 +00:00
self.labels = set()
self.pr_html_url = f"{repo_prefix}/commits/{ref}"
self.base_ref = ref
2021-11-22 09:39:45 +00:00
self.base_name = self.repo_full_name
self.head_ref = ref
2021-11-22 09:39:45 +00:00
self.head_name = self.repo_full_name
self.diff_urls.append(
2022-01-26 11:09:35 +00:00
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/"
f"compare/{github_event['before']}...{self.sha}"
)
2021-11-22 09:39:45 +00:00
else:
2022-02-16 12:31:24 +00:00
self.number = pull_request["number"]
self.labels = {label["name"] for label in pull_request["labels"]}
2022-01-26 11:09:35 +00:00
self.base_ref = pull_request["base"]["ref"]
self.base_name = pull_request["base"]["repo"]["full_name"]
self.head_ref = pull_request["head"]["ref"]
self.head_name = pull_request["head"]["repo"]["full_name"]
self.pr_html_url = pull_request["html_url"]
if "pr-backport" in self.labels:
# head1...head2 gives changes in head2 since merge base
# Thag's why we need {self.head_ref}...master to get
# files changed in upstream AND master...{self.head_ref}
# to get files, changed in current HEAD
self.diff_urls.append(
2022-01-26 11:09:35 +00:00
f"https://github.com/{GITHUB_REPOSITORY}/"
f"compare/master...{self.head_ref}.diff"
)
self.diff_urls.append(
f"https://github.com/{GITHUB_REPOSITORY}/"
f"compare/{self.head_ref}...master.diff"
)
# Get release PR number.
self.release_pr = get_pr_for_commit(self.base_ref, self.base_ref)[
"number"
]
2021-10-29 15:01:29 +00:00
else:
self.diff_urls.append(pull_request["diff_url"])
if "release" in self.labels:
# For release PRs we must get not only files changed in the PR
# itself, but as well files changed since we branched out
self.diff_urls.append(
f"https://github.com/{GITHUB_REPOSITORY}/"
f"compare/{self.head_ref}...master.diff"
)
2021-10-31 18:08:38 +00:00
else:
print("event.json does not match pull_request or push:")
2021-12-24 17:57:32 +00:00
print(json.dumps(github_event, sort_keys=True, indent=4))
self.sha = os.getenv(
"GITHUB_SHA", "0000000000000000000000000000000000000000"
)
2021-12-24 18:02:40 +00:00
self.number = 0
2022-11-10 16:11:23 +00:00
self.labels = set()
2021-12-24 18:02:40 +00:00
repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}"
self.task_url = GITHUB_RUN_URL
2021-12-24 18:02:40 +00:00
self.commit_html_url = f"{repo_prefix}/commits/{self.sha}"
self.repo_full_name = GITHUB_REPOSITORY
self.pr_html_url = f"{repo_prefix}/commits/{ref}"
self.base_ref = ref
2021-12-24 18:02:40 +00:00
self.base_name = self.repo_full_name
self.head_ref = ref
2021-12-24 18:02:40 +00:00
self.head_name = self.repo_full_name
2021-11-26 14:00:09 +00:00
if need_changed_files:
self.fetch_changed_files()
2021-10-31 18:08:38 +00:00
2021-11-26 14:00:09 +00:00
def fetch_changed_files(self):
if not getattr(self, "diff_urls", False):
raise TypeError("The event does not have diff URLs")
for diff_url in self.diff_urls:
response = get_gh_api(
diff_url,
sleep=RETRY_SLEEP,
)
response.raise_for_status()
if "commits" in self.event and self.number == 0:
diff = response.json()
if "files" in diff:
self.changed_files = {f["filename"] for f in diff["files"]}
else:
diff_object = PatchSet(response.text)
self.changed_files.update({f.path for f in diff_object})
2022-07-01 13:06:56 +00:00
print(f"Fetched info about {len(self.changed_files)} changed files")
2021-09-15 16:32:17 +00:00
2021-09-15 13:56:03 +00:00
def get_dict(self):
return {
2022-01-26 11:09:35 +00:00
"sha": self.sha,
"number": self.number,
"labels": self.labels,
"user_login": self.user_login,
"user_orgs": self.user_orgs,
2021-09-15 13:56:03 +00:00
}
2021-10-21 15:32:15 +00:00
def has_changes_in_documentation(self) -> bool:
2021-10-29 09:58:25 +00:00
# If the list wasn't built yet the best we can do is to
# assume that there were changes.
if self.changed_files is None or not self.changed_files:
return True
for f in self.changed_files:
_, ext = os.path.splitext(f)
path_in_docs = f.startswith("docs/")
2022-01-26 11:09:35 +00:00
if (
ext in DIFF_IN_DOCUMENTATION_EXT and path_in_docs
2022-01-26 11:09:35 +00:00
) or "docker/docs" in f:
2021-10-29 09:58:25 +00:00
return True
return False
2022-03-29 17:28:18 +00:00
def has_changes_in_submodules(self):
if self.changed_files is None or not self.changed_files:
return True
for f in self.changed_files:
2022-06-03 11:59:34 +00:00
if "contrib/" in f:
2022-03-29 17:28:18 +00:00
return True
return False
2021-12-03 08:33:16 +00:00
def can_skip_builds_and_use_version_from_master(self):
# TODO: See a broken loop
2022-04-21 14:33:46 +00:00
if FORCE_TESTS_LABEL in self.labels:
2021-12-03 08:33:16 +00:00
return False
if self.changed_files is None or not self.changed_files:
return False
for f in self.changed_files:
# TODO: this logic is broken, should be fixed before using
2022-01-26 11:09:35 +00:00
if (
not f.startswith("tests/queries")
or not f.startswith("tests/integration")
or not f.startswith("tests/performance")
):
2021-12-03 08:33:16 +00:00
return False
return True
def can_skip_integration_tests(self):
# TODO: See a broken loop
2022-04-21 14:33:46 +00:00
if FORCE_TESTS_LABEL in self.labels:
2021-12-03 08:33:16 +00:00
return False
if self.changed_files is None or not self.changed_files:
return False
for f in self.changed_files:
# TODO: this logic is broken, should be fixed before using
2022-01-26 11:09:35 +00:00
if not f.startswith("tests/queries") or not f.startswith(
"tests/performance"
):
2021-12-03 08:33:16 +00:00
return False
return True
def can_skip_functional_tests(self):
# TODO: See a broken loop
2022-04-21 14:33:46 +00:00
if FORCE_TESTS_LABEL in self.labels:
2021-12-03 08:33:16 +00:00
return False
if self.changed_files is None or not self.changed_files:
return False
for f in self.changed_files:
# TODO: this logic is broken, should be fixed before using
2022-01-26 11:09:35 +00:00
if not f.startswith("tests/integration") or not f.startswith(
"tests/performance"
):
2021-12-03 08:33:16 +00:00
return False
return True
2021-10-21 15:32:15 +00:00
class FakePRInfo:
def __init__(self):
self.number = 11111
self.sha = "xxxxxxxxxxxxxxxxxx"