2021-09-15 12:59:39 +00:00
|
|
|
#!/usr/bin/env python3
|
2021-11-29 14:17:17 +00:00
|
|
|
import json
|
2022-04-19 07:45:40 +00:00
|
|
|
import logging
|
2021-10-29 13:57:47 +00:00
|
|
|
import os
|
2022-07-01 13:06:56 +00:00
|
|
|
from typing import Set
|
2021-10-29 13:57:47 +00:00
|
|
|
|
2021-12-22 10:38:27 +00:00
|
|
|
from unidiff import PatchSet # type: ignore
|
2021-09-15 16:32:17 +00:00
|
|
|
|
2022-01-26 13:54:11 +00:00
|
|
|
from build_download_helper import get_with_retries
|
2022-01-26 11:09:35 +00:00
|
|
|
from env_helper import (
|
|
|
|
GITHUB_REPOSITORY,
|
|
|
|
GITHUB_SERVER_URL,
|
2022-03-24 14:37:53 +00:00
|
|
|
GITHUB_RUN_URL,
|
2022-01-26 11:09:35 +00:00
|
|
|
GITHUB_EVENT_PATH,
|
|
|
|
)
|
|
|
|
|
2022-04-21 14:33:46 +00:00
|
|
|
FORCE_TESTS_LABEL = "force tests"
|
|
|
|
|
2022-01-26 11:09:35 +00:00
|
|
|
DIFF_IN_DOCUMENTATION_EXT = [
|
|
|
|
".html",
|
|
|
|
".md",
|
|
|
|
".yml",
|
|
|
|
".txt",
|
|
|
|
".css",
|
|
|
|
".js",
|
|
|
|
".xml",
|
|
|
|
".ico",
|
|
|
|
".conf",
|
|
|
|
".svg",
|
|
|
|
".png",
|
|
|
|
".jpg",
|
|
|
|
".py",
|
|
|
|
".sh",
|
|
|
|
".json",
|
|
|
|
]
|
2022-01-26 13:54:11 +00:00
|
|
|
RETRY_SLEEP = 0
|
2021-09-15 12:59:39 +00:00
|
|
|
|
2021-10-29 09:58:25 +00:00
|
|
|
|
2021-11-22 09:56:13 +00:00
|
|
|
def get_pr_for_commit(sha, ref):
|
2022-01-18 14:43:35 +00:00
|
|
|
if not ref:
|
2022-01-24 15:34:01 +00:00
|
|
|
return None
|
2022-01-26 11:09:35 +00:00
|
|
|
try_get_pr_url = (
|
|
|
|
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/commits/{sha}/pulls"
|
|
|
|
)
|
2021-11-22 09:39:45 +00:00
|
|
|
try:
|
2022-01-26 13:54:11 +00:00
|
|
|
response = get_with_retries(try_get_pr_url, sleep=RETRY_SLEEP)
|
2021-11-22 09:39:45 +00:00
|
|
|
data = response.json()
|
|
|
|
if len(data) > 1:
|
|
|
|
print("Got more than one pr for commit", sha)
|
|
|
|
for pr in data:
|
2021-11-22 09:56:13 +00:00
|
|
|
# refs for pushes looks like refs/head/XX
|
|
|
|
# refs for RPs looks like XX
|
2022-01-26 11:09:35 +00:00
|
|
|
if pr["head"]["ref"] in ref:
|
2021-11-22 09:39:45 +00:00
|
|
|
return pr
|
2021-11-26 14:00:09 +00:00
|
|
|
print("Cannot find PR with required ref", ref, "returning first one")
|
2021-11-22 09:39:45 +00:00
|
|
|
first_pr = data[0]
|
|
|
|
return first_pr
|
|
|
|
except Exception as ex:
|
|
|
|
print("Cannot fetch PR info from commit", ex)
|
|
|
|
return None
|
|
|
|
|
2021-11-29 14:17:17 +00:00
|
|
|
|
2021-09-15 12:59:39 +00:00
|
|
|
class PRInfo:
|
2022-01-18 14:43:35 +00:00
|
|
|
default_event = {
|
2022-01-26 11:09:35 +00:00
|
|
|
"commits": 1,
|
|
|
|
"before": "HEAD~",
|
|
|
|
"after": "HEAD",
|
|
|
|
"ref": None,
|
|
|
|
}
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
github_event=None,
|
|
|
|
need_orgs=False,
|
|
|
|
need_changed_files=False,
|
2022-01-26 13:54:11 +00:00
|
|
|
pr_event_from_api=False,
|
2022-01-26 11:09:35 +00:00
|
|
|
):
|
2021-11-26 14:00:09 +00:00
|
|
|
if not github_event:
|
|
|
|
if GITHUB_EVENT_PATH:
|
2022-01-26 11:09:35 +00:00
|
|
|
with open(GITHUB_EVENT_PATH, "r", encoding="utf-8") as event_file:
|
2021-11-26 14:00:09 +00:00
|
|
|
github_event = json.load(event_file)
|
|
|
|
else:
|
2022-01-18 14:43:35 +00:00
|
|
|
github_event = PRInfo.default_event.copy()
|
2021-11-26 14:00:09 +00:00
|
|
|
self.event = github_event
|
2022-07-01 13:06:56 +00:00
|
|
|
self.changed_files = set() # type: Set[str]
|
2022-01-13 11:08:31 +00:00
|
|
|
self.body = ""
|
2022-07-01 13:30:21 +00:00
|
|
|
self.diff_urls = []
|
2022-07-01 14:45:00 +00:00
|
|
|
self.release_pr = ""
|
2022-01-12 11:29:37 +00:00
|
|
|
ref = github_event.get("ref", "refs/head/master")
|
2022-01-26 11:09:35 +00:00
|
|
|
if ref and ref.startswith("refs/heads/"):
|
2022-01-12 11:29:37 +00:00
|
|
|
ref = ref[11:]
|
2021-12-24 18:22:29 +00:00
|
|
|
|
|
|
|
# workflow completed event, used for PRs only
|
2022-01-26 11:09:35 +00:00
|
|
|
if "action" in github_event and github_event["action"] == "completed":
|
|
|
|
self.sha = github_event["workflow_run"]["head_sha"]
|
2022-01-26 13:54:11 +00:00
|
|
|
prs_for_sha = get_with_retries(
|
2022-01-26 11:09:35 +00:00
|
|
|
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/commits/{self.sha}"
|
2022-01-26 13:54:11 +00:00
|
|
|
"/pulls",
|
|
|
|
sleep=RETRY_SLEEP,
|
2022-01-26 11:09:35 +00:00
|
|
|
).json()
|
2021-12-24 18:22:29 +00:00
|
|
|
if len(prs_for_sha) != 0:
|
2022-01-26 11:09:35 +00:00
|
|
|
github_event["pull_request"] = prs_for_sha[0]
|
2021-12-24 18:22:29 +00:00
|
|
|
|
2022-01-26 11:09:35 +00:00
|
|
|
if "pull_request" in github_event: # pull request and other similar events
|
|
|
|
self.number = github_event["pull_request"]["number"]
|
2022-01-26 13:54:11 +00:00
|
|
|
if pr_event_from_api:
|
2022-04-19 07:45:40 +00:00
|
|
|
try:
|
|
|
|
response = get_with_retries(
|
|
|
|
f"https://api.github.com/repos/{GITHUB_REPOSITORY}"
|
|
|
|
f"/pulls/{self.number}",
|
|
|
|
sleep=RETRY_SLEEP,
|
|
|
|
)
|
|
|
|
github_event["pull_request"] = response.json()
|
|
|
|
except Exception as e:
|
|
|
|
logging.warning(
|
|
|
|
"Unable to get pull request event %s from API, "
|
|
|
|
"fallback to received event. Exception: %s",
|
|
|
|
self.number,
|
|
|
|
e,
|
|
|
|
)
|
2022-01-26 13:54:11 +00:00
|
|
|
|
2022-01-26 11:09:35 +00:00
|
|
|
if "after" in github_event:
|
|
|
|
self.sha = github_event["after"]
|
2021-10-29 15:01:29 +00:00
|
|
|
else:
|
2022-01-26 11:09:35 +00:00
|
|
|
self.sha = github_event["pull_request"]["head"]["sha"]
|
2021-10-29 15:01:29 +00:00
|
|
|
|
2021-11-26 14:00:09 +00:00
|
|
|
repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}"
|
2022-03-24 14:37:53 +00:00
|
|
|
self.task_url = GITHUB_RUN_URL
|
2021-11-19 14:47:04 +00:00
|
|
|
|
2021-11-26 14:00:09 +00:00
|
|
|
self.repo_full_name = GITHUB_REPOSITORY
|
2021-11-19 14:47:04 +00:00
|
|
|
self.commit_html_url = f"{repo_prefix}/commits/{self.sha}"
|
|
|
|
self.pr_html_url = f"{repo_prefix}/pull/{self.number}"
|
|
|
|
|
2022-01-26 11:09:35 +00:00
|
|
|
self.base_ref = github_event["pull_request"]["base"]["ref"]
|
|
|
|
self.base_name = github_event["pull_request"]["base"]["repo"]["full_name"]
|
|
|
|
self.head_ref = github_event["pull_request"]["head"]["ref"]
|
|
|
|
self.head_name = github_event["pull_request"]["head"]["repo"]["full_name"]
|
|
|
|
self.body = github_event["pull_request"]["body"]
|
2022-01-26 13:54:11 +00:00
|
|
|
self.labels = {
|
|
|
|
label["name"] for label in github_event["pull_request"]["labels"]
|
|
|
|
}
|
2021-12-22 07:54:50 +00:00
|
|
|
|
2022-01-26 11:09:35 +00:00
|
|
|
self.user_login = github_event["pull_request"]["user"]["login"]
|
2021-10-29 15:01:29 +00:00
|
|
|
self.user_orgs = set([])
|
|
|
|
if need_orgs:
|
2022-01-26 13:54:11 +00:00
|
|
|
user_orgs_response = get_with_retries(
|
|
|
|
github_event["pull_request"]["user"]["organizations_url"],
|
|
|
|
sleep=RETRY_SLEEP,
|
2022-01-26 11:09:35 +00:00
|
|
|
)
|
2021-10-29 15:01:29 +00:00
|
|
|
if user_orgs_response.ok:
|
|
|
|
response_json = user_orgs_response.json()
|
2022-01-26 11:09:35 +00:00
|
|
|
self.user_orgs = set(org["id"] for org in response_json)
|
2021-10-29 15:01:29 +00:00
|
|
|
|
2022-07-01 13:30:21 +00:00
|
|
|
self.diff_urls.append(github_event["pull_request"]["diff_url"])
|
2022-01-26 11:09:35 +00:00
|
|
|
elif "commits" in github_event:
|
|
|
|
self.sha = github_event["after"]
|
|
|
|
pull_request = get_pr_for_commit(self.sha, github_event["ref"])
|
2021-11-26 14:00:09 +00:00
|
|
|
repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}"
|
2022-03-24 14:37:53 +00:00
|
|
|
self.task_url = GITHUB_RUN_URL
|
2021-11-19 14:47:04 +00:00
|
|
|
self.commit_html_url = f"{repo_prefix}/commits/{self.sha}"
|
2021-11-26 14:00:09 +00:00
|
|
|
self.repo_full_name = GITHUB_REPOSITORY
|
2022-01-26 11:09:35 +00:00
|
|
|
if pull_request is None or pull_request["state"] == "closed":
|
2022-01-04 13:33:50 +00:00
|
|
|
# it's merged PR to master
|
2021-11-22 09:39:45 +00:00
|
|
|
self.number = 0
|
|
|
|
self.labels = {}
|
2022-01-12 11:29:37 +00:00
|
|
|
self.pr_html_url = f"{repo_prefix}/commits/{ref}"
|
|
|
|
self.base_ref = ref
|
2021-11-22 09:39:45 +00:00
|
|
|
self.base_name = self.repo_full_name
|
2022-01-12 11:29:37 +00:00
|
|
|
self.head_ref = ref
|
2021-11-22 09:39:45 +00:00
|
|
|
self.head_name = self.repo_full_name
|
2022-07-01 13:30:21 +00:00
|
|
|
self.diff_urls.append(
|
2022-01-26 11:09:35 +00:00
|
|
|
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/"
|
|
|
|
f"compare/{github_event['before']}...{self.sha}"
|
|
|
|
)
|
2021-11-22 09:39:45 +00:00
|
|
|
else:
|
2022-02-16 12:31:24 +00:00
|
|
|
self.number = pull_request["number"]
|
2022-01-26 13:54:11 +00:00
|
|
|
self.labels = {label["name"] for label in pull_request["labels"]}
|
2022-01-26 11:09:35 +00:00
|
|
|
|
|
|
|
self.base_ref = pull_request["base"]["ref"]
|
|
|
|
self.base_name = pull_request["base"]["repo"]["full_name"]
|
|
|
|
self.head_ref = pull_request["head"]["ref"]
|
|
|
|
self.head_name = pull_request["head"]["repo"]["full_name"]
|
|
|
|
self.pr_html_url = pull_request["html_url"]
|
|
|
|
if "pr-backport" in self.labels:
|
2022-07-01 13:30:21 +00:00
|
|
|
# head1...head2 gives changes in head2 since merge base
|
|
|
|
# Thag's why we need {self.head_ref}...master to get
|
|
|
|
# files changed in upstream AND master...{self.head_ref}
|
|
|
|
# to get files, changed in current HEAD
|
|
|
|
self.diff_urls.append(
|
2022-01-26 11:09:35 +00:00
|
|
|
f"https://github.com/{GITHUB_REPOSITORY}/"
|
|
|
|
f"compare/master...{self.head_ref}.diff"
|
|
|
|
)
|
2022-07-01 13:30:21 +00:00
|
|
|
self.diff_urls.append(
|
|
|
|
f"https://github.com/{GITHUB_REPOSITORY}/"
|
|
|
|
f"compare/{self.head_ref}...master.diff"
|
|
|
|
)
|
2022-07-01 14:45:00 +00:00
|
|
|
# Get release PR number.
|
|
|
|
self.release_pr = get_pr_for_commit(self.base_ref, self.base_ref)[
|
|
|
|
"number"
|
|
|
|
]
|
2021-10-29 15:01:29 +00:00
|
|
|
else:
|
2022-07-01 13:30:21 +00:00
|
|
|
self.diff_urls.append(pull_request["diff_url"])
|
2021-10-31 18:08:38 +00:00
|
|
|
else:
|
2022-05-23 11:10:08 +00:00
|
|
|
print("event.json does not match pull_request or push:")
|
2021-12-24 17:57:32 +00:00
|
|
|
print(json.dumps(github_event, sort_keys=True, indent=4))
|
2021-12-24 18:02:40 +00:00
|
|
|
self.sha = os.getenv("GITHUB_SHA")
|
|
|
|
self.number = 0
|
|
|
|
self.labels = {}
|
|
|
|
repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}"
|
2022-03-24 14:37:53 +00:00
|
|
|
self.task_url = GITHUB_RUN_URL
|
2021-12-24 18:02:40 +00:00
|
|
|
self.commit_html_url = f"{repo_prefix}/commits/{self.sha}"
|
|
|
|
self.repo_full_name = GITHUB_REPOSITORY
|
2022-01-12 11:29:37 +00:00
|
|
|
self.pr_html_url = f"{repo_prefix}/commits/{ref}"
|
|
|
|
self.base_ref = ref
|
2021-12-24 18:02:40 +00:00
|
|
|
self.base_name = self.repo_full_name
|
2022-01-12 11:29:37 +00:00
|
|
|
self.head_ref = ref
|
2021-12-24 18:02:40 +00:00
|
|
|
self.head_name = self.repo_full_name
|
|
|
|
|
2021-11-26 14:00:09 +00:00
|
|
|
if need_changed_files:
|
|
|
|
self.fetch_changed_files()
|
2021-10-31 18:08:38 +00:00
|
|
|
|
2021-11-26 14:00:09 +00:00
|
|
|
def fetch_changed_files(self):
|
2022-07-01 13:30:21 +00:00
|
|
|
if not getattr(self, "diff_urls", False):
|
|
|
|
raise TypeError("The event does not have diff URLs")
|
|
|
|
|
|
|
|
for diff_url in self.diff_urls:
|
|
|
|
response = get_with_retries(
|
|
|
|
diff_url,
|
|
|
|
sleep=RETRY_SLEEP,
|
|
|
|
)
|
|
|
|
response.raise_for_status()
|
|
|
|
if "commits" in self.event and self.number == 0:
|
|
|
|
diff = response.json()
|
|
|
|
|
|
|
|
if "files" in diff:
|
|
|
|
self.changed_files = {f["filename"] for f in diff["files"]}
|
|
|
|
else:
|
|
|
|
diff_object = PatchSet(response.text)
|
|
|
|
self.changed_files.update({f.path for f in diff_object})
|
2022-07-01 13:06:56 +00:00
|
|
|
print(f"Fetched info about {len(self.changed_files)} changed files")
|
2021-09-15 16:32:17 +00:00
|
|
|
|
2021-09-15 13:56:03 +00:00
|
|
|
def get_dict(self):
|
|
|
|
return {
|
2022-01-26 11:09:35 +00:00
|
|
|
"sha": self.sha,
|
|
|
|
"number": self.number,
|
|
|
|
"labels": self.labels,
|
|
|
|
"user_login": self.user_login,
|
|
|
|
"user_orgs": self.user_orgs,
|
2021-09-15 13:56:03 +00:00
|
|
|
}
|
2021-10-21 15:32:15 +00:00
|
|
|
|
2021-10-29 09:58:25 +00:00
|
|
|
def has_changes_in_documentation(self):
|
|
|
|
# If the list wasn't built yet the best we can do is to
|
|
|
|
# assume that there were changes.
|
|
|
|
if self.changed_files is None or not self.changed_files:
|
|
|
|
return True
|
|
|
|
|
|
|
|
for f in self.changed_files:
|
|
|
|
_, ext = os.path.splitext(f)
|
2022-01-26 11:09:35 +00:00
|
|
|
path_in_docs = "docs" in f
|
|
|
|
path_in_website = "website" in f
|
|
|
|
if (
|
|
|
|
ext in DIFF_IN_DOCUMENTATION_EXT and (path_in_docs or path_in_website)
|
|
|
|
) or "docker/docs" in f:
|
2021-10-29 09:58:25 +00:00
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2022-03-29 17:28:18 +00:00
|
|
|
def has_changes_in_submodules(self):
|
|
|
|
if self.changed_files is None or not self.changed_files:
|
|
|
|
return True
|
|
|
|
|
|
|
|
for f in self.changed_files:
|
2022-06-03 11:59:34 +00:00
|
|
|
if "contrib/" in f:
|
2022-03-29 17:28:18 +00:00
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2021-12-03 08:33:16 +00:00
|
|
|
def can_skip_builds_and_use_version_from_master(self):
|
2022-01-26 14:30:59 +00:00
|
|
|
# TODO: See a broken loop
|
2022-04-21 14:33:46 +00:00
|
|
|
if FORCE_TESTS_LABEL in self.labels:
|
2021-12-03 08:33:16 +00:00
|
|
|
return False
|
|
|
|
|
|
|
|
if self.changed_files is None or not self.changed_files:
|
|
|
|
return False
|
|
|
|
|
|
|
|
for f in self.changed_files:
|
2022-01-26 14:30:59 +00:00
|
|
|
# TODO: this logic is broken, should be fixed before using
|
2022-01-26 11:09:35 +00:00
|
|
|
if (
|
|
|
|
not f.startswith("tests/queries")
|
|
|
|
or not f.startswith("tests/integration")
|
|
|
|
or not f.startswith("tests/performance")
|
|
|
|
):
|
2021-12-03 08:33:16 +00:00
|
|
|
return False
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
def can_skip_integration_tests(self):
|
2022-01-26 14:30:59 +00:00
|
|
|
# TODO: See a broken loop
|
2022-04-21 14:33:46 +00:00
|
|
|
if FORCE_TESTS_LABEL in self.labels:
|
2021-12-03 08:33:16 +00:00
|
|
|
return False
|
|
|
|
|
|
|
|
if self.changed_files is None or not self.changed_files:
|
|
|
|
return False
|
|
|
|
|
|
|
|
for f in self.changed_files:
|
2022-01-26 14:30:59 +00:00
|
|
|
# TODO: this logic is broken, should be fixed before using
|
2022-01-26 11:09:35 +00:00
|
|
|
if not f.startswith("tests/queries") or not f.startswith(
|
|
|
|
"tests/performance"
|
|
|
|
):
|
2021-12-03 08:33:16 +00:00
|
|
|
return False
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
def can_skip_functional_tests(self):
|
2022-01-26 14:30:59 +00:00
|
|
|
# TODO: See a broken loop
|
2022-04-21 14:33:46 +00:00
|
|
|
if FORCE_TESTS_LABEL in self.labels:
|
2021-12-03 08:33:16 +00:00
|
|
|
return False
|
|
|
|
|
|
|
|
if self.changed_files is None or not self.changed_files:
|
|
|
|
return False
|
|
|
|
|
|
|
|
for f in self.changed_files:
|
2022-01-26 14:30:59 +00:00
|
|
|
# TODO: this logic is broken, should be fixed before using
|
2022-01-26 11:09:35 +00:00
|
|
|
if not f.startswith("tests/integration") or not f.startswith(
|
|
|
|
"tests/performance"
|
|
|
|
):
|
2021-12-03 08:33:16 +00:00
|
|
|
return False
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
2021-10-21 15:32:15 +00:00
|
|
|
|
|
|
|
class FakePRInfo:
|
|
|
|
def __init__(self):
|
|
|
|
self.number = 11111
|
|
|
|
self.sha = "xxxxxxxxxxxxxxxxxx"
|