Merge pull request #64904 from ClickHouse/improve-pr-info-diff

Persistent PRInfo diff url
This commit is contained in:
Mikhail f. Shiryaev 2024-06-10 18:24:16 +00:00 committed by GitHub
commit 221ac1414d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 39 additions and 28 deletions

View File

@ -27,6 +27,8 @@ except ImportError:
DOWNLOAD_RETRIES_COUNT = 5
logger = logging.getLogger(__name__)
class DownloadException(Exception):
pass
@ -42,7 +44,7 @@ def get_with_retries(
sleep: int = 3,
**kwargs: Any,
) -> requests.Response:
logging.info(
logger.info(
"Getting URL with %i tries and sleep %i in between: %s", retries, sleep, url
)
exc = Exception("A placeholder to satisfy typing and avoid nesting")
@ -54,7 +56,7 @@ def get_with_retries(
return response
except Exception as e:
if i + 1 < retries:
logging.info("Exception '%s' while getting, retry %i", e, i + 1)
logger.info("Exception '%s' while getting, retry %i", e, i + 1)
time.sleep(sleep)
exc = e
@ -103,7 +105,7 @@ def get_gh_api(
)
try_auth = e.response.status_code == 404
if (ratelimit_exceeded or try_auth) and not token_is_set:
logging.warning(
logger.warning(
"Received rate limit exception, setting the auth header and retry"
)
set_auth_header()
@ -114,10 +116,10 @@ def get_gh_api(
exc = e
if try_cnt < retries:
logging.info("Exception '%s' while getting, retry %i", exc, try_cnt)
logger.info("Exception '%s' while getting, retry %i", exc, try_cnt)
time.sleep(sleep)
raise APIException("Unable to request data from GH API") from exc
raise APIException(f"Unable to request data from GH API: {url}") from exc
def get_build_name_for_check(check_name: str) -> str:
@ -128,25 +130,25 @@ def read_build_urls(build_name: str, reports_path: Union[Path, str]) -> List[str
for root, _, files in os.walk(reports_path):
for file in files:
if file.endswith(f"_{build_name}.json"):
logging.info("Found build report json %s for %s", file, build_name)
logger.info("Found build report json %s for %s", file, build_name)
with open(
os.path.join(root, file), "r", encoding="utf-8"
) as file_handler:
build_report = json.load(file_handler)
return build_report["build_urls"] # type: ignore
logging.info("A build report is not found for %s", build_name)
logger.info("A build report is not found for %s", build_name)
return []
def download_build_with_progress(url: str, path: Path) -> None:
logging.info("Downloading from %s to temp path %s", url, path)
logger.info("Downloading from %s to temp path %s", url, path)
for i in range(DOWNLOAD_RETRIES_COUNT):
try:
response = get_with_retries(url, retries=1, stream=True)
total_length = int(response.headers.get("content-length", 0))
if path.is_file() and total_length and path.stat().st_size == total_length:
logging.info(
logger.info(
"The file %s already exists and have a proper size %s",
path,
total_length,
@ -155,14 +157,14 @@ def download_build_with_progress(url: str, path: Path) -> None:
with open(path, "wb") as f:
if total_length == 0:
logging.info(
logger.info(
"No content-length, will download file without progress"
)
f.write(response.content)
else:
dl = 0
logging.info("Content length is %ld bytes", total_length)
logger.info("Content length is %ld bytes", total_length)
for data in response.iter_content(chunk_size=4096):
dl += len(data)
f.write(data)
@ -177,8 +179,8 @@ def download_build_with_progress(url: str, path: Path) -> None:
except Exception as e:
if sys.stdout.isatty():
sys.stdout.write("\n")
if os.path.exists(path):
os.remove(path)
if path.exists():
path.unlink()
if i + 1 < DOWNLOAD_RETRIES_COUNT:
time.sleep(3)
@ -189,7 +191,7 @@ def download_build_with_progress(url: str, path: Path) -> None:
if sys.stdout.isatty():
sys.stdout.write("\n")
logging.info("Downloading finished")
logger.info("Downloading finished")
def download_builds(
@ -198,7 +200,7 @@ def download_builds(
for url in build_urls:
if filter_fn(url):
fname = os.path.basename(url.replace("%2B", "+").replace("%20", " "))
logging.info("Will download %s to %s", fname, result_path)
logger.info("Will download %s to %s", fname, result_path)
download_build_with_progress(url, result_path / fname)
@ -210,7 +212,7 @@ def download_builds_filter(
) -> None:
build_name = get_build_name_for_check(check_name)
urls = read_build_urls(build_name, reports_path)
logging.info("The build report for %s contains the next URLs: %s", build_name, urls)
logger.info("The build report for %s contains the next URLs: %s", build_name, urls)
if not urls:
raise DownloadException("No build URLs found")
@ -247,7 +249,7 @@ def get_clickhouse_binary_url(
) -> Optional[str]:
build_name = get_build_name_for_check(check_name)
urls = read_build_urls(build_name, reports_path)
logging.info("The build report for %s contains the next URLs: %s", build_name, urls)
logger.info("The build report for %s contains the next URLs: %s", build_name, urls)
for url in urls:
check_url = url
if "?" in check_url:

View File

@ -59,7 +59,7 @@ def get_pr_for_commit(sha, ref):
data = response.json()
our_prs = [] # type: List[Dict]
if len(data) > 1:
print("Got more than one pr for commit", sha)
logging.warning("Got more than one pr for commit %s", sha)
for pr in data:
# We need to check if the PR is created in our repo, because
# https://github.com/kaynewu/ClickHouse/pull/2
@ -71,13 +71,20 @@ def get_pr_for_commit(sha, ref):
if pr["head"]["ref"] in ref:
return pr
our_prs.append(pr)
print(
f"Cannot find PR with required ref {ref}, sha {sha} - returning first one"
logging.warning(
"Cannot find PR with required ref %s, sha %s - returning first one",
ref,
sha,
)
first_pr = our_prs[0]
return first_pr
except Exception as ex:
print(f"Cannot fetch PR info from commit {ref}, {sha}", ex)
logging.error(
"Cannot fetch PR info from commit ref %s, sha %s, exception: %s",
ref,
sha,
ex,
)
return None
@ -259,12 +266,12 @@ class PRInfo:
self.diff_urls.append(
self.compare_url(
pull_request["base"]["repo"]["default_branch"],
pull_request["head"]["label"],
pull_request["head"]["sha"],
)
)
self.diff_urls.append(
self.compare_url(
pull_request["head"]["label"],
pull_request["head"]["sha"],
pull_request["base"]["repo"]["default_branch"],
)
)
@ -279,7 +286,7 @@ class PRInfo:
# itself, but as well files changed since we branched out
self.diff_urls.append(
self.compare_url(
pull_request["head"]["label"],
pull_request["head"]["sha"],
pull_request["base"]["repo"]["default_branch"],
)
)
@ -289,8 +296,10 @@ class PRInfo:
else:
# assume this is a dispatch
self.event_type = EventType.DISPATCH
print("event.json does not match pull_request or push:")
print(json.dumps(github_event, sort_keys=True, indent=4))
logging.warning(
"event.json does not match pull_request or push:\n%s",
json.dumps(github_event, sort_keys=True, indent=4),
)
self.sha = os.getenv(
"GITHUB_SHA", "0000000000000000000000000000000000000000"
)
@ -330,7 +339,7 @@ class PRInfo:
return self.event_type == EventType.DISPATCH
def compare_pr_url(self, pr_object: dict) -> str:
return self.compare_url(pr_object["base"]["label"], pr_object["head"]["label"])
return self.compare_url(pr_object["base"]["sha"], pr_object["head"]["sha"])
@staticmethod
def compare_url(first: str, second: str) -> str:
@ -357,7 +366,7 @@ class PRInfo:
diff_object = PatchSet(response.text)
self.changed_files.update({f.path for f in diff_object})
self.changed_files_requested = True
print(f"Fetched info about {len(self.changed_files)} changed files")
logging.info("Fetched info about %s changed files", len(self.changed_files))
def get_dict(self):
return {