Merge pull request #64904 from ClickHouse/improve-pr-info-diff

Persistent PRInfo diff url
2024-09-19 16:20:50 +00:00 · 2024-06-10 18:24:16 +00:00 · 2024-06-10 18:24:16 +00:00 · 221ac1414d
commit 221ac1414d
parent 621793e856 8c4f5c65aa
2 changed files with 39 additions and 28 deletions
--- a/tests/ci/build_download_helper.py
+++ b/tests/ci/build_download_helper.py
@ -27,6 +27,8 @@ except ImportError:

 DOWNLOAD_RETRIES_COUNT = 5

+logger = logging.getLogger(__name__)
+

 class DownloadException(Exception):
    pass
@ -42,7 +44,7 @@ def get_with_retries(
    sleep: int = 3,
    **kwargs: Any,
 ) -> requests.Response:
-    logging.info(
+    logger.info(
        "Getting URL with %i tries and sleep %i in between: %s", retries, sleep, url
    )
    exc = Exception("A placeholder to satisfy typing and avoid nesting")
@ -54,7 +56,7 @@ def get_with_retries(
            return response
        except Exception as e:
            if i + 1 < retries:
-                logging.info("Exception '%s' while getting, retry %i", e, i + 1)
+                logger.info("Exception '%s' while getting, retry %i", e, i + 1)
                time.sleep(sleep)

            exc = e
@ -103,7 +105,7 @@ def get_gh_api(
            )
            try_auth = e.response.status_code == 404
            if (ratelimit_exceeded or try_auth) and not token_is_set:
-                logging.warning(
+                logger.warning(
                    "Received rate limit exception, setting the auth header and retry"
                )
                set_auth_header()
@ -114,10 +116,10 @@ def get_gh_api(
            exc = e

        if try_cnt < retries:
-            logging.info("Exception '%s' while getting, retry %i", exc, try_cnt)
+            logger.info("Exception '%s' while getting, retry %i", exc, try_cnt)
            time.sleep(sleep)

-    raise APIException("Unable to request data from GH API") from exc
+    raise APIException(f"Unable to request data from GH API: {url}") from exc


 def get_build_name_for_check(check_name: str) -> str:
@ -128,25 +130,25 @@ def read_build_urls(build_name: str, reports_path: Union[Path, str]) -> List[str
    for root, _, files in os.walk(reports_path):
        for file in files:
            if file.endswith(f"_{build_name}.json"):
-                logging.info("Found build report json %s for %s", file, build_name)
+                logger.info("Found build report json %s for %s", file, build_name)
                with open(
                    os.path.join(root, file), "r", encoding="utf-8"
                ) as file_handler:
                    build_report = json.load(file_handler)
                    return build_report["build_urls"]  # type: ignore

-    logging.info("A build report is not found for %s", build_name)
+    logger.info("A build report is not found for %s", build_name)
    return []


 def download_build_with_progress(url: str, path: Path) -> None:
-    logging.info("Downloading from %s to temp path %s", url, path)
+    logger.info("Downloading from %s to temp path %s", url, path)
    for i in range(DOWNLOAD_RETRIES_COUNT):
        try:
            response = get_with_retries(url, retries=1, stream=True)
            total_length = int(response.headers.get("content-length", 0))
            if path.is_file() and total_length and path.stat().st_size == total_length:
-                logging.info(
+                logger.info(
                    "The file %s already exists and have a proper size %s",
                    path,
                    total_length,
@ -155,14 +157,14 @@ def download_build_with_progress(url: str, path: Path) -> None:

            with open(path, "wb") as f:
                if total_length == 0:
-                    logging.info(
+                    logger.info(
                        "No content-length, will download file without progress"
                    )
                    f.write(response.content)
                else:
                    dl = 0

-                    logging.info("Content length is %ld bytes", total_length)
+                    logger.info("Content length is %ld bytes", total_length)
                    for data in response.iter_content(chunk_size=4096):
                        dl += len(data)
                        f.write(data)
@ -177,8 +179,8 @@ def download_build_with_progress(url: str, path: Path) -> None:
        except Exception as e:
            if sys.stdout.isatty():
                sys.stdout.write("\n")
-            if os.path.exists(path):
-                os.remove(path)
+            if path.exists():
+                path.unlink()

            if i + 1 < DOWNLOAD_RETRIES_COUNT:
                time.sleep(3)
@ -189,7 +191,7 @@ def download_build_with_progress(url: str, path: Path) -> None:

    if sys.stdout.isatty():
        sys.stdout.write("\n")
-    logging.info("Downloading finished")
+    logger.info("Downloading finished")


 def download_builds(
@ -198,7 +200,7 @@ def download_builds(
    for url in build_urls:
        if filter_fn(url):
            fname = os.path.basename(url.replace("%2B", "+").replace("%20", " "))
-            logging.info("Will download %s to %s", fname, result_path)
+            logger.info("Will download %s to %s", fname, result_path)
            download_build_with_progress(url, result_path / fname)


@ -210,7 +212,7 @@ def download_builds_filter(
 ) -> None:
    build_name = get_build_name_for_check(check_name)
    urls = read_build_urls(build_name, reports_path)
-    logging.info("The build report for %s contains the next URLs: %s", build_name, urls)
+    logger.info("The build report for %s contains the next URLs: %s", build_name, urls)

    if not urls:
        raise DownloadException("No build URLs found")
@ -247,7 +249,7 @@ def get_clickhouse_binary_url(
 ) -> Optional[str]:
    build_name = get_build_name_for_check(check_name)
    urls = read_build_urls(build_name, reports_path)
-    logging.info("The build report for %s contains the next URLs: %s", build_name, urls)
+    logger.info("The build report for %s contains the next URLs: %s", build_name, urls)
    for url in urls:
        check_url = url
        if "?" in check_url:
--- a/tests/ci/pr_info.py
+++ b/tests/ci/pr_info.py
@ -59,7 +59,7 @@ def get_pr_for_commit(sha, ref):
        data = response.json()
        our_prs = []  # type: List[Dict]
        if len(data) > 1:
-            print("Got more than one pr for commit", sha)
+            logging.warning("Got more than one pr for commit %s", sha)
        for pr in data:
            # We need to check if the PR is created in our repo, because
            # https://github.com/kaynewu/ClickHouse/pull/2
@ -71,13 +71,20 @@ def get_pr_for_commit(sha, ref):
            if pr["head"]["ref"] in ref:
                return pr
            our_prs.append(pr)
-        print(
-            f"Cannot find PR with required ref {ref}, sha {sha} - returning first one"
+        logging.warning(
+            "Cannot find PR with required ref %s, sha %s - returning first one",
+            ref,
+            sha,
        )
        first_pr = our_prs[0]
        return first_pr
    except Exception as ex:
-        print(f"Cannot fetch PR info from commit {ref}, {sha}", ex)
+        logging.error(
+            "Cannot fetch PR info from commit ref %s, sha %s, exception: %s",
+            ref,
+            sha,
+            ex,
+        )
    return None


@ -259,12 +266,12 @@ class PRInfo:
                    self.diff_urls.append(
                        self.compare_url(
                            pull_request["base"]["repo"]["default_branch"],
-                            pull_request["head"]["label"],
+                            pull_request["head"]["sha"],
                        )
                    )
                    self.diff_urls.append(
                        self.compare_url(
-                            pull_request["head"]["label"],
+                            pull_request["head"]["sha"],
                            pull_request["base"]["repo"]["default_branch"],
                        )
                    )
@ -279,7 +286,7 @@ class PRInfo:
                    # itself, but as well files changed since we branched out
                    self.diff_urls.append(
                        self.compare_url(
-                            pull_request["head"]["label"],
+                            pull_request["head"]["sha"],
                            pull_request["base"]["repo"]["default_branch"],
                        )
                    )
@ -289,8 +296,10 @@ class PRInfo:
            else:
                # assume this is a dispatch
                self.event_type = EventType.DISPATCH
-            print("event.json does not match pull_request or push:")
-            print(json.dumps(github_event, sort_keys=True, indent=4))
+            logging.warning(
+                "event.json does not match pull_request or push:\n%s",
+                json.dumps(github_event, sort_keys=True, indent=4),
+            )
            self.sha = os.getenv(
                "GITHUB_SHA", "0000000000000000000000000000000000000000"
            )
@ -330,7 +339,7 @@ class PRInfo:
        return self.event_type == EventType.DISPATCH

    def compare_pr_url(self, pr_object: dict) -> str:
-        return self.compare_url(pr_object["base"]["label"], pr_object["head"]["label"])
+        return self.compare_url(pr_object["base"]["sha"], pr_object["head"]["sha"])

    @staticmethod
    def compare_url(first: str, second: str) -> str:
@ -357,7 +366,7 @@ class PRInfo:
            diff_object = PatchSet(response.text)
            self.changed_files.update({f.path for f in diff_object})
        self.changed_files_requested = True
-        print(f"Fetched info about {len(self.changed_files)} changed files")
+        logging.info("Fetched info about %s changed files", len(self.changed_files))

    def get_dict(self):
        return {