Move changelog script to tests/ci

2024-11-21 07:01:59 +00:00 · 2024-06-04 14:06:26 +02:00 · 2024-06-04 14:06:26 +02:00 · dcd3e9d151
commit dcd3e9d151
parent 31a978d75d
4 changed files with 435 additions and 422 deletions
--- a/tests/ci/changelog.py
+++ b/tests/ci/changelog.py
@ -0,0 +1,427 @@
+#!/usr/bin/env python3
+# In our CI this script runs in style-test containers
+
+import argparse
+import logging
+import os
+import os.path as p
+import re
+from datetime import date, timedelta
+from subprocess import DEVNULL
+from typing import Dict, List, Optional, TextIO
+
+from github.GithubException import RateLimitExceededException, UnknownObjectException
+from github.NamedUser import NamedUser
+from thefuzz.fuzz import ratio  # type: ignore
+
+from git_helper import git_runner as runner
+from git_helper import is_shallow
+from github_helper import GitHub, PullRequest, PullRequests, Repository
+
+# This array gives the preferred category order, and is also used to
+# normalize category names.
+# Categories are used in .github/PULL_REQUEST_TEMPLATE.md, keep comments there
+# updated accordingly
+categories_preferred_order = (
+    "Backward Incompatible Change",
+    "New Feature",
+    "Performance Improvement",
+    "Improvement",
+    "Critical Bug Fix",
+    "Bug Fix",
+    "Build/Testing/Packaging Improvement",
+    "Other",
+)
+
+FROM_REF = ""
+TO_REF = ""
+SHA_IN_CHANGELOG = []  # type: List[str]
+gh = GitHub(create_cache_dir=False)
+CACHE_PATH = p.join(p.dirname(p.realpath(__file__)), "gh_cache")
+
+
+class Description:
+    def __init__(
+        self, number: int, user: NamedUser, html_url: str, entry: str, category: str
+    ):
+        self.number = number
+        self.html_url = html_url
+        self.user = gh.get_user_cached(user._rawData["login"])  # type: ignore
+        self.entry = entry
+        self.category = category
+
+    @property
+    def formatted_entry(self) -> str:
+        # Substitute issue links.
+        # 1) issue number w/o markdown link
+        entry = re.sub(
+            r"([^[])#([0-9]{4,})",
+            r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)",
+            self.entry,
+        )
+        # 2) issue URL w/o markdown link
+        # including #issuecomment-1 or #event-12
+        entry = re.sub(
+            r"([^(])(https://github.com/ClickHouse/ClickHouse/issues/([0-9]{4,})[-#a-z0-9]*)",
+            r"\1[#\3](\2)",
+            entry,
+        )
+        # It's possible that we face a secondary rate limit.
+        # In this case we should sleep until we get it
+        while True:
+            try:
+                user_name = self.user.name if self.user.name else self.user.login
+                break
+            except UnknownObjectException:
+                user_name = self.user.login
+                break
+            except RateLimitExceededException:
+                gh.sleep_on_rate_limit()
+        return (
+            f"* {entry} [#{self.number}]({self.html_url}) "
+            f"([{user_name}]({self.user.html_url}))."
+        )
+
+    # Sort PR descriptions by numbers
+    def __eq__(self, other) -> bool:
+        if not isinstance(self, type(other)):
+            return NotImplemented
+        return self.number == other.number
+
+    def __lt__(self, other: "Description") -> bool:
+        return self.number < other.number
+
+
+def get_descriptions(prs: PullRequests) -> Dict[str, List[Description]]:
+    descriptions = {}  # type: Dict[str, List[Description]]
+    repos = {}  # type: Dict[str, Repository]
+    for pr in prs:
+        # See https://github.com/PyGithub/PyGithub/issues/2202,
+        # obj._rawData doesn't spend additional API requests
+        # We'll save some requests
+        # pylint: disable=protected-access
+        repo_name = pr._rawData["base"]["repo"]["full_name"]
+        # pylint: enable=protected-access
+        if repo_name not in repos:
+            repos[repo_name] = pr.base.repo
+        in_changelog = False
+        merge_commit = pr.merge_commit_sha
+        if merge_commit is None:
+            logging.warning("PR %s does not have merge-commit, skipping", pr.number)
+            continue
+
+        in_changelog = merge_commit in SHA_IN_CHANGELOG
+        if in_changelog:
+            desc = generate_description(pr, repos[repo_name])
+            if desc:
+                if desc.category not in descriptions:
+                    descriptions[desc.category] = []
+                descriptions[desc.category].append(desc)
+
+    for descs in descriptions.values():
+        descs.sort()
+
+    return descriptions
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        description="Generate a changelog in Markdown format between given tags. "
+        "It fetches all tags and unshallow the git repository automatically",
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="count",
+        default=0,
+        help="set the script verbosity, could be used multiple",
+    )
+    parser.add_argument(
+        "--debug-helpers",
+        action="store_true",
+        help="add debug logging for git_helper and github_helper",
+    )
+    parser.add_argument(
+        "--output",
+        type=argparse.FileType("w"),
+        default="-",
+        help="output file for changelog",
+    )
+    parser.add_argument(
+        "--repo",
+        default="ClickHouse/ClickHouse",
+        help="a repository to query for pull-requests from GitHub",
+    )
+    parser.add_argument(
+        "--jobs",
+        type=int,
+        default=10,
+        help="number of jobs to get pull-requests info from GitHub API",
+    )
+    parser.add_argument(
+        "--gh-user-or-token",
+        help="user name or GH token to authenticate",
+    )
+    parser.add_argument(
+        "--gh-password",
+        help="a password that should be used when user is given",
+    )
+    parser.add_argument(
+        "--with-testing-tags",
+        action="store_true",
+        help="by default '*-testing' tags are ignored, this argument enables them too",
+    )
+    parser.add_argument(
+        "--from",
+        dest="from_ref",
+        help="git ref for a starting point of changelog, by default is calculated "
+        "automatically to match a previous tag in history",
+    )
+    parser.add_argument(
+        "to_ref",
+        metavar="TO_REF",
+        help="git ref for the changelog end",
+    )
+    args = parser.parse_args()
+    return args
+
+
+# This function mirrors the PR description checks in ClickhousePullRequestTrigger.
+# Returns None if the PR should not be mentioned in changelog.
+def generate_description(item: PullRequest, repo: Repository) -> Optional[Description]:
+    backport_number = item.number
+    if item.head.ref.startswith("backport/"):
+        branch_parts = item.head.ref.split("/")
+        if len(branch_parts) == 3:
+            try:
+                item = gh.get_pull_cached(repo, int(branch_parts[-1]))
+            except Exception as e:
+                logging.warning("unable to get backpoted PR, exception: %s", e)
+        else:
+            logging.warning(
+                "The branch %s doesn't match backport template, using PR %s as is",
+                item.head.ref,
+                item.number,
+            )
+    description = item.body
+    # Don't skip empty lines because they delimit parts of description
+    lines = [x.strip() for x in (description.split("\n") if description else [])]
+    lines = [re.sub(r"\s+", " ", ln) for ln in lines]
+
+    category = ""
+    entry = ""
+
+    if lines:
+        i = 0
+        while i < len(lines):
+            if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
+                i += 1
+                if i >= len(lines):
+                    break
+                # Can have one empty line between header and the category itself.
+                # Filter it out.
+                if not lines[i]:
+                    i += 1
+                    if i >= len(lines):
+                        break
+                category = re.sub(r"^[-*\s]*", "", lines[i])
+                i += 1
+            elif re.match(
+                r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
+            ):
+                i += 1
+                # Can have one empty line between header and the entry itself.
+                # Filter it out.
+                if i < len(lines) and not lines[i]:
+                    i += 1
+                # All following lines until empty one are the changelog entry.
+                entry_lines = []
+                while i < len(lines) and lines[i]:
+                    entry_lines.append(lines[i])
+                    i += 1
+                entry = " ".join(entry_lines)
+            else:
+                i += 1
+
+    # Remove excessive bullets from the entry.
+    if re.match(r"^[\-\*] ", entry):
+        entry = entry[2:]
+
+    # Better style.
+    if re.match(r"^[a-z]", entry):
+        entry = entry.capitalize()
+
+    if not category:
+        # Shouldn't happen, because description check in CI should catch such PRs.
+        # Fall through, so that it shows up in output and the user can fix it.
+        category = "NO CL CATEGORY"
+
+    # Filter out the PR categories that are not for changelog.
+    if re.match(
+        r"(?i)((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
+        category,
+    ):
+        category = "NOT FOR CHANGELOG / INSIGNIFICANT"
+        return Description(item.number, item.user, item.html_url, item.title, category)
+
+    # Normalize bug fixes
+    if re.match(
+        r"(?i)bug\Wfix",
+        category,
+    ):
+        category = "Bug Fix (user-visible misbehavior in an official stable release)"
+
+    # Filter out documentations changelog
+    if re.match(
+        r"(?i)doc",
+        category,
+    ):
+        return None
+
+    if backport_number != item.number:
+        entry = f"Backported in #{backport_number}: {entry}"
+
+    if not entry:
+        # Shouldn't happen, because description check in CI should catch such PRs.
+        category = "NO CL ENTRY"
+        entry = "NO CL ENTRY:  '" + item.title + "'"
+
+    entry = entry.strip()
+    if entry[-1] != ".":
+        entry += "."
+
+    for c in categories_preferred_order:
+        if ratio(category.lower(), c.lower()) >= 90:
+            category = c
+            break
+
+    return Description(item.number, item.user, item.html_url, entry, category)
+
+
+def write_changelog(
+    fd: TextIO, descriptions: Dict[str, List[Description]], year: int
+) -> None:
+    to_commit = runner(f"git rev-parse {TO_REF}^{{}}")[:11]
+    from_commit = runner(f"git rev-parse {FROM_REF}^{{}}")[:11]
+    fd.write(
+        f"---\nsidebar_position: 1\nsidebar_label: {year}\n---\n\n"
+        f"# {year} Changelog\n\n"
+        f"### ClickHouse release {TO_REF} ({to_commit}) FIXME "
+        f"as compared to {FROM_REF} ({from_commit})\n\n"
+    )
+
+    seen_categories = []  # type: List[str]
+    for category in categories_preferred_order:
+        if category in descriptions:
+            seen_categories.append(category)
+            fd.write(f"#### {category}\n")
+            for desc in descriptions[category]:
+                fd.write(f"{desc.formatted_entry}\n")
+
+            fd.write("\n")
+
+    for category in sorted(descriptions):
+        if category not in seen_categories:
+            fd.write(f"#### {category}\n\n")
+            for desc in descriptions[category]:
+                fd.write(f"{desc.formatted_entry}\n")
+
+            fd.write("\n")
+
+
+def check_refs(from_ref: Optional[str], to_ref: str, with_testing_tags: bool):
+    global FROM_REF, TO_REF
+    TO_REF = to_ref
+
+    # Check TO_REF
+    runner.run(f"git rev-parse {TO_REF}")
+
+    # Check from_ref
+    if from_ref is None:
+        # Get all tags pointing to TO_REF
+        tags = runner.run(f"git tag --points-at '{TO_REF}^{{}}'").split("\n")
+        logging.info("All tags pointing to %s:\n%s", TO_REF, tags)
+        if not with_testing_tags:
+            tags.append("*-testing")
+        exclude = " ".join([f"--exclude='{tag}'" for tag in tags])
+        cmd = f"git describe --abbrev=0 --tags {exclude} '{TO_REF}'"
+        FROM_REF = runner.run(cmd)
+    else:
+        runner.run(f"git rev-parse {FROM_REF}")
+        FROM_REF = from_ref
+
+
+def set_sha_in_changelog():
+    global SHA_IN_CHANGELOG
+    SHA_IN_CHANGELOG = runner.run(
+        f"git log --format=format:%H {FROM_REF}..{TO_REF}"
+    ).split("\n")
+
+
+def get_year(prs: PullRequests) -> int:
+    if not prs:
+        return date.today().year
+    return max(pr.created_at.year for pr in prs)
+
+
+def main():
+    log_levels = [logging.WARN, logging.INFO, logging.DEBUG]
+    args = parse_args()
+    logging.basicConfig(
+        format="%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d]:\n%(message)s",
+        level=log_levels[min(args.verbose, 2)],
+    )
+    if args.debug_helpers:
+        logging.getLogger("github_helper").setLevel(logging.DEBUG)
+        logging.getLogger("git_helper").setLevel(logging.DEBUG)
+    # Create a cache directory
+    if not p.isdir(CACHE_PATH):
+        os.mkdir(CACHE_PATH, 0o700)
+
+    # Get the full repo
+    if is_shallow():
+        logging.info("Unshallow repository")
+        runner.run("git fetch --unshallow", stderr=DEVNULL)
+    logging.info("Fetching all tags")
+    runner.run("git fetch --tags", stderr=DEVNULL)
+
+    check_refs(args.from_ref, args.to_ref, args.with_testing_tags)
+    set_sha_in_changelog()
+
+    logging.info("Using %s..%s as changelog interval", FROM_REF, TO_REF)
+
+    # use merge-base commit as a starting point, if used ref in another branch
+    base_commit = runner.run(f"git merge-base '{FROM_REF}^{{}}' '{TO_REF}^{{}}'")
+    # Get starting and ending dates for gathering PRs
+    # Add one day after and before to mitigate TZ possible issues
+    # `tag^{}` format gives commit ref when we have annotated tags
+    # format %cs gives a committer date, works better for cherry-picked commits
+    from_date = runner.run(f"git log -1 --format=format:%cs '{base_commit}'")
+    to_date = runner.run(f"git log -1 --format=format:%cs '{TO_REF}^{{}}'")
+    merged = (
+        date.fromisoformat(from_date) - timedelta(1),
+        date.fromisoformat(to_date) + timedelta(1),
+    )
+
+    # Get all PRs for the given time frame
+    global gh
+    gh = GitHub(
+        args.gh_user_or_token,
+        args.gh_password,
+        create_cache_dir=False,
+        per_page=100,
+        pool_size=args.jobs,
+    )
+    gh.cache_path = CACHE_PATH
+    query = f"type:pr repo:{args.repo} is:merged"
+    prs = gh.get_pulls_from_search(query=query, merged=merged, sort="created")
+
+    descriptions = get_descriptions(prs)
+    changelog_year = get_year(prs)
+
+    write_changelog(args.output, descriptions, changelog_year)
+
+
+if __name__ == "__main__":
+    main()
--- a/utils/changelog/changelog.py
+++ b/utils/changelog/changelog.py
@ -1,427 +1,15 @@
 #!/usr/bin/env python3
 # In our CI this script runs in style-test containers

-import argparse
-import logging
-import os
-import os.path as p
-import re
-from datetime import date, timedelta
-from subprocess import DEVNULL, CalledProcessError
-from typing import Dict, List, Optional, TextIO
+# The main script is moved to tests/ci/changelog.py
+# It depends on the ci scripts too hard to keep it here
+# Here's only a wrapper around it for the people who used to it

-from github.GithubException import RateLimitExceededException, UnknownObjectException
-from github.NamedUser import NamedUser
-from thefuzz.fuzz import ratio  # type: ignore
-
-from git_helper import git_runner as runner
-from git_helper import is_shallow
-from github_helper import GitHub, PullRequest, PullRequests, Repository
-
-# This array gives the preferred category order, and is also used to
-# normalize category names.
-# Categories are used in .github/PULL_REQUEST_TEMPLATE.md, keep comments there
-# updated accordingly
-categories_preferred_order = (
-    "Backward Incompatible Change",
-    "New Feature",
-    "Performance Improvement",
-    "Improvement",
-    "Critical Bug Fix",
-    "Bug Fix",
-    "Build/Testing/Packaging Improvement",
-    "Other",
-)
-
-FROM_REF = ""
-TO_REF = ""
-SHA_IN_CHANGELOG = []  # type: List[str]
-gh = GitHub(create_cache_dir=False)
-CACHE_PATH = p.join(p.dirname(p.realpath(__file__)), "gh_cache")
-
-
-class Description:
-    def __init__(
-        self, number: int, user: NamedUser, html_url: str, entry: str, category: str
-    ):
-        self.number = number
-        self.html_url = html_url
-        self.user = gh.get_user_cached(user._rawData["login"])  # type: ignore
-        self.entry = entry
-        self.category = category
-
-    @property
-    def formatted_entry(self) -> str:
-        # Substitute issue links.
-        # 1) issue number w/o markdown link
-        entry = re.sub(
-            r"([^[])#([0-9]{4,})",
-            r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)",
-            self.entry,
-        )
-        # 2) issue URL w/o markdown link
-        # including #issuecomment-1 or #event-12
-        entry = re.sub(
-            r"([^(])(https://github.com/ClickHouse/ClickHouse/issues/([0-9]{4,})[-#a-z0-9]*)",
-            r"\1[#\3](\2)",
-            entry,
-        )
-        # It's possible that we face a secondary rate limit.
-        # In this case we should sleep until we get it
-        while True:
-            try:
-                user_name = self.user.name if self.user.name else self.user.login
-                break
-            except UnknownObjectException:
-                user_name = self.user.login
-                break
-            except RateLimitExceededException:
-                gh.sleep_on_rate_limit()
-        return (
-            f"* {entry} [#{self.number}]({self.html_url}) "
-            f"([{user_name}]({self.user.html_url}))."
-        )
-
-    # Sort PR descriptions by numbers
-    def __eq__(self, other) -> bool:
-        if not isinstance(self, type(other)):
-            return NotImplemented
-        return self.number == other.number
-
-    def __lt__(self, other: "Description") -> bool:
-        return self.number < other.number
-
-
-def get_descriptions(prs: PullRequests) -> Dict[str, List[Description]]:
-    descriptions = {}  # type: Dict[str, List[Description]]
-    repos = {}  # type: Dict[str, Repository]
-    for pr in prs:
-        # See https://github.com/PyGithub/PyGithub/issues/2202,
-        # obj._rawData doesn't spend additional API requests
-        # We'll save some requests
-        # pylint: disable=protected-access
-        repo_name = pr._rawData["base"]["repo"]["full_name"]
-        # pylint: enable=protected-access
-        if repo_name not in repos:
-            repos[repo_name] = pr.base.repo
-        in_changelog = False
-        merge_commit = pr.merge_commit_sha
-        if merge_commit is None:
-            logging.warning("PR %s does not have merge-commit, skipping", pr.number)
-            continue
-
-        in_changelog = merge_commit in SHA_IN_CHANGELOG
-        if in_changelog:
-            desc = generate_description(pr, repos[repo_name])
-            if desc:
-                if desc.category not in descriptions:
-                    descriptions[desc.category] = []
-                descriptions[desc.category].append(desc)
-
-    for descs in descriptions.values():
-        descs.sort()
-
-    return descriptions
-
-
-def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-        description="Generate a changelog in Markdown format between given tags. "
-        "It fetches all tags and unshallow the git repository automatically",
-    )
-    parser.add_argument(
-        "-v",
-        "--verbose",
-        action="count",
-        default=0,
-        help="set the script verbosity, could be used multiple",
-    )
-    parser.add_argument(
-        "--debug-helpers",
-        action="store_true",
-        help="add debug logging for git_helper and github_helper",
-    )
-    parser.add_argument(
-        "--output",
-        type=argparse.FileType("w"),
-        default="-",
-        help="output file for changelog",
-    )
-    parser.add_argument(
-        "--repo",
-        default="ClickHouse/ClickHouse",
-        help="a repository to query for pull-requests from GitHub",
-    )
-    parser.add_argument(
-        "--jobs",
-        type=int,
-        default=10,
-        help="number of jobs to get pull-requests info from GitHub API",
-    )
-    parser.add_argument(
-        "--gh-user-or-token",
-        help="user name or GH token to authenticate",
-    )
-    parser.add_argument(
-        "--gh-password",
-        help="a password that should be used when user is given",
-    )
-    parser.add_argument(
-        "--with-testing-tags",
-        action="store_true",
-        help="by default '*-testing' tags are ignored, this argument enables them too",
-    )
-    parser.add_argument(
-        "--from",
-        dest="from_ref",
-        help="git ref for a starting point of changelog, by default is calculated "
-        "automatically to match a previous tag in history",
-    )
-    parser.add_argument(
-        "to_ref",
-        metavar="TO_REF",
-        help="git ref for the changelog end",
-    )
-    args = parser.parse_args()
-    return args
-
-
-# This function mirrors the PR description checks in ClickhousePullRequestTrigger.
-# Returns None if the PR should not be mentioned in changelog.
-def generate_description(item: PullRequest, repo: Repository) -> Optional[Description]:
-    backport_number = item.number
-    if item.head.ref.startswith("backport/"):
-        branch_parts = item.head.ref.split("/")
-        if len(branch_parts) == 3:
-            try:
-                item = gh.get_pull_cached(repo, int(branch_parts[-1]))
-            except Exception as e:
-                logging.warning("unable to get backpoted PR, exception: %s", e)
-        else:
-            logging.warning(
-                "The branch %s doesn't match backport template, using PR %s as is",
-                item.head.ref,
-                item.number,
-            )
-    description = item.body
-    # Don't skip empty lines because they delimit parts of description
-    lines = [x.strip() for x in (description.split("\n") if description else [])]
-    lines = [re.sub(r"\s+", " ", ln) for ln in lines]
-
-    category = ""
-    entry = ""
-
-    if lines:
-        i = 0
-        while i < len(lines):
-            if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
-                i += 1
-                if i >= len(lines):
-                    break
-                # Can have one empty line between header and the category itself.
-                # Filter it out.
-                if not lines[i]:
-                    i += 1
-                    if i >= len(lines):
-                        break
-                category = re.sub(r"^[-*\s]*", "", lines[i])
-                i += 1
-            elif re.match(
-                r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
-            ):
-                i += 1
-                # Can have one empty line between header and the entry itself.
-                # Filter it out.
-                if i < len(lines) and not lines[i]:
-                    i += 1
-                # All following lines until empty one are the changelog entry.
-                entry_lines = []
-                while i < len(lines) and lines[i]:
-                    entry_lines.append(lines[i])
-                    i += 1
-                entry = " ".join(entry_lines)
-            else:
-                i += 1
-
-    # Remove excessive bullets from the entry.
-    if re.match(r"^[\-\*] ", entry):
-        entry = entry[2:]
-
-    # Better style.
-    if re.match(r"^[a-z]", entry):
-        entry = entry.capitalize()
-
-    if not category:
-        # Shouldn't happen, because description check in CI should catch such PRs.
-        # Fall through, so that it shows up in output and the user can fix it.
-        category = "NO CL CATEGORY"
-
-    # Filter out the PR categories that are not for changelog.
-    if re.match(
-        r"(?i)((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
-        category,
-    ):
-        category = "NOT FOR CHANGELOG / INSIGNIFICANT"
-        return Description(item.number, item.user, item.html_url, item.title, category)
-
-    # Normalize bug fixes
-    if re.match(
-        r"(?i)bug\Wfix",
-        category,
-    ):
-        category = "Bug Fix (user-visible misbehavior in an official stable release)"
-
-    # Filter out documentations changelog
-    if re.match(
-        r"(?i)doc",
-        category,
-    ):
-        return None
-
-    if backport_number != item.number:
-        entry = f"Backported in #{backport_number}: {entry}"
-
-    if not entry:
-        # Shouldn't happen, because description check in CI should catch such PRs.
-        category = "NO CL ENTRY"
-        entry = "NO CL ENTRY:  '" + item.title + "'"
-
-    entry = entry.strip()
-    if entry[-1] != ".":
-        entry += "."
-
-    for c in categories_preferred_order:
-        if ratio(category.lower(), c.lower()) >= 90:
-            category = c
-            break
-
-    return Description(item.number, item.user, item.html_url, entry, category)
-
-
-def write_changelog(
-    fd: TextIO, descriptions: Dict[str, List[Description]], year: int
-) -> None:
-    to_commit = runner(f"git rev-parse {TO_REF}^{{}}")[:11]
-    from_commit = runner(f"git rev-parse {FROM_REF}^{{}}")[:11]
-    fd.write(
-        f"---\nsidebar_position: 1\nsidebar_label: {year}\n---\n\n"
-        f"# {year} Changelog\n\n"
-        f"### ClickHouse release {TO_REF} ({to_commit}) FIXME "
-        f"as compared to {FROM_REF} ({from_commit})\n\n"
-    )
-
-    seen_categories = []  # type: List[str]
-    for category in categories_preferred_order:
-        if category in descriptions:
-            seen_categories.append(category)
-            fd.write(f"#### {category}\n")
-            for desc in descriptions[category]:
-                fd.write(f"{desc.formatted_entry}\n")
-
-            fd.write("\n")
-
-    for category in sorted(descriptions):
-        if category not in seen_categories:
-            fd.write(f"#### {category}\n\n")
-            for desc in descriptions[category]:
-                fd.write(f"{desc.formatted_entry}\n")
-
-            fd.write("\n")
-
-
-def check_refs(from_ref: Optional[str], to_ref: str, with_testing_tags: bool):
-    global FROM_REF, TO_REF
-    TO_REF = to_ref
-
-    # Check TO_REF
-    runner.run(f"git rev-parse {TO_REF}")
-
-    # Check from_ref
-    if from_ref is None:
-        # Get all tags pointing to TO_REF
-        tags = runner.run(f"git tag --points-at '{TO_REF}^{{}}'").split("\n")
-        logging.info("All tags pointing to %s:\n%s", TO_REF, tags)
-        if not with_testing_tags:
-            tags.append("*-testing")
-        exclude = " ".join([f"--exclude='{tag}'" for tag in tags])
-        cmd = f"git describe --abbrev=0 --tags {exclude} '{TO_REF}'"
-        FROM_REF = runner.run(cmd)
-    else:
-        runner.run(f"git rev-parse {FROM_REF}")
-        FROM_REF = from_ref
-
-
-def set_sha_in_changelog():
-    global SHA_IN_CHANGELOG
-    SHA_IN_CHANGELOG = runner.run(
-        f"git log --format=format:%H {FROM_REF}..{TO_REF}"
-    ).split("\n")
-
-
-def get_year(prs: PullRequests) -> int:
-    if not prs:
-        return date.today().year
-    return max(pr.created_at.year for pr in prs)
-
-
-def main():
-    log_levels = [logging.WARN, logging.INFO, logging.DEBUG]
-    args = parse_args()
-    logging.basicConfig(
-        format="%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d]:\n%(message)s",
-        level=log_levels[min(args.verbose, 2)],
-    )
-    if args.debug_helpers:
-        logging.getLogger("github_helper").setLevel(logging.DEBUG)
-        logging.getLogger("git_helper").setLevel(logging.DEBUG)
-    # Create a cache directory
-    if not p.isdir(CACHE_PATH):
-        os.mkdir(CACHE_PATH, 0o700)
-
-    # Get the full repo
-    if is_shallow():
-        logging.info("Unshallow repository")
-        runner.run("git fetch --unshallow", stderr=DEVNULL)
-    logging.info("Fetching all tags")
-    runner.run("git fetch --tags", stderr=DEVNULL)
-
-    check_refs(args.from_ref, args.to_ref, args.with_testing_tags)
-    set_sha_in_changelog()
-
-    logging.info("Using %s..%s as changelog interval", FROM_REF, TO_REF)
-
-    # use merge-base commit as a starting point, if used ref in another branch
-    base_commit = runner.run(f"git merge-base '{FROM_REF}^{{}}' '{TO_REF}^{{}}'")
-    # Get starting and ending dates for gathering PRs
-    # Add one day after and before to mitigate TZ possible issues
-    # `tag^{}` format gives commit ref when we have annotated tags
-    # format %cs gives a committer date, works better for cherry-picked commits
-    from_date = runner.run(f"git log -1 --format=format:%cs '{base_commit}'")
-    to_date = runner.run(f"git log -1 --format=format:%cs '{TO_REF}^{{}}'")
-    merged = (
-        date.fromisoformat(from_date) - timedelta(1),
-        date.fromisoformat(to_date) + timedelta(1),
-    )
-
-    # Get all PRs for the given time frame
-    global gh
-    gh = GitHub(
-        args.gh_user_or_token,
-        args.gh_password,
-        create_cache_dir=False,
-        per_page=100,
-        pool_size=args.jobs,
-    )
-    gh.cache_path = CACHE_PATH
-    query = f"type:pr repo:{args.repo} is:merged"
-    prs = gh.get_pulls_from_search(query=query, merged=merged, sort="created")
-
-    descriptions = get_descriptions(prs)
-    changelog_year = get_year(prs)
-
-    write_changelog(args.output, descriptions, changelog_year)
+import subprocess
+import sys
+from pathlib import Path

+SCRIPT_PATH = (Path(__file__).parents[2] / "tests/ci/changelog.py").absolute()

 if __name__ == "__main__":
-    main()
+    subprocess.check_call(["python3", SCRIPT_PATH, *sys.argv[1:]])
--- a/utils/changelog/git_helper.py
+++ b/utils/changelog/git_helper.py
@ -1 +0,0 @@
-../../tests/ci/git_helper.py
--- a/utils/changelog/github_helper.py
+++ b/utils/changelog/github_helper.py
@ -1 +0,0 @@
-../../tests/ci/github_helper.py