mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Move changelog script to tests/ci
This commit is contained in:
parent
31a978d75d
commit
dcd3e9d151
427
tests/ci/changelog.py
Executable file
427
tests/ci/changelog.py
Executable file
@ -0,0 +1,427 @@
|
||||
#!/usr/bin/env python3
|
||||
# In our CI this script runs in style-test containers
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import os.path as p
|
||||
import re
|
||||
from datetime import date, timedelta
|
||||
from subprocess import DEVNULL
|
||||
from typing import Dict, List, Optional, TextIO
|
||||
|
||||
from github.GithubException import RateLimitExceededException, UnknownObjectException
|
||||
from github.NamedUser import NamedUser
|
||||
from thefuzz.fuzz import ratio # type: ignore
|
||||
|
||||
from git_helper import git_runner as runner
|
||||
from git_helper import is_shallow
|
||||
from github_helper import GitHub, PullRequest, PullRequests, Repository
|
||||
|
||||
# This array gives the preferred category order, and is also used to
|
||||
# normalize category names.
|
||||
# Categories are used in .github/PULL_REQUEST_TEMPLATE.md, keep comments there
|
||||
# updated accordingly
|
||||
categories_preferred_order = (
|
||||
"Backward Incompatible Change",
|
||||
"New Feature",
|
||||
"Performance Improvement",
|
||||
"Improvement",
|
||||
"Critical Bug Fix",
|
||||
"Bug Fix",
|
||||
"Build/Testing/Packaging Improvement",
|
||||
"Other",
|
||||
)
|
||||
|
||||
FROM_REF = ""
|
||||
TO_REF = ""
|
||||
SHA_IN_CHANGELOG = [] # type: List[str]
|
||||
gh = GitHub(create_cache_dir=False)
|
||||
CACHE_PATH = p.join(p.dirname(p.realpath(__file__)), "gh_cache")
|
||||
|
||||
|
||||
class Description:
|
||||
def __init__(
|
||||
self, number: int, user: NamedUser, html_url: str, entry: str, category: str
|
||||
):
|
||||
self.number = number
|
||||
self.html_url = html_url
|
||||
self.user = gh.get_user_cached(user._rawData["login"]) # type: ignore
|
||||
self.entry = entry
|
||||
self.category = category
|
||||
|
||||
@property
|
||||
def formatted_entry(self) -> str:
|
||||
# Substitute issue links.
|
||||
# 1) issue number w/o markdown link
|
||||
entry = re.sub(
|
||||
r"([^[])#([0-9]{4,})",
|
||||
r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)",
|
||||
self.entry,
|
||||
)
|
||||
# 2) issue URL w/o markdown link
|
||||
# including #issuecomment-1 or #event-12
|
||||
entry = re.sub(
|
||||
r"([^(])(https://github.com/ClickHouse/ClickHouse/issues/([0-9]{4,})[-#a-z0-9]*)",
|
||||
r"\1[#\3](\2)",
|
||||
entry,
|
||||
)
|
||||
# It's possible that we face a secondary rate limit.
|
||||
# In this case we should sleep until we get it
|
||||
while True:
|
||||
try:
|
||||
user_name = self.user.name if self.user.name else self.user.login
|
||||
break
|
||||
except UnknownObjectException:
|
||||
user_name = self.user.login
|
||||
break
|
||||
except RateLimitExceededException:
|
||||
gh.sleep_on_rate_limit()
|
||||
return (
|
||||
f"* {entry} [#{self.number}]({self.html_url}) "
|
||||
f"([{user_name}]({self.user.html_url}))."
|
||||
)
|
||||
|
||||
# Sort PR descriptions by numbers
|
||||
def __eq__(self, other) -> bool:
|
||||
if not isinstance(self, type(other)):
|
||||
return NotImplemented
|
||||
return self.number == other.number
|
||||
|
||||
def __lt__(self, other: "Description") -> bool:
|
||||
return self.number < other.number
|
||||
|
||||
|
||||
def get_descriptions(prs: PullRequests) -> Dict[str, List[Description]]:
|
||||
descriptions = {} # type: Dict[str, List[Description]]
|
||||
repos = {} # type: Dict[str, Repository]
|
||||
for pr in prs:
|
||||
# See https://github.com/PyGithub/PyGithub/issues/2202,
|
||||
# obj._rawData doesn't spend additional API requests
|
||||
# We'll save some requests
|
||||
# pylint: disable=protected-access
|
||||
repo_name = pr._rawData["base"]["repo"]["full_name"]
|
||||
# pylint: enable=protected-access
|
||||
if repo_name not in repos:
|
||||
repos[repo_name] = pr.base.repo
|
||||
in_changelog = False
|
||||
merge_commit = pr.merge_commit_sha
|
||||
if merge_commit is None:
|
||||
logging.warning("PR %s does not have merge-commit, skipping", pr.number)
|
||||
continue
|
||||
|
||||
in_changelog = merge_commit in SHA_IN_CHANGELOG
|
||||
if in_changelog:
|
||||
desc = generate_description(pr, repos[repo_name])
|
||||
if desc:
|
||||
if desc.category not in descriptions:
|
||||
descriptions[desc.category] = []
|
||||
descriptions[desc.category].append(desc)
|
||||
|
||||
for descs in descriptions.values():
|
||||
descs.sort()
|
||||
|
||||
return descriptions
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
description="Generate a changelog in Markdown format between given tags. "
|
||||
"It fetches all tags and unshallow the git repository automatically",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-v",
|
||||
"--verbose",
|
||||
action="count",
|
||||
default=0,
|
||||
help="set the script verbosity, could be used multiple",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--debug-helpers",
|
||||
action="store_true",
|
||||
help="add debug logging for git_helper and github_helper",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=argparse.FileType("w"),
|
||||
default="-",
|
||||
help="output file for changelog",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--repo",
|
||||
default="ClickHouse/ClickHouse",
|
||||
help="a repository to query for pull-requests from GitHub",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--jobs",
|
||||
type=int,
|
||||
default=10,
|
||||
help="number of jobs to get pull-requests info from GitHub API",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gh-user-or-token",
|
||||
help="user name or GH token to authenticate",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gh-password",
|
||||
help="a password that should be used when user is given",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--with-testing-tags",
|
||||
action="store_true",
|
||||
help="by default '*-testing' tags are ignored, this argument enables them too",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--from",
|
||||
dest="from_ref",
|
||||
help="git ref for a starting point of changelog, by default is calculated "
|
||||
"automatically to match a previous tag in history",
|
||||
)
|
||||
parser.add_argument(
|
||||
"to_ref",
|
||||
metavar="TO_REF",
|
||||
help="git ref for the changelog end",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
# This function mirrors the PR description checks in ClickhousePullRequestTrigger.
|
||||
# Returns None if the PR should not be mentioned in changelog.
|
||||
def generate_description(item: PullRequest, repo: Repository) -> Optional[Description]:
|
||||
backport_number = item.number
|
||||
if item.head.ref.startswith("backport/"):
|
||||
branch_parts = item.head.ref.split("/")
|
||||
if len(branch_parts) == 3:
|
||||
try:
|
||||
item = gh.get_pull_cached(repo, int(branch_parts[-1]))
|
||||
except Exception as e:
|
||||
logging.warning("unable to get backpoted PR, exception: %s", e)
|
||||
else:
|
||||
logging.warning(
|
||||
"The branch %s doesn't match backport template, using PR %s as is",
|
||||
item.head.ref,
|
||||
item.number,
|
||||
)
|
||||
description = item.body
|
||||
# Don't skip empty lines because they delimit parts of description
|
||||
lines = [x.strip() for x in (description.split("\n") if description else [])]
|
||||
lines = [re.sub(r"\s+", " ", ln) for ln in lines]
|
||||
|
||||
category = ""
|
||||
entry = ""
|
||||
|
||||
if lines:
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
|
||||
i += 1
|
||||
if i >= len(lines):
|
||||
break
|
||||
# Can have one empty line between header and the category itself.
|
||||
# Filter it out.
|
||||
if not lines[i]:
|
||||
i += 1
|
||||
if i >= len(lines):
|
||||
break
|
||||
category = re.sub(r"^[-*\s]*", "", lines[i])
|
||||
i += 1
|
||||
elif re.match(
|
||||
r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
|
||||
):
|
||||
i += 1
|
||||
# Can have one empty line between header and the entry itself.
|
||||
# Filter it out.
|
||||
if i < len(lines) and not lines[i]:
|
||||
i += 1
|
||||
# All following lines until empty one are the changelog entry.
|
||||
entry_lines = []
|
||||
while i < len(lines) and lines[i]:
|
||||
entry_lines.append(lines[i])
|
||||
i += 1
|
||||
entry = " ".join(entry_lines)
|
||||
else:
|
||||
i += 1
|
||||
|
||||
# Remove excessive bullets from the entry.
|
||||
if re.match(r"^[\-\*] ", entry):
|
||||
entry = entry[2:]
|
||||
|
||||
# Better style.
|
||||
if re.match(r"^[a-z]", entry):
|
||||
entry = entry.capitalize()
|
||||
|
||||
if not category:
|
||||
# Shouldn't happen, because description check in CI should catch such PRs.
|
||||
# Fall through, so that it shows up in output and the user can fix it.
|
||||
category = "NO CL CATEGORY"
|
||||
|
||||
# Filter out the PR categories that are not for changelog.
|
||||
if re.match(
|
||||
r"(?i)((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
|
||||
category,
|
||||
):
|
||||
category = "NOT FOR CHANGELOG / INSIGNIFICANT"
|
||||
return Description(item.number, item.user, item.html_url, item.title, category)
|
||||
|
||||
# Normalize bug fixes
|
||||
if re.match(
|
||||
r"(?i)bug\Wfix",
|
||||
category,
|
||||
):
|
||||
category = "Bug Fix (user-visible misbehavior in an official stable release)"
|
||||
|
||||
# Filter out documentations changelog
|
||||
if re.match(
|
||||
r"(?i)doc",
|
||||
category,
|
||||
):
|
||||
return None
|
||||
|
||||
if backport_number != item.number:
|
||||
entry = f"Backported in #{backport_number}: {entry}"
|
||||
|
||||
if not entry:
|
||||
# Shouldn't happen, because description check in CI should catch such PRs.
|
||||
category = "NO CL ENTRY"
|
||||
entry = "NO CL ENTRY: '" + item.title + "'"
|
||||
|
||||
entry = entry.strip()
|
||||
if entry[-1] != ".":
|
||||
entry += "."
|
||||
|
||||
for c in categories_preferred_order:
|
||||
if ratio(category.lower(), c.lower()) >= 90:
|
||||
category = c
|
||||
break
|
||||
|
||||
return Description(item.number, item.user, item.html_url, entry, category)
|
||||
|
||||
|
||||
def write_changelog(
|
||||
fd: TextIO, descriptions: Dict[str, List[Description]], year: int
|
||||
) -> None:
|
||||
to_commit = runner(f"git rev-parse {TO_REF}^{{}}")[:11]
|
||||
from_commit = runner(f"git rev-parse {FROM_REF}^{{}}")[:11]
|
||||
fd.write(
|
||||
f"---\nsidebar_position: 1\nsidebar_label: {year}\n---\n\n"
|
||||
f"# {year} Changelog\n\n"
|
||||
f"### ClickHouse release {TO_REF} ({to_commit}) FIXME "
|
||||
f"as compared to {FROM_REF} ({from_commit})\n\n"
|
||||
)
|
||||
|
||||
seen_categories = [] # type: List[str]
|
||||
for category in categories_preferred_order:
|
||||
if category in descriptions:
|
||||
seen_categories.append(category)
|
||||
fd.write(f"#### {category}\n")
|
||||
for desc in descriptions[category]:
|
||||
fd.write(f"{desc.formatted_entry}\n")
|
||||
|
||||
fd.write("\n")
|
||||
|
||||
for category in sorted(descriptions):
|
||||
if category not in seen_categories:
|
||||
fd.write(f"#### {category}\n\n")
|
||||
for desc in descriptions[category]:
|
||||
fd.write(f"{desc.formatted_entry}\n")
|
||||
|
||||
fd.write("\n")
|
||||
|
||||
|
||||
def check_refs(from_ref: Optional[str], to_ref: str, with_testing_tags: bool):
|
||||
global FROM_REF, TO_REF
|
||||
TO_REF = to_ref
|
||||
|
||||
# Check TO_REF
|
||||
runner.run(f"git rev-parse {TO_REF}")
|
||||
|
||||
# Check from_ref
|
||||
if from_ref is None:
|
||||
# Get all tags pointing to TO_REF
|
||||
tags = runner.run(f"git tag --points-at '{TO_REF}^{{}}'").split("\n")
|
||||
logging.info("All tags pointing to %s:\n%s", TO_REF, tags)
|
||||
if not with_testing_tags:
|
||||
tags.append("*-testing")
|
||||
exclude = " ".join([f"--exclude='{tag}'" for tag in tags])
|
||||
cmd = f"git describe --abbrev=0 --tags {exclude} '{TO_REF}'"
|
||||
FROM_REF = runner.run(cmd)
|
||||
else:
|
||||
runner.run(f"git rev-parse {FROM_REF}")
|
||||
FROM_REF = from_ref
|
||||
|
||||
|
||||
def set_sha_in_changelog():
|
||||
global SHA_IN_CHANGELOG
|
||||
SHA_IN_CHANGELOG = runner.run(
|
||||
f"git log --format=format:%H {FROM_REF}..{TO_REF}"
|
||||
).split("\n")
|
||||
|
||||
|
||||
def get_year(prs: PullRequests) -> int:
|
||||
if not prs:
|
||||
return date.today().year
|
||||
return max(pr.created_at.year for pr in prs)
|
||||
|
||||
|
||||
def main():
|
||||
log_levels = [logging.WARN, logging.INFO, logging.DEBUG]
|
||||
args = parse_args()
|
||||
logging.basicConfig(
|
||||
format="%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d]:\n%(message)s",
|
||||
level=log_levels[min(args.verbose, 2)],
|
||||
)
|
||||
if args.debug_helpers:
|
||||
logging.getLogger("github_helper").setLevel(logging.DEBUG)
|
||||
logging.getLogger("git_helper").setLevel(logging.DEBUG)
|
||||
# Create a cache directory
|
||||
if not p.isdir(CACHE_PATH):
|
||||
os.mkdir(CACHE_PATH, 0o700)
|
||||
|
||||
# Get the full repo
|
||||
if is_shallow():
|
||||
logging.info("Unshallow repository")
|
||||
runner.run("git fetch --unshallow", stderr=DEVNULL)
|
||||
logging.info("Fetching all tags")
|
||||
runner.run("git fetch --tags", stderr=DEVNULL)
|
||||
|
||||
check_refs(args.from_ref, args.to_ref, args.with_testing_tags)
|
||||
set_sha_in_changelog()
|
||||
|
||||
logging.info("Using %s..%s as changelog interval", FROM_REF, TO_REF)
|
||||
|
||||
# use merge-base commit as a starting point, if used ref in another branch
|
||||
base_commit = runner.run(f"git merge-base '{FROM_REF}^{{}}' '{TO_REF}^{{}}'")
|
||||
# Get starting and ending dates for gathering PRs
|
||||
# Add one day after and before to mitigate TZ possible issues
|
||||
# `tag^{}` format gives commit ref when we have annotated tags
|
||||
# format %cs gives a committer date, works better for cherry-picked commits
|
||||
from_date = runner.run(f"git log -1 --format=format:%cs '{base_commit}'")
|
||||
to_date = runner.run(f"git log -1 --format=format:%cs '{TO_REF}^{{}}'")
|
||||
merged = (
|
||||
date.fromisoformat(from_date) - timedelta(1),
|
||||
date.fromisoformat(to_date) + timedelta(1),
|
||||
)
|
||||
|
||||
# Get all PRs for the given time frame
|
||||
global gh
|
||||
gh = GitHub(
|
||||
args.gh_user_or_token,
|
||||
args.gh_password,
|
||||
create_cache_dir=False,
|
||||
per_page=100,
|
||||
pool_size=args.jobs,
|
||||
)
|
||||
gh.cache_path = CACHE_PATH
|
||||
query = f"type:pr repo:{args.repo} is:merged"
|
||||
prs = gh.get_pulls_from_search(query=query, merged=merged, sort="created")
|
||||
|
||||
descriptions = get_descriptions(prs)
|
||||
changelog_year = get_year(prs)
|
||||
|
||||
write_changelog(args.output, descriptions, changelog_year)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,427 +1,15 @@
|
||||
#!/usr/bin/env python3
|
||||
# In our CI this script runs in style-test containers
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import os.path as p
|
||||
import re
|
||||
from datetime import date, timedelta
|
||||
from subprocess import DEVNULL, CalledProcessError
|
||||
from typing import Dict, List, Optional, TextIO
|
||||
# The main script is moved to tests/ci/changelog.py
|
||||
# It depends on the ci scripts too hard to keep it here
|
||||
# Here's only a wrapper around it for the people who used to it
|
||||
|
||||
from github.GithubException import RateLimitExceededException, UnknownObjectException
|
||||
from github.NamedUser import NamedUser
|
||||
from thefuzz.fuzz import ratio # type: ignore
|
||||
|
||||
from git_helper import git_runner as runner
|
||||
from git_helper import is_shallow
|
||||
from github_helper import GitHub, PullRequest, PullRequests, Repository
|
||||
|
||||
# This array gives the preferred category order, and is also used to
|
||||
# normalize category names.
|
||||
# Categories are used in .github/PULL_REQUEST_TEMPLATE.md, keep comments there
|
||||
# updated accordingly
|
||||
categories_preferred_order = (
|
||||
"Backward Incompatible Change",
|
||||
"New Feature",
|
||||
"Performance Improvement",
|
||||
"Improvement",
|
||||
"Critical Bug Fix",
|
||||
"Bug Fix",
|
||||
"Build/Testing/Packaging Improvement",
|
||||
"Other",
|
||||
)
|
||||
|
||||
FROM_REF = ""
|
||||
TO_REF = ""
|
||||
SHA_IN_CHANGELOG = [] # type: List[str]
|
||||
gh = GitHub(create_cache_dir=False)
|
||||
CACHE_PATH = p.join(p.dirname(p.realpath(__file__)), "gh_cache")
|
||||
|
||||
|
||||
class Description:
|
||||
def __init__(
|
||||
self, number: int, user: NamedUser, html_url: str, entry: str, category: str
|
||||
):
|
||||
self.number = number
|
||||
self.html_url = html_url
|
||||
self.user = gh.get_user_cached(user._rawData["login"]) # type: ignore
|
||||
self.entry = entry
|
||||
self.category = category
|
||||
|
||||
@property
|
||||
def formatted_entry(self) -> str:
|
||||
# Substitute issue links.
|
||||
# 1) issue number w/o markdown link
|
||||
entry = re.sub(
|
||||
r"([^[])#([0-9]{4,})",
|
||||
r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)",
|
||||
self.entry,
|
||||
)
|
||||
# 2) issue URL w/o markdown link
|
||||
# including #issuecomment-1 or #event-12
|
||||
entry = re.sub(
|
||||
r"([^(])(https://github.com/ClickHouse/ClickHouse/issues/([0-9]{4,})[-#a-z0-9]*)",
|
||||
r"\1[#\3](\2)",
|
||||
entry,
|
||||
)
|
||||
# It's possible that we face a secondary rate limit.
|
||||
# In this case we should sleep until we get it
|
||||
while True:
|
||||
try:
|
||||
user_name = self.user.name if self.user.name else self.user.login
|
||||
break
|
||||
except UnknownObjectException:
|
||||
user_name = self.user.login
|
||||
break
|
||||
except RateLimitExceededException:
|
||||
gh.sleep_on_rate_limit()
|
||||
return (
|
||||
f"* {entry} [#{self.number}]({self.html_url}) "
|
||||
f"([{user_name}]({self.user.html_url}))."
|
||||
)
|
||||
|
||||
# Sort PR descriptions by numbers
|
||||
def __eq__(self, other) -> bool:
|
||||
if not isinstance(self, type(other)):
|
||||
return NotImplemented
|
||||
return self.number == other.number
|
||||
|
||||
def __lt__(self, other: "Description") -> bool:
|
||||
return self.number < other.number
|
||||
|
||||
|
||||
def get_descriptions(prs: PullRequests) -> Dict[str, List[Description]]:
|
||||
descriptions = {} # type: Dict[str, List[Description]]
|
||||
repos = {} # type: Dict[str, Repository]
|
||||
for pr in prs:
|
||||
# See https://github.com/PyGithub/PyGithub/issues/2202,
|
||||
# obj._rawData doesn't spend additional API requests
|
||||
# We'll save some requests
|
||||
# pylint: disable=protected-access
|
||||
repo_name = pr._rawData["base"]["repo"]["full_name"]
|
||||
# pylint: enable=protected-access
|
||||
if repo_name not in repos:
|
||||
repos[repo_name] = pr.base.repo
|
||||
in_changelog = False
|
||||
merge_commit = pr.merge_commit_sha
|
||||
if merge_commit is None:
|
||||
logging.warning("PR %s does not have merge-commit, skipping", pr.number)
|
||||
continue
|
||||
|
||||
in_changelog = merge_commit in SHA_IN_CHANGELOG
|
||||
if in_changelog:
|
||||
desc = generate_description(pr, repos[repo_name])
|
||||
if desc:
|
||||
if desc.category not in descriptions:
|
||||
descriptions[desc.category] = []
|
||||
descriptions[desc.category].append(desc)
|
||||
|
||||
for descs in descriptions.values():
|
||||
descs.sort()
|
||||
|
||||
return descriptions
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
description="Generate a changelog in Markdown format between given tags. "
|
||||
"It fetches all tags and unshallow the git repository automatically",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-v",
|
||||
"--verbose",
|
||||
action="count",
|
||||
default=0,
|
||||
help="set the script verbosity, could be used multiple",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--debug-helpers",
|
||||
action="store_true",
|
||||
help="add debug logging for git_helper and github_helper",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=argparse.FileType("w"),
|
||||
default="-",
|
||||
help="output file for changelog",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--repo",
|
||||
default="ClickHouse/ClickHouse",
|
||||
help="a repository to query for pull-requests from GitHub",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--jobs",
|
||||
type=int,
|
||||
default=10,
|
||||
help="number of jobs to get pull-requests info from GitHub API",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gh-user-or-token",
|
||||
help="user name or GH token to authenticate",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gh-password",
|
||||
help="a password that should be used when user is given",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--with-testing-tags",
|
||||
action="store_true",
|
||||
help="by default '*-testing' tags are ignored, this argument enables them too",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--from",
|
||||
dest="from_ref",
|
||||
help="git ref for a starting point of changelog, by default is calculated "
|
||||
"automatically to match a previous tag in history",
|
||||
)
|
||||
parser.add_argument(
|
||||
"to_ref",
|
||||
metavar="TO_REF",
|
||||
help="git ref for the changelog end",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
# This function mirrors the PR description checks in ClickhousePullRequestTrigger.
|
||||
# Returns None if the PR should not be mentioned in changelog.
|
||||
def generate_description(item: PullRequest, repo: Repository) -> Optional[Description]:
|
||||
backport_number = item.number
|
||||
if item.head.ref.startswith("backport/"):
|
||||
branch_parts = item.head.ref.split("/")
|
||||
if len(branch_parts) == 3:
|
||||
try:
|
||||
item = gh.get_pull_cached(repo, int(branch_parts[-1]))
|
||||
except Exception as e:
|
||||
logging.warning("unable to get backpoted PR, exception: %s", e)
|
||||
else:
|
||||
logging.warning(
|
||||
"The branch %s doesn't match backport template, using PR %s as is",
|
||||
item.head.ref,
|
||||
item.number,
|
||||
)
|
||||
description = item.body
|
||||
# Don't skip empty lines because they delimit parts of description
|
||||
lines = [x.strip() for x in (description.split("\n") if description else [])]
|
||||
lines = [re.sub(r"\s+", " ", ln) for ln in lines]
|
||||
|
||||
category = ""
|
||||
entry = ""
|
||||
|
||||
if lines:
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
|
||||
i += 1
|
||||
if i >= len(lines):
|
||||
break
|
||||
# Can have one empty line between header and the category itself.
|
||||
# Filter it out.
|
||||
if not lines[i]:
|
||||
i += 1
|
||||
if i >= len(lines):
|
||||
break
|
||||
category = re.sub(r"^[-*\s]*", "", lines[i])
|
||||
i += 1
|
||||
elif re.match(
|
||||
r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
|
||||
):
|
||||
i += 1
|
||||
# Can have one empty line between header and the entry itself.
|
||||
# Filter it out.
|
||||
if i < len(lines) and not lines[i]:
|
||||
i += 1
|
||||
# All following lines until empty one are the changelog entry.
|
||||
entry_lines = []
|
||||
while i < len(lines) and lines[i]:
|
||||
entry_lines.append(lines[i])
|
||||
i += 1
|
||||
entry = " ".join(entry_lines)
|
||||
else:
|
||||
i += 1
|
||||
|
||||
# Remove excessive bullets from the entry.
|
||||
if re.match(r"^[\-\*] ", entry):
|
||||
entry = entry[2:]
|
||||
|
||||
# Better style.
|
||||
if re.match(r"^[a-z]", entry):
|
||||
entry = entry.capitalize()
|
||||
|
||||
if not category:
|
||||
# Shouldn't happen, because description check in CI should catch such PRs.
|
||||
# Fall through, so that it shows up in output and the user can fix it.
|
||||
category = "NO CL CATEGORY"
|
||||
|
||||
# Filter out the PR categories that are not for changelog.
|
||||
if re.match(
|
||||
r"(?i)((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
|
||||
category,
|
||||
):
|
||||
category = "NOT FOR CHANGELOG / INSIGNIFICANT"
|
||||
return Description(item.number, item.user, item.html_url, item.title, category)
|
||||
|
||||
# Normalize bug fixes
|
||||
if re.match(
|
||||
r"(?i)bug\Wfix",
|
||||
category,
|
||||
):
|
||||
category = "Bug Fix (user-visible misbehavior in an official stable release)"
|
||||
|
||||
# Filter out documentations changelog
|
||||
if re.match(
|
||||
r"(?i)doc",
|
||||
category,
|
||||
):
|
||||
return None
|
||||
|
||||
if backport_number != item.number:
|
||||
entry = f"Backported in #{backport_number}: {entry}"
|
||||
|
||||
if not entry:
|
||||
# Shouldn't happen, because description check in CI should catch such PRs.
|
||||
category = "NO CL ENTRY"
|
||||
entry = "NO CL ENTRY: '" + item.title + "'"
|
||||
|
||||
entry = entry.strip()
|
||||
if entry[-1] != ".":
|
||||
entry += "."
|
||||
|
||||
for c in categories_preferred_order:
|
||||
if ratio(category.lower(), c.lower()) >= 90:
|
||||
category = c
|
||||
break
|
||||
|
||||
return Description(item.number, item.user, item.html_url, entry, category)
|
||||
|
||||
|
||||
def write_changelog(
|
||||
fd: TextIO, descriptions: Dict[str, List[Description]], year: int
|
||||
) -> None:
|
||||
to_commit = runner(f"git rev-parse {TO_REF}^{{}}")[:11]
|
||||
from_commit = runner(f"git rev-parse {FROM_REF}^{{}}")[:11]
|
||||
fd.write(
|
||||
f"---\nsidebar_position: 1\nsidebar_label: {year}\n---\n\n"
|
||||
f"# {year} Changelog\n\n"
|
||||
f"### ClickHouse release {TO_REF} ({to_commit}) FIXME "
|
||||
f"as compared to {FROM_REF} ({from_commit})\n\n"
|
||||
)
|
||||
|
||||
seen_categories = [] # type: List[str]
|
||||
for category in categories_preferred_order:
|
||||
if category in descriptions:
|
||||
seen_categories.append(category)
|
||||
fd.write(f"#### {category}\n")
|
||||
for desc in descriptions[category]:
|
||||
fd.write(f"{desc.formatted_entry}\n")
|
||||
|
||||
fd.write("\n")
|
||||
|
||||
for category in sorted(descriptions):
|
||||
if category not in seen_categories:
|
||||
fd.write(f"#### {category}\n\n")
|
||||
for desc in descriptions[category]:
|
||||
fd.write(f"{desc.formatted_entry}\n")
|
||||
|
||||
fd.write("\n")
|
||||
|
||||
|
||||
def check_refs(from_ref: Optional[str], to_ref: str, with_testing_tags: bool):
|
||||
global FROM_REF, TO_REF
|
||||
TO_REF = to_ref
|
||||
|
||||
# Check TO_REF
|
||||
runner.run(f"git rev-parse {TO_REF}")
|
||||
|
||||
# Check from_ref
|
||||
if from_ref is None:
|
||||
# Get all tags pointing to TO_REF
|
||||
tags = runner.run(f"git tag --points-at '{TO_REF}^{{}}'").split("\n")
|
||||
logging.info("All tags pointing to %s:\n%s", TO_REF, tags)
|
||||
if not with_testing_tags:
|
||||
tags.append("*-testing")
|
||||
exclude = " ".join([f"--exclude='{tag}'" for tag in tags])
|
||||
cmd = f"git describe --abbrev=0 --tags {exclude} '{TO_REF}'"
|
||||
FROM_REF = runner.run(cmd)
|
||||
else:
|
||||
runner.run(f"git rev-parse {FROM_REF}")
|
||||
FROM_REF = from_ref
|
||||
|
||||
|
||||
def set_sha_in_changelog():
|
||||
global SHA_IN_CHANGELOG
|
||||
SHA_IN_CHANGELOG = runner.run(
|
||||
f"git log --format=format:%H {FROM_REF}..{TO_REF}"
|
||||
).split("\n")
|
||||
|
||||
|
||||
def get_year(prs: PullRequests) -> int:
|
||||
if not prs:
|
||||
return date.today().year
|
||||
return max(pr.created_at.year for pr in prs)
|
||||
|
||||
|
||||
def main():
|
||||
log_levels = [logging.WARN, logging.INFO, logging.DEBUG]
|
||||
args = parse_args()
|
||||
logging.basicConfig(
|
||||
format="%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d]:\n%(message)s",
|
||||
level=log_levels[min(args.verbose, 2)],
|
||||
)
|
||||
if args.debug_helpers:
|
||||
logging.getLogger("github_helper").setLevel(logging.DEBUG)
|
||||
logging.getLogger("git_helper").setLevel(logging.DEBUG)
|
||||
# Create a cache directory
|
||||
if not p.isdir(CACHE_PATH):
|
||||
os.mkdir(CACHE_PATH, 0o700)
|
||||
|
||||
# Get the full repo
|
||||
if is_shallow():
|
||||
logging.info("Unshallow repository")
|
||||
runner.run("git fetch --unshallow", stderr=DEVNULL)
|
||||
logging.info("Fetching all tags")
|
||||
runner.run("git fetch --tags", stderr=DEVNULL)
|
||||
|
||||
check_refs(args.from_ref, args.to_ref, args.with_testing_tags)
|
||||
set_sha_in_changelog()
|
||||
|
||||
logging.info("Using %s..%s as changelog interval", FROM_REF, TO_REF)
|
||||
|
||||
# use merge-base commit as a starting point, if used ref in another branch
|
||||
base_commit = runner.run(f"git merge-base '{FROM_REF}^{{}}' '{TO_REF}^{{}}'")
|
||||
# Get starting and ending dates for gathering PRs
|
||||
# Add one day after and before to mitigate TZ possible issues
|
||||
# `tag^{}` format gives commit ref when we have annotated tags
|
||||
# format %cs gives a committer date, works better for cherry-picked commits
|
||||
from_date = runner.run(f"git log -1 --format=format:%cs '{base_commit}'")
|
||||
to_date = runner.run(f"git log -1 --format=format:%cs '{TO_REF}^{{}}'")
|
||||
merged = (
|
||||
date.fromisoformat(from_date) - timedelta(1),
|
||||
date.fromisoformat(to_date) + timedelta(1),
|
||||
)
|
||||
|
||||
# Get all PRs for the given time frame
|
||||
global gh
|
||||
gh = GitHub(
|
||||
args.gh_user_or_token,
|
||||
args.gh_password,
|
||||
create_cache_dir=False,
|
||||
per_page=100,
|
||||
pool_size=args.jobs,
|
||||
)
|
||||
gh.cache_path = CACHE_PATH
|
||||
query = f"type:pr repo:{args.repo} is:merged"
|
||||
prs = gh.get_pulls_from_search(query=query, merged=merged, sort="created")
|
||||
|
||||
descriptions = get_descriptions(prs)
|
||||
changelog_year = get_year(prs)
|
||||
|
||||
write_changelog(args.output, descriptions, changelog_year)
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
SCRIPT_PATH = (Path(__file__).parents[2] / "tests/ci/changelog.py").absolute()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
subprocess.check_call(["python3", SCRIPT_PATH, *sys.argv[1:]])
|
||||
|
@ -1 +0,0 @@
|
||||
../../tests/ci/git_helper.py
|
@ -1 +0,0 @@
|
||||
../../tests/ci/github_helper.py
|
Loading…
Reference in New Issue
Block a user