Remove the old changelog script

This commit is contained in:
Alexey Milovidov 2023-03-27 12:11:32 +02:00
parent 6ebad01070
commit 78eb71a49b
4 changed files with 0 additions and 284 deletions

View File

@ -1,2 +0,0 @@
*.txt
*.json

View File

@ -1,21 +0,0 @@
## How To Generate Changelog
Generate github token:
* https://github.com/settings/tokens - keep all checkboxes unchecked, no scopes need to be enabled.
Dependencies:
```
sudo apt-get install git curl jq python3 python3-fuzzywuzzy
```
Update information about tags:
```
git fetch --tags
```
Usage example:
```
export GITHUB_USER=... GITHUB_TOKEN=ghp_...
./changelog.sh v21.5.6.6-stable v21.6.2.7-prestable
```

View File

@ -1,96 +0,0 @@
#!/bin/bash
set -e
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
from="$1"
to="$2"
log_command=(git log "$from..$to" --first-parent)
"${log_command[@]}" > "changelog-log.txt"
# Check for diamond merges.
if "${log_command[@]}" --oneline --grep "Merge branch '" | grep ''
then
# DO NOT ADD automated handling of diamond merges to this script.
# It is an unsustainable way to work with git, and it MUST be visible.
echo Warning: suspected diamond merges above.
echo Some commits will be missed, review these manually.
fi
# Search for PR numbers in commit messages. First variant is normal merge, and second
# variant is squashed. Next are some backport message variants.
find_prs=(sed -n "s/^.*merg[eding]*.*#\([[:digit:]]\+\).*$/\1/Ip;
s/^.*(#\([[:digit:]]\+\))$/\1/p;
s/^.*back[- ]*port[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip;
s/^.*cherry[- ]*pick[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip")
# awk is to filter out small task numbers from different task tracker, which are
# referenced by documentation commits like '* DOCSUP-824: query log (#115)'.
"${find_prs[@]}" "changelog-log.txt" | sort -rn | uniq | awk '$0 > 1000 { print $0 }' > "changelog-prs.txt"
echo "$(wc -l < "changelog-prs.txt") PRs added between $from and $to."
if [ $(wc -l < "changelog-prs.txt") -eq 0 ] ; then exit 0 ; fi
function github_download()
{
local url=${1}
local file=${2}
if ! [ -f "$file" ]
then
echo "curl -u \"$GITHUB_USER:***\" -sSf \"$url\" > \"$file\""
if ! curl -u "$GITHUB_USER:$GITHUB_TOKEN" \
-sSf "$url" \
> "$file"
then
>&2 echo "Failed to download '$url' to '$file'. Contents: '$(cat "$file")'."
rm "$file"
return 1
fi
sleep 0.1
fi
}
rm changelog-prs-filtered.txt &> /dev/null ||:
for pr in $(cat "changelog-prs.txt")
do
# Download PR info from github.
file="pr$pr.json"
github_download "https://api.github.com/repos/ClickHouse/ClickHouse/pulls/$pr" "$file" || continue
if ! [ "$pr" == "$(jq -r .number "$file")" ]
then
>&2 echo "Got wrong data for PR #$pr (please check and remove '$file')."
continue
fi
# Filter out PRs by bots.
user_login=$(jq -r .user.login "$file")
filter_bot=$(echo "$user_login" | grep -q "\[bot\]$" && echo "Skip." || echo "Ok." ||:)
filter_robot=$(echo "$user_login" | grep -q "robot-clickhouse" && echo "Skip." || echo "Ok." ||:)
if [ "Skip." == "$filter_robot" ] || [ "Skip." == "$filter_bot" ]
then
continue
fi
# Download author info from github.
user_id=$(jq -r .user.id "$file")
user_file="user$user_id.json"
github_download "$(jq -r .user.url "$file")" "$user_file" || continue
if ! [ "$user_id" == "$(jq -r .id "$user_file")" ]
then
>&2 echo "Got wrong data for user #$user_id (please check and remove '$user_file')."
continue
fi
echo "$pr" >> changelog-prs-filtered.txt
done
echo "### ClickHouse release $to FIXME as compared to $from
" > changelog.md
"$script_dir/format-changelog.py" changelog-prs-filtered.txt >> changelog.md
cat changelog.md

View File

@ -1,165 +0,0 @@
#!/usr/bin/python3
import argparse
import collections
import fuzzywuzzy.fuzz
import itertools
import json
import os
import re
import sys
parser = argparse.ArgumentParser(description="Format changelog for given PRs.")
parser.add_argument(
"file",
metavar="FILE",
type=argparse.FileType("r", encoding="utf-8"),
nargs="?",
default=sys.stdin,
help="File with PR numbers, one per line.",
)
args = parser.parse_args()
# This function mirrors the PR description checks in ClickhousePullRequestTrigger.
# Returns False if the PR should not be mentioned changelog.
def parse_one_pull_request(item):
description = item["body"]
# Don't skip empty lines because they delimit parts of description
lines = [
line
for line in [
x.strip() for x in (description.split("\n") if description else [])
]
]
lines = [re.sub(r"\s+", " ", l) for l in lines]
category = ""
entry = ""
if lines:
i = 0
while i < len(lines):
if re.match(r"(?i).*change\s*log\s*category", lines[i]):
i += 1
if i >= len(lines):
break
# Can have one empty line between header and the category itself. Filter it out.
if not lines[i]:
i += 1
if i >= len(lines):
break
category = re.sub(r"^[-*\s]*", "", lines[i])
i += 1
elif re.match(r"(?i).*change\s*log\s*entry", lines[i]):
i += 1
# Can have one empty line between header and the entry itself. Filter it out.
if i < len(lines) and not lines[i]:
i += 1
# All following lines until empty one are the changelog entry.
entry_lines = []
while i < len(lines) and lines[i]:
entry_lines.append(lines[i])
i += 1
entry = " ".join(entry_lines)
else:
i += 1
if not category:
# Shouldn't happen, because description check in CI should catch such PRs.
# Fall through, so that it shows up in output and the user can fix it.
category = "NO CL CATEGORY"
# Filter out the PR categories that are not for changelog.
if re.match(
r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
category,
):
return False
if not entry:
# Shouldn't happen, because description check in CI should catch such PRs.
category = "NO CL ENTRY"
entry = "NO CL ENTRY: '" + item["title"] + "'"
entry = entry.strip()
if entry[-1] != ".":
entry += "."
item["entry"] = entry
item["category"] = category
return True
# This array gives the preferred category order, and is also used to
# normalize category names.
categories_preferred_order = [
"Backward Incompatible Change",
"New Feature",
"Performance Improvement",
"Improvement",
"Bug Fix",
"Build/Testing/Packaging Improvement",
"Other",
]
category_to_pr = collections.defaultdict(lambda: [])
users = {}
for line in args.file:
pr = json.loads(open(f"pr{line.strip()}.json").read())
assert pr["number"]
if not parse_one_pull_request(pr):
continue
assert pr["category"]
# Normalize category name
for c in categories_preferred_order:
if fuzzywuzzy.fuzz.ratio(pr["category"].lower(), c.lower()) >= 90:
pr["category"] = c
break
category_to_pr[pr["category"]].append(pr)
user_id = pr["user"]["id"]
users[user_id] = json.loads(open(f"user{user_id}.json").read())
def print_category(category):
print(("#### " + category))
print()
for pr in category_to_pr[category]:
user = users[pr["user"]["id"]]
user_name = user["name"] if user["name"] else user["login"]
# Substitute issue links.
# 1) issue number w/o markdown link
pr["entry"] = re.sub(
r"([^[])#([0-9]{4,})",
r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)",
pr["entry"],
)
# 2) issue URL w/o markdown link
pr["entry"] = re.sub(
r"([^(])https://github.com/ClickHouse/ClickHouse/issues/([0-9]{4,})",
r"\1[#\2](https://github.com/ClickHouse/ClickHouse/issues/\2)",
pr["entry"],
)
print(
f'* {pr["entry"]} [#{pr["number"]}]({pr["html_url"]}) ([{user_name}]({user["html_url"]})).'
)
print()
# Print categories in preferred order
for category in categories_preferred_order:
if category in category_to_pr:
print_category(category)
category_to_pr.pop(category)
# Print the rest of the categories
for category in category_to_pr:
print_category(category)