ClickHouse/utils/simple-backport/backport.sh

113 lines
4.6 KiB
Bash
Raw Normal View History

2020-04-10 22:29:15 +00:00
#!/bin/bash
set -e
branch="$1"
merge_base=$(git merge-base origin/master "origin/$branch")
2020-04-15 19:03:16 +00:00
master_git_cmd=(git log "$merge_base..origin/master" --first-parent)
2020-09-10 16:49:57 +00:00
# The history in back branches shouldn't be too crazy, and sometimes we have a PR
# that merges several backport commits there (3f2cba6824fddf31c30bde8c6f4f860572f4f580),
# so don't use --first-parent
branch_git_cmd=(git log "$merge_base..origin/$branch")
2020-04-10 22:29:15 +00:00
# Make lists of PRs that were merged into each branch. Use first parent here, or else
# we'll get weird things like seeing older master that was merged into a PR branch
# that was then merged into master.
2020-04-15 19:03:16 +00:00
"${master_git_cmd[@]}" > master-log.txt
"${branch_git_cmd[@]}" > "$branch-log.txt"
# Check for diamond merges.
2020-05-22 18:44:11 +00:00
diamonds_in_master=$("${master_git_cmd[@]}" --oneline --grep "Merge branch '")
diamonds_in_branch=$("${branch_git_cmd[@]}" --oneline --grep "Merge branch '")
2020-04-15 19:03:16 +00:00
2020-05-22 18:44:11 +00:00
if [ "$diamonds_in_master" != "" ] || [ "$diamonds_in_branch" != "" ]
2020-04-15 19:03:16 +00:00
then
2020-05-22 18:44:11 +00:00
echo "$diamonds_in_master"
echo "$diamonds_in_branch"
2020-04-15 19:03:16 +00:00
# DO NOT ADD automated handling of diamond merges to this script.
# It is an unsustainable way to work with git, and it MUST be visible.
echo Warning: suspected diamond merges above.
echo Some commits will be missed, review these manually.
fi
2020-04-11 00:00:33 +00:00
2020-07-07 09:49:14 +00:00
# NOTE keep in sync with ./backport.sh.
2020-04-11 00:00:33 +00:00
# Search for PR numbers in commit messages. First variant is normal merge, and second
2020-04-13 21:15:58 +00:00
# variant is squashed. Next are some backport message variants.
2020-07-07 10:49:18 +00:00
find_prs=(sed -n "s/^.*merg[eding]*.*#\([[:digit:]]\+\).*$/\1/Ip;
2020-09-10 16:49:57 +00:00
s/^.*#\([[:digit:]]\+\))$/\1/p;
2020-07-07 09:49:14 +00:00
s/^.*back[- ]*port[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip;
s/^.*cherry[- ]*pick[ed of]*.*#\([[:digit:]]\+\).*$/\1/Ip")
2020-04-11 00:00:33 +00:00
2020-07-07 09:49:14 +00:00
# awk is to filter out small task numbers from different task tracker, which are
# referenced by documentation commits like '* DOCSUP-824: query log (#115)'.
"${find_prs[@]}" master-log.txt | sort -rn | uniq | awk '$0 > 1000 { print $0 }' > master-prs.txt
"${find_prs[@]}" "$branch-log.txt" | sort -rn | uniq | awk '$0 > 1000 { print $0 }' > "$branch-prs.txt"
2020-04-10 22:29:15 +00:00
# Find all master PRs that are not in branch by calculating differences of two PR lists.
grep -f "$branch-prs.txt" -F -x -v master-prs.txt > "$branch-diff-prs.txt"
echo "$(wc -l < "$branch-diff-prs".txt) PRs differ between $branch and master."
2020-04-15 19:03:16 +00:00
function github_download()
{
local url=${1}
local file=${2}
2020-04-10 22:29:15 +00:00
if ! [ -f "$file" ]
then
if ! curl -H "Authorization: token $GITHUB_TOKEN" \
2020-04-15 19:03:16 +00:00
-sSf "$url" \
2020-04-10 22:29:15 +00:00
> "$file"
then
2020-04-15 19:03:16 +00:00
>&2 echo "Failed to download '$url' to '$file'. Contents: '$(cat "$file")'."
2020-04-10 22:29:15 +00:00
rm "$file"
2020-04-15 19:03:16 +00:00
return 1
2020-04-10 22:29:15 +00:00
fi
2020-04-13 21:15:58 +00:00
sleep 0.1
2020-04-10 22:29:15 +00:00
fi
2020-04-15 19:03:16 +00:00
}
rm "$branch-report.tsv" &> /dev/null ||:
for pr in $(cat "$branch-diff-prs.txt")
do
# Download PR info from github.
file="pr$pr.json"
github_download "https://api.github.com/repos/ClickHouse/ClickHouse/pulls/$pr" "$file" || continue
2020-04-10 22:29:15 +00:00
if ! [ "$pr" == "$(jq -r .number "$file")" ]
then
2020-04-11 00:00:33 +00:00
>&2 echo "Got wrong data for PR #$pr (please check and remove '$file')."
2020-04-10 22:29:15 +00:00
continue
fi
action="skip"
# First, check the changelog category. We port all bugfixes.
if jq -r .body "$file" | grep -i "^- bug[ -]*fix" > /dev/null
then
action="backport"
fi
2020-04-13 12:42:15 +00:00
# Next, check the tag. They might override the decision. Checks are ordered by priority.
labels="$(jq -r .labels[].name "$file")"
2020-04-13 12:54:09 +00:00
if echo "$labels" | grep -x "pr-must-backport\|v$branch-must-backport" > /dev/null; then action="backport"; fi
if echo "$labels" | grep -x "v$branch-conflicts" > /dev/null; then action="conflict"; fi
if echo "$labels" | grep -x "pr-no-backport\|v$branch-no-backport" > /dev/null; then action="no-backport"; fi
2020-04-13 21:15:58 +00:00
# FIXME Ignore "backported" labels for now. If we can't find the backport commit,
# this means that the changelog script also won't be able to. An alternative
# way to mark PR as backported is to add an empty commit with text like
# "backported #12345", so that it can be found between tags and put in proper
# place in changelog.
#if echo "$labels" | grep -x "v$branch\|v$branch-backported" > /dev/null; then action="done"; fi
2020-04-10 22:29:15 +00:00
2020-04-13 12:00:36 +00:00
# Find merge commit SHA for convenience
merge_sha="$(jq -r .merge_commit_sha "$file")"
2020-04-10 22:29:15 +00:00
url="https://github.com/ClickHouse/ClickHouse/pull/$pr"
2020-04-13 12:00:36 +00:00
printf "%s\t%s\t%s\t%s\t%s\n" "$action" "$pr" "$url" "$file" "$merge_sha" >> "$branch-report.tsv"
2020-04-10 22:29:15 +00:00
if [ "$action" == "backport" ]
then
2020-04-13 12:00:36 +00:00
printf "%s\t%s\t%s\n" "$action" "$url" "$merge_sha"
2020-04-10 22:29:15 +00:00
fi
done
2020-04-15 19:03:16 +00:00
echo "Done."