ClickHouse/tests/ci/cherry_pick.py
Mikhail f. Shiryaev a4c2ac7e97
Commit only staged files to backport branches
Formerly all changed files were committed, and it caused changed
submodules to be committed as well
2023-02-02 18:18:37 +01:00

517 lines
19 KiB
Python

#!/usr/bin/env python3
"""
A plan:
- TODO: consider receiving GH objects cache from S3, but it's really a few
of requests to API currently
- Get all open release PRs (20.10, 21.8, 22.5, etc.)
- Get all pull-requests between the date of the merge-base for the oldest PR with
labels pr-must-backport and version-specific v21.8-must-backport, but without
pr-backported
- Iterate over gotten PRs:
- for pr-must-backport:
- check if all backport-PRs are created. If yes,
set pr-backported label and finish
- If not, create either cherrypick PRs or merge cherrypick (in the same
stage, if mergable) and create backport-PRs
- If successfull, set pr-backported label on the PR
- for version-specific labels:
- the same, check, cherry-pick, backport, pr-backported
Cherry-pick stage:
- From time to time the cherry-pick fails, if it was done manually. In the
case we check if it's even needed, and mark the release as done somehow.
"""
import argparse
import logging
import os
from contextlib import contextmanager
from datetime import date, timedelta
from subprocess import CalledProcessError
from typing import List, Optional
from env_helper import TEMP_PATH
from get_robot_token import get_best_robot_token
from git_helper import git_runner, is_shallow
from github_helper import (
GitHub,
PullRequest,
PullRequests,
Repository,
)
from ssh import SSHKey
class Labels:
MUST_BACKPORT = "pr-must-backport"
BACKPORT = "pr-backport"
BACKPORTS_CREATED = "pr-backports-created"
CHERRYPICK = "pr-cherrypick"
DO_NOT_TEST = "do not test"
class ReleaseBranch:
CHERRYPICK_DESCRIPTION = """This pull-request is a first step of an automated \
backporting.
It contains changes like after calling a local command `git cherry-pick`.
If you intend to continue backporting this changes, then resolve all conflicts if any.
Otherwise, if you do not want to backport them, then just close this pull-request.
The check results does not matter at this step - you can safely ignore them.
Also this pull-request will be merged automatically as it reaches the mergeable state, \
but you always can merge it manually.
"""
BACKPORT_DESCRIPTION = """This pull-request is a last step of an automated \
backporting.
Treat it as a standard pull-request: look at the checks and resolve conflicts.
Merge it only if you intend to backport changes to the target branch, otherwise just \
close it.
"""
REMOTE = ""
def __init__(self, name: str, pr: PullRequest):
self.name = name
self.pr = pr
self.cherrypick_branch = f"cherrypick/{name}/{pr.merge_commit_sha}"
self.backport_branch = f"backport/{name}/{pr.number}"
self.cherrypick_pr = None # type: Optional[PullRequest]
self.backport_pr = None # type: Optional[PullRequest]
self._backported = None # type: Optional[bool]
self.git_prefix = ( # All commits to cherrypick are done as robot-clickhouse
"git -c user.email=robot-clickhouse@users.noreply.github.com "
"-c user.name=robot-clickhouse -c commit.gpgsign=false"
)
self.pre_check()
def pre_check(self):
branch_updated = git_runner(
f"git branch -a --contains={self.pr.merge_commit_sha} "
f"{self.REMOTE}/{self.name}"
)
if branch_updated:
self._backported = True
def pop_prs(self, prs: PullRequests) -> None:
"""the method processes all prs and pops the ReleaseBranch related prs"""
to_pop = [] # type: List[int]
for i, pr in enumerate(prs):
if self.name not in pr.head.ref:
continue
if pr.head.ref.startswith(f"cherrypick/{self.name}"):
self.cherrypick_pr = pr
to_pop.append(i)
elif pr.head.ref.startswith(f"backport/{self.name}"):
self.backport_pr = pr
to_pop.append(i)
else:
logging.error(
"head ref of PR #%s isn't starting with known suffix",
pr.number,
)
for i in reversed(to_pop):
# Going from the tail to keep the order and pop greater index first
prs.pop(i)
def process(self, dry_run: bool) -> None:
if self.backported:
return
if not self.cherrypick_pr:
if dry_run:
logging.info(
"DRY RUN: Would create cherrypick PR for #%s", self.pr.number
)
return
self.create_cherrypick()
if self.backported:
return
if self.cherrypick_pr is not None:
# Try to merge cherrypick instantly
if self.cherrypick_pr.mergeable and self.cherrypick_pr.state != "closed":
self.cherrypick_pr.merge()
# The PR needs update, since PR.merge doesn't update the object
self.cherrypick_pr.update()
if self.cherrypick_pr.merged:
if dry_run:
logging.info(
"DRY RUN: Would create backport PR for #%s", self.pr.number
)
return
self.create_backport()
return
elif self.cherrypick_pr.state == "closed":
logging.info(
"The cherrypick PR #%s for PR #%s is discarded",
self.cherrypick_pr.number,
self.pr.number,
)
self._backported = True
return
logging.info(
"Cherrypick PR #%s for PR #%s have conflicts and unable to be merged",
self.cherrypick_pr.number,
self.pr.number,
)
def create_cherrypick(self):
# First, create backport branch:
# Checkout release branch with discarding every change
git_runner(f"{self.git_prefix} checkout -f {self.name}")
# Create or reset backport branch
git_runner(f"{self.git_prefix} checkout -B {self.backport_branch}")
# Merge all changes from PR's the first parent commit w/o applying anything
# It will allow to create a merge commit like it would be a cherry-pick
first_parent = git_runner(f"git rev-parse {self.pr.merge_commit_sha}^1")
git_runner(f"{self.git_prefix} merge -s ours --no-edit {first_parent}")
# Second step, create cherrypick branch
git_runner(
f"{self.git_prefix} branch -f "
f"{self.cherrypick_branch} {self.pr.merge_commit_sha}"
)
# Check if there actually any changes between branches. If no, then no
# other actions are required. It's possible when changes are backported
# manually to the release branch already
try:
output = git_runner(
f"{self.git_prefix} merge --no-commit --no-ff {self.cherrypick_branch}"
)
# 'up-to-date', 'up to date', who knows what else (╯°v°)╯ ^┻━┻
if output.startswith("Already up") and output.endswith("date."):
# The changes are already in the release branch, we are done here
logging.info(
"Release branch %s already contain changes from %s",
self.name,
self.pr.number,
)
self._backported = True
return
except CalledProcessError:
# There are most probably conflicts, they'll be resolved in PR
git_runner(f"{self.git_prefix} reset --merge")
else:
# There are changes to apply, so continue
git_runner(f"{self.git_prefix} reset --merge")
# Push, create the cherrypick PR, lable and assign it
for branch in [self.cherrypick_branch, self.backport_branch]:
git_runner(f"{self.git_prefix} push -f {self.REMOTE} {branch}:{branch}")
self.cherrypick_pr = self.pr.base.repo.create_pull(
title=f"Cherry pick #{self.pr.number} to {self.name}: {self.pr.title}",
body=f"Original pull-request #{self.pr.number}\n\n"
f"{self.CHERRYPICK_DESCRIPTION}",
base=self.backport_branch,
head=self.cherrypick_branch,
)
self.cherrypick_pr.add_to_labels(Labels.CHERRYPICK)
self.cherrypick_pr.add_to_labels(Labels.DO_NOT_TEST)
self._assign_new_pr(self.cherrypick_pr)
# update cherrypick PR to get the state for PR.mergable
self.cherrypick_pr.update()
def create_backport(self):
assert self.cherrypick_pr is not None
# Checkout the backport branch from the remote and make all changes to
# apply like they are only one cherry-pick commit on top of release
git_runner(f"{self.git_prefix} checkout -f {self.backport_branch}")
git_runner(
f"{self.git_prefix} pull --ff-only {self.REMOTE} {self.backport_branch}"
)
merge_base = git_runner(
f"{self.git_prefix} merge-base "
f"{self.REMOTE}/{self.name} {self.backport_branch}"
)
git_runner(f"{self.git_prefix} reset --soft {merge_base}")
title = f"Backport #{self.pr.number} to {self.name}: {self.pr.title}"
git_runner(f"{self.git_prefix} commit --allow-empty -F -", input=title)
# Push with force, create the backport PR, lable and assign it
git_runner(
f"{self.git_prefix} push -f {self.REMOTE} "
f"{self.backport_branch}:{self.backport_branch}"
)
self.backport_pr = self.pr.base.repo.create_pull(
title=title,
body=f"Original pull-request #{self.pr.number}\n"
f"Cherry-pick pull-request #{self.cherrypick_pr.number}\n\n"
f"{self.BACKPORT_DESCRIPTION}",
base=self.name,
head=self.backport_branch,
)
self.backport_pr.add_to_labels(Labels.BACKPORT)
self._assign_new_pr(self.backport_pr)
def _assign_new_pr(self, new_pr: PullRequest) -> None:
"""Assign `new_pr` to author, merger and assignees of an original PR"""
# It looks there some race when multiple .add_to_assignees are executed,
# so we'll add all at once
assignees = [self.pr.user, self.pr.merged_by]
if self.pr.assignees:
assignees.extend(self.pr.assignees)
logging.info(
"Assing #%s to author and assignees of the original PR: %s",
new_pr.number,
", ".join(user.login for user in assignees),
)
new_pr.add_to_assignees(*assignees)
@property
def backported(self) -> bool:
if self._backported is not None:
return self._backported
return self.backport_pr is not None
def __repr__(self):
return self.name
class Backport:
def __init__(self, gh: GitHub, repo: str, dry_run: bool):
self.gh = gh
self._repo_name = repo
self.dry_run = dry_run
self._query = f"type:pr repo:{repo}"
self._remote = ""
self._repo = None # type: Optional[Repository]
self.release_prs = [] # type: PullRequests
self.release_branches = [] # type: List[str]
self.labels_to_backport = [] # type: List[str]
self.prs_for_backport = [] # type: PullRequests
self.error = None # type: Optional[Exception]
@property
def remote(self) -> str:
if not self._remote:
# lines of "origin git@github.com:ClickHouse/ClickHouse.git (fetch)"
remotes = git_runner("git remote -v").split("\n")
# We need the first word from the first matching result
self._remote = tuple(
remote.split(maxsplit=1)[0]
for remote in remotes
if f"github.com/{self._repo_name}" in remote # https
or f"github.com:{self._repo_name}" in remote # ssh
)[0]
git_runner(f"git fetch {self._remote}")
ReleaseBranch.REMOTE = self._remote
return self._remote
def receive_release_prs(self):
logging.info("Getting release PRs")
self.release_prs = self.gh.get_pulls_from_search(
query=f"{self._query} is:open",
sort="created",
order="asc",
label="release",
)
self.release_branches = [pr.head.ref for pr in self.release_prs]
self.labels_to_backport = [
f"v{branch}-must-backport" for branch in self.release_branches
]
logging.info("Active releases: %s", ", ".join(self.release_branches))
def receive_prs_for_backport(self):
# The commits in the oldest open release branch
oldest_branch_commits = git_runner(
"git log --no-merges --format=%H --reverse "
f"{self.remote}/{self.default_branch}..{self.remote}/{self.release_branches[0]}"
)
# The first commit is the one we are looking for
since_commit = oldest_branch_commits.split("\n", 1)[0]
since_date = date.fromisoformat(
git_runner.run(f"git log -1 --format=format:%cs {since_commit}")
)
# To not have a possible TZ issues
tomorrow = date.today() + timedelta(days=1)
logging.info("Receive PRs suppose to be backported")
self.prs_for_backport = self.gh.get_pulls_from_search(
query=f"{self._query} -label:{Labels.BACKPORTS_CREATED}",
label=",".join(self.labels_to_backport + [Labels.MUST_BACKPORT]),
merged=[since_date, tomorrow],
)
logging.info(
"PRs to be backported:\n %s",
"\n ".join([pr.html_url for pr in self.prs_for_backport]),
)
def process_backports(self):
for pr in self.prs_for_backport:
try:
self.process_pr(pr)
except Exception as e:
logging.error(
"During processing the PR #%s error occured: %s", pr.number, e
)
self.error = e
def process_pr(self, pr: PullRequest) -> None:
pr_labels = [label.name for label in pr.labels]
if Labels.MUST_BACKPORT in pr_labels:
branches = [
ReleaseBranch(br, pr) for br in self.release_branches
] # type: List[ReleaseBranch]
else:
branches = [
ReleaseBranch(br, pr)
for br in [
label.split("-", 1)[0][1:] # v21.8-must-backport
for label in pr_labels
if label in self.labels_to_backport
]
]
if not branches:
# This is definitely some error. There must be at least one branch
# It also make the whole program exit code non-zero
self.error = Exception(
f"There are no branches to backport PR #{pr.number}, logical error"
)
raise self.error
logging.info(
" PR #%s is suppose to be backported to %s",
pr.number,
", ".join(map(str, branches)),
)
# All PRs for cherrypick and backport branches as heads
query_suffix = " ".join(
[
f"head:{branch.backport_branch} head:{branch.cherrypick_branch}"
for branch in branches
]
)
bp_cp_prs = self.gh.get_pulls_from_search(
query=f"{self._query} {query_suffix}",
)
for br in branches:
br.pop_prs(bp_cp_prs)
if bp_cp_prs:
# This is definitely some error. All prs must be consumed by
# branches with ReleaseBranch.pop_prs. It also make the whole
# program exit code non-zero
self.error = Exception(
"The following PRs are not filtered by release branches:\n"
"\n".join(map(str, bp_cp_prs))
)
raise self.error
if all(br.backported for br in branches):
# Let's check if the PR is already backported
self.mark_pr_backported(pr)
return
for br in branches:
br.process(self.dry_run)
if all(br.backported for br in branches):
# And check it after the running
self.mark_pr_backported(pr)
def mark_pr_backported(self, pr: PullRequest) -> None:
if self.dry_run:
logging.info("DRY RUN: would mark PR #%s as done", pr.number)
return
pr.add_to_labels(Labels.BACKPORTS_CREATED)
logging.info(
"PR #%s is successfully labeled with `%s`",
pr.number,
Labels.BACKPORTS_CREATED,
)
@property
def repo(self) -> Repository:
if self._repo is None:
try:
self._repo = self.release_prs[0].base.repo
except IndexError as exc:
raise Exception(
"`repo` is available only after the `receive_release_prs`"
) from exc
return self._repo
@property
def default_branch(self) -> str:
return self.repo.default_branch
def parse_args():
parser = argparse.ArgumentParser("Create cherry-pick and backport PRs")
parser.add_argument("--token", help="github token, if not set, used from smm")
parser.add_argument(
"--repo", default="ClickHouse/ClickHouse", help="repo owner/name"
)
parser.add_argument("--dry-run", action="store_true", help="do not create anything")
parser.add_argument(
"--debug-helpers",
action="store_true",
help="add debug logging for git_helper and github_helper",
)
return parser.parse_args()
@contextmanager
def clear_repo():
orig_ref = git_runner("git branch --show-current") or git_runner(
"git rev-parse HEAD"
)
try:
yield
except (Exception, KeyboardInterrupt):
git_runner(f"git checkout -f {orig_ref}")
raise
else:
git_runner(f"git checkout -f {orig_ref}")
@contextmanager
def stash():
need_stash = bool(git_runner("git diff HEAD"))
if need_stash:
git_runner("git stash push --no-keep-index -m 'running cherry_pick.py'")
try:
with clear_repo():
yield
except (Exception, KeyboardInterrupt):
if need_stash:
git_runner("git stash pop")
raise
else:
if need_stash:
git_runner("git stash pop")
def main():
if not os.path.exists(TEMP_PATH):
os.makedirs(TEMP_PATH)
args = parse_args()
if args.debug_helpers:
logging.getLogger("github_helper").setLevel(logging.DEBUG)
logging.getLogger("git_helper").setLevel(logging.DEBUG)
token = args.token or get_best_robot_token()
gh = GitHub(token, create_cache_dir=False, per_page=100)
bp = Backport(gh, args.repo, args.dry_run)
# https://github.com/python/mypy/issues/3004
bp.gh.cache_path = f"{TEMP_PATH}/gh_cache" # type: ignore
bp.receive_release_prs()
bp.receive_prs_for_backport()
bp.process_backports()
if bp.error is not None:
logging.error("Finished successfully, but errors occured!")
raise bp.error
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
assert not is_shallow()
with stash():
if os.getenv("ROBOT_CLICKHOUSE_SSH_KEY", ""):
with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"):
main()
else:
main()