diff --git a/utils/github/__main__.py b/utils/github/__main__.py deleted file mode 100644 index 1dbc9895bd6..00000000000 --- a/utils/github/__main__.py +++ /dev/null @@ -1,231 +0,0 @@ -# -*- coding: utf-8 -*- - -''' - Rules for commit messages, branch names and everything: - - - All important(!) commits to master branch must originate from pull-requests. - - All pull-requests must be squash-merged or explicitly merged without rebase. - - All pull-requests to master must have at least one label prefixed with `pr-`. - - Labels that require pull-request to be backported must be red colored (#ff0000). - - Release branch name must be of form `YY.NUMBER`. - - All release branches must be forked directly from the master branch and never be merged back, - or merged with any other branches based on the master branch (including master branch itself). - - Output of this script: - - - Commits without references from pull-requests. - - Pull-requests to master without proper labels. - - Pull-requests that need to be backported, with statuses per release branch. - -''' - -from . import local, query -from . import parser as parse_description - -import argparse -import re -import sys - -try: - from termcolor import colored # `pip install termcolor` -except ImportError: - sys.exit("Package 'termcolor' not found. Try run: `pip3 install [--user] termcolor`") - - -CHECK_MARK = colored('šŸ—ø', 'green') -CROSS_MARK = colored('šŸ—™', 'red') -BACKPORT_LABEL_MARK = colored('šŸ·', 'yellow') -CONFLICT_LABEL_MARK = colored('ā˜', 'yellow') -NO_BACKPORT_LABEL_MARK = colored('ā”', 'yellow') -CLOCK_MARK = colored('ā†»', 'cyan') - - -parser = argparse.ArgumentParser(description='Helper for the ClickHouse Release machinery') -parser.add_argument('--repo', '-r', type=str, default='', metavar='PATH', - help='path to the root of the ClickHouse repository') -parser.add_argument('--remote', type=str, default='origin', - help='remote name of the "ClickHouse/ClickHouse" upstream') -parser.add_argument('--token', type=str, required=True, - help='token for Github access') -parser.add_argument('--login', type=str, - help='filter authorship by login') -parser.add_argument('--auto-label', action='store_true', dest='autolabel', default=True, - help='try to automatically parse PR description and put labels') - -# Either select last N release branches, or specify them manually. -group = parser.add_mutually_exclusive_group(required=True) -group.add_argument('-n', type=int, default=3, dest='number', - help='number of last release branches to consider') -group.add_argument('--branch', type=str, action='append', metavar='BRANCH', - help='specific release branch name to consider') - -args = parser.parse_args() - -github = query.Query(args.token, 30) -repo = local.Local(args.repo, args.remote, github.get_default_branch()) - -if not args.branch: - release_branches = repo.get_release_branches()[-args.number:] # [(branch name, base)] -else: - release_branches = [] - all_release_branches = repo.get_release_branches() - for branch in all_release_branches: - if branch[0] in args.branch: - release_branches.append(branch) - -if not release_branches: - sys.exit('No release branches found!') -else: - print('Found release branches:') - for branch in release_branches: - print(f'{CHECK_MARK} {branch[0]} forked from {branch[1]}') - -first_commit = release_branches[0][1] -pull_requests = github.get_pull_requests(first_commit, args.login) -good_commits = set(pull_request['mergeCommit']['oid'] for pull_request in pull_requests) - -bad_commits = [] # collect and print them in the end -from_commit = repo.get_head_commit() -for i in reversed(range(len(release_branches))): - for commit in repo.iterate(from_commit, release_branches[i][1]): - if str(commit) not in good_commits and commit.author.name != 'robot-clickhouse': - bad_commits.append(commit) - - from_commit = release_branches[i][1] - -members = set(github.get_members("ClickHouse", "ClickHouse")) -def print_responsible(pull_request): - if "author" not in pull_request or pull_request["author"] is None: - return "No author" - if pull_request["author"]["login"] in members: - return colored(pull_request["author"]["login"], 'green') - elif pull_request["mergedBy"]["login"] in members: - return f'{pull_request["author"]["login"]} ā†’ {colored(pull_request["mergedBy"]["login"], "green")}' - else: - return f'{pull_request["author"]["login"]} ā†’ {pull_request["mergedBy"]["login"]}' - -LABEL_NO_BACKPORT = 'pr-no-backport' -bad_pull_requests = [] # collect and print if not empty -need_backporting = [] -for pull_request in pull_requests: - - def find_label(): - labels = github.get_labels(pull_request) - backport_allowed = LABEL_NO_BACKPORT not in map(lambda label: label['name'], labels) - for label in labels: - if label['name'].startswith('pr-'): - if label['color'] == 'ff0000' and backport_allowed: - need_backporting.append(pull_request) - return True - return False - - label_found = find_label() - - if not label_found and args.autolabel: - print(f"Trying to auto-label pull-request: {pull_request['number']}") - description = parse_description.Description(pull_request) - if description.label_name: - github.set_label(pull_request, description.label_name) - label_found = find_label() - - if not label_found: - bad_pull_requests.append(pull_request) - -if bad_pull_requests: - print('\nPull-requests without description label:') - for bad in reversed(sorted(bad_pull_requests, key = lambda x : x['number'])): - print(f'{CROSS_MARK} {bad["number"]}: {bad["url"]} ({print_responsible(bad)})') - -# FIXME: compatibility logic, until the direct modification of master is not prohibited. -if bad_commits and not args.login: - print('\nCommits not referenced by any pull-request:') - - for bad in bad_commits: - print(f'{CROSS_MARK} {bad} {bad.author}') - -# TODO: check backports. -if need_backporting: - re_vlabel = re.compile(r'^v\d+\.\d+$') - re_vlabel_backported = re.compile(r'^v\d+\.\d+-backported$') - re_vlabel_conflicts = re.compile(r'^v\d+\.\d+-conflicts$') - re_vlabel_no_backport = re.compile(r'^v\d+\.\d+-no-backport$') - - print('\nPull-requests need to be backported:') - for pull_request in reversed(sorted(need_backporting, key=lambda x: x['number'])): - targets = [] # use common list for consistent order in output - good = set() - backport_labeled = set() - conflict_labeled = set() - no_backport_labeled = set() - wait = set() - - for branch in release_branches: - if repo.comparator(branch[1]) < repo.comparator(pull_request['mergeCommit']['oid']): - targets.append(branch[0]) - - # FIXME: compatibility logic - check for a manually set label, that indicates status 'backported'. - # FIXME: O(nĀ²) - no need to iterate all labels for every `branch` - for label in github.get_labels(pull_request): - if re_vlabel.match(label['name']) or re_vlabel_backported.match(label['name']): - if f'v{branch[0]}' == label['name'] or f'v{branch[0]}-backported' == label['name']: - backport_labeled.add(branch[0]) - if re_vlabel_conflicts.match(label['name']): - if f'v{branch[0]}-conflicts' == label['name']: - conflict_labeled.add(branch[0]) - if re_vlabel_no_backport.match(label['name']): - if f'v{branch[0]}-no-backport' == label['name']: - no_backport_labeled.add(branch[0]) - - for event in github.get_timeline(pull_request): - if(event['isCrossRepository'] or - event['target']['number'] != pull_request['number'] or - event['source']['baseRefName'] not in targets): - continue - - found_label = False - for label in github.get_labels(event['source']): - if label['name'] == 'pr-backport': - found_label = True - break - if not found_label: - continue - - if event['source']['merged']: - good.add(event['source']['baseRefName']) - else: - wait.add(event['source']['baseRefName']) - - # print pull-request's status - if len(good) + len(backport_labeled) + len(conflict_labeled) + len(no_backport_labeled) == len(targets): - print(f'{CHECK_MARK}', end=' ') - else: - print(f'{CROSS_MARK}', end=' ') - print(f'{pull_request["number"]}', end=':') - for target in targets: - if target in good: - print(f'\t{CHECK_MARK} {target}', end='') - elif target in backport_labeled: - print(f'\t{BACKPORT_LABEL_MARK} {target}', end='') - elif target in conflict_labeled: - print(f'\t{CONFLICT_LABEL_MARK} {target}', end='') - elif target in no_backport_labeled: - print(f'\t{NO_BACKPORT_LABEL_MARK} {target}', end='') - elif target in wait: - print(f'\t{CLOCK_MARK} {target}', end='') - else: - print(f'\t{CROSS_MARK} {target}', end='') - print(f'\t{pull_request["url"]} ({print_responsible(pull_request)})') - -# print legend -print('\nLegend:') -print(f'{CHECK_MARK} - good') -print(f'{CROSS_MARK} - bad') -print(f'{BACKPORT_LABEL_MARK} - backport is detected via label') -print(f'{CONFLICT_LABEL_MARK} - backport conflict is detected via label') -print(f'{NO_BACKPORT_LABEL_MARK} - backport to this release is not needed') -print(f'{CLOCK_MARK} - backport is waiting to merge') - -# print API costs -print('\nGitHub API total costs per query:') -for name, value in github.api_costs.items(): - print(f'{name} : {value}') diff --git a/utils/github/backport.py b/utils/github/backport.py new file mode 100644 index 00000000000..b860b3e93cb --- /dev/null +++ b/utils/github/backport.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- + +from query import Query as RemoteRepo +from local import BareRepository as LocalRepo +import cherrypick + +import argparse +import logging +import re +import sys + + +class Backport: + def __init__(self, token, owner, name, team): + ''' + `refs` is a list of (ref_path, base_commit) sorted by ancestry starting from the least recent ref. + ''' + self._gh = RemoteRepo(token, owner=owner, name=name, team=team, max_page_size=30) + self.default_branch_name = self._gh.default_branch + + def getPullRequests(self, from_commit): + return self._gh.get_pull_requests(from_commit) + + +def run(token, repo_bare, til, number, run_cherrypick): + bp = Backport(token, 'ClickHouse', 'ClickHouse', 'core') + repo = LocalRepo(repo_bare, bp.default_branch_name) + + branches = repo.get_release_branches()[-number:] # [(branch_name, base_commit)] + + if not branches: + logging.info('No release branches found!') + return + + for branch in branches: + logging.info('Found release branch: %s', branch[0]) + + if not til: + til = branches[0][1] + prs = bp.getPullRequests(til) + + backport_map = {} + + RE_MUST_BACKPORT = re.compile(r'^v(\d+\.\d+)-must-backport$') + RE_NO_BACKPORT = re.compile(r'^v(\d+\.\d+)-no-backport$') + + # pull-requests are sorted by ancestry from the least recent. + for pr in prs: + while repo.comparator(branches[-1][1]) >= repo.comparator(pr['mergeCommit']['oid']): + branches.pop() + + assert len(branches) + + branch_set = set([branch[0] for branch in branches]) + + # First pass. Find all must-backports + for label in pr['labels']['nodes']: + if label['name'].startswith('pr-') and label['color'] == 'ff0000': + backport_map[pr['number']] = branch_set.copy() + continue + m = RE_MUST_BACKPORT.match(label['name']) + if m: + if pr['number'] not in backport_map: + backport_map[pr['number']] = set() + backport_map[pr['number']].add(m.group(1)) + + # Second pass. Find all no-backports + for label in pr['labels']['nodes']: + if label['name'] == 'pr-no-backport' and pr['number'] in backport_map: + del backport_map[pr['number']] + break + m = RE_NO_BACKPORT.match(label['name']) + if m and pr['number'] in backport_map and m.group(1) in backport_map[pr['number']]: + backport_map[pr['number']].remove(m.group(1)) + + for pr, branches in backport_map.items(): + logging.info('PR #%s needs to be backported to:', pr) + for branch in branches: + logging.info('\t%s %s', branch, run_cherrypick(token, pr, branch)) + + # print API costs + logging.info('\nGitHub API total costs per query:') + for name, value in bp._gh.api_costs.items(): + logging.info('%s : %s', name, value) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--token', type=str, required=True, help='token for Github access') + parser.add_argument('--repo-bare', type=str, required=True, help='path to bare repository', metavar='PATH') + parser.add_argument('--repo-full', type=str, required=True, help='path to full repository', metavar='PATH') + parser.add_argument('--til', type=str, help='check PRs from HEAD til this commit', metavar='COMMIT') + parser.add_argument('-n', type=int, dest='number', help='number of last release branches to consider') + parser.add_argument('--dry-run', action='store_true', help='do not create or merge any PRs', default=False) + parser.add_argument('--verbose', '-v', action='store_true', help='more verbose output', default=False) + args = parser.parse_args() + + if args.verbose: + logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.DEBUG) + else: + logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.INFO) + + cherrypick_run = lambda token, pr, branch: cherrypick.run(token, pr, branch, args.repo_full, args.dry_run) + run(args.token, args.repo_bare, args.til, args.number, cherrypick_run) diff --git a/utils/github/cherrypick.py b/utils/github/cherrypick.py new file mode 100644 index 00000000000..0694516da1e --- /dev/null +++ b/utils/github/cherrypick.py @@ -0,0 +1,185 @@ +# -*- coding: utf-8 -*- + +''' +Backports changes from PR to release branch. +Requires multiple separate runs as part of the implementation. + +First run should do the following: +1. Merge release branch with a first parent of merge-commit of PR (using 'ours' strategy). (branch: backport/{branch}/{pr}) +2. Create temporary branch over merge-commit to use it for PR creation. (branch: cherrypick/{merge_commit}) +3. Create PR from temporary branch to backport branch (emulating cherry-pick). + +Second run checks PR from previous run to be merged or at least being mergeable. If it's not merged then try to merge it. + +Third run creates PR from backport branch (with merged previous PR) to release branch. +''' + +from query import Query as RemoteRepo + +import argparse +from enum import Enum +import logging +import os +import sys + + +class CherryPick: + class Status(Enum): + DISCARDED = 'discarded' + NOT_INITIATED = 'not started' + FIRST_MERGEABLE = 'waiting for 1st stage' + FIRST_CONFLICTS = 'conflicts on 1st stage' + SECOND_MERGEABLE = 'waiting for 2nd stage' + SECOND_CONFLICTS = 'conflicts on 2nd stage' + MERGED = 'backported' + + def __init__(self, token, owner, name, team, pr_number, target_branch): + self._gh = RemoteRepo(token, owner=owner, name=name, team=team) + self._pr = self._gh.get_pull_request(pr_number) + + # TODO: check if pull-request is merged. + + self.merge_commit_oid = self._pr['mergeCommit']['oid'] + + self.target_branch = target_branch + self.backport_branch = 'backport/{branch}/{pr}'.format(branch=target_branch, pr=pr_number) + self.cherrypick_branch = 'cherrypick/{branch}/{oid}'.format(branch=target_branch, oid=self.merge_commit_oid) + + def getCherryPickPullRequest(self): + return self._gh.find_pull_request(base=self.backport_branch, head=self.cherrypick_branch) + + def createCherryPickPullRequest(self, repo_path): + DESCRIPTION = ( + 'This pull-request is a first step of an automated backporting.\n' + 'It contains changes like after calling a local command `git cherry-pick`.\n' + 'If you intend to continue backporting this changes, then resolve all conflicts if any.\n' + 'Otherwise, if you do not want to backport them, then just close this pull-request.\n' + '\n' + 'The check results does not matter at this step - you can safely ignore them.\n' + 'Also this pull-request will be merged automatically as it reaches the mergeable state, but you always can merge it manually.\n' + ) + + # FIXME: replace with something better than os.system() + git_prefix = 'git -C {} -c "user.email=robot-clickhouse@yandex-team.ru" -c "user.name=robot-clickhouse" '.format(repo_path) + base_commit_oid = self._pr['mergeCommit']['parents']['nodes'][0]['oid'] + + # Create separate branch for backporting, and make it look like real cherry-pick. + os.system(git_prefix + 'checkout -f ' + self.target_branch) + os.system(git_prefix + 'checkout -B ' + self.backport_branch) + os.system(git_prefix + 'merge -s ours --no-edit ' + base_commit_oid) + + # Create secondary branch to allow pull request with cherry-picked commit. + os.system(git_prefix + 'branch -f {} {}'.format(self.cherrypick_branch, self.merge_commit_oid)) + + os.system(git_prefix + 'push -f origin {branch}:{branch}'.format(branch=self.backport_branch)) + os.system(git_prefix + 'push -f origin {branch}:{branch}'.format(branch=self.cherrypick_branch)) + + # Create pull-request like a local cherry-pick + pr = self._gh.create_pull_request(source=self.cherrypick_branch, target=self.backport_branch, + title='Cherry pick #{number} to {target}: {title}'.format( + number=self._pr['number'], target=self.target_branch, title=self._pr['title'].replace('"', '\\"')), + description='Original pull-request #{}\n\n{}'.format(self._pr['number'], DESCRIPTION)) + + # FIXME: use `team` to leave a single eligible assignee. + self._gh.add_assignee(pr, self._pr['author']) + self._gh.add_assignee(pr, self._pr['mergedBy']) + + self._gh.set_label(pr, "do not test") + self._gh.set_label(pr, "pr-cherrypick") + + return pr + + def mergeCherryPickPullRequest(self, cherrypick_pr): + return self._gh.merge_pull_request(cherrypick_pr['id']) + + def getBackportPullRequest(self): + return self._gh.find_pull_request(base=self.target_branch, head=self.backport_branch) + + def createBackportPullRequest(self, cherrypick_pr, repo_path): + DESCRIPTION = ( + 'This pull-request is a last step of an automated backporting.\n' + 'Treat it as a standard pull-request: look at the checks and resolve conflicts.\n' + 'Merge it only if you intend to backport changes to the target branch, otherwise just close it.\n' + ) + + git_prefix = 'git -C {} -c "user.email=robot-clickhouse@yandex-team.ru" -c "user.name=robot-clickhouse" '.format(repo_path) + + os.system(git_prefix + 'checkout -f ' + self.backport_branch) + os.system(git_prefix + 'pull --ff-only origin ' + self.backport_branch) + os.system(git_prefix + 'reset --soft `{git} merge-base {target} {backport}`'.format(git=git_prefix, target=self.target_branch, backport=self.backport_branch)) + os.system(git_prefix + 'commit -a -m "Squash backport branch"') + os.system(git_prefix + 'push -f origin {branch}:{branch}'.format(branch=self.backport_branch)) + + pr = self._gh.create_pull_request(source=self.backport_branch, target=self.target_branch, + title='Backport #{number} to {target}: {title}'.format( + number=self._pr['number'], target=self.target_branch, title=self._pr['title'].replace('"', '\\"')), + description='Original pull-request #{}\nCherry-pick pull-request #{}\n\n{}'.format(self._pr['number'], cherrypick_pr['number'], DESCRIPTION)) + + # FIXME: use `team` to leave a single eligible assignee. + self._gh.add_assignee(pr, self._pr['author']) + self._gh.add_assignee(pr, self._pr['mergedBy']) + + self._gh.set_label(pr, "pr-backport") + + return pr + + +def run(token, pr, branch, repo, dry_run=False): + cp = CherryPick(token, 'ClickHouse', 'ClickHouse', 'core', pr, branch) + + pr1 = cp.getCherryPickPullRequest() + if not pr1: + if not dry_run: + pr1 = cp.createCherryPickPullRequest(repo) + logging.debug('Created PR with cherry-pick of %s to %s: %s', pr, branch, pr1['url']) + else: + return CherryPick.Status.NOT_INITIATED + else: + logging.debug('Found PR with cherry-pick of %s to %s: %s', pr, branch, pr1['url']) + + if not pr1['merged'] and pr1['mergeable'] == 'MERGEABLE' and not pr1['closed']: + if not dry_run: + pr1 = cp.mergeCherryPickPullRequest(pr1) + logging.debug('Merged PR with cherry-pick of %s to %s: %s', pr, branch, pr1['url']) + + if not pr1['merged']: + logging.debug('Waiting for PR with cherry-pick of %s to %s: %s', pr, branch, pr1['url']) + + if pr1['closed']: + return CherryPick.Status.DISCARDED + elif pr1['mergeable'] == 'CONFLICTING': + return CherryPick.Status.FIRST_CONFLICTS + else: + return CherryPick.Status.FIRST_MERGEABLE + + pr2 = cp.getBackportPullRequest() + if not pr2: + if not dry_run: + pr2 = cp.createBackportPullRequest(pr1, repo) + logging.debug('Created PR with backport of %s to %s: %s', pr, branch, pr2['url']) + else: + return CherryPick.Status.FIRST_MERGEABLE + else: + logging.debug('Found PR with backport of %s to %s: %s', pr, branch, pr2['url']) + + if pr2['merged']: + return CherryPick.Status.MERGED + elif pr2['closed']: + return CherryPick.Status.DISCARDED + elif pr2['mergeable'] == 'CONFLICTING': + return CherryPick.Status.SECOND_CONFLICTS + else: + return CherryPick.Status.SECOND_MERGEABLE + + +if __name__ == "__main__": + logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.DEBUG) + + parser = argparse.ArgumentParser() + parser.add_argument('--token', '-t', type=str, required=True, help='token for Github access') + parser.add_argument('--pr', type=str, required=True, help='PR# to cherry-pick') + parser.add_argument('--branch', '-b', type=str, required=True, help='target branch name for cherry-pick') + parser.add_argument('--repo', '-r', type=str, required=True, help='path to full repository', metavar='PATH') + args = parser.parse_args() + + run(args.token, args.pr, args.branch, args.repo) diff --git a/utils/github/local.py b/utils/github/local.py index 96a1ae765bf..60d9f8ab1e5 100644 --- a/utils/github/local.py +++ b/utils/github/local.py @@ -1,26 +1,22 @@ # -*- coding: utf-8 -*- try: - import git # `pip3 install gitpython` + import git # `pip install gitpython` except ImportError: - sys.exit("Package 'gitpython' not found. Try run: `pip3 install [--user] gitpython`") + import sys + sys.exit("Package 'gitpython' not found. Try run: `pip install [--user] gitpython`") import functools +import logging import os import re -class Local: - '''Implements some useful methods atop of the local repository - ''' - RE_RELEASE_BRANCH_REF = re.compile(r'^refs/remotes/.+/\d+\.\d+$') - - def __init__(self, repo_path, remote_name, default_branch_name): +class RepositoryBase(object): + def __init__(self, repo_path): self._repo = git.Repo(repo_path, search_parent_directories=(not repo_path)) - self._remote = self._repo.remotes[remote_name] - self._default = self._remote.refs[default_branch_name] - # public key comparator + # commit comparator def cmp(x, y): if x == y: return 0 @@ -38,21 +34,58 @@ class Local: for commit in self._repo.iter_commits(rev_range, first_parent=True): yield commit - ''' Returns sorted list of tuples: +class Repository(RepositoryBase): + def __init__(self, repo_path, remote_name, default_branch_name): + super(Repository, self).__init__(repo_path) + self._remote = self._repo.remotes[remote_name] + self._default = self._remote.refs[default_branch_name] + + def get_release_branches(self): + ''' + Returns sorted list of tuples: * remote branch (git.refs.remote.RemoteReference), * base commit (git.Commit), * head (git.Commit)). List is sorted by commits in ascending order. - ''' - def get_release_branches(self): + ''' release_branches = [] - for branch in [r for r in self._remote.refs if Local.RE_RELEASE_BRANCH_REF.match(r.path)]: + RE_RELEASE_BRANCH_REF = re.compile(r'^refs/remotes/.+/\d+\.\d+$') + + for branch in [r for r in self._remote.refs if RE_RELEASE_BRANCH_REF.match(r.path)]: base = self._repo.merge_base(self._default, self._repo.commit(branch)) if not base: - print(f'Branch {branch.path} is not based on branch {self._default}. Ignoring.') + logging.info('Branch %s is not based on branch %s. Ignoring.', branch.path, self._default) elif len(base) > 1: - print(f'Branch {branch.path} has more than one base commit. Ignoring.') + logging.info('Branch %s has more than one base commit. Ignoring.', branch.path) + else: + release_branches.append((os.path.basename(branch.name), base[0])) + + return sorted(release_branches, key=lambda x : self.comparator(x[1])) + +class BareRepository(RepositoryBase): + def __init__(self, repo_path, default_branch_name): + super(BareRepository, self).__init__(repo_path) + self._default = self._repo.branches[default_branch_name] + + def get_release_branches(self): + ''' + Returns sorted list of tuples: + * branch (git.refs.head?), + * base commit (git.Commit), + * head (git.Commit)). + List is sorted by commits in ascending order. + ''' + release_branches = [] + + RE_RELEASE_BRANCH_REF = re.compile(r'^refs/heads/\d+\.\d+$') + + for branch in [r for r in self._repo.branches if RE_RELEASE_BRANCH_REF.match(r.path)]: + base = self._repo.merge_base(self._default, self._repo.commit(branch)) + if not base: + logging.info('Branch %s is not based on branch %s. Ignoring.', branch.path, self._default) + elif len(base) > 1: + logging.info('Branch %s has more than one base commit. Ignoring.', branch.path) else: release_branches.append((os.path.basename(branch.name), base[0])) diff --git a/utils/github/query.py b/utils/github/query.py index 6c22d3cfeb3..bb39493cf5c 100644 --- a/utils/github/query.py +++ b/utils/github/query.py @@ -5,42 +5,112 @@ import time class Query: - '''Implements queries to the Github API using GraphQL + ''' + Implements queries to the Github API using GraphQL ''' - def __init__(self, token, max_page_size=100, min_page_size=5): - self._token = token - self._max_page_size = max_page_size - self._min_page_size = min_page_size - self.api_costs = {} + _PULL_REQUEST = ''' + author {{ + ... on User {{ + id + login + }} + }} - _MEMBERS = ''' - organization(login: "{organization}") {{ - team(slug: "{team}") {{ - members(first: {max_page_size} {next}) {{ - pageInfo {{ - hasNextPage - endCursor - }} - nodes {{ - login - }} + baseRepository {{ + nameWithOwner + }} + + mergeCommit {{ + oid + parents(first: {min_page_size}) {{ + totalCount + nodes {{ + oid }} }} }} + + mergedBy {{ + ... on User {{ + id + login + }} + }} + + baseRefName + closed + id + mergeable + merged + number + title + url ''' - def get_members(self, organization, team): + + def __init__(self, token, owner, name, team, max_page_size=100, min_page_size=5): + self._PULL_REQUEST = Query._PULL_REQUEST.format(min_page_size=min_page_size) + + self._token = token + self._owner = owner + self._name = name + self._team = team + + self._max_page_size = max_page_size + self._min_page_size = min_page_size + + self.api_costs = {} + + repo = self.get_repository() + self._id = repo['id'] + self.ssh_url = repo['sshUrl'] + self.default_branch = repo['defaultBranchRef']['name'] + + self.members = set(self.get_members()) + + def get_repository(self): + _QUERY = ''' + repository(owner: "{owner}" name: "{name}") {{ + defaultBranchRef {{ + name + }} + id + sshUrl + }} + ''' + + query = _QUERY.format(owner=self._owner, name=self._name) + return self._run(query)['repository'] + + def get_members(self): '''Get all team members for organization Returns: - logins: a list of members' logins + members: a map of members' logins to ids ''' - logins = [] + + _QUERY = ''' + organization(login: "{organization}") {{ + team(slug: "{team}") {{ + members(first: {max_page_size} {next}) {{ + pageInfo {{ + hasNextPage + endCursor + }} + nodes {{ + id + login + }} + }} + }} + }} + ''' + + members = {} not_end = True - query = Query._MEMBERS.format(organization=organization, - team=team, - max_page_size=self._max_page_size, - next='') + query = _QUERY.format(organization=self._owner, team=self._team, + max_page_size=self._max_page_size, + next='') while not_end: result = self._run(query)['organization']['team'] @@ -48,191 +118,79 @@ class Query: break result = result['members'] not_end = result['pageInfo']['hasNextPage'] - query = Query._MEMBERS.format(organization=organization, - team=team, - max_page_size=self._max_page_size, - next=f'after: "{result["pageInfo"]["endCursor"]}"') + query = _QUERY.format(organization=self._owner, team=self._team, + max_page_size=self._max_page_size, + next='after: "{}"'.format(result["pageInfo"]["endCursor"])) - logins += [node['login'] for node in result['nodes']] + members += dict([(node['login'], node['id']) for node in result['nodes']]) - return logins + return members - _LABELS = ''' - repository(owner: "ClickHouse" name: "ClickHouse") {{ - pullRequest(number: {number}) {{ - labels(first: {max_page_size} {next}) {{ - pageInfo {{ - hasNextPage - endCursor - }} - nodes {{ - name - color - }} + def get_pull_request(self, number): + _QUERY = ''' + repository(owner: "{owner}" name: "{name}") {{ + pullRequest(number: {number}) {{ + {pull_request_data} }} }} - }} - ''' - def get_labels(self, pull_request): - '''Fetchs all labels for given pull-request - - Args: - pull_request: JSON object returned by `get_pull_requests()` - - Returns: - labels: a list of JSON nodes with the name and color fields ''' - labels = [label for label in pull_request['labels']['nodes']] - not_end = pull_request['labels']['pageInfo']['hasNextPage'] - query = Query._LABELS.format(number = pull_request['number'], - max_page_size = self._max_page_size, - next=f'after: "{pull_request["labels"]["pageInfo"]["endCursor"]}"') - while not_end: - result = self._run(query)['repository']['pullRequest']['labels'] - not_end = result['pageInfo']['hasNextPage'] - query = Query._LABELS.format(number=pull_request['number'], - max_page_size=self._max_page_size, - next=f'after: "{result["pageInfo"]["endCursor"]}"') + query = _QUERY.format(owner=self._owner, name=self._name, number=number, + pull_request_data = self._PULL_REQUEST, min_page_size=self._min_page_size) + return self._run(query)['repository']['pullRequest'] - labels += [label for label in result['nodes']] - - return labels - - _TIMELINE = ''' - repository(owner: "ClickHouse" name: "ClickHouse") {{ - pullRequest(number: {number}) {{ - timeline(first: {max_page_size} {next}) {{ - pageInfo {{ - hasNextPage - endCursor - }} + def find_pull_request(self, base, head): + _QUERY = ''' + repository(owner: "{owner}" name: "{name}") {{ + pullRequests(first: {min_page_size} baseRefName: "{base}" headRefName: "{head}") {{ nodes {{ - ... on CrossReferencedEvent {{ - isCrossRepository - source {{ - ... on PullRequest {{ - number - baseRefName - merged - labels(first: {max_page_size}) {{ - pageInfo {{ - hasNextPage - endCursor - }} + {pull_request_data} + }} + totalCount + }} + }} + ''' + + query = _QUERY.format(owner=self._owner, name=self._name, base=base, head=head, + pull_request_data = self._PULL_REQUEST, min_page_size=self._min_page_size) + result = self._run(query)['repository']['pullRequests'] + if result['totalCount'] > 0: + return result['nodes'][0] + else: + return {} + + def get_pull_requests(self, before_commit): + ''' + Get all merged pull-requests from the HEAD of default branch to the last commit (excluding) + ''' + + _QUERY = ''' + repository(owner: "{owner}" name: "{name}") {{ + defaultBranchRef {{ + target {{ + ... on Commit {{ + history(first: {max_page_size} {next}) {{ + pageInfo {{ + hasNextPage + endCursor + }} + nodes {{ + oid + associatedPullRequests(first: {min_page_size}) {{ + totalCount nodes {{ - name - color - }} - }} - }} - }} - target {{ - ... on PullRequest {{ - number - }} - }} - }} - }} - }} - }} - }} - ''' - def get_timeline(self, pull_request): - '''Fetchs all cross-reference events from pull-request's timeline + ... on PullRequest {{ + {pull_request_data} - Args: - pull_request: JSON object returned by `get_pull_requests()` - - Returns: - events: a list of JSON nodes for CrossReferenceEvent - ''' - events = [event for event in pull_request['timeline']['nodes'] if event and event['source']] - not_end = pull_request['timeline']['pageInfo']['hasNextPage'] - query = Query._TIMELINE.format(number = pull_request['number'], - max_page_size = self._max_page_size, - next=f'after: "{pull_request["timeline"]["pageInfo"]["endCursor"]}"') - - while not_end: - result = self._run(query)['repository']['pullRequest']['timeline'] - not_end = result['pageInfo']['hasNextPage'] - query = Query._TIMELINE.format(number=pull_request['number'], - max_page_size=self._max_page_size, - next=f'after: "{result["pageInfo"]["endCursor"]}"') - - events += [event for event in result['nodes'] if event and event['source']] - - return events - - _PULL_REQUESTS = ''' - repository(owner: "ClickHouse" name: "ClickHouse") {{ - defaultBranchRef {{ - name - target {{ - ... on Commit {{ - history(first: {max_page_size} {next}) {{ - pageInfo {{ - hasNextPage - endCursor - }} - nodes {{ - oid - associatedPullRequests(first: {min_page_size}) {{ - totalCount - nodes {{ - ... on PullRequest {{ - id - number - author {{ - login - }} - bodyText - mergedBy {{ - login - }} - url - baseRefName - baseRepository {{ - nameWithOwner - }} - mergeCommit {{ - oid - }} - labels(first: {min_page_size}) {{ - pageInfo {{ - hasNextPage - endCursor - }} - nodes {{ - name - color - }} - }} - timeline(first: {min_page_size}) {{ - pageInfo {{ - hasNextPage - endCursor - }} - nodes {{ - ... on CrossReferencedEvent {{ - isCrossRepository - source {{ - ... on PullRequest {{ - number - baseRefName - merged - labels(first: 0) {{ - nodes {{ - name - }} - }} - }} - }} - target {{ - ... on PullRequest {{ - number - }} - }} + labels(first: {min_page_size}) {{ + totalCount + pageInfo {{ + hasNextPage + endCursor + }} + nodes {{ + name + color }} }} }} @@ -244,116 +202,270 @@ class Query: }} }} }} - }} - ''' - def get_pull_requests(self, before_commit, login): - '''Get all merged pull-requests from the HEAD of default branch to the last commit (excluding) - - Args: - before_commit (string-convertable): commit sha of the last commit (excluding) - login (string): filter pull-requests by user login - - Returns: - pull_requests: a list of JSON nodes with pull-requests' details ''' + pull_requests = [] not_end = True - query = Query._PULL_REQUESTS.format(max_page_size=self._max_page_size, - min_page_size=self._min_page_size, - next='') + query = _QUERY.format(owner=self._owner, name=self._name, + max_page_size=self._max_page_size, + min_page_size=self._min_page_size, + pull_request_data=self._PULL_REQUEST, + next='') while not_end: - result = self._run(query)['repository']['defaultBranchRef'] - default_branch_name = result['name'] - result = result['target']['history'] + result = self._run(query)['repository']['defaultBranchRef']['target']['history'] not_end = result['pageInfo']['hasNextPage'] - query = Query._PULL_REQUESTS.format(max_page_size=self._max_page_size, - min_page_size=self._min_page_size, - next=f'after: "{result["pageInfo"]["endCursor"]}"') + query = _QUERY.format(owner=self._owner, name=self._name, + max_page_size=self._max_page_size, + min_page_size=self._min_page_size, + pull_request_data=self._PULL_REQUEST, + next='after: "{}"'.format(result["pageInfo"]["endCursor"])) for commit in result['nodes']: + # FIXME: maybe include `before_commit`? if str(commit['oid']) == str(before_commit): not_end = False break # TODO: fetch all pull-requests that were merged in a single commit. - assert commit['associatedPullRequests']['totalCount'] <= self._min_page_size, \ - f'there are {commit["associatedPullRequests"]["totalCount"]} pull-requests merged in commit {commit["oid"]}' + assert commit['associatedPullRequests']['totalCount'] <= self._min_page_size for pull_request in commit['associatedPullRequests']['nodes']: - if(pull_request['baseRepository']['nameWithOwner'] == 'ClickHouse/ClickHouse' and - pull_request['baseRefName'] == default_branch_name and - pull_request['mergeCommit']['oid'] == commit['oid'] and - (not login or pull_request['author']['login'] == login)): + if(pull_request['baseRepository']['nameWithOwner'] == '{}/{}'.format(self._owner, self._name) and + pull_request['baseRefName'] == self.default_branch and + pull_request['mergeCommit']['oid'] == commit['oid']): pull_requests.append(pull_request) return pull_requests - _DEFAULT = ''' - repository(owner: "ClickHouse", name: "ClickHouse") { - defaultBranchRef { - name - } - } - ''' - def get_default_branch(self): - '''Get short name of the default branch - - Returns: - name (string): branch name - ''' - return self._run(Query._DEFAULT)['repository']['defaultBranchRef']['name'] - - _GET_LABEL = ''' - repository(owner: "ClickHouse" name: "ClickHouse") {{ - labels(first: {max_page_size} {next} query: "{name}") {{ - pageInfo {{ - hasNextPage - endCursor - }} - nodes {{ - id - name - color + def create_pull_request(self, source, target, title, description="", draft=False, can_modify=True): + _QUERY = ''' + createPullRequest(input: {{ + baseRefName: "{target}", + headRefName: "{source}", + repositoryId: "{id}", + title: "{title}", + body: "{body}", + draft: {draft}, + maintainerCanModify: {modify} + }}) {{ + pullRequest {{ + {pull_request_data} }} }} - }} - ''' - _SET_LABEL = ''' - addLabelsToLabelable(input: {{ labelableId: "{pr_id}", labelIds: "{label_id}" }}) {{ - clientMutationId - }} - ''' + ''' + + query = _QUERY.format(target=target, source=source, id=self._id, title=title, body=description, + draft="true" if draft else "false", modify="true" if can_modify else "false", + pull_request_data = self._PULL_REQUEST) + return self._run(query, is_mutation=True)['createPullRequest']['pullRequest'] + + def merge_pull_request(self, id): + _QUERY = ''' + mergePullRequest(input: {{ + pullRequestId: "{id}" + }}) {{ + pullRequest {{ + {pull_request_data} + }} + }} + ''' + + query = _QUERY.format(id=id, pull_request_data = self._PULL_REQUEST) + return self._run(query, is_mutation=True)['mergePullRequest']['pullRequest'] + + # FIXME: figure out how to add more assignees at once + def add_assignee(self, pr, assignee): + _QUERY = ''' + addAssigneesToAssignable(input: {{ + assignableId: "{id1}", + assigneeIds: "{id2}" + }}) {{ + clientMutationId + }} + ''' + + query = _QUERY.format(id1=pr['id'], id2=assignee['id']) + self._run(query, is_mutation=True) + def set_label(self, pull_request, label_name): - '''Set label by name to the pull request + ''' + Set label by name to the pull request Args: pull_request: JSON object returned by `get_pull_requests()` label_name (string): label name ''' + + _GET_LABEL = ''' + repository(owner: "{owner}" name: "{name}") {{ + labels(first: {max_page_size} {next} query: "{label_name}") {{ + pageInfo {{ + hasNextPage + endCursor + }} + nodes {{ + id + name + color + }} + }} + }} + ''' + + _SET_LABEL = ''' + addLabelsToLabelable(input: {{ + labelableId: "{pr_id}", + labelIds: "{label_id}" + }}) {{ + clientMutationId + }} + ''' + labels = [] not_end = True - query = Query._GET_LABEL.format(name=label_name, - max_page_size=self._max_page_size, - next='') + query = _GET_LABEL.format(owner=self._owner, name=self._name, label_name=label_name, + max_page_size=self._max_page_size, + next='') while not_end: result = self._run(query)['repository']['labels'] not_end = result['pageInfo']['hasNextPage'] - query = Query._GET_LABEL.format(name=label_name, - max_page_size=self._max_page_size, - next=f'after: "{result["pageInfo"]["endCursor"]}"') + query = _GET_LABEL.format(owner=self._owner, name=self._name, label_name=label_name, + max_page_size=self._max_page_size, + next='after: "{}"'.format(result["pageInfo"]["endCursor"])) labels += [label for label in result['nodes']] if not labels: return - query = Query._SET_LABEL.format(pr_id = pull_request['id'], label_id = labels[0]['id']) + query = _SET_LABEL.format(pr_id = pull_request['id'], label_id = labels[0]['id']) self._run(query, is_mutation=True) - pull_request['labels']['nodes'].append(labels[0]) + ### OLD METHODS + # _LABELS = ''' + # repository(owner: "ClickHouse" name: "ClickHouse") {{ + # pullRequest(number: {number}) {{ + # labels(first: {max_page_size} {next}) {{ + # pageInfo {{ + # hasNextPage + # endCursor + # }} + # nodes {{ + # name + # color + # }} + # }} + # }} + # }} + # ''' + # def get_labels(self, pull_request): + # '''Fetchs all labels for given pull-request + + # Args: + # pull_request: JSON object returned by `get_pull_requests()` + + # Returns: + # labels: a list of JSON nodes with the name and color fields + # ''' + # labels = [label for label in pull_request['labels']['nodes']] + # not_end = pull_request['labels']['pageInfo']['hasNextPage'] + # query = Query._LABELS.format(number = pull_request['number'], + # max_page_size = self._max_page_size, + # next=f'after: "{pull_request["labels"]["pageInfo"]["endCursor"]}"') + + # while not_end: + # result = self._run(query)['repository']['pullRequest']['labels'] + # not_end = result['pageInfo']['hasNextPage'] + # query = Query._LABELS.format(number=pull_request['number'], + # max_page_size=self._max_page_size, + # next=f'after: "{result["pageInfo"]["endCursor"]}"') + + # labels += [label for label in result['nodes']] + + # return labels + + # _TIMELINE = ''' + # repository(owner: "ClickHouse" name: "ClickHouse") {{ + # pullRequest(number: {number}) {{ + # timeline(first: {max_page_size} {next}) {{ + # pageInfo {{ + # hasNextPage + # endCursor + # }} + # nodes {{ + # ... on CrossReferencedEvent {{ + # isCrossRepository + # source {{ + # ... on PullRequest {{ + # number + # baseRefName + # merged + # labels(first: {max_page_size}) {{ + # pageInfo {{ + # hasNextPage + # endCursor + # }} + # nodes {{ + # name + # color + # }} + # }} + # }} + # }} + # target {{ + # ... on PullRequest {{ + # number + # }} + # }} + # }} + # }} + # }} + # }} + # }} + # ''' + # def get_timeline(self, pull_request): + # '''Fetchs all cross-reference events from pull-request's timeline + + # Args: + # pull_request: JSON object returned by `get_pull_requests()` + + # Returns: + # events: a list of JSON nodes for CrossReferenceEvent + # ''' + # events = [event for event in pull_request['timeline']['nodes'] if event and event['source']] + # not_end = pull_request['timeline']['pageInfo']['hasNextPage'] + # query = Query._TIMELINE.format(number = pull_request['number'], + # max_page_size = self._max_page_size, + # next=f'after: "{pull_request["timeline"]["pageInfo"]["endCursor"]}"') + + # while not_end: + # result = self._run(query)['repository']['pullRequest']['timeline'] + # not_end = result['pageInfo']['hasNextPage'] + # query = Query._TIMELINE.format(number=pull_request['number'], + # max_page_size=self._max_page_size, + # next=f'after: "{result["pageInfo"]["endCursor"]}"') + + # events += [event for event in result['nodes'] if event and event['source']] + + # return events + + # _DEFAULT = ''' + # repository(owner: "ClickHouse", name: "ClickHouse") { + # defaultBranchRef { + # name + # } + # } + # ''' + # def get_default_branch(self): + # '''Get short name of the default branch + + # Returns: + # name (string): branch name + # ''' + # return self._run(Query._DEFAULT)['repository']['defaultBranchRef']['name'] def _run(self, query, is_mutation=False): from requests.adapters import HTTPAdapter @@ -378,15 +490,15 @@ class Query: session.mount('https://', adapter) return session - headers = {'Authorization': f'bearer {self._token}'} + headers = {'Authorization': 'bearer {}'.format(self._token)} if is_mutation: - query = f''' + query = ''' mutation {{ {query} }} - ''' + '''.format(query=query) else: - query = f''' + query = ''' query {{ {query} rateLimit {{ @@ -394,14 +506,14 @@ class Query: remaining }} }} - ''' + '''.format(query=query) while True: request = requests_retry_session().post('https://api.github.com/graphql', json={'query': query}, headers=headers) if request.status_code == 200: result = request.json() if 'errors' in result: - raise Exception(f'Errors occured: {result["errors"]}') + raise Exception('Errors occured: {}\nOriginal query: {}'.format(result["errors"], query)) if not is_mutation: import inspect @@ -413,10 +525,4 @@ class Query: return result['data'] else: import json - resp = request.json() - if resp and len(resp) > 0 and resp[0] and 'type' in resp[0] and resp[0]['type'] == 'RATE_LIMITED': - print("API rate limit exceeded. Waiting for 1 second.") - time.sleep(1) - continue - - raise Exception(f'Query failed with code {request.status_code}:\n{json.dumps(resp, indent=4)}') + raise Exception('Query failed with code {code}:\n{json}'.format(code=request.status_code, json=json.dumps(request.json(), indent=4))) diff --git a/utils/github/ya.make b/utils/github/ya.make new file mode 100644 index 00000000000..8ff9d7bad42 --- /dev/null +++ b/utils/github/ya.make @@ -0,0 +1,11 @@ +PY_LIBRARY() + +PY_SRCS( + __init__.py + backport.py + cherrypick.py + local.py + query.py +) + +END() diff --git a/utils/list_backports.sh b/utils/list_backports.sh deleted file mode 100755 index 6eba3410c95..00000000000 --- a/utils/list_backports.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/sh - -# sudo apt install python3-pip -# sudo pip3 install gitpython termcolor - -# Go to GitHub. -# In top-right corner, click to your profile icon. -# Choose "Settings". -# Choose "Developer settings". -# Choose "Personal access tokens". -# Choose "Generate new token". - -# Don't check any checkboxes. - -# Run as: -# ./list_backports.sh --token your-token - - -set -e -SCRIPTPATH=$(readlink -f "$0") -SCRIPTDIR=$(dirname "$SCRIPTPATH") -PYTHONPATH="$SCRIPTDIR" python3 -m github "$@"