Automated backporting machinery (#12029)

* Add automated cherry-pick machinery

* Added script for backporting machinery

* Implement dry-run mode

* Comment old methods

* Fix python2

* Add ya.make for sandbox tasks

* Remove old stuff
This commit is contained in:
Ivan 2020-06-29 15:13:19 +03:00 committed by GitHub
parent 97b8888ed2
commit fce8328e65
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 735 additions and 549 deletions

View File

@ -1,231 +0,0 @@
# -*- coding: utf-8 -*-
'''
Rules for commit messages, branch names and everything:
- All important(!) commits to master branch must originate from pull-requests.
- All pull-requests must be squash-merged or explicitly merged without rebase.
- All pull-requests to master must have at least one label prefixed with `pr-`.
- Labels that require pull-request to be backported must be red colored (#ff0000).
- Release branch name must be of form `YY.NUMBER`.
- All release branches must be forked directly from the master branch and never be merged back,
or merged with any other branches based on the master branch (including master branch itself).
Output of this script:
- Commits without references from pull-requests.
- Pull-requests to master without proper labels.
- Pull-requests that need to be backported, with statuses per release branch.
'''
from . import local, query
from . import parser as parse_description
import argparse
import re
import sys
try:
from termcolor import colored # `pip install termcolor`
except ImportError:
sys.exit("Package 'termcolor' not found. Try run: `pip3 install [--user] termcolor`")
CHECK_MARK = colored('🗸', 'green')
CROSS_MARK = colored('🗙', 'red')
BACKPORT_LABEL_MARK = colored('🏷', 'yellow')
CONFLICT_LABEL_MARK = colored('', 'yellow')
NO_BACKPORT_LABEL_MARK = colored('', 'yellow')
CLOCK_MARK = colored('', 'cyan')
parser = argparse.ArgumentParser(description='Helper for the ClickHouse Release machinery')
parser.add_argument('--repo', '-r', type=str, default='', metavar='PATH',
help='path to the root of the ClickHouse repository')
parser.add_argument('--remote', type=str, default='origin',
help='remote name of the "ClickHouse/ClickHouse" upstream')
parser.add_argument('--token', type=str, required=True,
help='token for Github access')
parser.add_argument('--login', type=str,
help='filter authorship by login')
parser.add_argument('--auto-label', action='store_true', dest='autolabel', default=True,
help='try to automatically parse PR description and put labels')
# Either select last N release branches, or specify them manually.
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-n', type=int, default=3, dest='number',
help='number of last release branches to consider')
group.add_argument('--branch', type=str, action='append', metavar='BRANCH',
help='specific release branch name to consider')
args = parser.parse_args()
github = query.Query(args.token, 30)
repo = local.Local(args.repo, args.remote, github.get_default_branch())
if not args.branch:
release_branches = repo.get_release_branches()[-args.number:] # [(branch name, base)]
else:
release_branches = []
all_release_branches = repo.get_release_branches()
for branch in all_release_branches:
if branch[0] in args.branch:
release_branches.append(branch)
if not release_branches:
sys.exit('No release branches found!')
else:
print('Found release branches:')
for branch in release_branches:
print(f'{CHECK_MARK} {branch[0]} forked from {branch[1]}')
first_commit = release_branches[0][1]
pull_requests = github.get_pull_requests(first_commit, args.login)
good_commits = set(pull_request['mergeCommit']['oid'] for pull_request in pull_requests)
bad_commits = [] # collect and print them in the end
from_commit = repo.get_head_commit()
for i in reversed(range(len(release_branches))):
for commit in repo.iterate(from_commit, release_branches[i][1]):
if str(commit) not in good_commits and commit.author.name != 'robot-clickhouse':
bad_commits.append(commit)
from_commit = release_branches[i][1]
members = set(github.get_members("ClickHouse", "ClickHouse"))
def print_responsible(pull_request):
if "author" not in pull_request or pull_request["author"] is None:
return "No author"
if pull_request["author"]["login"] in members:
return colored(pull_request["author"]["login"], 'green')
elif pull_request["mergedBy"]["login"] in members:
return f'{pull_request["author"]["login"]}{colored(pull_request["mergedBy"]["login"], "green")}'
else:
return f'{pull_request["author"]["login"]}{pull_request["mergedBy"]["login"]}'
LABEL_NO_BACKPORT = 'pr-no-backport'
bad_pull_requests = [] # collect and print if not empty
need_backporting = []
for pull_request in pull_requests:
def find_label():
labels = github.get_labels(pull_request)
backport_allowed = LABEL_NO_BACKPORT not in map(lambda label: label['name'], labels)
for label in labels:
if label['name'].startswith('pr-'):
if label['color'] == 'ff0000' and backport_allowed:
need_backporting.append(pull_request)
return True
return False
label_found = find_label()
if not label_found and args.autolabel:
print(f"Trying to auto-label pull-request: {pull_request['number']}")
description = parse_description.Description(pull_request)
if description.label_name:
github.set_label(pull_request, description.label_name)
label_found = find_label()
if not label_found:
bad_pull_requests.append(pull_request)
if bad_pull_requests:
print('\nPull-requests without description label:')
for bad in reversed(sorted(bad_pull_requests, key = lambda x : x['number'])):
print(f'{CROSS_MARK} {bad["number"]}: {bad["url"]} ({print_responsible(bad)})')
# FIXME: compatibility logic, until the direct modification of master is not prohibited.
if bad_commits and not args.login:
print('\nCommits not referenced by any pull-request:')
for bad in bad_commits:
print(f'{CROSS_MARK} {bad} {bad.author}')
# TODO: check backports.
if need_backporting:
re_vlabel = re.compile(r'^v\d+\.\d+$')
re_vlabel_backported = re.compile(r'^v\d+\.\d+-backported$')
re_vlabel_conflicts = re.compile(r'^v\d+\.\d+-conflicts$')
re_vlabel_no_backport = re.compile(r'^v\d+\.\d+-no-backport$')
print('\nPull-requests need to be backported:')
for pull_request in reversed(sorted(need_backporting, key=lambda x: x['number'])):
targets = [] # use common list for consistent order in output
good = set()
backport_labeled = set()
conflict_labeled = set()
no_backport_labeled = set()
wait = set()
for branch in release_branches:
if repo.comparator(branch[1]) < repo.comparator(pull_request['mergeCommit']['oid']):
targets.append(branch[0])
# FIXME: compatibility logic - check for a manually set label, that indicates status 'backported'.
# FIXME: O(n²) - no need to iterate all labels for every `branch`
for label in github.get_labels(pull_request):
if re_vlabel.match(label['name']) or re_vlabel_backported.match(label['name']):
if f'v{branch[0]}' == label['name'] or f'v{branch[0]}-backported' == label['name']:
backport_labeled.add(branch[0])
if re_vlabel_conflicts.match(label['name']):
if f'v{branch[0]}-conflicts' == label['name']:
conflict_labeled.add(branch[0])
if re_vlabel_no_backport.match(label['name']):
if f'v{branch[0]}-no-backport' == label['name']:
no_backport_labeled.add(branch[0])
for event in github.get_timeline(pull_request):
if(event['isCrossRepository'] or
event['target']['number'] != pull_request['number'] or
event['source']['baseRefName'] not in targets):
continue
found_label = False
for label in github.get_labels(event['source']):
if label['name'] == 'pr-backport':
found_label = True
break
if not found_label:
continue
if event['source']['merged']:
good.add(event['source']['baseRefName'])
else:
wait.add(event['source']['baseRefName'])
# print pull-request's status
if len(good) + len(backport_labeled) + len(conflict_labeled) + len(no_backport_labeled) == len(targets):
print(f'{CHECK_MARK}', end=' ')
else:
print(f'{CROSS_MARK}', end=' ')
print(f'{pull_request["number"]}', end=':')
for target in targets:
if target in good:
print(f'\t{CHECK_MARK} {target}', end='')
elif target in backport_labeled:
print(f'\t{BACKPORT_LABEL_MARK} {target}', end='')
elif target in conflict_labeled:
print(f'\t{CONFLICT_LABEL_MARK} {target}', end='')
elif target in no_backport_labeled:
print(f'\t{NO_BACKPORT_LABEL_MARK} {target}', end='')
elif target in wait:
print(f'\t{CLOCK_MARK} {target}', end='')
else:
print(f'\t{CROSS_MARK} {target}', end='')
print(f'\t{pull_request["url"]} ({print_responsible(pull_request)})')
# print legend
print('\nLegend:')
print(f'{CHECK_MARK} - good')
print(f'{CROSS_MARK} - bad')
print(f'{BACKPORT_LABEL_MARK} - backport is detected via label')
print(f'{CONFLICT_LABEL_MARK} - backport conflict is detected via label')
print(f'{NO_BACKPORT_LABEL_MARK} - backport to this release is not needed')
print(f'{CLOCK_MARK} - backport is waiting to merge')
# print API costs
print('\nGitHub API total costs per query:')
for name, value in github.api_costs.items():
print(f'{name} : {value}')

104
utils/github/backport.py Normal file
View File

@ -0,0 +1,104 @@
# -*- coding: utf-8 -*-
from query import Query as RemoteRepo
from local import BareRepository as LocalRepo
import cherrypick
import argparse
import logging
import re
import sys
class Backport:
def __init__(self, token, owner, name, team):
'''
`refs` is a list of (ref_path, base_commit) sorted by ancestry starting from the least recent ref.
'''
self._gh = RemoteRepo(token, owner=owner, name=name, team=team, max_page_size=30)
self.default_branch_name = self._gh.default_branch
def getPullRequests(self, from_commit):
return self._gh.get_pull_requests(from_commit)
def run(token, repo_bare, til, number, run_cherrypick):
bp = Backport(token, 'ClickHouse', 'ClickHouse', 'core')
repo = LocalRepo(repo_bare, bp.default_branch_name)
branches = repo.get_release_branches()[-number:] # [(branch_name, base_commit)]
if not branches:
logging.info('No release branches found!')
return
for branch in branches:
logging.info('Found release branch: %s', branch[0])
if not til:
til = branches[0][1]
prs = bp.getPullRequests(til)
backport_map = {}
RE_MUST_BACKPORT = re.compile(r'^v(\d+\.\d+)-must-backport$')
RE_NO_BACKPORT = re.compile(r'^v(\d+\.\d+)-no-backport$')
# pull-requests are sorted by ancestry from the least recent.
for pr in prs:
while repo.comparator(branches[-1][1]) >= repo.comparator(pr['mergeCommit']['oid']):
branches.pop()
assert len(branches)
branch_set = set([branch[0] for branch in branches])
# First pass. Find all must-backports
for label in pr['labels']['nodes']:
if label['name'].startswith('pr-') and label['color'] == 'ff0000':
backport_map[pr['number']] = branch_set.copy()
continue
m = RE_MUST_BACKPORT.match(label['name'])
if m:
if pr['number'] not in backport_map:
backport_map[pr['number']] = set()
backport_map[pr['number']].add(m.group(1))
# Second pass. Find all no-backports
for label in pr['labels']['nodes']:
if label['name'] == 'pr-no-backport' and pr['number'] in backport_map:
del backport_map[pr['number']]
break
m = RE_NO_BACKPORT.match(label['name'])
if m and pr['number'] in backport_map and m.group(1) in backport_map[pr['number']]:
backport_map[pr['number']].remove(m.group(1))
for pr, branches in backport_map.items():
logging.info('PR #%s needs to be backported to:', pr)
for branch in branches:
logging.info('\t%s %s', branch, run_cherrypick(token, pr, branch))
# print API costs
logging.info('\nGitHub API total costs per query:')
for name, value in bp._gh.api_costs.items():
logging.info('%s : %s', name, value)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--token', type=str, required=True, help='token for Github access')
parser.add_argument('--repo-bare', type=str, required=True, help='path to bare repository', metavar='PATH')
parser.add_argument('--repo-full', type=str, required=True, help='path to full repository', metavar='PATH')
parser.add_argument('--til', type=str, help='check PRs from HEAD til this commit', metavar='COMMIT')
parser.add_argument('-n', type=int, dest='number', help='number of last release branches to consider')
parser.add_argument('--dry-run', action='store_true', help='do not create or merge any PRs', default=False)
parser.add_argument('--verbose', '-v', action='store_true', help='more verbose output', default=False)
args = parser.parse_args()
if args.verbose:
logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.DEBUG)
else:
logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.INFO)
cherrypick_run = lambda token, pr, branch: cherrypick.run(token, pr, branch, args.repo_full, args.dry_run)
run(args.token, args.repo_bare, args.til, args.number, cherrypick_run)

185
utils/github/cherrypick.py Normal file
View File

@ -0,0 +1,185 @@
# -*- coding: utf-8 -*-
'''
Backports changes from PR to release branch.
Requires multiple separate runs as part of the implementation.
First run should do the following:
1. Merge release branch with a first parent of merge-commit of PR (using 'ours' strategy). (branch: backport/{branch}/{pr})
2. Create temporary branch over merge-commit to use it for PR creation. (branch: cherrypick/{merge_commit})
3. Create PR from temporary branch to backport branch (emulating cherry-pick).
Second run checks PR from previous run to be merged or at least being mergeable. If it's not merged then try to merge it.
Third run creates PR from backport branch (with merged previous PR) to release branch.
'''
from query import Query as RemoteRepo
import argparse
from enum import Enum
import logging
import os
import sys
class CherryPick:
class Status(Enum):
DISCARDED = 'discarded'
NOT_INITIATED = 'not started'
FIRST_MERGEABLE = 'waiting for 1st stage'
FIRST_CONFLICTS = 'conflicts on 1st stage'
SECOND_MERGEABLE = 'waiting for 2nd stage'
SECOND_CONFLICTS = 'conflicts on 2nd stage'
MERGED = 'backported'
def __init__(self, token, owner, name, team, pr_number, target_branch):
self._gh = RemoteRepo(token, owner=owner, name=name, team=team)
self._pr = self._gh.get_pull_request(pr_number)
# TODO: check if pull-request is merged.
self.merge_commit_oid = self._pr['mergeCommit']['oid']
self.target_branch = target_branch
self.backport_branch = 'backport/{branch}/{pr}'.format(branch=target_branch, pr=pr_number)
self.cherrypick_branch = 'cherrypick/{branch}/{oid}'.format(branch=target_branch, oid=self.merge_commit_oid)
def getCherryPickPullRequest(self):
return self._gh.find_pull_request(base=self.backport_branch, head=self.cherrypick_branch)
def createCherryPickPullRequest(self, repo_path):
DESCRIPTION = (
'This pull-request is a first step of an automated backporting.\n'
'It contains changes like after calling a local command `git cherry-pick`.\n'
'If you intend to continue backporting this changes, then resolve all conflicts if any.\n'
'Otherwise, if you do not want to backport them, then just close this pull-request.\n'
'\n'
'The check results does not matter at this step - you can safely ignore them.\n'
'Also this pull-request will be merged automatically as it reaches the mergeable state, but you always can merge it manually.\n'
)
# FIXME: replace with something better than os.system()
git_prefix = 'git -C {} -c "user.email=robot-clickhouse@yandex-team.ru" -c "user.name=robot-clickhouse" '.format(repo_path)
base_commit_oid = self._pr['mergeCommit']['parents']['nodes'][0]['oid']
# Create separate branch for backporting, and make it look like real cherry-pick.
os.system(git_prefix + 'checkout -f ' + self.target_branch)
os.system(git_prefix + 'checkout -B ' + self.backport_branch)
os.system(git_prefix + 'merge -s ours --no-edit ' + base_commit_oid)
# Create secondary branch to allow pull request with cherry-picked commit.
os.system(git_prefix + 'branch -f {} {}'.format(self.cherrypick_branch, self.merge_commit_oid))
os.system(git_prefix + 'push -f origin {branch}:{branch}'.format(branch=self.backport_branch))
os.system(git_prefix + 'push -f origin {branch}:{branch}'.format(branch=self.cherrypick_branch))
# Create pull-request like a local cherry-pick
pr = self._gh.create_pull_request(source=self.cherrypick_branch, target=self.backport_branch,
title='Cherry pick #{number} to {target}: {title}'.format(
number=self._pr['number'], target=self.target_branch, title=self._pr['title'].replace('"', '\\"')),
description='Original pull-request #{}\n\n{}'.format(self._pr['number'], DESCRIPTION))
# FIXME: use `team` to leave a single eligible assignee.
self._gh.add_assignee(pr, self._pr['author'])
self._gh.add_assignee(pr, self._pr['mergedBy'])
self._gh.set_label(pr, "do not test")
self._gh.set_label(pr, "pr-cherrypick")
return pr
def mergeCherryPickPullRequest(self, cherrypick_pr):
return self._gh.merge_pull_request(cherrypick_pr['id'])
def getBackportPullRequest(self):
return self._gh.find_pull_request(base=self.target_branch, head=self.backport_branch)
def createBackportPullRequest(self, cherrypick_pr, repo_path):
DESCRIPTION = (
'This pull-request is a last step of an automated backporting.\n'
'Treat it as a standard pull-request: look at the checks and resolve conflicts.\n'
'Merge it only if you intend to backport changes to the target branch, otherwise just close it.\n'
)
git_prefix = 'git -C {} -c "user.email=robot-clickhouse@yandex-team.ru" -c "user.name=robot-clickhouse" '.format(repo_path)
os.system(git_prefix + 'checkout -f ' + self.backport_branch)
os.system(git_prefix + 'pull --ff-only origin ' + self.backport_branch)
os.system(git_prefix + 'reset --soft `{git} merge-base {target} {backport}`'.format(git=git_prefix, target=self.target_branch, backport=self.backport_branch))
os.system(git_prefix + 'commit -a -m "Squash backport branch"')
os.system(git_prefix + 'push -f origin {branch}:{branch}'.format(branch=self.backport_branch))
pr = self._gh.create_pull_request(source=self.backport_branch, target=self.target_branch,
title='Backport #{number} to {target}: {title}'.format(
number=self._pr['number'], target=self.target_branch, title=self._pr['title'].replace('"', '\\"')),
description='Original pull-request #{}\nCherry-pick pull-request #{}\n\n{}'.format(self._pr['number'], cherrypick_pr['number'], DESCRIPTION))
# FIXME: use `team` to leave a single eligible assignee.
self._gh.add_assignee(pr, self._pr['author'])
self._gh.add_assignee(pr, self._pr['mergedBy'])
self._gh.set_label(pr, "pr-backport")
return pr
def run(token, pr, branch, repo, dry_run=False):
cp = CherryPick(token, 'ClickHouse', 'ClickHouse', 'core', pr, branch)
pr1 = cp.getCherryPickPullRequest()
if not pr1:
if not dry_run:
pr1 = cp.createCherryPickPullRequest(repo)
logging.debug('Created PR with cherry-pick of %s to %s: %s', pr, branch, pr1['url'])
else:
return CherryPick.Status.NOT_INITIATED
else:
logging.debug('Found PR with cherry-pick of %s to %s: %s', pr, branch, pr1['url'])
if not pr1['merged'] and pr1['mergeable'] == 'MERGEABLE' and not pr1['closed']:
if not dry_run:
pr1 = cp.mergeCherryPickPullRequest(pr1)
logging.debug('Merged PR with cherry-pick of %s to %s: %s', pr, branch, pr1['url'])
if not pr1['merged']:
logging.debug('Waiting for PR with cherry-pick of %s to %s: %s', pr, branch, pr1['url'])
if pr1['closed']:
return CherryPick.Status.DISCARDED
elif pr1['mergeable'] == 'CONFLICTING':
return CherryPick.Status.FIRST_CONFLICTS
else:
return CherryPick.Status.FIRST_MERGEABLE
pr2 = cp.getBackportPullRequest()
if not pr2:
if not dry_run:
pr2 = cp.createBackportPullRequest(pr1, repo)
logging.debug('Created PR with backport of %s to %s: %s', pr, branch, pr2['url'])
else:
return CherryPick.Status.FIRST_MERGEABLE
else:
logging.debug('Found PR with backport of %s to %s: %s', pr, branch, pr2['url'])
if pr2['merged']:
return CherryPick.Status.MERGED
elif pr2['closed']:
return CherryPick.Status.DISCARDED
elif pr2['mergeable'] == 'CONFLICTING':
return CherryPick.Status.SECOND_CONFLICTS
else:
return CherryPick.Status.SECOND_MERGEABLE
if __name__ == "__main__":
logging.basicConfig(format='%(message)s', stream=sys.stdout, level=logging.DEBUG)
parser = argparse.ArgumentParser()
parser.add_argument('--token', '-t', type=str, required=True, help='token for Github access')
parser.add_argument('--pr', type=str, required=True, help='PR# to cherry-pick')
parser.add_argument('--branch', '-b', type=str, required=True, help='target branch name for cherry-pick')
parser.add_argument('--repo', '-r', type=str, required=True, help='path to full repository', metavar='PATH')
args = parser.parse_args()
run(args.token, args.pr, args.branch, args.repo)

View File

@ -1,26 +1,22 @@
# -*- coding: utf-8 -*-
try:
import git # `pip3 install gitpython`
import git # `pip install gitpython`
except ImportError:
sys.exit("Package 'gitpython' not found. Try run: `pip3 install [--user] gitpython`")
import sys
sys.exit("Package 'gitpython' not found. Try run: `pip install [--user] gitpython`")
import functools
import logging
import os
import re
class Local:
'''Implements some useful methods atop of the local repository
'''
RE_RELEASE_BRANCH_REF = re.compile(r'^refs/remotes/.+/\d+\.\d+$')
def __init__(self, repo_path, remote_name, default_branch_name):
class RepositoryBase(object):
def __init__(self, repo_path):
self._repo = git.Repo(repo_path, search_parent_directories=(not repo_path))
self._remote = self._repo.remotes[remote_name]
self._default = self._remote.refs[default_branch_name]
# public key comparator
# commit comparator
def cmp(x, y):
if x == y:
return 0
@ -38,21 +34,58 @@ class Local:
for commit in self._repo.iter_commits(rev_range, first_parent=True):
yield commit
''' Returns sorted list of tuples:
class Repository(RepositoryBase):
def __init__(self, repo_path, remote_name, default_branch_name):
super(Repository, self).__init__(repo_path)
self._remote = self._repo.remotes[remote_name]
self._default = self._remote.refs[default_branch_name]
def get_release_branches(self):
'''
Returns sorted list of tuples:
* remote branch (git.refs.remote.RemoteReference),
* base commit (git.Commit),
* head (git.Commit)).
List is sorted by commits in ascending order.
'''
def get_release_branches(self):
'''
release_branches = []
for branch in [r for r in self._remote.refs if Local.RE_RELEASE_BRANCH_REF.match(r.path)]:
RE_RELEASE_BRANCH_REF = re.compile(r'^refs/remotes/.+/\d+\.\d+$')
for branch in [r for r in self._remote.refs if RE_RELEASE_BRANCH_REF.match(r.path)]:
base = self._repo.merge_base(self._default, self._repo.commit(branch))
if not base:
print(f'Branch {branch.path} is not based on branch {self._default}. Ignoring.')
logging.info('Branch %s is not based on branch %s. Ignoring.', branch.path, self._default)
elif len(base) > 1:
print(f'Branch {branch.path} has more than one base commit. Ignoring.')
logging.info('Branch %s has more than one base commit. Ignoring.', branch.path)
else:
release_branches.append((os.path.basename(branch.name), base[0]))
return sorted(release_branches, key=lambda x : self.comparator(x[1]))
class BareRepository(RepositoryBase):
def __init__(self, repo_path, default_branch_name):
super(BareRepository, self).__init__(repo_path)
self._default = self._repo.branches[default_branch_name]
def get_release_branches(self):
'''
Returns sorted list of tuples:
* branch (git.refs.head?),
* base commit (git.Commit),
* head (git.Commit)).
List is sorted by commits in ascending order.
'''
release_branches = []
RE_RELEASE_BRANCH_REF = re.compile(r'^refs/heads/\d+\.\d+$')
for branch in [r for r in self._repo.branches if RE_RELEASE_BRANCH_REF.match(r.path)]:
base = self._repo.merge_base(self._default, self._repo.commit(branch))
if not base:
logging.info('Branch %s is not based on branch %s. Ignoring.', branch.path, self._default)
elif len(base) > 1:
logging.info('Branch %s has more than one base commit. Ignoring.', branch.path)
else:
release_branches.append((os.path.basename(branch.name), base[0]))

View File

@ -5,42 +5,112 @@ import time
class Query:
'''Implements queries to the Github API using GraphQL
'''
Implements queries to the Github API using GraphQL
'''
def __init__(self, token, max_page_size=100, min_page_size=5):
self._token = token
self._max_page_size = max_page_size
self._min_page_size = min_page_size
self.api_costs = {}
_PULL_REQUEST = '''
author {{
... on User {{
id
login
}}
}}
_MEMBERS = '''
organization(login: "{organization}") {{
team(slug: "{team}") {{
members(first: {max_page_size} {next}) {{
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
login
}}
baseRepository {{
nameWithOwner
}}
mergeCommit {{
oid
parents(first: {min_page_size}) {{
totalCount
nodes {{
oid
}}
}}
}}
mergedBy {{
... on User {{
id
login
}}
}}
baseRefName
closed
id
mergeable
merged
number
title
url
'''
def get_members(self, organization, team):
def __init__(self, token, owner, name, team, max_page_size=100, min_page_size=5):
self._PULL_REQUEST = Query._PULL_REQUEST.format(min_page_size=min_page_size)
self._token = token
self._owner = owner
self._name = name
self._team = team
self._max_page_size = max_page_size
self._min_page_size = min_page_size
self.api_costs = {}
repo = self.get_repository()
self._id = repo['id']
self.ssh_url = repo['sshUrl']
self.default_branch = repo['defaultBranchRef']['name']
self.members = set(self.get_members())
def get_repository(self):
_QUERY = '''
repository(owner: "{owner}" name: "{name}") {{
defaultBranchRef {{
name
}}
id
sshUrl
}}
'''
query = _QUERY.format(owner=self._owner, name=self._name)
return self._run(query)['repository']
def get_members(self):
'''Get all team members for organization
Returns:
logins: a list of members' logins
members: a map of members' logins to ids
'''
logins = []
_QUERY = '''
organization(login: "{organization}") {{
team(slug: "{team}") {{
members(first: {max_page_size} {next}) {{
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
id
login
}}
}}
}}
}}
'''
members = {}
not_end = True
query = Query._MEMBERS.format(organization=organization,
team=team,
max_page_size=self._max_page_size,
next='')
query = _QUERY.format(organization=self._owner, team=self._team,
max_page_size=self._max_page_size,
next='')
while not_end:
result = self._run(query)['organization']['team']
@ -48,191 +118,79 @@ class Query:
break
result = result['members']
not_end = result['pageInfo']['hasNextPage']
query = Query._MEMBERS.format(organization=organization,
team=team,
max_page_size=self._max_page_size,
next=f'after: "{result["pageInfo"]["endCursor"]}"')
query = _QUERY.format(organization=self._owner, team=self._team,
max_page_size=self._max_page_size,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]))
logins += [node['login'] for node in result['nodes']]
members += dict([(node['login'], node['id']) for node in result['nodes']])
return logins
return members
_LABELS = '''
repository(owner: "ClickHouse" name: "ClickHouse") {{
pullRequest(number: {number}) {{
labels(first: {max_page_size} {next}) {{
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
name
color
}}
def get_pull_request(self, number):
_QUERY = '''
repository(owner: "{owner}" name: "{name}") {{
pullRequest(number: {number}) {{
{pull_request_data}
}}
}}
}}
'''
def get_labels(self, pull_request):
'''Fetchs all labels for given pull-request
Args:
pull_request: JSON object returned by `get_pull_requests()`
Returns:
labels: a list of JSON nodes with the name and color fields
'''
labels = [label for label in pull_request['labels']['nodes']]
not_end = pull_request['labels']['pageInfo']['hasNextPage']
query = Query._LABELS.format(number = pull_request['number'],
max_page_size = self._max_page_size,
next=f'after: "{pull_request["labels"]["pageInfo"]["endCursor"]}"')
while not_end:
result = self._run(query)['repository']['pullRequest']['labels']
not_end = result['pageInfo']['hasNextPage']
query = Query._LABELS.format(number=pull_request['number'],
max_page_size=self._max_page_size,
next=f'after: "{result["pageInfo"]["endCursor"]}"')
query = _QUERY.format(owner=self._owner, name=self._name, number=number,
pull_request_data = self._PULL_REQUEST, min_page_size=self._min_page_size)
return self._run(query)['repository']['pullRequest']
labels += [label for label in result['nodes']]
return labels
_TIMELINE = '''
repository(owner: "ClickHouse" name: "ClickHouse") {{
pullRequest(number: {number}) {{
timeline(first: {max_page_size} {next}) {{
pageInfo {{
hasNextPage
endCursor
}}
def find_pull_request(self, base, head):
_QUERY = '''
repository(owner: "{owner}" name: "{name}") {{
pullRequests(first: {min_page_size} baseRefName: "{base}" headRefName: "{head}") {{
nodes {{
... on CrossReferencedEvent {{
isCrossRepository
source {{
... on PullRequest {{
number
baseRefName
merged
labels(first: {max_page_size}) {{
pageInfo {{
hasNextPage
endCursor
}}
{pull_request_data}
}}
totalCount
}}
}}
'''
query = _QUERY.format(owner=self._owner, name=self._name, base=base, head=head,
pull_request_data = self._PULL_REQUEST, min_page_size=self._min_page_size)
result = self._run(query)['repository']['pullRequests']
if result['totalCount'] > 0:
return result['nodes'][0]
else:
return {}
def get_pull_requests(self, before_commit):
'''
Get all merged pull-requests from the HEAD of default branch to the last commit (excluding)
'''
_QUERY = '''
repository(owner: "{owner}" name: "{name}") {{
defaultBranchRef {{
target {{
... on Commit {{
history(first: {max_page_size} {next}) {{
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
oid
associatedPullRequests(first: {min_page_size}) {{
totalCount
nodes {{
name
color
}}
}}
}}
}}
target {{
... on PullRequest {{
number
}}
}}
}}
}}
}}
}}
}}
'''
def get_timeline(self, pull_request):
'''Fetchs all cross-reference events from pull-request's timeline
... on PullRequest {{
{pull_request_data}
Args:
pull_request: JSON object returned by `get_pull_requests()`
Returns:
events: a list of JSON nodes for CrossReferenceEvent
'''
events = [event for event in pull_request['timeline']['nodes'] if event and event['source']]
not_end = pull_request['timeline']['pageInfo']['hasNextPage']
query = Query._TIMELINE.format(number = pull_request['number'],
max_page_size = self._max_page_size,
next=f'after: "{pull_request["timeline"]["pageInfo"]["endCursor"]}"')
while not_end:
result = self._run(query)['repository']['pullRequest']['timeline']
not_end = result['pageInfo']['hasNextPage']
query = Query._TIMELINE.format(number=pull_request['number'],
max_page_size=self._max_page_size,
next=f'after: "{result["pageInfo"]["endCursor"]}"')
events += [event for event in result['nodes'] if event and event['source']]
return events
_PULL_REQUESTS = '''
repository(owner: "ClickHouse" name: "ClickHouse") {{
defaultBranchRef {{
name
target {{
... on Commit {{
history(first: {max_page_size} {next}) {{
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
oid
associatedPullRequests(first: {min_page_size}) {{
totalCount
nodes {{
... on PullRequest {{
id
number
author {{
login
}}
bodyText
mergedBy {{
login
}}
url
baseRefName
baseRepository {{
nameWithOwner
}}
mergeCommit {{
oid
}}
labels(first: {min_page_size}) {{
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
name
color
}}
}}
timeline(first: {min_page_size}) {{
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
... on CrossReferencedEvent {{
isCrossRepository
source {{
... on PullRequest {{
number
baseRefName
merged
labels(first: 0) {{
nodes {{
name
}}
}}
}}
}}
target {{
... on PullRequest {{
number
}}
}}
labels(first: {min_page_size}) {{
totalCount
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
name
color
}}
}}
}}
@ -244,116 +202,270 @@ class Query:
}}
}}
}}
}}
'''
def get_pull_requests(self, before_commit, login):
'''Get all merged pull-requests from the HEAD of default branch to the last commit (excluding)
Args:
before_commit (string-convertable): commit sha of the last commit (excluding)
login (string): filter pull-requests by user login
Returns:
pull_requests: a list of JSON nodes with pull-requests' details
'''
pull_requests = []
not_end = True
query = Query._PULL_REQUESTS.format(max_page_size=self._max_page_size,
min_page_size=self._min_page_size,
next='')
query = _QUERY.format(owner=self._owner, name=self._name,
max_page_size=self._max_page_size,
min_page_size=self._min_page_size,
pull_request_data=self._PULL_REQUEST,
next='')
while not_end:
result = self._run(query)['repository']['defaultBranchRef']
default_branch_name = result['name']
result = result['target']['history']
result = self._run(query)['repository']['defaultBranchRef']['target']['history']
not_end = result['pageInfo']['hasNextPage']
query = Query._PULL_REQUESTS.format(max_page_size=self._max_page_size,
min_page_size=self._min_page_size,
next=f'after: "{result["pageInfo"]["endCursor"]}"')
query = _QUERY.format(owner=self._owner, name=self._name,
max_page_size=self._max_page_size,
min_page_size=self._min_page_size,
pull_request_data=self._PULL_REQUEST,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]))
for commit in result['nodes']:
# FIXME: maybe include `before_commit`?
if str(commit['oid']) == str(before_commit):
not_end = False
break
# TODO: fetch all pull-requests that were merged in a single commit.
assert commit['associatedPullRequests']['totalCount'] <= self._min_page_size, \
f'there are {commit["associatedPullRequests"]["totalCount"]} pull-requests merged in commit {commit["oid"]}'
assert commit['associatedPullRequests']['totalCount'] <= self._min_page_size
for pull_request in commit['associatedPullRequests']['nodes']:
if(pull_request['baseRepository']['nameWithOwner'] == 'ClickHouse/ClickHouse' and
pull_request['baseRefName'] == default_branch_name and
pull_request['mergeCommit']['oid'] == commit['oid'] and
(not login or pull_request['author']['login'] == login)):
if(pull_request['baseRepository']['nameWithOwner'] == '{}/{}'.format(self._owner, self._name) and
pull_request['baseRefName'] == self.default_branch and
pull_request['mergeCommit']['oid'] == commit['oid']):
pull_requests.append(pull_request)
return pull_requests
_DEFAULT = '''
repository(owner: "ClickHouse", name: "ClickHouse") {
defaultBranchRef {
name
}
}
'''
def get_default_branch(self):
'''Get short name of the default branch
Returns:
name (string): branch name
'''
return self._run(Query._DEFAULT)['repository']['defaultBranchRef']['name']
_GET_LABEL = '''
repository(owner: "ClickHouse" name: "ClickHouse") {{
labels(first: {max_page_size} {next} query: "{name}") {{
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
id
name
color
def create_pull_request(self, source, target, title, description="", draft=False, can_modify=True):
_QUERY = '''
createPullRequest(input: {{
baseRefName: "{target}",
headRefName: "{source}",
repositoryId: "{id}",
title: "{title}",
body: "{body}",
draft: {draft},
maintainerCanModify: {modify}
}}) {{
pullRequest {{
{pull_request_data}
}}
}}
}}
'''
_SET_LABEL = '''
addLabelsToLabelable(input: {{ labelableId: "{pr_id}", labelIds: "{label_id}" }}) {{
clientMutationId
}}
'''
'''
query = _QUERY.format(target=target, source=source, id=self._id, title=title, body=description,
draft="true" if draft else "false", modify="true" if can_modify else "false",
pull_request_data = self._PULL_REQUEST)
return self._run(query, is_mutation=True)['createPullRequest']['pullRequest']
def merge_pull_request(self, id):
_QUERY = '''
mergePullRequest(input: {{
pullRequestId: "{id}"
}}) {{
pullRequest {{
{pull_request_data}
}}
}}
'''
query = _QUERY.format(id=id, pull_request_data = self._PULL_REQUEST)
return self._run(query, is_mutation=True)['mergePullRequest']['pullRequest']
# FIXME: figure out how to add more assignees at once
def add_assignee(self, pr, assignee):
_QUERY = '''
addAssigneesToAssignable(input: {{
assignableId: "{id1}",
assigneeIds: "{id2}"
}}) {{
clientMutationId
}}
'''
query = _QUERY.format(id1=pr['id'], id2=assignee['id'])
self._run(query, is_mutation=True)
def set_label(self, pull_request, label_name):
'''Set label by name to the pull request
'''
Set label by name to the pull request
Args:
pull_request: JSON object returned by `get_pull_requests()`
label_name (string): label name
'''
_GET_LABEL = '''
repository(owner: "{owner}" name: "{name}") {{
labels(first: {max_page_size} {next} query: "{label_name}") {{
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
id
name
color
}}
}}
}}
'''
_SET_LABEL = '''
addLabelsToLabelable(input: {{
labelableId: "{pr_id}",
labelIds: "{label_id}"
}}) {{
clientMutationId
}}
'''
labels = []
not_end = True
query = Query._GET_LABEL.format(name=label_name,
max_page_size=self._max_page_size,
next='')
query = _GET_LABEL.format(owner=self._owner, name=self._name, label_name=label_name,
max_page_size=self._max_page_size,
next='')
while not_end:
result = self._run(query)['repository']['labels']
not_end = result['pageInfo']['hasNextPage']
query = Query._GET_LABEL.format(name=label_name,
max_page_size=self._max_page_size,
next=f'after: "{result["pageInfo"]["endCursor"]}"')
query = _GET_LABEL.format(owner=self._owner, name=self._name, label_name=label_name,
max_page_size=self._max_page_size,
next='after: "{}"'.format(result["pageInfo"]["endCursor"]))
labels += [label for label in result['nodes']]
if not labels:
return
query = Query._SET_LABEL.format(pr_id = pull_request['id'], label_id = labels[0]['id'])
query = _SET_LABEL.format(pr_id = pull_request['id'], label_id = labels[0]['id'])
self._run(query, is_mutation=True)
pull_request['labels']['nodes'].append(labels[0])
### OLD METHODS
# _LABELS = '''
# repository(owner: "ClickHouse" name: "ClickHouse") {{
# pullRequest(number: {number}) {{
# labels(first: {max_page_size} {next}) {{
# pageInfo {{
# hasNextPage
# endCursor
# }}
# nodes {{
# name
# color
# }}
# }}
# }}
# }}
# '''
# def get_labels(self, pull_request):
# '''Fetchs all labels for given pull-request
# Args:
# pull_request: JSON object returned by `get_pull_requests()`
# Returns:
# labels: a list of JSON nodes with the name and color fields
# '''
# labels = [label for label in pull_request['labels']['nodes']]
# not_end = pull_request['labels']['pageInfo']['hasNextPage']
# query = Query._LABELS.format(number = pull_request['number'],
# max_page_size = self._max_page_size,
# next=f'after: "{pull_request["labels"]["pageInfo"]["endCursor"]}"')
# while not_end:
# result = self._run(query)['repository']['pullRequest']['labels']
# not_end = result['pageInfo']['hasNextPage']
# query = Query._LABELS.format(number=pull_request['number'],
# max_page_size=self._max_page_size,
# next=f'after: "{result["pageInfo"]["endCursor"]}"')
# labels += [label for label in result['nodes']]
# return labels
# _TIMELINE = '''
# repository(owner: "ClickHouse" name: "ClickHouse") {{
# pullRequest(number: {number}) {{
# timeline(first: {max_page_size} {next}) {{
# pageInfo {{
# hasNextPage
# endCursor
# }}
# nodes {{
# ... on CrossReferencedEvent {{
# isCrossRepository
# source {{
# ... on PullRequest {{
# number
# baseRefName
# merged
# labels(first: {max_page_size}) {{
# pageInfo {{
# hasNextPage
# endCursor
# }}
# nodes {{
# name
# color
# }}
# }}
# }}
# }}
# target {{
# ... on PullRequest {{
# number
# }}
# }}
# }}
# }}
# }}
# }}
# }}
# '''
# def get_timeline(self, pull_request):
# '''Fetchs all cross-reference events from pull-request's timeline
# Args:
# pull_request: JSON object returned by `get_pull_requests()`
# Returns:
# events: a list of JSON nodes for CrossReferenceEvent
# '''
# events = [event for event in pull_request['timeline']['nodes'] if event and event['source']]
# not_end = pull_request['timeline']['pageInfo']['hasNextPage']
# query = Query._TIMELINE.format(number = pull_request['number'],
# max_page_size = self._max_page_size,
# next=f'after: "{pull_request["timeline"]["pageInfo"]["endCursor"]}"')
# while not_end:
# result = self._run(query)['repository']['pullRequest']['timeline']
# not_end = result['pageInfo']['hasNextPage']
# query = Query._TIMELINE.format(number=pull_request['number'],
# max_page_size=self._max_page_size,
# next=f'after: "{result["pageInfo"]["endCursor"]}"')
# events += [event for event in result['nodes'] if event and event['source']]
# return events
# _DEFAULT = '''
# repository(owner: "ClickHouse", name: "ClickHouse") {
# defaultBranchRef {
# name
# }
# }
# '''
# def get_default_branch(self):
# '''Get short name of the default branch
# Returns:
# name (string): branch name
# '''
# return self._run(Query._DEFAULT)['repository']['defaultBranchRef']['name']
def _run(self, query, is_mutation=False):
from requests.adapters import HTTPAdapter
@ -378,15 +490,15 @@ class Query:
session.mount('https://', adapter)
return session
headers = {'Authorization': f'bearer {self._token}'}
headers = {'Authorization': 'bearer {}'.format(self._token)}
if is_mutation:
query = f'''
query = '''
mutation {{
{query}
}}
'''
'''.format(query=query)
else:
query = f'''
query = '''
query {{
{query}
rateLimit {{
@ -394,14 +506,14 @@ class Query:
remaining
}}
}}
'''
'''.format(query=query)
while True:
request = requests_retry_session().post('https://api.github.com/graphql', json={'query': query}, headers=headers)
if request.status_code == 200:
result = request.json()
if 'errors' in result:
raise Exception(f'Errors occured: {result["errors"]}')
raise Exception('Errors occured: {}\nOriginal query: {}'.format(result["errors"], query))
if not is_mutation:
import inspect
@ -413,10 +525,4 @@ class Query:
return result['data']
else:
import json
resp = request.json()
if resp and len(resp) > 0 and resp[0] and 'type' in resp[0] and resp[0]['type'] == 'RATE_LIMITED':
print("API rate limit exceeded. Waiting for 1 second.")
time.sleep(1)
continue
raise Exception(f'Query failed with code {request.status_code}:\n{json.dumps(resp, indent=4)}')
raise Exception('Query failed with code {code}:\n{json}'.format(code=request.status_code, json=json.dumps(request.json(), indent=4)))

11
utils/github/ya.make Normal file
View File

@ -0,0 +1,11 @@
PY_LIBRARY()
PY_SRCS(
__init__.py
backport.py
cherrypick.py
local.py
query.py
)
END()

View File

@ -1,22 +0,0 @@
#!/bin/sh
# sudo apt install python3-pip
# sudo pip3 install gitpython termcolor
# Go to GitHub.
# In top-right corner, click to your profile icon.
# Choose "Settings".
# Choose "Developer settings".
# Choose "Personal access tokens".
# Choose "Generate new token".
# Don't check any checkboxes.
# Run as:
# ./list_backports.sh --token your-token
set -e
SCRIPTPATH=$(readlink -f "$0")
SCRIPTDIR=$(dirname "$SCRIPTPATH")
PYTHONPATH="$SCRIPTDIR" python3 -m github "$@"