Merge pull request #4949 from abyss7/release-helpers

Add python util to help with releases
This commit is contained in:
alesapin 2019-04-11 19:28:58 +03:00 committed by GitHub
commit 3accb16cc6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 407 additions and 0 deletions

1
utils/github/__init__.py Normal file
View File

@ -0,0 +1 @@
# -*- coding: utf-8 -*-

137
utils/github/__main__.py Normal file
View File

@ -0,0 +1,137 @@
# -*- coding: utf-8 -*-
'''
Rules for commit messages, branch names and everything:
- All(!) commits to master branch must originate from pull-requests.
- All pull-requests must be squash-merged or explicitly merged without rebase.
- All pull-requests to master must have at least one label prefixed with `pr-`.
- Labels that require pull-request to be backported must be red colored (#ff0000).
- Stable branch name must be of form `YY.NUMBER`.
- All stable branches must be forked directly from the master branch and never be merged back,
or merged with any other branches based on the master branch (including master branch itself).
Output of this script:
- Commits without references from pull-requests.
- Pull-requests to master without proper labels.
- Pull-requests that need to be backported.
'''
from . import local, query
import argparse
import re
import sys
CHECK_MARK = '🗸'
CROSS_MARK = '🗙'
LABEL_MARK = '🏷'
parser = argparse.ArgumentParser(description='Helper for the ClickHouse Release machinery')
parser.add_argument('--repo', '-r', type=str, default='', metavar='PATH',
help='path to the root of the ClickHouse repository')
parser.add_argument('--remote', type=str, default='origin',
help='remote name of the "yandex/ClickHouse" upstream')
parser.add_argument('-n', type=int, default=3, dest='number',
help='number of last stable branches to consider')
parser.add_argument('--token', type=str, required=True,
help='token for Github access')
parser.add_argument('--login', type = str,
help='filter authorship by login')
args = parser.parse_args()
github = query.Query(args.token)
repo = local.Local(args.repo, args.remote, github.get_default_branch())
stables = repo.get_stables()[-args.number:] # [(branch, base)]
if not stables:
sys.exit('No stable branches found!')
else:
print('Found stable branches:')
for stable in stables:
print(f'{CHECK_MARK} {stable[0]} forked from {stable[1]}')
first_commit = stables[0][1]
pull_requests = github.get_pull_requests(first_commit, args.login)
good_commits = set(pull_request['mergeCommit']['oid'] for pull_request in pull_requests)
bad_commits = [] # collect and print them in the end
from_commit = repo.get_head_commit()
for i in reversed(range(len(stables))):
for commit in repo.iterate(from_commit, stables[i][1]):
if str(commit) not in good_commits and commit.author.name != 'robot-clickhouse':
bad_commits.append(commit)
from_commit = stables[i][1]
bad_pull_requests = [] # collect and print if not empty
need_backporting = []
for pull_request in pull_requests:
label_found = False
for label in github.get_labels(pull_request):
if label['name'].startswith('pr-'):
label_found = True
if label['color'] == 'ff0000':
need_backporting.append(pull_request)
break
if not label_found:
bad_pull_requests.append(pull_request)
if bad_pull_requests:
print('\nPull-requests without description label:')
for bad in reversed(sorted(bad_pull_requests, key = lambda x : x['number'])):
print(f'{CROSS_MARK} {bad["number"]}: {bad["url"]}')
# FIXME: compatibility logic, until the direct modification of master is not prohibited.
if bad_commits:
print('\nCommits not referenced by any pull-request:')
for bad in bad_commits:
print(f'{CROSS_MARK} {bad} {bad.author}')
# TODO: check backports.
if need_backporting:
re_vlabel = re.compile(r'^v\d+\.\d+$')
re_stable_num = re.compile(r'\d+\.\d+$')
print('\nPull-requests need to be backported:')
for pull_request in reversed(sorted(need_backporting, key=lambda x: x['number'])):
targets = [] # use common list for consistent order in output
good = set()
for stable in stables:
if repo.comparator(stable[1]) < repo.comparator(pull_request['mergeCommit']['oid']):
targets.append(stable)
# FIXME: compatibility logic - check for a manually set label, that indicates status 'backported'.
# FIXME: O(n²) - no need to iterate all labels for every `stable`
for label in github.get_labels(pull_request):
if re_vlabel.match(label['name']):
stable_num = re_stable_num.search(stable[0].name)
if f'v{stable_num[0]}' == label['name']:
good.add(stable)
# print pull-request's status
if len(good) == len(targets):
print(f'{CHECK_MARK}', end=' ')
else:
print(f'{CROSS_MARK}', end=' ')
print(f'{pull_request["number"]}', end=':')
for target in targets:
if target in good:
print(f'\t{LABEL_MARK} {target[0]}', end='')
else:
print(f'\t{CROSS_MARK} {target[0]}', end='')
print(f'\t({pull_request["mergeCommit"]["author"]["name"]}) {pull_request["url"]}')
# print legend
print('\nLegend:')
print(f'{CHECK_MARK} - good')
print(f'{CROSS_MARK} - bad')
print(f'{LABEL_MARK} - backport is detected via label')

57
utils/github/local.py Normal file
View File

@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-
# `pip install …`
import git # gitpython
import functools
import os
import re
class Local:
'''Implements some useful methods atop of the local repository
'''
RE_STABLE_REF = re.compile(r'^refs/remotes/.+/\d+\.\d+$')
def __init__(self, repo_path, remote_name, default_branch_name):
self._repo = git.Repo(repo_path, search_parent_directories=(not repo_path))
self._remote = self._repo.remotes[remote_name]
self._default = self._remote.refs[default_branch_name]
# public key comparator
def cmp(x, y):
if x == y:
return 0
if self._repo.is_ancestor(x, y):
return -1
else:
return 1
self.comparator = functools.cmp_to_key(cmp)
def get_head_commit(self):
return self._repo.commit(self._default)
def iterate(self, begin, end):
rev_range = '{}...{}'.format(begin, end)
for commit in self._repo.iter_commits(rev_range, first_parent=True):
yield commit
''' Returns sorted list of tuples:
* remote branch (git.refs.remote.RemoteReference),
* base commit (git.Commit),
* head (git.Commit)).
List is sorted by commits in ascending order.
'''
def get_stables(self):
stables = []
for stable in [r for r in self._remote.refs if Local.RE_STABLE_REF.match(r.path)]:
base = self._repo.merge_base(self._default, self._repo.commit(stable))
if not base:
print(f'Branch {stable.path} is not based on branch {self._default}. Ignoring.')
elif len(base) > 1:
print(f'Branch {stable.path} has more than one base commit. Ignoring.')
else:
stables.append((stable, base[0]))
return sorted(stables, key=lambda x : self.comparator(x[1]))

208
utils/github/query.py Normal file
View File

@ -0,0 +1,208 @@
# -*- coding: utf-8 -*-
import requests
class Query:
'''Implements queries to the Github API using GraphQL
'''
def __init__(self, token, max_page_size=100):
self._token = token
self._max_page_size = max_page_size
_LABELS = '''
{{
repository(owner: "yandex" name: "ClickHouse") {{
pullRequest(number: {number}) {{
labels(first: {max_page_size} {next}) {{
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
name
color
}}
}}
}}
}}
}}
'''
def get_labels(self, pull_request):
'''Fetchs all labels for given pull-request
Args:
pull_request: JSON object returned by `get_pull_requests()`
Returns:
labels: a list of JSON nodes with the name and color fields
'''
labels = [label for label in pull_request['labels']['nodes']]
not_end = bool(pull_request['labels']['pageInfo']['hasNextPage'])
query = Query._LABELS.format(number=pull_request['number'], max_page_size=self._max_page_size, next=f'after: "{pull_request["labels"]["pageInfo"]["endCursor"]}"')
while not_end:
result = self._run(query)['data']['repository']['pullRequest']['labels']
not_end = result['pageInfo']['hasNextPage']
labels += [label for label in result['nodes']]
query = Query._LABELS.format(number=pull_request['number'], max_page_size=self._max_page_size, next=f'after: "{result["pageInfo"]["endCursor"]}"')
return labels
_MAX_PULL_REQUESTS = 5
_PULL_REQUESTS = '''
{{
repository(owner: "yandex" name: "ClickHouse") {{
defaultBranchRef {{
name
target {{
... on Commit {{
history(first: {max_page_size} {next}) {{
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
oid
associatedPullRequests(first: {max_pull_requests}) {{
totalCount
nodes {{
... on PullRequest {{
number
url
baseRefName
baseRepository {{
nameWithOwner
}}
mergeCommit {{
oid
author {{
user {{
id
}}
name
}}
}}
labels(first: {max_page_size}) {{
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
name
color
}}
}}
}}
}}
}}
}}
}}
}}
}}
}}
}}
}}
'''
def get_pull_requests(self, before_commit, author):
'''Get all merged pull-requests from the HEAD of default branch to the last commit (excluding)
Args:
before_commit (string-convertable): commit sha of the last commit (excluding)
author (string): filter pull-requests by author name
Returns:
pull_requests: a list of JSON nodes with pull-requests' details
'''
pull_requests = []
query = Query._PULL_REQUESTS.format(max_page_size=self._max_page_size, max_pull_requests=Query._MAX_PULL_REQUESTS, next='')
not_end = True
user_id = self.get_user(author) if author else None
while not_end:
result = self._run(query)['data']['repository']['defaultBranchRef']
default_branch_name = result['name']
result = result['target']['history']
not_end = result['pageInfo']['hasNextPage']
for commit in result['nodes']:
if str(commit['oid']) == str(before_commit):
not_end = False
break
# TODO: fetch all pull-requests that were merged in a single commit.
assert commit['associatedPullRequests']['totalCount'] <= Query._MAX_PULL_REQUESTS, \
f'there are {commit["associatedPullRequests"]["totalCount"]} pull-requests merged in commit {commit["oid"]}'
for pull_request in commit['associatedPullRequests']['nodes']:
if(pull_request['baseRepository']['nameWithOwner'] == 'yandex/ClickHouse' and
pull_request['baseRefName'] == default_branch_name and
pull_request['mergeCommit']['oid'] == commit['oid'] and
(not user_id or pull_request['mergeCommit']['author']['user']['id'] == user_id)):
pull_requests.append(pull_request)
query = Query._PULL_REQUESTS.format(max_page_size=self._max_page_size, max_pull_requests=Query._MAX_PULL_REQUESTS, next=f'after: "{result["pageInfo"]["endCursor"]}"')
return pull_requests
_DEFAULT = '''
{
repository(owner: "yandex", name: "ClickHouse") {
defaultBranchRef {
name
}
}
}
'''
def get_default_branch(self):
'''Get short name of the default branch
Returns:
name (string): branch name
'''
return self._run(Query._DEFAULT)['data']['repository']['defaultBranchRef']['name']
_USER = '''
{{
user(login: "{login}") {{
id
}}
}}
'''
def get_user(self, login):
'''Returns id by user login
'''
return self._run(Query._USER.format(login=login))['data']['user']['id']
def _run(self, query):
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
def requests_retry_session(
retries=3,
backoff_factor=0.3,
status_forcelist=(500, 502, 504),
session=None,
):
session = session or requests.Session()
retry = Retry(
total=retries,
read=retries,
connect=retries,
backoff_factor=backoff_factor,
status_forcelist=status_forcelist,
)
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)
return session
headers = {'Authorization': f'bearer {self._token}'}
request = requests_retry_session().post('https://api.github.com/graphql', json={'query': query}, headers=headers)
if request.status_code == 200:
return request.json()
else:
raise Exception(f'Query failed with code {request.status_code}: {query}')

4
utils/list_backports.sh Executable file
View File

@ -0,0 +1,4 @@
#!/bin/sh
set -e
python3 -m github "$@"