2021-11-02 10:59:25 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
import json
|
|
|
|
import time
|
|
|
|
import fnmatch
|
|
|
|
from collections import namedtuple
|
2021-11-04 15:47:35 +00:00
|
|
|
import jwt
|
2021-11-02 10:59:25 +00:00
|
|
|
|
|
|
|
import requests
|
|
|
|
import boto3
|
|
|
|
|
2021-11-03 15:25:15 +00:00
|
|
|
API_URL = 'https://api.github.com/repos/ClickHouse/ClickHouse'
|
2021-11-02 10:59:25 +00:00
|
|
|
|
|
|
|
SUSPICIOUS_CHANGED_FILES_NUMBER = 200
|
|
|
|
|
|
|
|
SUSPICIOUS_PATTERNS = [
|
|
|
|
"tests/ci/*",
|
|
|
|
"docs/tools/*",
|
|
|
|
".github/*",
|
|
|
|
"utils/release/*",
|
|
|
|
"docker/*",
|
|
|
|
"release",
|
|
|
|
]
|
|
|
|
|
|
|
|
MAX_RETRY = 5
|
|
|
|
|
|
|
|
WorkflowDescription = namedtuple('WorkflowDescription',
|
2021-11-25 16:25:29 +00:00
|
|
|
['name', 'action', 'run_id', 'event', 'workflow_id',
|
|
|
|
'fork_owner_login', 'fork_branch'])
|
2021-11-08 10:05:59 +00:00
|
|
|
|
|
|
|
TRUSTED_WORKFLOW_IDS = {
|
|
|
|
14586616, # Cancel workflows, always trusted
|
|
|
|
}
|
2021-11-02 10:59:25 +00:00
|
|
|
|
|
|
|
TRUSTED_ORG_IDS = {
|
|
|
|
7409213, # yandex
|
|
|
|
28471076, # altinity
|
|
|
|
54801242, # clickhouse
|
|
|
|
}
|
|
|
|
|
|
|
|
# Individual trusted contirbutors who are not in any trusted organization.
|
|
|
|
# Can be changed in runtime: we will append users that we learned to be in
|
|
|
|
# a trusted org, to save GitHub API calls.
|
|
|
|
TRUSTED_CONTRIBUTORS = {
|
|
|
|
"achimbab",
|
|
|
|
"adevyatova ", # DOCSUP
|
|
|
|
"Algunenano", # Raúl Marín, Tinybird
|
|
|
|
"AnaUvarova", # DOCSUP
|
|
|
|
"anauvarova", # technical writer, Yandex
|
|
|
|
"annvsh", # technical writer, Yandex
|
|
|
|
"atereh", # DOCSUP
|
|
|
|
"azat",
|
|
|
|
"bharatnc", # Newbie, but already with many contributions.
|
|
|
|
"bobrik", # Seasoned contributor, CloundFlare
|
|
|
|
"BohuTANG",
|
|
|
|
"damozhaeva", # DOCSUP
|
|
|
|
"den-crane",
|
|
|
|
"gyuton", # DOCSUP
|
|
|
|
"hagen1778", # Roman Khavronenko, seasoned contributor
|
|
|
|
"hczhcz",
|
|
|
|
"hexiaoting", # Seasoned contributor
|
|
|
|
"ildus", # adjust, ex-pgpro
|
|
|
|
"javisantana", # a Spanish ClickHouse enthusiast, ex-Carto
|
|
|
|
"ka1bi4", # DOCSUP
|
|
|
|
"kirillikoff", # DOCSUP
|
|
|
|
"kreuzerkrieg",
|
|
|
|
"lehasm", # DOCSUP
|
|
|
|
"michon470", # DOCSUP
|
|
|
|
"MyroTk", # Tester in Altinity
|
|
|
|
"myrrc", # Michael Kot, Altinity
|
|
|
|
"nikvas0",
|
|
|
|
"nvartolomei",
|
|
|
|
"olgarev", # DOCSUP
|
|
|
|
"otrazhenia", # Yandex docs contractor
|
|
|
|
"pdv-ru", # DOCSUP
|
|
|
|
"podshumok", # cmake expert from QRator Labs
|
|
|
|
"s-mx", # Maxim Sabyanin, former employee, present contributor
|
|
|
|
"sevirov", # technical writer, Yandex
|
|
|
|
"spongedu", # Seasoned contributor
|
2021-11-08 15:50:33 +00:00
|
|
|
"ucasfl", # Amos Bird's friend
|
2021-11-02 10:59:25 +00:00
|
|
|
"vdimir", # Employee
|
|
|
|
"vzakaznikov",
|
|
|
|
"YiuRULE",
|
|
|
|
"zlobober" # Developer of YT
|
|
|
|
}
|
|
|
|
|
2021-11-04 15:47:35 +00:00
|
|
|
|
|
|
|
def get_installation_id(jwt_token):
|
|
|
|
headers = {
|
|
|
|
"Authorization": f"Bearer {jwt_token}",
|
|
|
|
"Accept": "application/vnd.github.v3+json",
|
|
|
|
}
|
|
|
|
response = requests.get("https://api.github.com/app/installations", headers=headers)
|
|
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
|
|
|
return data[0]['id']
|
|
|
|
|
|
|
|
def get_access_token(jwt_token, installation_id):
|
|
|
|
headers = {
|
|
|
|
"Authorization": f"Bearer {jwt_token}",
|
|
|
|
"Accept": "application/vnd.github.v3+json",
|
|
|
|
}
|
|
|
|
response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers)
|
|
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
|
|
|
return data['token']
|
|
|
|
|
|
|
|
def get_key_and_app_from_aws():
|
|
|
|
secret_name = "clickhouse_github_secret_key"
|
|
|
|
session = boto3.session.Session()
|
|
|
|
client = session.client(
|
|
|
|
service_name='secretsmanager',
|
|
|
|
)
|
|
|
|
get_secret_value_response = client.get_secret_value(
|
|
|
|
SecretId=secret_name
|
|
|
|
)
|
|
|
|
data = json.loads(get_secret_value_response['SecretString'])
|
|
|
|
return data['clickhouse-app-key'], int(data['clickhouse-app-id'])
|
|
|
|
|
|
|
|
|
2021-11-25 16:25:29 +00:00
|
|
|
def is_trusted_contributor(pr_user_login, pr_user_orgs):
|
2021-11-02 10:59:25 +00:00
|
|
|
if pr_user_login in TRUSTED_CONTRIBUTORS:
|
2021-11-08 10:05:59 +00:00
|
|
|
print(f"User '{pr_user_login}' is trusted")
|
2021-11-02 10:59:25 +00:00
|
|
|
return True
|
|
|
|
|
2021-11-08 10:05:59 +00:00
|
|
|
print(f"User '{pr_user_login}' is not trusted")
|
2021-11-02 10:59:25 +00:00
|
|
|
|
|
|
|
for org_id in pr_user_orgs:
|
|
|
|
if org_id in TRUSTED_ORG_IDS:
|
2021-11-08 10:05:59 +00:00
|
|
|
print(f"Org '{org_id}' is trusted; will mark user {pr_user_login} as trusted")
|
2021-11-02 10:59:25 +00:00
|
|
|
return True
|
2021-11-08 10:05:59 +00:00
|
|
|
print(f"Org '{org_id}' is not trusted")
|
2021-11-02 10:59:25 +00:00
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
def _exec_get_with_retry(url):
|
|
|
|
for i in range(MAX_RETRY):
|
|
|
|
try:
|
|
|
|
response = requests.get(url)
|
|
|
|
response.raise_for_status()
|
|
|
|
return response.json()
|
|
|
|
except Exception as ex:
|
2021-11-08 10:05:59 +00:00
|
|
|
print("Got exception executing request", ex)
|
2021-11-02 10:59:25 +00:00
|
|
|
time.sleep(i + 1)
|
|
|
|
|
|
|
|
raise Exception("Cannot execute GET request with retries")
|
|
|
|
|
|
|
|
def _exec_post_with_retry(url, token, data=None):
|
|
|
|
headers = {
|
|
|
|
"Authorization": f"token {token}"
|
|
|
|
}
|
|
|
|
for i in range(MAX_RETRY):
|
|
|
|
try:
|
|
|
|
if data:
|
|
|
|
response = requests.post(url, headers=headers, json=data)
|
|
|
|
else:
|
|
|
|
response = requests.post(url, headers=headers)
|
2021-11-04 15:47:35 +00:00
|
|
|
if response.status_code == 403:
|
|
|
|
data = response.json()
|
|
|
|
if 'message' in data and data['message'] == 'This workflow run is not waiting for approval':
|
2021-11-08 10:05:59 +00:00
|
|
|
print("Workflow doesn't need approval")
|
2021-11-04 15:47:35 +00:00
|
|
|
return data
|
2021-11-02 10:59:25 +00:00
|
|
|
response.raise_for_status()
|
|
|
|
return response.json()
|
|
|
|
except Exception as ex:
|
2021-11-08 10:05:59 +00:00
|
|
|
print("Got exception executing request", ex)
|
2021-11-02 10:59:25 +00:00
|
|
|
time.sleep(i + 1)
|
|
|
|
|
|
|
|
raise Exception("Cannot execute POST request with retry")
|
|
|
|
|
|
|
|
def _get_pull_requests_from(owner, branch):
|
|
|
|
url = f"{API_URL}/pulls?head={owner}:{branch}"
|
|
|
|
return _exec_get_with_retry(url)
|
|
|
|
|
|
|
|
def get_workflow_description_from_event(event):
|
|
|
|
action = event['action']
|
|
|
|
run_id = event['workflow_run']['id']
|
2021-11-03 15:25:15 +00:00
|
|
|
event_type = event['workflow_run']['event']
|
2021-11-02 10:59:25 +00:00
|
|
|
fork_owner = event['workflow_run']['head_repository']['owner']['login']
|
|
|
|
fork_branch = event['workflow_run']['head_branch']
|
2021-11-08 10:05:59 +00:00
|
|
|
name = event['workflow_run']['name']
|
|
|
|
workflow_id = event['workflow_run']['workflow_id']
|
2021-11-02 10:59:25 +00:00
|
|
|
return WorkflowDescription(
|
2021-11-08 10:05:59 +00:00
|
|
|
name=name,
|
2021-11-02 10:59:25 +00:00
|
|
|
action=action,
|
|
|
|
run_id=run_id,
|
2021-11-03 15:25:15 +00:00
|
|
|
event=event_type,
|
2021-11-02 10:59:25 +00:00
|
|
|
fork_owner_login=fork_owner,
|
|
|
|
fork_branch=fork_branch,
|
2021-11-08 10:05:59 +00:00
|
|
|
workflow_id=workflow_id,
|
2021-11-02 10:59:25 +00:00
|
|
|
)
|
|
|
|
|
2021-11-25 16:25:29 +00:00
|
|
|
def get_pr_author_and_orgs(pull_request):
|
|
|
|
author = pull_request['user']['login']
|
|
|
|
orgs = _exec_get_with_retry(pull_request['user']['organizations_url'])
|
|
|
|
return author, [org['id'] for org in orgs]
|
2021-11-02 10:59:25 +00:00
|
|
|
|
|
|
|
def get_changed_files_for_pull_request(pull_request):
|
|
|
|
number = pull_request['number']
|
|
|
|
|
|
|
|
changed_files = set([])
|
|
|
|
for i in range(1, 31):
|
2021-11-08 10:05:59 +00:00
|
|
|
print("Requesting changed files page", i)
|
2021-11-02 10:59:25 +00:00
|
|
|
url = f"{API_URL}/pulls/{number}/files?page={i}&per_page=100"
|
|
|
|
data = _exec_get_with_retry(url)
|
2021-11-08 10:05:59 +00:00
|
|
|
print(f"Got {len(data)} changed files")
|
|
|
|
if len(data) == 0:
|
|
|
|
print("No more changed files")
|
|
|
|
break
|
|
|
|
|
2021-11-02 10:59:25 +00:00
|
|
|
for change in data:
|
2021-11-08 10:05:59 +00:00
|
|
|
#print("Adding changed file", change['filename'])
|
2021-11-02 10:59:25 +00:00
|
|
|
changed_files.add(change['filename'])
|
|
|
|
|
|
|
|
if len(changed_files) >= SUSPICIOUS_CHANGED_FILES_NUMBER:
|
2021-11-08 10:05:59 +00:00
|
|
|
print(f"More than {len(changed_files)} changed files. Will stop fetching new files.")
|
2021-11-02 10:59:25 +00:00
|
|
|
break
|
|
|
|
|
|
|
|
return changed_files
|
|
|
|
|
|
|
|
def check_suspicious_changed_files(changed_files):
|
|
|
|
if len(changed_files) >= SUSPICIOUS_CHANGED_FILES_NUMBER:
|
2021-11-08 10:05:59 +00:00
|
|
|
print(f"Too many files changed {len(changed_files)}, need manual approve")
|
2021-11-02 10:59:25 +00:00
|
|
|
return True
|
|
|
|
|
|
|
|
for path in changed_files:
|
|
|
|
for pattern in SUSPICIOUS_PATTERNS:
|
|
|
|
if fnmatch.fnmatch(path, pattern):
|
2021-11-08 10:05:59 +00:00
|
|
|
print(f"File {path} match suspicious pattern {pattern}, will not approve automatically")
|
2021-11-02 10:59:25 +00:00
|
|
|
return True
|
|
|
|
|
2021-11-08 10:05:59 +00:00
|
|
|
print("No changed files match suspicious patterns, run will be approved")
|
2021-11-02 10:59:25 +00:00
|
|
|
return False
|
|
|
|
|
|
|
|
def approve_run(run_id, token):
|
|
|
|
url = f"{API_URL}/actions/runs/{run_id}/approve"
|
|
|
|
_exec_post_with_retry(url, token)
|
|
|
|
|
|
|
|
def label_manual_approve(pull_request, token):
|
|
|
|
number = pull_request['number']
|
|
|
|
url = f"{API_URL}/issues/{number}/labels"
|
|
|
|
data = {"labels" : "manual approve"}
|
|
|
|
|
|
|
|
_exec_post_with_retry(url, token, data)
|
|
|
|
|
|
|
|
def get_token_from_aws():
|
2021-11-04 15:47:35 +00:00
|
|
|
private_key, app_id = get_key_and_app_from_aws()
|
|
|
|
payload = {
|
|
|
|
"iat": int(time.time()) - 60,
|
|
|
|
"exp": int(time.time()) + (10 * 60),
|
|
|
|
"iss": app_id,
|
|
|
|
}
|
|
|
|
|
|
|
|
encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256")
|
|
|
|
installation_id = get_installation_id(encoded_jwt)
|
|
|
|
return get_access_token(encoded_jwt, installation_id)
|
2021-11-02 10:59:25 +00:00
|
|
|
|
|
|
|
def main(event):
|
|
|
|
token = get_token_from_aws()
|
2021-11-03 15:25:15 +00:00
|
|
|
event_data = json.loads(event['body'])
|
|
|
|
workflow_description = get_workflow_description_from_event(event_data)
|
2021-11-08 10:05:59 +00:00
|
|
|
|
|
|
|
print("Got workflow description", workflow_description)
|
|
|
|
if workflow_description.action != "requested":
|
|
|
|
print("Exiting, event action is", workflow_description.action)
|
|
|
|
return
|
|
|
|
|
|
|
|
if workflow_description.workflow_id in TRUSTED_WORKFLOW_IDS:
|
|
|
|
print("Workflow in trusted list, approving run")
|
|
|
|
approve_run(workflow_description.run_id, token)
|
|
|
|
return
|
2021-11-02 10:59:25 +00:00
|
|
|
|
|
|
|
pull_requests = _get_pull_requests_from(workflow_description.fork_owner_login, workflow_description.fork_branch)
|
2021-11-25 16:25:29 +00:00
|
|
|
|
2021-11-08 10:05:59 +00:00
|
|
|
print("Got pull requests for workflow", len(pull_requests))
|
2021-11-02 10:59:25 +00:00
|
|
|
if len(pull_requests) > 1:
|
|
|
|
raise Exception("Received more than one PR for workflow run")
|
|
|
|
|
|
|
|
if len(pull_requests) < 1:
|
|
|
|
raise Exception("Cannot find any pull requests for workflow run")
|
|
|
|
|
|
|
|
pull_request = pull_requests[0]
|
2021-11-08 10:05:59 +00:00
|
|
|
print("Pull request for workflow number", pull_request['number'])
|
2021-11-02 10:59:25 +00:00
|
|
|
|
2021-11-25 16:25:29 +00:00
|
|
|
author, author_orgs = get_pr_author_and_orgs(pull_request)
|
|
|
|
if is_trusted_contributor(author, author_orgs):
|
|
|
|
print("Contributor is trusted, approving run")
|
|
|
|
approve_run(workflow_description.run_id, token)
|
|
|
|
return
|
|
|
|
|
2021-11-02 10:59:25 +00:00
|
|
|
changed_files = get_changed_files_for_pull_request(pull_request)
|
2021-11-08 10:05:59 +00:00
|
|
|
print(f"Totally have {len(changed_files)} changed files in PR:", changed_files)
|
2021-11-02 10:59:25 +00:00
|
|
|
if check_suspicious_changed_files(changed_files):
|
2021-11-08 10:05:59 +00:00
|
|
|
print(f"Pull Request {pull_request['number']} has suspicious changes, label it for manuall approve")
|
2021-11-02 10:59:25 +00:00
|
|
|
label_manual_approve(pull_request, token)
|
|
|
|
else:
|
2021-11-08 10:05:59 +00:00
|
|
|
print(f"Pull Request {pull_request['number']} has no suspicious changes")
|
2021-11-02 10:59:25 +00:00
|
|
|
approve_run(workflow_description.run_id, token)
|
|
|
|
|
|
|
|
def handler(event, _):
|
|
|
|
main(event)
|