ClickHouse/tests/ci/approve_lambda/app.py
2021-11-02 13:59:25 +03:00

244 lines
8.2 KiB
Python

#!/usr/bin/env python3
import logging
import json
import time
import fnmatch
from collections import namedtuple
import requests
import boto3
API_URL = 'https://api.github.com/repos/ClickHouse/ClickHouse/'
SUSPICIOUS_CHANGED_FILES_NUMBER = 200
SUSPICIOUS_PATTERNS = [
"tests/ci/*",
"docs/tools/*",
".github/*",
"utils/release/*",
"docker/*",
"release",
]
MAX_RETRY = 5
WorkflowDescription = namedtuple('WorkflowDescription',
['action', 'run_id', 'event', 'sender_login', 'fork_owner_login', 'fork_branch', 'sender_orgs'])
TRUSTED_ORG_IDS = {
7409213, # yandex
28471076, # altinity
54801242, # clickhouse
}
# Individual trusted contirbutors who are not in any trusted organization.
# Can be changed in runtime: we will append users that we learned to be in
# a trusted org, to save GitHub API calls.
TRUSTED_CONTRIBUTORS = {
"achimbab",
"adevyatova ", # DOCSUP
"Algunenano", # Raúl Marín, Tinybird
"AnaUvarova", # DOCSUP
"anauvarova", # technical writer, Yandex
"annvsh", # technical writer, Yandex
"atereh", # DOCSUP
"azat",
"bharatnc", # Newbie, but already with many contributions.
"bobrik", # Seasoned contributor, CloundFlare
"BohuTANG",
"damozhaeva", # DOCSUP
"den-crane",
"gyuton", # DOCSUP
"gyuton", # technical writer, Yandex
"hagen1778", # Roman Khavronenko, seasoned contributor
"hczhcz",
"hexiaoting", # Seasoned contributor
"ildus", # adjust, ex-pgpro
"javisantana", # a Spanish ClickHouse enthusiast, ex-Carto
"ka1bi4", # DOCSUP
"kirillikoff", # DOCSUP
"kitaisreal", # Seasoned contributor
"kreuzerkrieg",
"lehasm", # DOCSUP
"michon470", # DOCSUP
"MyroTk", # Tester in Altinity
"myrrc", # Michael Kot, Altinity
"nikvas0",
"nvartolomei",
"olgarev", # DOCSUP
"otrazhenia", # Yandex docs contractor
"pdv-ru", # DOCSUP
"podshumok", # cmake expert from QRator Labs
"s-mx", # Maxim Sabyanin, former employee, present contributor
"sevirov", # technical writer, Yandex
"spongedu", # Seasoned contributor
"ucasFL", # Amos Bird's friend
"vdimir", # Employee
"vzakaznikov",
"YiuRULE",
"zlobober" # Developer of YT
}
def is_trusted_sender(pr_user_login, pr_user_orgs):
if pr_user_login in TRUSTED_CONTRIBUTORS:
logging.info("User '%s' is trusted", pr_user_login)
return True
logging.info("User '%s' is not trusted", pr_user_login)
for org_id in pr_user_orgs:
if org_id in TRUSTED_ORG_IDS:
logging.info("Org '%s' is trusted; will mark user %s as trusted", org_id, pr_user_login)
return True
logging.info("Org '%s' is not trusted", org_id)
return False
def _exec_get_with_retry(url):
for i in range(MAX_RETRY):
try:
response = requests.get(url)
response.raise_for_status()
return response.json()
except Exception as ex:
logging.info("Got exception executing request %s", ex)
time.sleep(i + 1)
raise Exception("Cannot execute GET request with retries")
def _exec_post_with_retry(url, token, data=None):
headers = {
"Authorization": f"token {token}"
}
for i in range(MAX_RETRY):
try:
if data:
response = requests.post(url, headers=headers, json=data)
else:
response = requests.post(url, headers=headers)
response.raise_for_status()
return response.json()
except Exception as ex:
logging.info("Got exception executing request %s", ex)
time.sleep(i + 1)
raise Exception("Cannot execute POST request with retry")
def _get_pull_requests_from(owner, branch):
url = f"{API_URL}/pulls?head={owner}:{branch}"
return _exec_get_with_retry(url)
def get_workflow_description_from_event(event):
action = event['action']
sender_login = event['sender']['login']
run_id = event['workflow_run']['id']
event = event['workflow_run']['event']
fork_owner = event['workflow_run']['head_repository']['owner']['login']
fork_branch = event['workflow_run']['head_branch']
orgs_data = _exec_get_with_retry(event['sender']['organizations_url'])
sender_orgs = [org['id'] for org in orgs_data]
return WorkflowDescription(
action=action,
sender_login=sender_login,
run_id=run_id,
event=event,
fork_owner_login=fork_owner,
fork_branch=fork_branch,
sender_orgs=sender_orgs,
)
def get_changed_files_for_pull_request(pull_request):
number = pull_request['number']
changed_files = set([])
for i in range(1, 31):
logging.info("Requesting changed files page %s", i)
url = f"{API_URL}/pulls/{number}/files?page={i}&per_page=100"
data = _exec_get_with_retry(url)
logging.info("Got %s changed files", len(data))
for change in data:
logging.info("Adding changed file %s", change['filename'])
changed_files.add(change['filename'])
if len(changed_files) >= SUSPICIOUS_CHANGED_FILES_NUMBER:
logging.info("More than %s changed files. Will stop fetching new files.", len(changed_files))
break
return changed_files
def check_suspicious_changed_files(changed_files):
if len(changed_files) >= SUSPICIOUS_CHANGED_FILES_NUMBER:
logging.info("Too many files changed %s, need manual approve", len(changed_files))
return True
for path in changed_files:
for pattern in SUSPICIOUS_PATTERNS:
if fnmatch.fnmatch(path, pattern):
logging.info("File %s match suspicious pattern %s, will not approve automatically", path, pattern)
return True
logging.info("File %s doesn't match suspicious pattern %s", path, pattern)
logging.info("No changed files match suspicious patterns, run will be approved")
return False
def approve_run(run_id, token):
url = f"{API_URL}/actions/runs/{run_id}/approve"
_exec_post_with_retry(url, token)
def label_manual_approve(pull_request, token):
number = pull_request['number']
url = f"{API_URL}/issues/{number}/labels"
data = {"labels" : "manual approve"}
_exec_post_with_retry(url, token, data)
def get_token_from_aws():
secret_name = "clickhouse_robot_token"
session = boto3.session.Session()
client = session.client(
service_name='secretsmanager',
)
get_secret_value_response = client.get_secret_value(
SecretId=secret_name
)
data = json.loads(get_secret_value_response['SecretString'])
return data['clickhouse_robot_token']
def main(event):
token = get_token_from_aws()
workflow_description = get_workflow_description_from_event(event)
logging.info("Got workflow description %s", workflow_description)
if is_trusted_sender(workflow_description.sender_login, workflow_description.sender_orgs):
approve_run(workflow_description.run_id, token)
return
pull_requests = _get_pull_requests_from(workflow_description.fork_owner_login, workflow_description.fork_branch)
logging.info("Got pull requests for workflow %s", len(pull_requests))
if len(pull_requests) > 1:
raise Exception("Received more than one PR for workflow run")
if len(pull_requests) < 1:
raise Exception("Cannot find any pull requests for workflow run")
pull_request = pull_requests[0]
logging.info("Pull request for workflow number %s", pull_request['number'])
changed_files = get_changed_files_for_pull_request(pull_request)
logging.info("Totally have %s changed files in PR", len(changed_files))
if check_suspicious_changed_files(changed_files):
logging.info("Pull Request %s has suspicious changes, label it for manuall approve", pull_request['number'])
label_manual_approve(pull_request, token)
else:
logging.info("Pull Request %s has no suspicious changes", pull_request['number'])
approve_run(workflow_description.run_id, token)
def handler(event, _):
logging.basicConfig(level=logging.INFO)
main(event)