mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-29 13:10:48 +00:00
244 lines
8.2 KiB
Python
244 lines
8.2 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import logging
|
|
import json
|
|
import time
|
|
import fnmatch
|
|
from collections import namedtuple
|
|
|
|
import requests
|
|
import boto3
|
|
|
|
API_URL = 'https://api.github.com/repos/ClickHouse/ClickHouse/'
|
|
|
|
SUSPICIOUS_CHANGED_FILES_NUMBER = 200
|
|
|
|
SUSPICIOUS_PATTERNS = [
|
|
"tests/ci/*",
|
|
"docs/tools/*",
|
|
".github/*",
|
|
"utils/release/*",
|
|
"docker/*",
|
|
"release",
|
|
]
|
|
|
|
MAX_RETRY = 5
|
|
|
|
WorkflowDescription = namedtuple('WorkflowDescription',
|
|
['action', 'run_id', 'event', 'sender_login', 'fork_owner_login', 'fork_branch', 'sender_orgs'])
|
|
|
|
TRUSTED_ORG_IDS = {
|
|
7409213, # yandex
|
|
28471076, # altinity
|
|
54801242, # clickhouse
|
|
}
|
|
|
|
# Individual trusted contirbutors who are not in any trusted organization.
|
|
# Can be changed in runtime: we will append users that we learned to be in
|
|
# a trusted org, to save GitHub API calls.
|
|
TRUSTED_CONTRIBUTORS = {
|
|
"achimbab",
|
|
"adevyatova ", # DOCSUP
|
|
"Algunenano", # Raúl Marín, Tinybird
|
|
"AnaUvarova", # DOCSUP
|
|
"anauvarova", # technical writer, Yandex
|
|
"annvsh", # technical writer, Yandex
|
|
"atereh", # DOCSUP
|
|
"azat",
|
|
"bharatnc", # Newbie, but already with many contributions.
|
|
"bobrik", # Seasoned contributor, CloundFlare
|
|
"BohuTANG",
|
|
"damozhaeva", # DOCSUP
|
|
"den-crane",
|
|
"gyuton", # DOCSUP
|
|
"gyuton", # technical writer, Yandex
|
|
"hagen1778", # Roman Khavronenko, seasoned contributor
|
|
"hczhcz",
|
|
"hexiaoting", # Seasoned contributor
|
|
"ildus", # adjust, ex-pgpro
|
|
"javisantana", # a Spanish ClickHouse enthusiast, ex-Carto
|
|
"ka1bi4", # DOCSUP
|
|
"kirillikoff", # DOCSUP
|
|
"kitaisreal", # Seasoned contributor
|
|
"kreuzerkrieg",
|
|
"lehasm", # DOCSUP
|
|
"michon470", # DOCSUP
|
|
"MyroTk", # Tester in Altinity
|
|
"myrrc", # Michael Kot, Altinity
|
|
"nikvas0",
|
|
"nvartolomei",
|
|
"olgarev", # DOCSUP
|
|
"otrazhenia", # Yandex docs contractor
|
|
"pdv-ru", # DOCSUP
|
|
"podshumok", # cmake expert from QRator Labs
|
|
"s-mx", # Maxim Sabyanin, former employee, present contributor
|
|
"sevirov", # technical writer, Yandex
|
|
"spongedu", # Seasoned contributor
|
|
"ucasFL", # Amos Bird's friend
|
|
"vdimir", # Employee
|
|
"vzakaznikov",
|
|
"YiuRULE",
|
|
"zlobober" # Developer of YT
|
|
}
|
|
|
|
def is_trusted_sender(pr_user_login, pr_user_orgs):
|
|
if pr_user_login in TRUSTED_CONTRIBUTORS:
|
|
logging.info("User '%s' is trusted", pr_user_login)
|
|
return True
|
|
|
|
logging.info("User '%s' is not trusted", pr_user_login)
|
|
|
|
for org_id in pr_user_orgs:
|
|
if org_id in TRUSTED_ORG_IDS:
|
|
logging.info("Org '%s' is trusted; will mark user %s as trusted", org_id, pr_user_login)
|
|
return True
|
|
logging.info("Org '%s' is not trusted", org_id)
|
|
|
|
return False
|
|
|
|
def _exec_get_with_retry(url):
|
|
for i in range(MAX_RETRY):
|
|
try:
|
|
response = requests.get(url)
|
|
response.raise_for_status()
|
|
return response.json()
|
|
except Exception as ex:
|
|
logging.info("Got exception executing request %s", ex)
|
|
time.sleep(i + 1)
|
|
|
|
raise Exception("Cannot execute GET request with retries")
|
|
|
|
def _exec_post_with_retry(url, token, data=None):
|
|
headers = {
|
|
"Authorization": f"token {token}"
|
|
}
|
|
for i in range(MAX_RETRY):
|
|
try:
|
|
if data:
|
|
response = requests.post(url, headers=headers, json=data)
|
|
else:
|
|
response = requests.post(url, headers=headers)
|
|
response.raise_for_status()
|
|
return response.json()
|
|
except Exception as ex:
|
|
logging.info("Got exception executing request %s", ex)
|
|
time.sleep(i + 1)
|
|
|
|
raise Exception("Cannot execute POST request with retry")
|
|
|
|
|
|
def _get_pull_requests_from(owner, branch):
|
|
url = f"{API_URL}/pulls?head={owner}:{branch}"
|
|
return _exec_get_with_retry(url)
|
|
|
|
|
|
def get_workflow_description_from_event(event):
|
|
action = event['action']
|
|
sender_login = event['sender']['login']
|
|
run_id = event['workflow_run']['id']
|
|
event = event['workflow_run']['event']
|
|
fork_owner = event['workflow_run']['head_repository']['owner']['login']
|
|
fork_branch = event['workflow_run']['head_branch']
|
|
orgs_data = _exec_get_with_retry(event['sender']['organizations_url'])
|
|
sender_orgs = [org['id'] for org in orgs_data]
|
|
return WorkflowDescription(
|
|
action=action,
|
|
sender_login=sender_login,
|
|
run_id=run_id,
|
|
event=event,
|
|
fork_owner_login=fork_owner,
|
|
fork_branch=fork_branch,
|
|
sender_orgs=sender_orgs,
|
|
)
|
|
|
|
|
|
def get_changed_files_for_pull_request(pull_request):
|
|
number = pull_request['number']
|
|
|
|
changed_files = set([])
|
|
for i in range(1, 31):
|
|
logging.info("Requesting changed files page %s", i)
|
|
url = f"{API_URL}/pulls/{number}/files?page={i}&per_page=100"
|
|
data = _exec_get_with_retry(url)
|
|
logging.info("Got %s changed files", len(data))
|
|
for change in data:
|
|
logging.info("Adding changed file %s", change['filename'])
|
|
changed_files.add(change['filename'])
|
|
|
|
if len(changed_files) >= SUSPICIOUS_CHANGED_FILES_NUMBER:
|
|
logging.info("More than %s changed files. Will stop fetching new files.", len(changed_files))
|
|
break
|
|
|
|
return changed_files
|
|
|
|
def check_suspicious_changed_files(changed_files):
|
|
if len(changed_files) >= SUSPICIOUS_CHANGED_FILES_NUMBER:
|
|
logging.info("Too many files changed %s, need manual approve", len(changed_files))
|
|
return True
|
|
|
|
for path in changed_files:
|
|
for pattern in SUSPICIOUS_PATTERNS:
|
|
if fnmatch.fnmatch(path, pattern):
|
|
logging.info("File %s match suspicious pattern %s, will not approve automatically", path, pattern)
|
|
return True
|
|
logging.info("File %s doesn't match suspicious pattern %s", path, pattern)
|
|
|
|
logging.info("No changed files match suspicious patterns, run will be approved")
|
|
return False
|
|
|
|
def approve_run(run_id, token):
|
|
url = f"{API_URL}/actions/runs/{run_id}/approve"
|
|
_exec_post_with_retry(url, token)
|
|
|
|
def label_manual_approve(pull_request, token):
|
|
number = pull_request['number']
|
|
url = f"{API_URL}/issues/{number}/labels"
|
|
data = {"labels" : "manual approve"}
|
|
|
|
_exec_post_with_retry(url, token, data)
|
|
|
|
def get_token_from_aws():
|
|
secret_name = "clickhouse_robot_token"
|
|
session = boto3.session.Session()
|
|
client = session.client(
|
|
service_name='secretsmanager',
|
|
)
|
|
get_secret_value_response = client.get_secret_value(
|
|
SecretId=secret_name
|
|
)
|
|
data = json.loads(get_secret_value_response['SecretString'])
|
|
return data['clickhouse_robot_token']
|
|
|
|
def main(event):
|
|
token = get_token_from_aws()
|
|
workflow_description = get_workflow_description_from_event(event)
|
|
logging.info("Got workflow description %s", workflow_description)
|
|
|
|
if is_trusted_sender(workflow_description.sender_login, workflow_description.sender_orgs):
|
|
approve_run(workflow_description.run_id, token)
|
|
return
|
|
|
|
pull_requests = _get_pull_requests_from(workflow_description.fork_owner_login, workflow_description.fork_branch)
|
|
logging.info("Got pull requests for workflow %s", len(pull_requests))
|
|
if len(pull_requests) > 1:
|
|
raise Exception("Received more than one PR for workflow run")
|
|
|
|
if len(pull_requests) < 1:
|
|
raise Exception("Cannot find any pull requests for workflow run")
|
|
|
|
pull_request = pull_requests[0]
|
|
logging.info("Pull request for workflow number %s", pull_request['number'])
|
|
|
|
changed_files = get_changed_files_for_pull_request(pull_request)
|
|
logging.info("Totally have %s changed files in PR", len(changed_files))
|
|
if check_suspicious_changed_files(changed_files):
|
|
logging.info("Pull Request %s has suspicious changes, label it for manuall approve", pull_request['number'])
|
|
label_manual_approve(pull_request, token)
|
|
else:
|
|
logging.info("Pull Request %s has no suspicious changes", pull_request['number'])
|
|
approve_run(workflow_description.run_id, token)
|
|
|
|
def handler(event, _):
|
|
logging.basicConfig(level=logging.INFO)
|
|
main(event)
|