ClickHouse/utils/github-hook/hook.py

321 lines
11 KiB
Python
Raw Normal View History

2020-06-11 15:55:44 +00:00
# -*- coding: utf-8 -*-
import json
import requests
import time
import os
DB = 'gh-data'
RETRIES = 5
2020-07-31 13:41:56 +00:00
API_URL = 'https://api.github.com/repos/ClickHouse/ClickHouse/'
def _reverse_dict_with_list(source):
result = {}
2020-10-02 16:54:07 +00:00
for key, value in list(source.items()):
2020-07-31 13:41:56 +00:00
for elem in value:
result[elem] = key
return result
MARKER_TO_LABEL = {
'- New Feature': ['pr-feature'],
'- Bug Fix': ['pr-bugfix'],
'- Improvement': ['pr-improvement'],
'- Performance Improvement': ['pr-performance'],
'- Backward Incompatible Change': ['pr-backward-incompatible'],
'- Build/Testing/Packaging Improvement': ['pr-build'],
'- Documentation': ['pr-documentation', 'pr-doc-fix'],
'- Other': ['pr-other'],
'- Not for changelog': ['pr-not-for-changelog']
}
LABEL_TO_MARKER = _reverse_dict_with_list(MARKER_TO_LABEL)
DOC_ALERT_LABELS = {
'pr-feature'
}
def set_labels_for_pr(pull_request_number, labels, headers):
data = {
"labels": list(labels)
}
for i in range(RETRIES):
try:
response = requests.put(API_URL + 'issues/' + str(pull_request_number) + '/labels', json=data, headers=headers)
response.raise_for_status()
break
except Exception as ex:
2020-10-02 16:54:07 +00:00
print(("Exception", ex))
2020-07-31 13:41:56 +00:00
time.sleep(0.2)
def get_required_labels_from_desc(description, current_labels):
result = set([])
# find first matching category
2020-10-02 16:54:07 +00:00
for marker, labels in list(MARKER_TO_LABEL.items()):
2020-07-31 13:41:56 +00:00
if marker in description:
if not any(label in current_labels for label in labels):
result.add(labels[0])
break
# if no category than leave as is
if not result:
return current_labels
# save all old labels except category label
for label in current_labels:
if label not in result and label not in LABEL_TO_MARKER:
result.add(label)
# if some of labels require doc alert
if any(label in result for label in DOC_ALERT_LABELS):
result.add('doc-alert')
return result
def label_pull_request_event(response):
pull_request = response['pull_request']
current_labels = set([label['name'] for label in pull_request['labels']])
pr_description = pull_request['body'] if pull_request['body'] else ''
required_labels = get_required_labels_from_desc(pr_description, current_labels)
if not required_labels.issubset(current_labels):
token = os.getenv('GITHUB_TOKEN')
auth = {'Authorization': 'token ' + token}
set_labels_for_pr(pull_request['number'], required_labels, auth)
2020-06-11 15:55:44 +00:00
def process_issue_event(response):
issue = response['issue']
return dict(
action=response['action'],
sender=response['sender']['login'],
updated_at=issue['updated_at'],
url=issue['url'],
number=issue['number'],
author=issue['user']['login'],
labels=[label['name'] for label in issue['labels']],
state=issue['state'],
assignees=[assignee['login'] for assignee in issue['assignees']],
created_at=issue['created_at'],
2020-06-24 14:48:43 +00:00
body=issue['body'] if issue['body'] else '',
2020-06-11 15:55:44 +00:00
title=issue['title'],
comments=issue['comments'],
raw_json=json.dumps(response),)
def process_issue_comment_event(response):
issue = response['issue']
comment = response['comment']
return dict(
action='comment_' + response['action'],
sender=response['sender']['login'],
updated_at=issue['updated_at'],
url=issue['url'],
number=issue['number'],
author=issue['user']['login'],
labels=[label['name'] for label in issue['labels']],
state=issue['state'],
assignees=[assignee['login'] for assignee in issue['assignees']],
created_at=issue['created_at'],
2020-06-24 14:48:43 +00:00
body=issue['body'] if issue['body'] else '',
2020-06-11 15:55:44 +00:00
title=issue['title'],
comments=issue['comments'],
comment_body=comment['body'],
comment_author=comment['user']['login'],
comment_url=comment['url'],
comment_created_at=comment['created_at'],
comment_updated_at=comment['updated_at'],
raw_json=json.dumps(response),)
def process_pull_request_event(response):
pull_request = response['pull_request']
result = dict(
updated_at=pull_request['updated_at'],
number=pull_request['number'],
action=response['action'],
sender=response['sender']['login'],
url=pull_request['url'],
author=pull_request['user']['login'],
labels=[label['name'] for label in pull_request['labels']],
state=pull_request['state'],
2020-06-24 14:48:43 +00:00
body=pull_request['body'] if pull_request['body'] else '',
2020-06-11 15:55:44 +00:00
title=pull_request['title'],
created_at=pull_request['created_at'],
assignees=[assignee['login'] for assignee in pull_request['assignees']],
requested_reviewers=[reviewer['login'] for reviewer in pull_request['requested_reviewers']],
head_repo=pull_request['head']['repo']['full_name'],
head_ref=pull_request['head']['ref'],
head_clone_url=pull_request['head']['repo']['clone_url'],
head_ssh_url=pull_request['head']['repo']['ssh_url'],
base_repo=pull_request['base']['repo']['full_name'],
base_ref=pull_request['base']['ref'],
base_clone_url=pull_request['base']['repo']['clone_url'],
base_ssh_url=pull_request['base']['repo']['ssh_url'],
raw_json=json.dumps(response),
)
if 'mergeable' in pull_request and pull_request['mergeable'] is not None:
result['mergeable'] = 1 if pull_request['mergeable'] else 0
if 'merged_by' in pull_request and pull_request['merged_by'] is not None:
result['merged_by'] = pull_request['merged_by']['login']
if 'merged_at' in pull_request and pull_request['merged_at'] is not None:
result['merged_at'] = pull_request['merged_at']
if 'closed_at' in pull_request and pull_request['closed_at'] is not None:
result['closed_at'] = pull_request['closed_at']
if 'merge_commit_sha' in pull_request and pull_request['merge_commit_sha'] is not None:
result['merge_commit_sha'] = pull_request['merge_commit_sha']
if 'draft' in pull_request:
result['draft'] = 1 if pull_request['draft'] else 0
for field in ['comments', 'review_comments', 'commits', 'additions', 'deletions', 'changed_files']:
if field in pull_request:
result[field] = pull_request[field]
return result
def process_pull_request_review(response):
result = process_pull_request_event(response)
review = response['review']
result['action'] = 'review_' + result['action']
result['review_body'] = review['body'] if review['body'] is not None else ''
result['review_id'] = review['id']
result['review_author'] = review['user']['login']
result['review_commit_sha'] = review['commit_id']
result['review_submitted_at'] = review['submitted_at']
result['review_state'] = review['state']
return result
def process_pull_request_review_comment(response):
result = process_pull_request_event(response)
comment = response['comment']
result['action'] = 'review_comment_' + result['action']
result['review_id'] = comment['pull_request_review_id']
result['review_comment_path'] = comment['path']
result['review_commit_sha'] = comment['commit_id']
result['review_comment_body'] = comment['body']
result['review_comment_author'] = comment['user']['login']
result['review_comment_created_at'] = comment['created_at']
result['review_comment_updated_at'] = comment['updated_at']
return result
2020-06-23 11:40:09 +00:00
def process_push(response):
common_part = dict(
before_sha=response['before'],
after_sha=response['after'],
full_ref=response['ref'],
ref=response['ref'].split('/')[-1],
repo=response['repository']['full_name'],
pusher=response['pusher']['name'],
sender=response['sender']['login'],
pushed_at=response['repository']['pushed_at'],
raw_json=json.dumps(response),
)
commits = response['commits']
result = []
for commit in commits:
commit_dict = common_part.copy()
commit_dict['sha'] = commit['id']
commit_dict['tree_sha'] = commit['tree_id']
commit_dict['author'] = commit['author']['name']
commit_dict['committer'] = commit['committer']['name']
commit_dict['message'] = commit['message']
commit_dict['commited_at'] = commit['timestamp']
result.append(commit_dict)
return result
2020-06-11 15:55:44 +00:00
def event_processor_dispatcher(headers, body, inserter):
if 'X-Github-Event' in headers:
if headers['X-Github-Event'] == 'issues':
result = process_issue_event(body)
inserter.insert_event_into(DB, 'issues', result)
elif headers['X-Github-Event'] == 'issue_comment':
result = process_issue_comment_event(body)
inserter.insert_event_into(DB, 'issues', result)
elif headers['X-Github-Event'] == 'pull_request':
result = process_pull_request_event(body)
inserter.insert_event_into(DB, 'pull_requests', result)
2020-07-31 14:07:54 +00:00
label_pull_request_event(body)
2020-06-11 15:55:44 +00:00
elif headers['X-Github-Event'] == 'pull_request_review':
result = process_pull_request_review(body)
inserter.insert_event_into(DB, 'pull_requests', result)
elif headers['X-Github-Event'] == 'pull_request_review_comment':
result = process_pull_request_review_comment(body)
inserter.insert_event_into(DB, 'pull_requests', result)
2020-06-23 11:40:09 +00:00
elif headers['X-Github-Event'] == 'push':
result = process_push(body)
inserter.insert_events_into(DB, 'commits', result)
2020-06-11 15:55:44 +00:00
class ClickHouseInserter(object):
def __init__(self, url, user, password):
self.url = url
self.auth = {
'X-ClickHouse-User': user,
'X-ClickHouse-Key': password
}
2020-06-23 11:40:09 +00:00
def _insert_json_str_info(self, db, table, json_str):
2020-06-11 15:55:44 +00:00
params = {
'database': db,
'query': 'INSERT INTO {table} FORMAT JSONEachRow'.format(table=table),
'date_time_input_format': 'best_effort'
}
for i in range(RETRIES):
2020-06-23 11:40:09 +00:00
response = None
2020-06-11 15:55:44 +00:00
try:
2020-06-23 11:40:09 +00:00
response = requests.post(self.url, params=params, data=json_str, headers=self.auth, verify=False)
2020-06-11 15:55:44 +00:00
response.raise_for_status()
break
except Exception as ex:
2020-10-02 16:54:07 +00:00
print(("Cannot insert with exception %s", str(ex)))
2020-06-23 11:40:09 +00:00
if response:
2020-10-02 16:54:07 +00:00
print(("Response text %s", response.text))
2020-06-11 15:55:44 +00:00
time.sleep(0.1)
2020-06-23 11:40:09 +00:00
else:
raise Exception("Cannot insert data into clickhouse")
def insert_event_into(self, db, table, event):
event_str = json.dumps(event)
self._insert_json_str_info(db, table, event_str)
def insert_events_into(self, db, table, events):
jsons = []
for event in events:
jsons.append(json.dumps(event))
self._insert_json_str_info(db, table, ','.join(jsons))
2020-06-11 15:55:44 +00:00
def test(event, context):
inserter = ClickHouseInserter(
os.getenv('CLICKHOUSE_URL'),
os.getenv('CLICKHOUSE_USER'),
os.getenv('CLICKHOUSE_PASSWORD'))
body = json.loads(event['body'], strict=False)
headers = event['headers']
event_processor_dispatcher(headers, body, inserter)
return {
'statusCode': 200,
'headers': {
'Content-Type': 'text/plain'
},
'isBase64Encoded': False,
}