2022-12-22 17:03:40 +00:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
"""
|
|
|
|
Lambda gets the workflow_job events, see
|
|
|
|
https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#workflow_job
|
|
|
|
|
|
|
|
Then it either posts it as is to the play.clickhouse.com, or anonymizes the sensitive
|
|
|
|
fields for private repositories
|
|
|
|
"""
|
|
|
|
|
2023-12-06 23:45:57 +00:00
|
|
|
import json
|
|
|
|
import logging
|
2022-12-22 17:03:40 +00:00
|
|
|
from base64 import b64decode
|
|
|
|
from dataclasses import dataclass
|
2023-05-26 15:17:49 +00:00
|
|
|
from typing import Any, List, Optional
|
2022-12-22 17:03:40 +00:00
|
|
|
|
2023-05-26 15:17:49 +00:00
|
|
|
from lambda_shared import ClickHouseHelper, InsertException, get_parameter_from_ssm
|
2022-12-22 17:03:40 +00:00
|
|
|
|
|
|
|
logging.getLogger().setLevel(logging.INFO)
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class WorkflowJob:
|
|
|
|
id: int
|
|
|
|
run_id: int
|
|
|
|
workflow_name: str
|
|
|
|
head_branch: str
|
|
|
|
run_url: str
|
|
|
|
run_attempt: int
|
|
|
|
node_id: str
|
|
|
|
head_sha: str
|
|
|
|
url: str
|
|
|
|
html_url: str
|
|
|
|
status: str
|
|
|
|
conclusion: str
|
|
|
|
started_at: str
|
|
|
|
completed_at: str
|
|
|
|
name: str
|
|
|
|
steps: int # just number of steps, we don't keep steps
|
|
|
|
check_run_url: str
|
|
|
|
labels: List[str]
|
|
|
|
runner_id: int
|
|
|
|
runner_name: str
|
|
|
|
runner_group_id: int
|
|
|
|
runner_group_name: str
|
|
|
|
repository: str
|
|
|
|
|
|
|
|
def anonimyze_url(self, url: str) -> str:
|
|
|
|
return url.replace(self.repository, "ANONYMIZED_REPO")
|
|
|
|
|
|
|
|
def anonimyze(self):
|
|
|
|
anm = "ANONYMIZED"
|
|
|
|
self.workflow_name = anm
|
|
|
|
self.head_branch = anm
|
|
|
|
self.run_url = self.anonimyze_url(self.run_url)
|
|
|
|
self.node_id = anm
|
|
|
|
self.url = self.anonimyze_url(self.url)
|
|
|
|
self.html_url = self.anonimyze_url(self.html_url)
|
|
|
|
self.name = anm
|
|
|
|
self.check_run_url = self.anonimyze_url(self.check_run_url)
|
|
|
|
self.repository = anm
|
|
|
|
|
|
|
|
def as_dict(self) -> dict:
|
|
|
|
return self.__dict__
|
|
|
|
|
|
|
|
|
2023-05-26 15:17:49 +00:00
|
|
|
CH_CLIENT = None # type: Optional[ClickHouseHelper]
|
2022-12-22 17:03:40 +00:00
|
|
|
|
|
|
|
|
|
|
|
def send_event_workflow_job(workflow_job: WorkflowJob) -> None:
|
2022-12-23 11:05:05 +00:00
|
|
|
# # SHOW CREATE TABLE default.workflow_jobs
|
|
|
|
# CREATE TABLE default.workflow_jobs UUID 'c0351924-8ccd-47a6-9db0-e28a9eee2fdf'
|
2022-12-22 17:03:40 +00:00
|
|
|
# (
|
|
|
|
# `id` UInt64,
|
|
|
|
# `run_id` UInt64,
|
|
|
|
# `workflow_name` LowCardinality(String),
|
|
|
|
# `head_branch` LowCardinality(String),
|
|
|
|
# `run_url` String,
|
|
|
|
# `run_attempt` UInt16,
|
|
|
|
# `node_id` String,
|
|
|
|
# `head_sha` String,
|
|
|
|
# `url` String,
|
|
|
|
# `html_url` String,
|
2023-03-02 12:53:56 +00:00
|
|
|
# `status` Enum8('waiting' = 1, 'queued' = 2, 'in_progress' = 3, 'completed' = 4),
|
2022-12-22 17:03:40 +00:00
|
|
|
# `conclusion` LowCardinality(String),
|
|
|
|
# `started_at` DateTime,
|
|
|
|
# `completed_at` DateTime,
|
|
|
|
# `name` LowCardinality(String),
|
|
|
|
# `steps` UInt16,
|
|
|
|
# `check_run_url` String,
|
|
|
|
# `labels` Array(LowCardinality(String)),
|
|
|
|
# `runner_id` UInt64,
|
|
|
|
# `runner_name` String,
|
|
|
|
# `runner_group_id` UInt64,
|
|
|
|
# `runner_group_name` LowCardinality(String),
|
2022-12-23 11:05:05 +00:00
|
|
|
# `repository` LowCardinality(String),
|
|
|
|
# `updated_at` DateTime DEFAULT now()
|
2022-12-22 17:03:40 +00:00
|
|
|
# )
|
2022-12-23 11:05:05 +00:00
|
|
|
# ENGINE = ReplicatedMergeTree('/clickhouse/tables/c0351924-8ccd-47a6-9db0-e28a9eee2fdf/{shard}', '{replica}')
|
2022-12-22 17:03:40 +00:00
|
|
|
# PARTITION BY toStartOfMonth(started_at)
|
2022-12-23 11:05:05 +00:00
|
|
|
# ORDER BY (id, updated_at)
|
2022-12-22 17:03:40 +00:00
|
|
|
# SETTINGS index_granularity = 8192
|
2023-05-26 15:17:49 +00:00
|
|
|
global CH_CLIENT
|
|
|
|
CH_CLIENT = CH_CLIENT or ClickHouseHelper(
|
|
|
|
get_parameter_from_ssm("clickhouse-test-stat-url"),
|
|
|
|
get_parameter_from_ssm("clickhouse-test-stat-login"),
|
|
|
|
get_parameter_from_ssm("clickhouse-test-stat-password"),
|
|
|
|
)
|
2022-12-22 17:03:40 +00:00
|
|
|
try:
|
2023-05-26 15:17:49 +00:00
|
|
|
CH_CLIENT.insert_event_into(
|
|
|
|
"default", "workflow_jobs", workflow_job.as_dict(), False
|
|
|
|
)
|
2022-12-22 17:03:40 +00:00
|
|
|
except InsertException as ex:
|
|
|
|
logging.exception(
|
|
|
|
"Got an exception on insert, tryuing to update the client "
|
|
|
|
"credentials and repeat",
|
|
|
|
exc_info=ex,
|
|
|
|
)
|
2023-05-26 15:17:49 +00:00
|
|
|
CH_CLIENT = ClickHouseHelper(
|
|
|
|
get_parameter_from_ssm("clickhouse-test-stat-url"),
|
|
|
|
get_parameter_from_ssm("clickhouse-test-stat-login"),
|
|
|
|
get_parameter_from_ssm("clickhouse-test-stat-password"),
|
|
|
|
)
|
|
|
|
CH_CLIENT.insert_event_into(
|
|
|
|
"default", "workflow_jobs", workflow_job.as_dict(), False
|
|
|
|
)
|
2022-12-22 17:03:40 +00:00
|
|
|
|
|
|
|
|
2023-12-06 23:45:57 +00:00
|
|
|
def killed_job(wf_job: dict) -> bool:
|
|
|
|
"""a hack to identify the killed runner if "Complete job" is omit"""
|
|
|
|
if (
|
|
|
|
wf_job.get("status", "") != "completed"
|
|
|
|
or wf_job.get("conclusion", "") != "failure"
|
|
|
|
):
|
|
|
|
# The task either success or in progress
|
|
|
|
return False
|
|
|
|
return not any(
|
|
|
|
step["name"] == "Complete job" and step["conclusion"] is not None
|
|
|
|
for step in wf_job["steps"]
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2023-05-11 12:21:07 +00:00
|
|
|
def handler(event: dict, context: Any) -> dict:
|
2022-12-22 17:03:40 +00:00
|
|
|
if event["isBase64Encoded"]:
|
|
|
|
event_data = json.loads(b64decode(event["body"]))
|
|
|
|
else:
|
|
|
|
event_data = json.loads(event["body"])
|
|
|
|
|
2023-05-04 10:20:57 +00:00
|
|
|
logging.info("Got the next raw event from the github hook: %s", event_data)
|
2022-12-22 17:03:40 +00:00
|
|
|
repo = event_data["repository"]
|
2023-05-11 12:21:07 +00:00
|
|
|
try:
|
|
|
|
wf_job = event_data["workflow_job"]
|
|
|
|
except KeyError:
|
|
|
|
logging.error("The event does not contain valid workflow_jobs data")
|
|
|
|
logging.error("The event data: %s", event)
|
|
|
|
logging.error("The context data: %s", context)
|
|
|
|
|
2023-12-06 23:45:57 +00:00
|
|
|
if killed_job(wf_job):
|
|
|
|
# for killed job we record 0
|
|
|
|
steps = 0
|
|
|
|
else:
|
|
|
|
# We record only finished steps
|
2023-12-06 23:57:01 +00:00
|
|
|
steps = sum(1 for st in wf_job["steps"] if st["conclusion"] is not None)
|
2023-05-04 10:20:57 +00:00
|
|
|
|
2022-12-22 17:03:40 +00:00
|
|
|
workflow_job = WorkflowJob(
|
|
|
|
wf_job["id"],
|
|
|
|
wf_job["run_id"],
|
|
|
|
wf_job["workflow_name"] or "", # nullable
|
|
|
|
wf_job["head_branch"],
|
|
|
|
wf_job["run_url"],
|
|
|
|
wf_job["run_attempt"],
|
|
|
|
wf_job["node_id"],
|
|
|
|
wf_job["head_sha"],
|
|
|
|
wf_job["url"],
|
|
|
|
wf_job["html_url"],
|
|
|
|
wf_job["status"],
|
|
|
|
wf_job["conclusion"] or "", # nullable
|
|
|
|
wf_job["started_at"],
|
|
|
|
wf_job["completed_at"] or "1970-01-01T00:00:00", # nullable date
|
|
|
|
wf_job["name"],
|
2023-05-04 10:20:57 +00:00
|
|
|
steps,
|
2022-12-22 17:03:40 +00:00
|
|
|
wf_job["check_run_url"],
|
|
|
|
wf_job["labels"],
|
|
|
|
wf_job["runner_id"] or 0, # nullable
|
|
|
|
wf_job["runner_name"] or "", # nullable
|
|
|
|
wf_job["runner_group_id"] or 0, # nullable
|
|
|
|
wf_job["runner_group_name"] or "", # nullable
|
|
|
|
repo["full_name"],
|
|
|
|
)
|
2023-04-25 17:11:20 +00:00
|
|
|
logging.info(
|
|
|
|
"Got the next event (private_repo=%s): %s", repo["private"], workflow_job
|
|
|
|
)
|
2022-12-22 17:03:40 +00:00
|
|
|
if repo["private"]:
|
|
|
|
workflow_job.anonimyze()
|
|
|
|
|
|
|
|
send_event_workflow_job(workflow_job)
|
|
|
|
|
|
|
|
return {
|
|
|
|
"statusCode": 200,
|
|
|
|
"headers": {"Content-Type": "application/json"},
|
|
|
|
"body": '{"status": "OK"}',
|
|
|
|
}
|