mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 09:02:00 +00:00
CI: Report job start and finish to CI DB
This commit is contained in:
parent
8340cec8b7
commit
d4e7188750
238
tests/ci/ci.py
238
tests/ci/ci.py
@ -48,7 +48,19 @@ from git_helper import GIT_PREFIX, Git
|
||||
from git_helper import Runner as GitRunner
|
||||
from github_helper import GitHub
|
||||
from pr_info import PRInfo
|
||||
from report import ERROR, FAILURE, PENDING, SUCCESS, BuildResult, JobReport, TestResult
|
||||
from report import (
|
||||
ERROR,
|
||||
FAILURE,
|
||||
PENDING,
|
||||
SUCCESS,
|
||||
BuildResult,
|
||||
JobReport,
|
||||
TestResult,
|
||||
OK,
|
||||
JOB_STARTED_TEST_NAME,
|
||||
JOB_FINISHED_TEST_NAME,
|
||||
FAIL,
|
||||
)
|
||||
from s3_helper import S3Helper
|
||||
from stopwatch import Stopwatch
|
||||
from tee_popen import TeePopen
|
||||
@ -263,7 +275,8 @@ def check_missing_images_on_dockerhub(
|
||||
return result
|
||||
|
||||
|
||||
def _pre_action(s3, indata, pr_info):
|
||||
def _pre_action(s3, job_name, batch, indata, pr_info):
|
||||
no_cache = CiSettings.create_from_run_config(indata).no_ci_cache
|
||||
print("Clear dmesg")
|
||||
Utils.clear_dmesg()
|
||||
CommitStatusData.cleanup()
|
||||
@ -282,6 +295,90 @@ def _pre_action(s3, indata, pr_info):
|
||||
|
||||
ci_cache.dump_run_config(indata)
|
||||
|
||||
to_be_skipped = False
|
||||
skip_status = SUCCESS
|
||||
# check if job was run already
|
||||
if CI.is_build_job(job_name):
|
||||
# this is a build job - check if a build report is present
|
||||
build_result = (
|
||||
BuildResult.load_any(job_name, pr_info.number, pr_info.head_ref)
|
||||
if not no_cache
|
||||
else None
|
||||
)
|
||||
if build_result:
|
||||
if build_result.status == SUCCESS:
|
||||
to_be_skipped = True
|
||||
else:
|
||||
print(
|
||||
"Build report found but status is unsuccessful - will try to rerun"
|
||||
)
|
||||
print("::group::Build Report")
|
||||
print(build_result.as_json())
|
||||
print("::endgroup::")
|
||||
else:
|
||||
# this is a test job - check if GH commit status or cache record is present
|
||||
commit = get_commit(GitHub(get_best_robot_token(), per_page=100), pr_info.sha)
|
||||
# rerun helper check
|
||||
# FIXME: Find a way to identify if job restarted manually (by developer) or by automatic workflow restart (died spot-instance)
|
||||
# disable rerun check for the former
|
||||
if job_name not in (
|
||||
CI.JobNames.BUILD_CHECK,
|
||||
): # we might want to rerun build report job
|
||||
rerun_helper = RerunHelper(commit, _get_ext_check_name(job_name))
|
||||
if rerun_helper.is_already_finished_by_status():
|
||||
print("WARNING: Rerunning job with GH status ")
|
||||
status = rerun_helper.get_finished_status()
|
||||
assert status
|
||||
print("::group::Commit Status")
|
||||
print(status)
|
||||
print("::endgroup::")
|
||||
to_be_skipped = True
|
||||
skip_status = status.state
|
||||
|
||||
# ci cache check
|
||||
if not to_be_skipped and not no_cache:
|
||||
ci_cache = CiCache(s3, indata["jobs_data"]["digests"]).update()
|
||||
job_config = CI.get_job_config(job_name)
|
||||
if ci_cache.is_successful(
|
||||
job_name,
|
||||
batch,
|
||||
job_config.num_batches,
|
||||
job_config.required_on_release_branch,
|
||||
):
|
||||
print("CICache record has be found - job will be skipped")
|
||||
job_status = ci_cache.get_successful(
|
||||
job_name, batch, job_config.num_batches
|
||||
)
|
||||
assert job_status, "BUG"
|
||||
_create_gh_status(
|
||||
commit,
|
||||
job_name,
|
||||
batch,
|
||||
job_config.num_batches,
|
||||
job_status,
|
||||
)
|
||||
to_be_skipped = True
|
||||
# skip_status = SUCCESS already there
|
||||
GHActions.print_in_group("Commit Status Data", job_status)
|
||||
|
||||
# create pre report
|
||||
jr = JobReport.create_pre_report(status=skip_status, job_skipped=to_be_skipped)
|
||||
jr.dump()
|
||||
|
||||
if not to_be_skipped:
|
||||
print("push start record to ci db")
|
||||
prepared_events = prepare_tests_results_for_clickhouse(
|
||||
pr_info,
|
||||
[TestResult(JOB_STARTED_TEST_NAME, OK)],
|
||||
SUCCESS,
|
||||
0.0,
|
||||
JobReport.get_start_time_from_current(),
|
||||
"",
|
||||
_get_ext_check_name(job_name),
|
||||
)
|
||||
ClickHouseHelper().insert_events_into(
|
||||
db="default", table="checks", events=prepared_events
|
||||
)
|
||||
print(f"Pre action done. Report files [{reports_files}] have been downloaded")
|
||||
|
||||
|
||||
@ -1045,108 +1142,23 @@ def main() -> int:
|
||||
### PRE action: start
|
||||
elif args.pre:
|
||||
assert indata, "Run config must be provided via --infile"
|
||||
_pre_action(s3, indata, pr_info)
|
||||
JobReport.create_pre_report().dump()
|
||||
_pre_action(s3, args.job_name, args.batch, indata, pr_info)
|
||||
|
||||
### RUN action: start
|
||||
elif args.run:
|
||||
assert indata
|
||||
ci_settings = CiSettings.create_from_run_config(indata)
|
||||
job_report = JobReport.load()
|
||||
check_name = args.job_name
|
||||
check_name_with_group = _get_ext_check_name(check_name)
|
||||
print(
|
||||
f"Check if rerun for name: [{check_name}], extended name [{check_name_with_group}]"
|
||||
)
|
||||
previous_status = None
|
||||
if CI.is_build_job(check_name):
|
||||
# this is a build job - check if a build report is present
|
||||
build_result = (
|
||||
BuildResult.load_any(check_name, pr_info.number, pr_info.head_ref)
|
||||
if not ci_settings.no_ci_cache
|
||||
else None
|
||||
)
|
||||
if build_result:
|
||||
if build_result.status == SUCCESS:
|
||||
previous_status = build_result.status
|
||||
JobReport(
|
||||
status=SUCCESS,
|
||||
description="",
|
||||
test_results=[],
|
||||
start_time="",
|
||||
duration=0.0,
|
||||
additional_files=[],
|
||||
job_skipped=True,
|
||||
).dump()
|
||||
else:
|
||||
# FIXME: Consider reusing failures for build jobs.
|
||||
# Just remove this if/else - that makes build job starting and failing immediately
|
||||
|
||||
if job_report.job_skipped and not args.force:
|
||||
print(
|
||||
"Build report found but status is unsuccessful - will try to rerun"
|
||||
f"Commit status or Build Report is already present - job will be skipped with status: [{job_report.status}]"
|
||||
)
|
||||
print("::group::Build Report")
|
||||
print(build_result.as_json())
|
||||
print("::endgroup::")
|
||||
else:
|
||||
# this is a test job - check if GH commit status or cache record is present
|
||||
commit = get_commit(
|
||||
GitHub(get_best_robot_token(), per_page=100), pr_info.sha
|
||||
)
|
||||
|
||||
# rerun helper check
|
||||
# FIXME: Find a way to identify if job restarted manually (by developer) or by automatic workflow restart (died spot-instance)
|
||||
# disable rerun check for the former
|
||||
if check_name not in (
|
||||
CI.JobNames.BUILD_CHECK,
|
||||
): # we might want to rerun build report job
|
||||
rerun_helper = RerunHelper(commit, check_name_with_group)
|
||||
if rerun_helper.is_already_finished_by_status():
|
||||
print("WARNING: Rerunning job with GH status ")
|
||||
status = rerun_helper.get_finished_status()
|
||||
assert status
|
||||
print("::group::Commit Status")
|
||||
print(status)
|
||||
print("::endgroup::")
|
||||
previous_status = status.state
|
||||
print("Create dummy job report with job_skipped flag")
|
||||
JobReport(
|
||||
status=status.state,
|
||||
description="",
|
||||
test_results=[],
|
||||
start_time="",
|
||||
duration=0.0,
|
||||
additional_files=[],
|
||||
job_skipped=True,
|
||||
).dump()
|
||||
|
||||
# ci cache check
|
||||
if not previous_status and not ci_settings.no_ci_cache:
|
||||
ci_cache = CiCache(s3, indata["jobs_data"]["digests"]).update()
|
||||
job_config = CI.get_job_config(check_name)
|
||||
if ci_cache.is_successful(
|
||||
check_name,
|
||||
args.batch,
|
||||
job_config.num_batches,
|
||||
job_config.required_on_release_branch,
|
||||
):
|
||||
job_status = ci_cache.get_successful(
|
||||
check_name, args.batch, job_config.num_batches
|
||||
)
|
||||
assert job_status, "BUG"
|
||||
_create_gh_status(
|
||||
commit,
|
||||
check_name,
|
||||
args.batch,
|
||||
job_config.num_batches,
|
||||
job_status,
|
||||
)
|
||||
previous_status = job_status.status
|
||||
GHActions.print_in_group("Commit Status Data", job_status)
|
||||
|
||||
if previous_status and not args.force:
|
||||
print(
|
||||
f"Commit status or Build Report is already present - job will be skipped with status: [{previous_status}]"
|
||||
)
|
||||
if previous_status == SUCCESS:
|
||||
if job_report.status == SUCCESS:
|
||||
exit_code = 0
|
||||
else:
|
||||
exit_code = 1
|
||||
@ -1166,7 +1178,8 @@ def main() -> int:
|
||||
assert (
|
||||
job_report
|
||||
), "BUG. There must be job report either real report, or pre-report if job was killed"
|
||||
if not job_report.job_skipped and not job_report.pre_report:
|
||||
error_description = ""
|
||||
if not job_report.pre_report:
|
||||
# it's a real job report
|
||||
ch_helper = ClickHouseHelper()
|
||||
check_url = ""
|
||||
@ -1244,7 +1257,6 @@ def main() -> int:
|
||||
pr_info,
|
||||
dump_to_file=True,
|
||||
)
|
||||
|
||||
print(f"Job report url: [{check_url}]")
|
||||
prepared_events = prepare_tests_results_for_clickhouse(
|
||||
pr_info,
|
||||
@ -1269,9 +1281,7 @@ def main() -> int:
|
||||
)
|
||||
elif job_report.job_skipped:
|
||||
print(f"Skipped after rerun check {[args.job_name]} - do nothing")
|
||||
elif job_report.job_skipped:
|
||||
print(f"Job was skipped {[args.job_name]} - do nothing")
|
||||
elif job_report.pre_report:
|
||||
else:
|
||||
print(f"ERROR: Job was killed - generate evidence")
|
||||
job_report.update_duration()
|
||||
ret_code = os.getenv("JOB_EXIT_CODE", "")
|
||||
@ -1282,10 +1292,13 @@ def main() -> int:
|
||||
pass
|
||||
if Utils.is_killed_with_oom():
|
||||
print("WARNING: OOM while job execution")
|
||||
error = f"Out Of Memory, exit_code {job_report.exit_code}, after {int(job_report.duration)}s"
|
||||
error_description = f"Out Of Memory, exit_code {job_report.exit_code}"
|
||||
else:
|
||||
error = f"Unknown, exit_code {job_report.exit_code}, after {int(job_report.duration)}s"
|
||||
CIBuddy().post_error(error, job_name=_get_ext_check_name(args.job_name))
|
||||
error_description = f"Unknown, exit_code {job_report.exit_code}"
|
||||
CIBuddy().post_error(
|
||||
error_description + f" after {int(job_report.duration)}s",
|
||||
job_name=_get_ext_check_name(args.job_name),
|
||||
)
|
||||
if CI.is_test_job(args.job_name):
|
||||
gh = GitHub(get_best_robot_token(), per_page=100)
|
||||
commit = get_commit(gh, pr_info.sha)
|
||||
@ -1293,11 +1306,32 @@ def main() -> int:
|
||||
commit,
|
||||
ERROR,
|
||||
"",
|
||||
"Error: " + error,
|
||||
"Error: " + error_description,
|
||||
_get_ext_check_name(args.job_name),
|
||||
pr_info,
|
||||
dump_to_file=True,
|
||||
)
|
||||
|
||||
if not job_report.job_skipped:
|
||||
print("push finish record to ci db")
|
||||
prepared_events = prepare_tests_results_for_clickhouse(
|
||||
pr_info,
|
||||
[
|
||||
TestResult(
|
||||
JOB_FINISHED_TEST_NAME,
|
||||
FAIL if error_description else OK,
|
||||
raw_logs=error_description or None,
|
||||
)
|
||||
],
|
||||
SUCCESS if not error_description else ERROR,
|
||||
0.0,
|
||||
JobReport.get_start_time_from_current(),
|
||||
"",
|
||||
_get_ext_check_name(args.job_name),
|
||||
)
|
||||
ClickHouseHelper().insert_events_into(
|
||||
db="default", table="checks", events=prepared_events
|
||||
)
|
||||
### POST action: end
|
||||
|
||||
### MARK SUCCESS action: start
|
||||
|
@ -247,6 +247,9 @@ BASE_HEADERS = ["Test name", "Test status"]
|
||||
# should not be in TEMP directory or any directory that may be cleaned during the job execution
|
||||
JOB_REPORT_FILE = Path(GITHUB_WORKSPACE) / "job_report.json"
|
||||
|
||||
JOB_STARTED_TEST_NAME = "STARTED"
|
||||
JOB_FINISHED_TEST_NAME = "COMPLETED"
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestResult:
|
||||
@ -304,14 +307,19 @@ class JobReport:
|
||||
exit_code: int = -1
|
||||
|
||||
@staticmethod
|
||||
def create_pre_report() -> "JobReport":
|
||||
def get_start_time_from_current():
|
||||
return datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
@classmethod
|
||||
def create_pre_report(cls, status: str, job_skipped: bool) -> "JobReport":
|
||||
return JobReport(
|
||||
status=ERROR,
|
||||
status=status,
|
||||
description="",
|
||||
test_results=[],
|
||||
start_time=datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
start_time=cls.get_start_time_from_current(),
|
||||
duration=0.0,
|
||||
additional_files=[],
|
||||
job_skipped=job_skipped,
|
||||
pre_report=True,
|
||||
)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user