ClickHouse/tests/performance/scripts/report.py
Max K 84e5870b71
Reapply "improve CI with digest for docker, build and test jobs" (#57904)
* Revert "Revert "improve CI with digest for docker, build and test jobs""

* fix: docker manifest merge for missing images only
2023-12-18 09:07:22 +01:00

756 lines
22 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/python3
import argparse
import ast
import collections
import csv
import itertools
import json
import os
import os.path
import pprint
import sys
import traceback
parser = argparse.ArgumentParser(description="Create performance test report")
parser.add_argument(
"--report",
default="main",
choices=["main", "all-queries"],
help="Which report to build",
)
parser.add_argument("--no-tests-run", action="store_true", default=False)
args = parser.parse_args()
tables = []
errors_explained = []
report_errors = []
error_tests = 0
slow_average_tests = 0
faster_queries = 0
slower_queries = 0
unstable_queries = 0
very_unstable_queries = 0
unstable_backward_incompatible_queries = 0
# max seconds to run one query by itself, not counting preparation
allowed_single_run_time = 2
color_bad = "#ffb0c0"
color_good = "#b0d050"
header_template = """
<!DOCTYPE html>
<html lang="en">
<style>
body {{
font-family: "DejaVu Sans", "Noto Sans", Arial, sans-serif;
background: #EEE;
}}
a {{ color: #06F; text-decoration: none; }}
a:hover, a:active {{ color: #F40; text-decoration: underline; }}
.main {{ margin: auto; max-width: 95%; }}
p.links a {{
padding: 5px; margin: 3px; background: #FFF; line-height: 2;
white-space: nowrap;
box-shadow: 0 0 0 1px rgba(0, 0, 0, 0.05), 0 8px 25px -5px rgba(0, 0, 0, 0.1);
}}
.cancela,.cancela:link,.cancela:visited,.cancela:hover,
.cancela:focus,.cancela:active {{
color: inherit;
text-decoration: none;
}}
table {{
border: none;
border-spacing: 0px;
line-height: 1.5;
box-shadow: 0 0 0 1px rgba(0, 0, 0, 0.05), 0 8px 25px -5px rgba(0, 0, 0, 0.1);
text-align: left;
}}
th, td {{
border: none;
padding: 5px;
vertical-align: top;
background-color: #FFF;
font-family: sans-serif;
}}
th {{
border-bottom: 2px solid black;
}}
tr:nth-child(odd) td {{filter: brightness(90%);}}
.unexpected-query-duration tr :nth-child(2),
.unexpected-query-duration tr :nth-child(3),
.unexpected-query-duration tr :nth-child(5),
.all-query-times tr :nth-child(1),
.all-query-times tr :nth-child(2),
.all-query-times tr :nth-child(3),
.all-query-times tr :nth-child(4),
.all-query-times tr :nth-child(5),
.all-query-times tr :nth-child(7),
.changes-in-performance tr :nth-child(1),
.changes-in-performance tr :nth-child(2),
.changes-in-performance tr :nth-child(3),
.changes-in-performance tr :nth-child(4),
.changes-in-performance tr :nth-child(5),
.changes-in-performance tr :nth-child(7),
.unstable-queries tr :nth-child(1),
.unstable-queries tr :nth-child(2),
.unstable-queries tr :nth-child(3),
.unstable-queries tr :nth-child(4),
.unstable-queries tr :nth-child(6),
.test-performance-changes tr :nth-child(2),
.test-performance-changes tr :nth-child(3),
.test-performance-changes tr :nth-child(4),
.test-performance-changes tr :nth-child(5),
.test-performance-changes tr :nth-child(6),
.test-times tr :nth-child(2),
.test-times tr :nth-child(3),
.test-times tr :nth-child(4),
.test-times tr :nth-child(5),
.test-times tr :nth-child(6),
.test-times tr :nth-child(7),
.concurrent-benchmarks tr :nth-child(2),
.concurrent-benchmarks tr :nth-child(3),
.concurrent-benchmarks tr :nth-child(4),
.concurrent-benchmarks tr :nth-child(5),
.metric-changes tr :nth-child(2),
.metric-changes tr :nth-child(3),
.metric-changes tr :nth-child(4),
.metric-changes tr :nth-child(5)
{{ text-align: right; }}
</style>
<title>Clickhouse performance comparison</title>
</head>
<body>
<div class="main">
<h1>ClickHouse performance comparison</h1>
"""
table_anchor = 0
row_anchor = 0
def currentTableAnchor():
global table_anchor
return f"{table_anchor}"
def newTableAnchor():
global table_anchor
table_anchor += 1
return currentTableAnchor()
def currentRowAnchor():
global row_anchor
global table_anchor
return f"{table_anchor}.{row_anchor}"
def nextRowAnchor():
global row_anchor
global table_anchor
return f"{table_anchor}.{row_anchor + 1}"
def advanceRowAnchor():
global row_anchor
global table_anchor
row_anchor += 1
return currentRowAnchor()
def tr(x, anchor=None):
# return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x))
anchor = anchor if anchor else advanceRowAnchor()
return f"<tr id={anchor}>{x}</tr>"
def td(value, cell_attributes=""):
return "<td {cell_attributes}>{value}</td>".format(
cell_attributes=cell_attributes, value=value
)
def th(value, cell_attributes=""):
return "<th {cell_attributes}>{value}</th>".format(
cell_attributes=cell_attributes, value=value
)
def tableRow(cell_values, cell_attributes=[], anchor=None):
return tr(
"".join(
[
td(v, a)
for v, a in itertools.zip_longest(
cell_values, cell_attributes, fillvalue=""
)
if a is not None and v is not None
]
),
anchor,
)
def tableHeader(cell_values, cell_attributes=[]):
return tr(
"".join(
[
th(v, a)
for v, a in itertools.zip_longest(
cell_values, cell_attributes, fillvalue=""
)
if a is not None and v is not None
]
)
)
def tableStart(title):
cls = "-".join(title.lower().split(" ")[:3])
global table_anchor
table_anchor = cls
anchor = currentTableAnchor()
help_anchor = "-".join(title.lower().split(" "))
return f"""
<h2 id="{anchor}">
<a class="cancela" href="#{anchor}">{title}</a>
<a class="cancela" href="https://github.com/ClickHouse/ClickHouse/tree/master/docker/test/performance-comparison#{help_anchor}"><sup style="color: #888">?</sup></a>
</h2>
<table class="{cls}">
"""
def tableEnd():
return "</table>"
def tsvRows(n):
try:
with open(n, encoding="utf-8") as fd:
result = []
for row in csv.reader(fd, delimiter="\t", quoting=csv.QUOTE_NONE):
new_row = []
for e in row:
# The first one .encode('utf-8').decode('unicode-escape') decodes the escape characters from the strings.
# The second one (encode('latin1').decode('utf-8')) fixes the changes with unicode vs utf-8 chars, so
# 'Чем зÐ<C2B7>нимаеÑ<C2B5>ЬÑ<C2AC>Ñ<EFBFBD>' is transformed back into 'Чем зАнимаешЬся'.
new_row.append(
e.encode("utf-8")
.decode("unicode-escape")
.encode("latin1")
.decode("utf-8")
)
result.append(new_row)
return result
except:
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
pass
return []
def htmlRows(n):
rawRows = tsvRows(n)
result = ""
for row in rawRows:
result += tableRow(row)
return result
def addSimpleTable(caption, columns, rows, pos=None):
global tables
text = ""
if not rows:
return
text += tableStart(caption)
text += tableHeader(columns)
for row in rows:
text += tableRow(row)
text += tableEnd()
tables.insert(pos if pos else len(tables), text)
def add_tested_commits():
global report_errors
try:
addSimpleTable(
"Tested Commits",
["Old", "New"],
[
[
"<pre>{}</pre>".format(x)
for x in [
open("left-commit.txt").read(),
open("right-commit.txt").read(),
]
]
],
)
except:
# Don't fail if no commit info -- maybe it's a manual run.
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
pass
def add_report_errors():
global tables
global report_errors
# Add the errors reported by various steps of comparison script
try:
report_errors += [l.strip() for l in open("report/errors.log")]
except:
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
pass
if not report_errors:
return
text = tableStart("Errors while Building the Report")
text += tableHeader(["Error"])
for x in report_errors:
text += tableRow([x])
text += tableEnd()
# Insert after Tested Commits
tables.insert(1, text)
errors_explained.append(
[
f'<a href="#{currentTableAnchor()}">There were some errors while building the report</a>'
]
)
def add_errors_explained():
if not errors_explained:
return
text = '<a name="fail1"/>'
text += tableStart("Error Summary")
text += tableHeader(["Description"])
for row in errors_explained:
text += tableRow(row)
text += tableEnd()
global tables
tables.insert(1, text)
if args.report == "main":
print((header_template.format()))
add_tested_commits()
def print_status(status, message):
print(
(
"""
<!--status: {status}-->
<!--message: {message}-->
""".format(
status=status, message=message
)
)
)
if args.no_tests_run:
for t in tables:
print(t)
print(
"<h2>No tests to run. Only changed tests were run, but all changed tests are from another batch.</h2>"
)
print(
f"""
</div>
{os.getenv("CHPC_ADD_REPORT_LINKS") or ''}
</body>
</html>
"""
)
# Why failure? Because otherwise we will not notice if we have a bug that leads to 0 tests being run
print_status("failure", "No tests changed, nothing to run")
exit(0)
run_error_rows = tsvRows("run-errors.tsv")
error_tests += len(run_error_rows)
addSimpleTable("Run Errors", ["Test", "Error"], run_error_rows)
if run_error_rows:
errors_explained.append(
[
f'<a href="#{currentTableAnchor()}">There were some errors while running the tests</a>'
]
)
def add_backward_incompatible():
rows = tsvRows("report/partial-queries-report.tsv")
if not rows:
return
global unstable_backward_incompatible_queries, slow_average_tests, tables
text = tableStart("Backward-incompatible queries")
columns = ["Median time, s", "Relative time variance", "Test", "#", "Query"]
text += tableHeader(columns)
attrs = ["" for c in columns]
for row in rows:
anchor = f"{currentTableAnchor()}.{row[2]}.{row[3]}"
if float(row[1]) > 0.10:
attrs[1] = f'style="background: {color_bad}"'
unstable_backward_incompatible_queries += 1
errors_explained.append(
[
f"<a href=\"#{anchor}\">The query no. {row[3]} of test '{row[2]}' has excessive variance of run time. Keep it below 10%</a>"
]
)
else:
attrs[1] = ""
if float(row[0]) > allowed_single_run_time:
attrs[0] = f'style="background: {color_bad}"'
errors_explained.append(
[
f'<a href="#{anchor}">The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"</a>'
]
)
slow_average_tests += 1
else:
attrs[0] = ""
text += tableRow(row, attrs, anchor)
text += tableEnd()
tables.append(text)
add_backward_incompatible()
def add_changes():
rows = tsvRows("report/changed-perf.tsv")
if not rows:
return
global faster_queries, slower_queries, tables
text = tableStart("Changes in Performance")
columns = [
"Old,&nbsp;s", # 0
"New,&nbsp;s", # 1
"Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)", # 2
"Relative difference (new&nbsp;&minus;&nbsp;old) / old", # 3
"p&nbsp;<&nbsp;0.01 threshold", # 4
"", # Failed # 5
"Test", # 6
"#", # 7
"Query", # 8
]
attrs = ["" for c in columns]
attrs[5] = None
text += tableHeader(columns, attrs)
for row in rows:
anchor = f"{currentTableAnchor()}.{row[6]}.{row[7]}"
if int(row[5]):
if float(row[3]) < 0.0:
faster_queries += 1
attrs[2] = attrs[3] = f'style="background: {color_good}"'
else:
slower_queries += 1
attrs[2] = attrs[3] = f'style="background: {color_bad}"'
errors_explained.append(
[
f"<a href=\"#{anchor}\">The query no. {row[7]} of test '{row[6]}' has slowed down</a>"
]
)
else:
attrs[2] = attrs[3] = ""
text += tableRow(row, attrs, anchor)
text += tableEnd()
tables.append(text)
add_changes()
def add_unstable_queries():
global unstable_queries, very_unstable_queries, tables
unstable_rows = tsvRows("report/unstable-queries.tsv")
if not unstable_rows:
return
unstable_queries += len(unstable_rows)
columns = [
"Old,&nbsp;s", # 0
"New,&nbsp;s", # 1
"Relative difference (new&nbsp;-&nbsp;old)/old", # 2
"p&nbsp;&lt;&nbsp;0.01 threshold", # 3
"", # Failed #4
"Test", # 5
"#", # 6
"Query", # 7
]
attrs = ["" for c in columns]
attrs[4] = None
text = tableStart("Unstable Queries")
text += tableHeader(columns, attrs)
for r in unstable_rows:
anchor = f"{currentTableAnchor()}.{r[5]}.{r[6]}"
if int(r[4]):
very_unstable_queries += 1
attrs[3] = f'style="background: {color_bad}"'
else:
attrs[3] = ""
# Just don't add the slightly unstable queries we don't consider
# errors. It's not clear what the user should do with them.
continue
text += tableRow(r, attrs, anchor)
text += tableEnd()
# Don't add an empty table.
if very_unstable_queries:
tables.append(text)
add_unstable_queries()
skipped_tests_rows = tsvRows("analyze/skipped-tests.tsv")
addSimpleTable("Skipped Tests", ["Test", "Reason"], skipped_tests_rows)
addSimpleTable(
"Test Performance Changes",
[
"Test",
"Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)",
"Queries",
"Total not OK",
"Changed perf",
"Unstable",
],
tsvRows("report/test-perf-changes.tsv"),
)
def add_test_times():
global slow_average_tests, tables
rows = tsvRows("report/test-times.tsv")
if not rows:
return
columns = [
"Test", # 0
"Wall clock time, entire test,&nbsp;s", # 1
"Total client time for measured query runs,&nbsp;s", # 2
"Queries", # 3
"Longest query, total for measured runs,&nbsp;s", # 4
"Wall clock time per query,&nbsp;s", # 5
"Shortest query, total for measured runs,&nbsp;s", # 6
"", # Runs #7
]
attrs = ["" for c in columns]
attrs[7] = None
text = tableStart("Test Times")
text += tableHeader(columns, attrs)
allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs
for r in rows:
anchor = f"{currentTableAnchor()}.{r[0]}"
total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers
if r[0] != "Total" and float(r[5]) > allowed_average_run_time * total_runs:
# FIXME should be 15s max -- investigate parallel_insert
slow_average_tests += 1
attrs[5] = f'style="background: {color_bad}"'
errors_explained.append(
[
f"<a href=\"#{anchor}\">The test '{r[0]}' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up"
]
)
else:
attrs[5] = ""
if r[0] != "Total" and float(r[4]) > allowed_single_run_time * total_runs:
slow_average_tests += 1
attrs[4] = f'style="background: {color_bad}"'
errors_explained.append(
[
f"<a href=\"./all-queries.html#all-query-times.{r[0]}.0\">Some query of the test '{r[0]}' is too slow to run. See the all queries report"
]
)
else:
attrs[4] = ""
text += tableRow(r, attrs, anchor)
text += tableEnd()
tables.append(text)
add_test_times()
addSimpleTable(
"Metric Changes",
[
"Metric",
"Old median value",
"New median value",
"Relative difference",
"Times difference",
],
tsvRows("metrics/changes.tsv"),
)
add_report_errors()
add_errors_explained()
for t in tables:
print(t)
print(
f"""
</div>
<p class="links">
<a href="all-queries.html">All queries</a>
<a href="compare.log">Log</a>
<a href="output.7z">Test output</a>
{os.getenv("CHPC_ADD_REPORT_LINKS") or ''}
</p>
</body>
</html>
"""
)
status = "success"
message = "See the report"
message_array = []
if slow_average_tests:
status = "failure"
message_array.append(str(slow_average_tests) + " too long")
if faster_queries:
message_array.append(str(faster_queries) + " faster")
if slower_queries:
# This threshold should be synchronized with the value in https://github.com/ClickHouse/ClickHouse/blob/master/tests/ci/performance_comparison_check.py#L225
# False positives rate should be < 1%: https://shorturl.at/CDEK8
if slower_queries > 5:
status = "failure"
message_array.append(str(slower_queries) + " slower")
if unstable_backward_incompatible_queries:
very_unstable_queries += unstable_backward_incompatible_queries
status = "failure"
# Don't show mildly unstable queries, only the very unstable ones we
# treat as errors.
if very_unstable_queries:
message_array.append(str(very_unstable_queries) + " unstable")
# FIXME: uncomment the following lines when tests are stable and
# reliable
# if very_unstable_queries > 5:
# error_tests += very_unstable_queries
# status = "failure"
#
# error_tests += slow_average_tests
# FIXME: until here
if error_tests:
status = "failure"
message_array.insert(0, str(error_tests) + " errors")
if message_array:
message = ", ".join(message_array)
if report_errors:
status = "failure"
message = "Errors while building the report."
print_status(status, message)
elif args.report == "all-queries":
print((header_template.format()))
add_tested_commits()
def add_all_queries():
rows = tsvRows("report/all-queries.tsv")
if not rows:
return
columns = [
"", # Changed #0
"", # Unstable #1
"Old,&nbsp;s", # 2
"New,&nbsp;s", # 3
"Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)", # 4
"Relative difference (new&nbsp;&minus;&nbsp;old) / old", # 5
"p&nbsp;&lt;&nbsp;0.01 threshold", # 6
"Test", # 7
"#", # 8
"Query", # 9
]
attrs = ["" for c in columns]
attrs[0] = None
attrs[1] = None
text = tableStart("All Query Times")
text += tableHeader(columns, attrs)
for r in rows:
anchor = f"{currentTableAnchor()}.{r[7]}.{r[8]}"
if int(r[1]):
attrs[6] = f'style="background: {color_bad}"'
else:
attrs[6] = ""
if int(r[0]):
if float(r[5]) > 0.0:
attrs[4] = attrs[5] = f'style="background: {color_bad}"'
else:
attrs[4] = attrs[5] = f'style="background: {color_good}"'
else:
attrs[4] = attrs[5] = ""
if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time:
attrs[2] = f'style="background: {color_bad}"'
attrs[3] = f'style="background: {color_bad}"'
else:
attrs[2] = ""
attrs[3] = ""
text += tableRow(r, attrs, anchor)
text += tableEnd()
tables.append(text)
add_all_queries()
add_report_errors()
for t in tables:
print(t)
print(
f"""
</div>
<p class="links">
<a href="report.html">Main report</a>
<a href="compare.log">Log</a>
<a href="output.7z">Test output</a>
{os.getenv("CHPC_ADD_REPORT_LINKS") or ''}
</p>
</body>
</html>
"""
)