#!/usr/bin/python3
import argparse
import ast
import collections
import csv
import itertools
import json
import os
import os.path
import pprint
import sys
import traceback
parser = argparse.ArgumentParser(description="Create performance test report")
parser.add_argument(
"--report",
default="main",
choices=["main", "all-queries"],
help="Which report to build",
)
args = parser.parse_args()
tables = []
errors_explained = []
report_errors = []
error_tests = 0
slow_average_tests = 0
faster_queries = 0
slower_queries = 0
unstable_queries = 0
very_unstable_queries = 0
unstable_partial_queries = 0
# max seconds to run one query by itself, not counting preparation
allowed_single_run_time = 2
color_bad = "#ffb0c0"
color_good = "#b0d050"
header_template = """
ClickHouse performance comparison
"""
table_anchor = 0
row_anchor = 0
def currentTableAnchor():
global table_anchor
return f"{table_anchor}"
def newTableAnchor():
global table_anchor
table_anchor += 1
return currentTableAnchor()
def currentRowAnchor():
global row_anchor
global table_anchor
return f"{table_anchor}.{row_anchor}"
def nextRowAnchor():
global row_anchor
global table_anchor
return f"{table_anchor}.{row_anchor + 1}"
def advanceRowAnchor():
global row_anchor
global table_anchor
row_anchor += 1
return currentRowAnchor()
def tr(x, anchor=None):
# return '
{x}
'.format(a=a, x=str(x))
anchor = anchor if anchor else advanceRowAnchor()
return f"
{x}
"
def td(value, cell_attributes=""):
return "
{value} | ".format(
cell_attributes=cell_attributes, value=value
)
def th(value, cell_attributes=""):
return "
{value} | ".format(
cell_attributes=cell_attributes, value=value
)
def tableRow(cell_values, cell_attributes=[], anchor=None):
return tr(
"".join(
[
td(v, a)
for v, a in itertools.zip_longest(
cell_values, cell_attributes, fillvalue=""
)
if a is not None and v is not None
]
),
anchor,
)
def tableHeader(cell_values, cell_attributes=[]):
return tr(
"".join(
[
th(v, a)
for v, a in itertools.zip_longest(
cell_values, cell_attributes, fillvalue=""
)
if a is not None and v is not None
]
)
)
def tableStart(title):
cls = "-".join(title.lower().split(" ")[:3])
global table_anchor
table_anchor = cls
anchor = currentTableAnchor()
help_anchor = "-".join(title.lower().split(" "))
return f"""
"""
def tableEnd():
return "
"
def tsvRows(n):
try:
with open(n, encoding="utf-8") as fd:
result = []
for row in csv.reader(fd, delimiter="\t", quoting=csv.QUOTE_NONE):
new_row = []
for e in row:
# The first one .encode('utf-8').decode('unicode-escape') decodes the escape characters from the strings.
# The second one (encode('latin1').decode('utf-8')) fixes the changes with unicode vs utf-8 chars, so
# 'Чем зÐ�нимаеÑ�ЬÑ�Ñ�' is transformed back into 'Чем зАнимаешЬся'.
new_row.append(
e.encode("utf-8")
.decode("unicode-escape")
.encode("latin1")
.decode("utf-8")
)
result.append(new_row)
return result
except:
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
pass
return []
def htmlRows(n):
rawRows = tsvRows(n)
result = ""
for row in rawRows:
result += tableRow(row)
return result
def addSimpleTable(caption, columns, rows, pos=None):
global tables
text = ""
if not rows:
return
text += tableStart(caption)
text += tableHeader(columns)
for row in rows:
text += tableRow(row)
text += tableEnd()
tables.insert(pos if pos else len(tables), text)
def add_tested_commits():
global report_errors
try:
addSimpleTable(
"Tested Commits",
["Old", "New"],
[
[
"
{}
".format(x)
for x in [
open("left-commit.txt").read(),
open("right-commit.txt").read(),
]
]
],
)
except:
# Don't fail if no commit info -- maybe it's a manual run.
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
pass
def add_report_errors():
global tables
global report_errors
# Add the errors reported by various steps of comparison script
try:
report_errors += [l.strip() for l in open("report/errors.log")]
except:
report_errors.append(traceback.format_exception_only(*sys.exc_info()[:2])[-1])
pass
if not report_errors:
return
text = tableStart("Errors while Building the Report")
text += tableHeader(["Error"])
for x in report_errors:
text += tableRow([x])
text += tableEnd()
# Insert after Tested Commits
tables.insert(1, text)
errors_explained.append(
[
f'
There were some errors while building the report'
]
)
def add_errors_explained():
if not errors_explained:
return
text = '
'
text += tableStart("Error Summary")
text += tableHeader(["Description"])
for row in errors_explained:
text += tableRow(row)
text += tableEnd()
global tables
tables.insert(1, text)
if args.report == "main":
print((header_template.format()))
add_tested_commits()
run_error_rows = tsvRows("run-errors.tsv")
error_tests += len(run_error_rows)
addSimpleTable("Run Errors", ["Test", "Error"], run_error_rows)
if run_error_rows:
errors_explained.append(
[
f'
There were some errors while running the tests'
]
)
slow_on_client_rows = tsvRows("report/slow-on-client.tsv")
error_tests += len(slow_on_client_rows)
addSimpleTable(
"Slow on Client",
["Client time, s", "Server time, s", "Ratio", "Test", "Query"],
slow_on_client_rows,
)
if slow_on_client_rows:
errors_explained.append(
[
f'
Some queries are taking noticeable time client-side (missing `FORMAT Null`?)'
]
)
def add_partial():
rows = tsvRows("report/partial-queries-report.tsv")
if not rows:
return
global unstable_partial_queries, slow_average_tests, tables
text = tableStart("Partial Queries")
columns = ["Median time, s", "Relative time variance", "Test", "#", "Query"]
text += tableHeader(columns)
attrs = ["" for c in columns]
for row in rows:
anchor = f"{currentTableAnchor()}.{row[2]}.{row[3]}"
if float(row[1]) > 0.10:
attrs[1] = f'style="background: {color_bad}"'
unstable_partial_queries += 1
errors_explained.append(
[
f"
The query no. {row[3]} of test '{row[2]}' has excessive variance of run time. Keep it below 10%"
]
)
else:
attrs[1] = ""
if float(row[0]) > allowed_single_run_time:
attrs[0] = f'style="background: {color_bad}"'
errors_explained.append(
[
f'
The query no. {row[3]} of test \'{row[2]}\' is taking too long to run. Keep the run time below {allowed_single_run_time} seconds"'
]
)
slow_average_tests += 1
else:
attrs[0] = ""
text += tableRow(row, attrs, anchor)
text += tableEnd()
tables.append(text)
add_partial()
def add_changes():
rows = tsvRows("report/changed-perf.tsv")
if not rows:
return
global faster_queries, slower_queries, tables
text = tableStart("Changes in Performance")
columns = [
"Old, s", # 0
"New, s", # 1
"Ratio of speedup (-) or slowdown (+)", # 2
"Relative difference (new − old) / old", # 3
"p < 0.01 threshold", # 4
"", # Failed # 5
"Test", # 6
"#", # 7
"Query", # 8
]
attrs = ["" for c in columns]
attrs[5] = None
text += tableHeader(columns, attrs)
for row in rows:
anchor = f"{currentTableAnchor()}.{row[6]}.{row[7]}"
if int(row[5]):
if float(row[3]) < 0.0:
faster_queries += 1
attrs[2] = attrs[3] = f'style="background: {color_good}"'
else:
slower_queries += 1
attrs[2] = attrs[3] = f'style="background: {color_bad}"'
errors_explained.append(
[
f"
The query no. {row[7]} of test '{row[6]}' has slowed down"
]
)
else:
attrs[2] = attrs[3] = ""
text += tableRow(row, attrs, anchor)
text += tableEnd()
tables.append(text)
add_changes()
def add_unstable_queries():
global unstable_queries, very_unstable_queries, tables
unstable_rows = tsvRows("report/unstable-queries.tsv")
if not unstable_rows:
return
unstable_queries += len(unstable_rows)
columns = [
"Old, s", # 0
"New, s", # 1
"Relative difference (new - old)/old", # 2
"p < 0.01 threshold", # 3
"", # Failed #4
"Test", # 5
"#", # 6
"Query", # 7
]
attrs = ["" for c in columns]
attrs[4] = None
text = tableStart("Unstable Queries")
text += tableHeader(columns, attrs)
for r in unstable_rows:
anchor = f"{currentTableAnchor()}.{r[5]}.{r[6]}"
if int(r[4]):
very_unstable_queries += 1
attrs[3] = f'style="background: {color_bad}"'
else:
attrs[3] = ""
# Just don't add the slightly unstable queries we don't consider
# errors. It's not clear what the user should do with them.
continue
text += tableRow(r, attrs, anchor)
text += tableEnd()
# Don't add an empty table.
if very_unstable_queries:
tables.append(text)
add_unstable_queries()
skipped_tests_rows = tsvRows("analyze/skipped-tests.tsv")
addSimpleTable("Skipped Tests", ["Test", "Reason"], skipped_tests_rows)
addSimpleTable(
"Test Performance Changes",
[
"Test",
"Ratio of speedup (-) or slowdown (+)",
"Queries",
"Total not OK",
"Changed perf",
"Unstable",
],
tsvRows("report/test-perf-changes.tsv"),
)
def add_test_times():
global slow_average_tests, tables
rows = tsvRows("report/test-times.tsv")
if not rows:
return
columns = [
"Test", # 0
"Wall clock time, entire test, s", # 1
"Total client time for measured query runs, s", # 2
"Queries", # 3
"Longest query, total for measured runs, s", # 4
"Wall clock time per query, s", # 5
"Shortest query, total for measured runs, s", # 6
"", # Runs #7
]
attrs = ["" for c in columns]
attrs[7] = None
text = tableStart("Test Times")
text += tableHeader(columns, attrs)
allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs
for r in rows:
anchor = f"{currentTableAnchor()}.{r[0]}"
total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers
if r[0] != "Total" and float(r[5]) > allowed_average_run_time * total_runs:
# FIXME should be 15s max -- investigate parallel_insert
slow_average_tests += 1
attrs[5] = f'style="background: {color_bad}"'
errors_explained.append(
[
f"
The test '{r[0]}' is too slow to run as a whole. Investigate whether the create and fill queries can be sped up"
]
)
else:
attrs[5] = ""
if r[0] != "Total" and float(r[4]) > allowed_single_run_time * total_runs:
slow_average_tests += 1
attrs[4] = f'style="background: {color_bad}"'
errors_explained.append(
[
f"Some query of the test '{r[0]}' is too slow to run. See the all queries report"
]
)
else:
attrs[4] = ""
text += tableRow(r, attrs, anchor)
text += tableEnd()
tables.append(text)
add_test_times()
addSimpleTable(
"Metric Changes",
[
"Metric",
"Old median value",
"New median value",
"Relative difference",
"Times difference",
],
tsvRows("metrics/changes.tsv"),
)
add_report_errors()
add_errors_explained()
for t in tables:
print(t)
print(
f"""