ClickHouse/docker/test/performance-comparison/report.py

392 lines
12 KiB
Python
Raw Normal View History

2020-01-23 17:48:26 +00:00
#!/usr/bin/python3
2020-04-02 18:44:58 +00:00
import argparse
2020-03-24 17:33:18 +00:00
import ast
2020-01-23 17:48:26 +00:00
import collections
import csv
2020-02-27 17:57:08 +00:00
import itertools
2020-01-23 17:48:26 +00:00
import os
import sys
2020-02-26 16:18:58 +00:00
import traceback
2020-01-23 17:48:26 +00:00
2020-04-02 18:44:58 +00:00
parser = argparse.ArgumentParser(description='Create performance test report')
parser.add_argument('--report', default='main', choices=['main', 'all-queries'],
help='Which report to build')
args = parser.parse_args()
2020-02-26 16:18:58 +00:00
report_errors = []
2020-02-27 19:43:43 +00:00
error_tests = 0
slow_average_tests = 0
faster_queries = 0
slower_queries = 0
unstable_queries = 0
2020-03-24 17:33:18 +00:00
very_unstable_queries = 0
2020-02-26 16:18:58 +00:00
2020-04-02 18:44:58 +00:00
header_template = """
2020-01-23 17:48:26 +00:00
<!DOCTYPE html>
<html>
<style>
@font-face {{
font-family:'Yandex Sans Display Web';
src:url(https://yastatic.net/adv-www/_/H63jN0veW07XQUIA2317lr9UIm8.eot);
src:url(https://yastatic.net/adv-www/_/H63jN0veW07XQUIA2317lr9UIm8.eot?#iefix) format('embedded-opentype'),
url(https://yastatic.net/adv-www/_/sUYVCPUAQE7ExrvMS7FoISoO83s.woff2) format('woff2'),
url(https://yastatic.net/adv-www/_/v2Sve_obH3rKm6rKrtSQpf-eB7U.woff) format('woff'),
url(https://yastatic.net/adv-www/_/PzD8hWLMunow5i3RfJ6WQJAL7aI.ttf) format('truetype'),
url(https://yastatic.net/adv-www/_/lF_KG5g4tpQNlYIgA0e77fBSZ5s.svg#YandexSansDisplayWeb-Regular) format('svg');
font-weight:400;
font-style:normal;
font-stretch:normal
}}
body {{ font-family: "Yandex Sans Display Web", Arial, sans-serif; background: #EEE; }}
h1 {{ margin-left: 10px; }}
th, td {{ border: 0; padding: 5px 10px 5px 10px; text-align: left; vertical-align: top; line-height: 1.5; background-color: #FFF;
td {{ white-space: pre; font-family: Monospace, Courier New; }}
border: 0; box-shadow: 0 0 0 1px rgba(0, 0, 0, 0.05), 0 8px 25px -5px rgba(0, 0, 0, 0.1); }}
a {{ color: #06F; text-decoration: none; }}
a:hover, a:active {{ color: #F40; text-decoration: underline; }}
table {{ border: 0; }}
.main {{ margin-left: 10%; }}
p.links a {{ padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-space: nowrap; box-shadow: 0 0 0 1px rgba(0, 0, 0, 0.05), 0 8px 25px -5px rgba(0, 0, 0, 0.1); }}
2020-02-27 17:57:08 +00:00
2020-02-10 16:34:07 +00:00
.cancela,.cancela:link,.cancela:visited,.cancela:hover,.cancela:focus,.cancela:active{{
color: inherit;
text-decoration: none;
}}
2020-02-10 18:12:52 +00:00
tr:nth-child(odd) td {{filter: brightness(95%);}}
2020-01-23 17:48:26 +00:00
</style>
2020-02-26 16:18:58 +00:00
<title>Clickhouse performance comparison</title>
2020-01-23 17:48:26 +00:00
</head>
<body>
<div class="main">
2020-02-26 16:18:58 +00:00
<h1>ClickHouse performance comparison</h1>
2020-04-02 18:44:58 +00:00
"""
2020-02-26 16:18:58 +00:00
2020-02-10 16:34:07 +00:00
table_anchor = 0
row_anchor = 0
def nextTableAnchor():
global table_anchor
table_anchor += 1
return str(table_anchor)
def nextRowAnchor():
global row_anchor
global table_anchor
row_anchor += 1
return str(table_anchor) + "." + str(row_anchor)
2020-01-23 17:48:26 +00:00
def tr(x):
2020-02-10 16:34:07 +00:00
a = nextRowAnchor()
2020-02-11 20:00:53 +00:00
#return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x))
return '<tr id={a}>{x}</tr>'.format(a=a, x=str(x))
2020-01-23 17:48:26 +00:00
2020-02-27 17:57:08 +00:00
def td(value, cell_attributes = ''):
return '<td {cell_attributes}>{value}</td>'.format(
cell_attributes = cell_attributes,
2020-02-26 16:18:58 +00:00
value = value)
2020-01-23 17:48:26 +00:00
def th(x):
return '<th>' + str(x) + '</th>'
2020-02-27 17:57:08 +00:00
def tableRow(cell_values, cell_attributes = []):
return tr(''.join([td(v, a)
for v, a in itertools.zip_longest(
cell_values, cell_attributes,
fillvalue = '')]))
2020-01-23 17:48:26 +00:00
2020-02-26 16:18:58 +00:00
def tableHeader(r):
2020-01-23 17:48:26 +00:00
return tr(''.join([th(f) for f in r]))
2020-02-26 16:18:58 +00:00
def tableStart(title):
return """
<h2 id="{anchor}"><a class="cancela" href="#{anchor}">{title}</a></h2>
<table>""".format(
anchor = nextTableAnchor(),
title = title)
def tableEnd():
return '</table>'
2020-02-27 17:57:08 +00:00
def tsvRows(n):
2020-02-26 16:18:58 +00:00
result = []
try:
with open(n, encoding='utf-8') as fd:
return [row for row in csv.reader(fd, delimiter="\t", quotechar='"')]
except:
report_errors.append(
traceback.format_exception_only(
*sys.exc_info()[:2])[-1])
pass
return []
2020-02-27 17:57:08 +00:00
def htmlRows(n):
rawRows = tsvRows(n)
2020-01-23 17:48:26 +00:00
result = ''
2020-02-26 16:18:58 +00:00
for row in rawRows:
result += tableRow(row)
2020-01-23 17:48:26 +00:00
return result
2020-02-27 17:57:08 +00:00
def printSimpleTable(caption, columns, rows):
if not rows:
return
print(tableStart(caption))
print(tableHeader(columns))
for row in rows:
print(tableRow(row))
print(tableEnd())
2020-04-02 18:44:58 +00:00
if args.report == 'main':
print(header_template.format())
printSimpleTable('Tested commits', ['Old', 'New'],
[['<pre>{}</pre>'.format(x) for x in
[open('left-commit.txt').read(),
open('right-commit.txt').read()]]])
def print_changes():
rows = tsvRows('changed-perf.tsv')
if not rows:
return
global faster_queries, slower_queries
print(tableStart('Changes in performance'))
columns = [
'Old, s', # 0
'New, s', # 1
'Relative difference (new&nbsp;-&nbsp;old)/old', # 2
'Randomization distribution quantiles \
[5%,&nbsp;50%,&nbsp;95%,&nbsp;99%]', # 3
'Test', # 4
'Query', # 5
]
print(tableHeader(columns))
attrs = ['' for c in columns]
for row in rows:
if float(row[2]) < 0.:
faster_queries += 1
attrs[2] = 'style="background: #adbdff"'
else:
slower_queries += 1
attrs[2] = 'style="background: #ffb0a0"'
print(tableRow(row, attrs))
print(tableEnd())
print_changes()
slow_on_client_rows = tsvRows('slow-on-client.tsv')
error_tests += len(slow_on_client_rows)
printSimpleTable('Slow on client',
['Client time, s', 'Server time, s', 'Ratio', 'Query'],
slow_on_client_rows)
def print_unstable_queries():
global unstable_queries
global very_unstable_queries
unstable_rows = tsvRows('unstable-queries.tsv')
if not unstable_rows:
return
unstable_queries += len(unstable_rows)
columns = [
'Old, s', #0
'New, s', #1
'Relative difference (new&nbsp;-&nbsp;old)/old', #2
'Randomization distribution quantiles [5%,&nbsp;50%,&nbsp;95%,&nbsp;99%]', #3
'Test', #4
'Query' #5
2020-02-27 17:57:08 +00:00
]
2020-04-02 18:44:58 +00:00
print(tableStart('Unstable queries'))
print(tableHeader(columns))
attrs = ['' for c in columns]
for r in unstable_rows:
rd = ast.literal_eval(r[3])
# Note the zero-based array index, this is rd[3] in SQL.
if rd[2] > 0.2:
very_unstable_queries += 1
attrs[3] = 'style="background: #ffb0a0"'
else:
attrs[3] = ''
print(tableRow(r, attrs))
print(tableEnd())
print_unstable_queries()
run_error_rows = tsvRows('run-errors.tsv')
error_tests += len(run_error_rows)
printSimpleTable('Run errors', ['Test', 'Error'], run_error_rows)
skipped_tests_rows = tsvRows('skipped-tests.tsv')
printSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows)
printSimpleTable('Tests with most unstable queries',
['Test', 'Unstable', 'Changed perf', 'Total not OK'],
tsvRows('bad-tests.tsv'))
def print_test_times():
global slow_average_tests
rows = tsvRows('test-times.tsv')
if not rows:
return
columns = [
'Test', #0
'Wall clock time, s', #1
'Total client time, s', #2
'Total queries', #3
'Ignored short queries', #4
'Longest query<br>(sum for all runs), s', #5
'Avg wall clock time<br>(sum for all runs), s', #6
'Shortest query<br>(sum for all runs), s', #7
]
print(tableStart('Test times'))
print(tableHeader(columns))
2020-04-17 23:59:03 +00:00
runs = 11 # FIXME pass this as an argument
2020-04-02 18:44:58 +00:00
attrs = ['' for c in columns]
for r in rows:
2020-04-17 23:59:03 +00:00
if float(r[6]) > 3 * runs:
2020-04-02 18:44:58 +00:00
# FIXME should be 15s max -- investigate parallel_insert
slow_average_tests += 1
attrs[6] = 'style="background: #ffb0a0"'
else:
attrs[6] = ''
2020-04-17 23:59:03 +00:00
if float(r[5]) > 4 * runs:
2020-04-02 18:44:58 +00:00
slow_average_tests += 1
attrs[5] = 'style="background: #ffb0a0"'
else:
attrs[5] = ''
print(tableRow(r, attrs))
print(tableEnd())
print_test_times()
# Add the errors reported by various steps of comparison script
report_errors += [l.strip() for l in open('report-errors.rep')]
if len(report_errors):
print(tableStart('Errors while building the report'))
print(tableHeader(['Error']))
for x in report_errors:
print(tableRow([x]))
print(tableEnd())
print("""
<p class="links">
<a href="output.7z">Test output</a>
<a href="all-queries.html">All queries</a>
<a href="compare.log">Log</a>
</p>
</body>
</html>
""")
status = 'success'
message = 'See the report'
message_array = []
if slow_average_tests:
status = 'failure'
message_array.append(str(slow_average_tests) + ' too long')
if faster_queries:
message_array.append(str(faster_queries) + ' faster')
if slower_queries:
status = 'failure'
message_array.append(str(slower_queries) + ' slower')
if unstable_queries:
message_array.append(str(unstable_queries) + ' unstable')
if very_unstable_queries:
status = 'failure'
error_tests += slow_average_tests
if error_tests:
status = 'failure'
message_array.append(str(error_tests) + ' errors')
if message_array:
message = ', '.join(message_array)
if report_errors:
status = 'failure'
message = 'Errors while building the report.'
print("""
<!--status: {status}-->
<!--message: {message}-->
""".format(status=status, message=message))
elif args.report == 'all-queries':
print(header_template.format())
printSimpleTable('Tested commits', ['Old', 'New'],
[['<pre>{}</pre>'.format(x) for x in
[open('left-commit.txt').read(),
open('right-commit.txt').read()]]])
def print_all_queries():
rows = tsvRows('all-queries.tsv')
if not rows:
return
columns = [
'Old, s', #0
'New, s', #1
'Relative difference (new&nbsp;-&nbsp;old)/old', #2
'Times speedup/slowdown', #3
'Randomization distribution quantiles \
[5%,&nbsp;50%,&nbsp;95%,&nbsp;99%]', #4
'Test', #5
'Query', #6
]
print(tableStart('All query times'))
print(tableHeader(columns))
attrs = ['' for c in columns]
for r in rows:
if float(r[2]) > 0.05:
attrs[3] = 'style="background: #ffb0a0"'
elif float(r[2]) < -0.05:
attrs[3] = 'style="background: #adbdff"'
else:
attrs[3] = ''
print(tableRow(r, attrs))
print(tableEnd())
print_all_queries()
print("""
<p class="links">
<a href="output.7z">Test output</a>
<a href="report.html">Main report</a>
<a href="compare.log">Log</a>
</p>
</body>
</html>
""")