Performance comparison fixes

This commit is contained in:
Alexander Kuzmenkov 2020-04-02 21:44:58 +03:00
parent 4075f26583
commit cd88b5380c
6 changed files with 270 additions and 222 deletions

View File

@ -97,10 +97,6 @@ function run_tests
touch "$x" touch "$x"
done done
# FIXME remove some broken long tests
rm "$test_prefix"/{IPv4,IPv6,modulo,parse_engine_file,number_formatting_formats,select_format}.xml ||:
test_files=$(ls "$test_prefix"/*.xml)
# FIXME a quick crutch to bring the run time down for the unstable tests -- # FIXME a quick crutch to bring the run time down for the unstable tests --
# if some performance tests xmls were changed in a PR, run only these ones. # if some performance tests xmls were changed in a PR, run only these ones.
@ -126,6 +122,17 @@ function run_tests
test_files=$(ls "$test_prefix"/$CHPC_TEST_GLOB.xml) test_files=$(ls "$test_prefix"/$CHPC_TEST_GLOB.xml)
fi fi
if [ "$test_files" == "" ]
then
# FIXME remove some broken long tests
for test_name in {IPv4,IPv6,modulo,parse_engine_file,number_formatting_formats,select_format,arithmetic,cryptographic_hashes,logical_functions_{medium,small}}
do
printf "$test_name\tMarked as broken (see compare.sh)" >> skipped-tests.tsv
rm "$test_prefix/$test_name.xml" ||:
done
test_files=$(ls "$test_prefix"/*.xml)
fi
# Run the tests. # Run the tests.
test_name="<none>" test_name="<none>"
for test in $test_files for test in $test_files
@ -275,9 +282,11 @@ create table test_times_tsv engine File(TSV, 'test-times.tsv') as
from test_time join wall_clock using test from test_time join wall_clock using test
order by avg_real_per_query desc; order by avg_real_per_query desc;
create table all_queries_tsv engine File(TSV, 'all-queries.tsv') as create table all_tests_tsv engine File(TSV, 'all-queries.tsv') as
select left, right, diff, rd, test, query select left, right, diff,
from queries order by rd[3] desc; floor(left > right ? left / right : right / left, 3),
rd, test, query
from queries order by test, query;
" 2> >(head -2 >> report-errors.rep) ||: " 2> >(head -2 >> report-errors.rep) ||:
for version in {right,left} for version in {right,left}
@ -429,6 +438,7 @@ case "$stage" in
"report") "report")
time report ||: time report ||:
time "$script_dir/report.py" --report=all-queries > all-queries.html 2> >(head -2 >> report-errors.rep) ||:
time "$script_dir/report.py" > report.html time "$script_dir/report.py" > report.html
;& ;&
esac esac

View File

@ -90,17 +90,23 @@ export PYTHONIOENCODING=utf-8
# Use a default number of runs if not told otherwise # Use a default number of runs if not told otherwise
export CHPC_RUNS=${CHPC_RUNS:-7} export CHPC_RUNS=${CHPC_RUNS:-7}
# By default, use the main comparison script from the tested package, so that we
# can change it in PRs.
script_path="right/scripts"
if [ -v CHPC_LOCAL_SCRIPT ]
then
script_path=".."
fi
# Even if we have some errors, try our best to save the logs. # Even if we have some errors, try our best to save the logs.
set +e set +e
# Use main comparison script from the tested package, so that we can change it
# in PRs.
# Older version use 'kill 0', so put the script into a separate process group # Older version use 'kill 0', so put the script into a separate process group
# FIXME remove set +m in April 2020 # FIXME remove set +m in April 2020
set +m set +m
{ \ { \
time ../download.sh "$REF_PR" "$REF_SHA" "$PR_TO_TEST" "$SHA_TO_TEST" && \ time ../download.sh "$REF_PR" "$REF_SHA" "$PR_TO_TEST" "$SHA_TO_TEST" && \
time stage=configure right/scripts/compare.sh ; \ time stage=configure "$script_path"/compare.sh ; \
} 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee compare.log } 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee compare.log
set -m set -m

View File

@ -1,5 +1,6 @@
#!/usr/bin/python3 #!/usr/bin/python3
import argparse
import ast import ast
import collections import collections
import csv import csv
@ -8,6 +9,11 @@ import os
import sys import sys
import traceback import traceback
parser = argparse.ArgumentParser(description='Create performance test report')
parser.add_argument('--report', default='main', choices=['main', 'all-queries'],
help='Which report to build')
args = parser.parse_args()
report_errors = [] report_errors = []
error_tests = 0 error_tests = 0
slow_average_tests = 0 slow_average_tests = 0
@ -16,7 +22,7 @@ slower_queries = 0
unstable_queries = 0 unstable_queries = 0
very_unstable_queries = 0 very_unstable_queries = 0
print(""" header_template = """
<!DOCTYPE html> <!DOCTYPE html>
<html> <html>
<style> <style>
@ -56,7 +62,7 @@ tr:nth-child(odd) td {{filter: brightness(95%);}}
<div class="main"> <div class="main">
<h1>ClickHouse performance comparison</h1> <h1>ClickHouse performance comparison</h1>
""".format()) """
table_anchor = 0 table_anchor = 0
row_anchor = 0 row_anchor = 0
@ -133,195 +139,252 @@ def printSimpleTable(caption, columns, rows):
print(tableRow(row)) print(tableRow(row))
print(tableEnd()) print(tableEnd())
printSimpleTable('Tested commits', ['Old', 'New'], if args.report == 'main':
[['<pre>{}</pre>'.format(x) for x in print(header_template.format())
[open('left-commit.txt').read(),
open('right-commit.txt').read()]]])
def print_changes(): printSimpleTable('Tested commits', ['Old', 'New'],
rows = tsvRows('changed-perf.tsv') [['<pre>{}</pre>'.format(x) for x in
if not rows: [open('left-commit.txt').read(),
return open('right-commit.txt').read()]]])
global faster_queries, slower_queries def print_changes():
rows = tsvRows('changed-perf.tsv')
if not rows:
return
print(tableStart('Changes in performance')) global faster_queries, slower_queries
columns = [
'Old, s', # 0 print(tableStart('Changes in performance'))
'New, s', # 1 columns = [
'Relative difference (new&nbsp;-&nbsp;old)/old', # 2 'Old, s', # 0
'Randomization distribution quantiles \ 'New, s', # 1
[5%,&nbsp;50%,&nbsp;95%,&nbsp;99%]', # 3 'Relative difference (new&nbsp;-&nbsp;old)/old', # 2
'Test', # 4 'Randomization distribution quantiles \
'Query', # 5 [5%,&nbsp;50%,&nbsp;95%,&nbsp;99%]', # 3
'Test', # 4
'Query', # 5
]
print(tableHeader(columns))
attrs = ['' for c in columns]
for row in rows:
if float(row[2]) < 0.:
faster_queries += 1
attrs[2] = 'style="background: #adbdff"'
else:
slower_queries += 1
attrs[2] = 'style="background: #ffb0a0"'
print(tableRow(row, attrs))
print(tableEnd())
print_changes()
slow_on_client_rows = tsvRows('slow-on-client.tsv')
error_tests += len(slow_on_client_rows)
printSimpleTable('Slow on client',
['Client time, s', 'Server time, s', 'Ratio', 'Query'],
slow_on_client_rows)
def print_unstable_queries():
global unstable_queries
global very_unstable_queries
unstable_rows = tsvRows('unstable-queries.tsv')
if not unstable_rows:
return
unstable_queries += len(unstable_rows)
columns = [
'Old, s', #0
'New, s', #1
'Relative difference (new&nbsp;-&nbsp;old)/old', #2
'Randomization distribution quantiles [5%,&nbsp;50%,&nbsp;95%,&nbsp;99%]', #3
'Test', #4
'Query' #5
] ]
print(tableHeader(columns)) print(tableStart('Unstable queries'))
print(tableHeader(columns))
attrs = ['' for c in columns] attrs = ['' for c in columns]
for row in rows: for r in unstable_rows:
if float(row[2]) < 0.: rd = ast.literal_eval(r[3])
faster_queries += 1 # Note the zero-based array index, this is rd[3] in SQL.
attrs[2] = 'style="background: #adbdff"' if rd[2] > 0.2:
else: very_unstable_queries += 1
slower_queries += 1 attrs[3] = 'style="background: #ffb0a0"'
attrs[2] = 'style="background: #ffb0a0"' else:
attrs[3] = ''
print(tableRow(row, attrs)) print(tableRow(r, attrs))
print(tableEnd()) print(tableEnd())
print_changes() print_unstable_queries()
slow_on_client_rows = tsvRows('slow-on-client.tsv') run_error_rows = tsvRows('run-errors.tsv')
error_tests += len(slow_on_client_rows) error_tests += len(run_error_rows)
printSimpleTable('Slow on client', printSimpleTable('Run errors', ['Test', 'Error'], run_error_rows)
['Client time, s', 'Server time, s', 'Ratio', 'Query'],
slow_on_client_rows)
def print_unstable_queries(): skipped_tests_rows = tsvRows('skipped-tests.tsv')
global unstable_queries printSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows)
global very_unstable_queries
unstable_rows = tsvRows('unstable-queries.tsv') printSimpleTable('Tests with most unstable queries',
if not unstable_rows: ['Test', 'Unstable', 'Changed perf', 'Total not OK'],
return tsvRows('bad-tests.tsv'))
unstable_queries += len(unstable_rows) def print_test_times():
global slow_average_tests
rows = tsvRows('test-times.tsv')
if not rows:
return
columns = [ columns = [
'Old, s', #0 'Test', #0
'New, s', #1 'Wall clock time, s', #1
'Relative difference (new&nbsp;-&nbsp;old)/old', #2 'Total client time, s', #2
'Randomization distribution quantiles [5%,&nbsp;50%,&nbsp;95%,&nbsp;99%]', #3 'Total queries', #3
'Test', #4 'Ignored short queries', #4
'Query' #5 'Longest query<br>(sum for all runs), s', #5
] 'Avg wall clock time<br>(sum for all runs), s', #6
'Shortest query<br>(sum for all runs), s', #7
]
print(tableStart('Unstable queries')) print(tableStart('Test times'))
print(tableHeader(columns)) print(tableHeader(columns))
attrs = ['' for c in columns] attrs = ['' for c in columns]
for r in unstable_rows: for r in rows:
rd = ast.literal_eval(r[3]) if float(r[6]) > 22:
# Note the zero-based array index, this is rd[3] in SQL. # FIXME should be 15s max -- investigate parallel_insert
if rd[2] > 0.2: slow_average_tests += 1
very_unstable_queries += 1 attrs[6] = 'style="background: #ffb0a0"'
attrs[3] = 'style="background: #ffb0a0"' else:
else: attrs[6] = ''
attrs[3] = ''
print(tableRow(r, attrs)) if float(r[5]) > 30:
slow_average_tests += 1
attrs[5] = 'style="background: #ffb0a0"'
else:
attrs[5] = ''
print(tableEnd()) print(tableRow(r, attrs))
print_unstable_queries() print(tableEnd())
run_error_rows = tsvRows('run-errors.tsv') print_test_times()
error_tests += len(run_error_rows)
printSimpleTable('Run errors', ['Test', 'Error'], run_error_rows)
skipped_tests_rows = tsvRows('skipped-tests.tsv') # Add the errors reported by various steps of comparison script
printSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows) report_errors += [l.strip() for l in open('report-errors.rep')]
if len(report_errors):
printSimpleTable('Tests with most unstable queries', print(tableStart('Errors while building the report'))
['Test', 'Unstable', 'Changed perf', 'Total not OK'], print(tableHeader(['Error']))
tsvRows('bad-tests.tsv')) for x in report_errors:
print(tableRow([x]))
def print_test_times(): print(tableEnd())
global slow_average_tests
rows = tsvRows('test-times.tsv')
if not rows:
return
columns = [
'Test', #0
'Wall clock time, s', #1
'Total client time, s', #2
'Total queries', #3
'Ignored short queries', #4
'Longest query<br>(sum for all runs), s', #5
'Avg wall clock time<br>(sum for all runs), s', #6
'Shortest query<br>(sum for all runs), s', #7
]
print(tableStart('Test times'))
print(tableHeader(columns))
attrs = ['' for c in columns]
for r in rows:
if float(r[6]) > 22:
# FIXME should be 15s max -- investigate parallel_insert
slow_average_tests += 1
attrs[6] = 'style="background: #ffb0a0"'
else:
attrs[6] = ''
if float(r[5]) > 30:
slow_average_tests += 1
attrs[5] = 'style="background: #ffb0a0"'
else:
attrs[5] = ''
print(tableRow(r, attrs))
print(tableEnd())
print_test_times()
# Add the errors reported by various steps of comparison script
report_errors += [l.strip() for l in open('report-errors.rep')]
if len(report_errors):
print(tableStart('Errors while building the report'))
print(tableHeader(['Error']))
for x in report_errors:
print(tableRow([x]))
print(tableEnd())
print(""" print("""
<p class="links"> <p class="links">
<a href="output.7z">Test output</a> <a href="output.7z">Test output</a>
<a href="compare.log">Log</a> <a href="all-queries.html">All queries</a>
</p> <a href="compare.log">Log</a>
</body> </p>
</html> </body>
""") </html>
""")
status = 'success' status = 'success'
message = 'See the report' message = 'See the report'
message_array = [] message_array = []
if slow_average_tests: if slow_average_tests:
status = 'failure' status = 'failure'
message_array.append(str(slow_average_tests) + ' too long') message_array.append(str(slow_average_tests) + ' too long')
if faster_queries: if faster_queries:
message_array.append(str(faster_queries) + ' faster') message_array.append(str(faster_queries) + ' faster')
if slower_queries: if slower_queries:
status = 'failure' status = 'failure'
message_array.append(str(slower_queries) + ' slower') message_array.append(str(slower_queries) + ' slower')
if unstable_queries: if unstable_queries:
message_array.append(str(unstable_queries) + ' unstable') message_array.append(str(unstable_queries) + ' unstable')
if very_unstable_queries: if very_unstable_queries:
status = 'failure' status = 'failure'
error_tests += slow_average_tests error_tests += slow_average_tests
if error_tests: if error_tests:
status = 'failure' status = 'failure'
message_array.append(str(error_tests) + ' errors') message_array.append(str(error_tests) + ' errors')
if message_array: if message_array:
message = ', '.join(message_array) message = ', '.join(message_array)
if report_errors: if report_errors:
status = 'failure' status = 'failure'
message = 'Errors while building the report.' message = 'Errors while building the report.'
print(""" print("""
<!--status: {status}--> <!--status: {status}-->
<!--message: {message}--> <!--message: {message}-->
""".format(status=status, message=message)) """.format(status=status, message=message))
elif args.report == 'all-queries':
print(header_template.format())
printSimpleTable('Tested commits', ['Old', 'New'],
[['<pre>{}</pre>'.format(x) for x in
[open('left-commit.txt').read(),
open('right-commit.txt').read()]]])
def print_all_queries():
rows = tsvRows('all-queries.tsv')
if not rows:
return
columns = [
'Old, s', #0
'New, s', #1
'Relative difference (new&nbsp;-&nbsp;old)/old', #2
'Times speedup/slowdown', #3
'Randomization distribution quantiles \
[5%,&nbsp;50%,&nbsp;95%,&nbsp;99%]', #4
'Test', #5
'Query', #6
]
print(tableStart('All query times'))
print(tableHeader(columns))
attrs = ['' for c in columns]
for r in rows:
if float(r[2]) > 0.05:
attrs[3] = 'style="background: #ffb0a0"'
elif float(r[2]) < -0.05:
attrs[3] = 'style="background: #adbdff"'
else:
attrs[3] = ''
print(tableRow(r, attrs))
print(tableEnd())
print_all_queries()
print("""
<p class="links">
<a href="output.7z">Test output</a>
<a href="report.html">Main report</a>
<a href="compare.log">Log</a>
</p>
</body>
</html>
""")

File diff suppressed because one or more lines are too long

View File

@ -1,16 +1,8 @@
<test> <test>
<query>SELECT arraySlice(arrayFill(x -> ((x % 2) >= 0), range(100000000)), 1, 10) FORMAT Null</query>
<stop_conditions> <query>SELECT arraySlice(arrayFill(x -> (((x.1) % 2) >= 0), arrayMap(x -> (x, toString(x)), range(100000000))), 1, 10) FORMAT Null</query>
<all_of> <query>SELECT arraySlice(arrayFill(x -> ((x % 2) >= 2), range(100000000)), 1, 10) FORMAT Null</query>
<total_time_ms>10000</total_time_ms> <query>SELECT arraySlice(arrayFill(x -> (((x.1) % 2) >= 2), arrayMap(x -> (x, toString(x)), range(100000000))), 1, 10) FORMAT Null</query>
</all_of> <query>SELECT arraySlice(arrayFill(x -> ((x % 2) = 0), range(100000000)), 1, 10) FORMAT Null</query>
</stop_conditions> <query>SELECT arraySlice(arrayFill(x -> (((x.1) % 2) = 0), arrayMap(x -> (x, toString(x)), range(100000000))), 1, 10) FORMAT Null</query>
<query>SELECT arraySlice(arrayFill(x -> ((x % 2) >= 0), range(100000000)), 1, 10)</query>
<query>SELECT arraySlice(arrayFill(x -> (((x.1) % 2) >= 0), arrayMap(x -> (x, toString(x)), range(100000000))), 1, 10)</query>
<query>SELECT arraySlice(arrayFill(x -> ((x % 2) >= 2), range(100000000)), 1, 10)</query>
<query>SELECT arraySlice(arrayFill(x -> (((x.1) % 2) >= 2), arrayMap(x -> (x, toString(x)), range(100000000))), 1, 10)</query>
<query>SELECT arraySlice(arrayFill(x -> ((x % 2) = 0), range(100000000)), 1, 10)</query>
<query>SELECT arraySlice(arrayFill(x -> (((x.1) % 2) = 0), arrayMap(x -> (x, toString(x)), range(100000000))), 1, 10)</query>
</test> </test>

View File

@ -1,17 +1,4 @@
<test> <test>
<stop_conditions>
<all_of>
<iterations>5</iterations>
<min_time_not_changing_for_ms>10000</min_time_not_changing_for_ms>
</all_of>
<any_of>
<iterations>50</iterations>
<total_time_ms>60000</total_time_ms>
</any_of>
</stop_conditions>
<preconditions> <preconditions>
<table_exists>test.hits</table_exists> <table_exists>test.hits</table_exists>
</preconditions> </preconditions>
@ -24,13 +11,13 @@
<query>SELECT count() FROM test.hits WHERE NOT ignore(concat(MobilePhoneModel, 'Hello'))</query> <query>SELECT count() FROM test.hits WHERE NOT ignore(concat(MobilePhoneModel, 'Hello'))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(concat(PageCharset, 'a'))</query> <query>SELECT count() FROM test.hits WHERE NOT ignore(concat(PageCharset, 'a'))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('{}{}', URL, URL))</query> <query>SELECT count() FROM test.hits WHERE NOT ignore(format('{{}}{{}}', URL, URL))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('{}{}', URL, SearchPhrase))</query> <query>SELECT count() FROM test.hits WHERE NOT ignore(format('{{}}{{}}', URL, SearchPhrase))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('{}{}', MobilePhoneModel, SearchPhrase))</query> <query>SELECT count() FROM test.hits WHERE NOT ignore(format('{{}}{{}}', MobilePhoneModel, SearchPhrase))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('{}Hello', URL))</query> <query>SELECT count() FROM test.hits WHERE NOT ignore(format('{{}}Hello', URL))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('World{}', SearchPhrase))</query> <query>SELECT count() FROM test.hits WHERE NOT ignore(format('World{{}}', SearchPhrase))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('{}Hello', MobilePhoneModel))</query> <query>SELECT count() FROM test.hits WHERE NOT ignore(format('{{}}Hello', MobilePhoneModel))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('{}a', PageCharset))</query> <query>SELECT count() FROM test.hits WHERE NOT ignore(format('{{}}a', PageCharset))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(concat(URL, URL, URL))</query> <query>SELECT count() FROM test.hits WHERE NOT ignore(concat(URL, URL, URL))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(concat(URL, SearchPhrase, MobilePhoneModel))</query> <query>SELECT count() FROM test.hits WHERE NOT ignore(concat(URL, SearchPhrase, MobilePhoneModel))</query>
@ -39,10 +26,10 @@
<query>SELECT count() FROM test.hits WHERE NOT ignore(concat(MobilePhoneModel, 'Hello', PageCharset))</query> <query>SELECT count() FROM test.hits WHERE NOT ignore(concat(MobilePhoneModel, 'Hello', PageCharset))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(concat('a', PageCharset, 'b'))</query> <query>SELECT count() FROM test.hits WHERE NOT ignore(concat('a', PageCharset, 'b'))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('{}{}{}', URL, URL, URL))</query> <query>SELECT count() FROM test.hits WHERE NOT ignore(format('{{}}{{}}{{}}', URL, URL, URL))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('{}{}{}', URL, SearchPhrase, MobilePhoneModel))</query> <query>SELECT count() FROM test.hits WHERE NOT ignore(format('{{}}{{}}{{}}', URL, SearchPhrase, MobilePhoneModel))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('{}Hello{}', URL, URL))</query> <query>SELECT count() FROM test.hits WHERE NOT ignore(format('{{}}Hello{{}}', URL, URL))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('Hello{}World', SearchPhrase))</query> <query>SELECT count() FROM test.hits WHERE NOT ignore(format('Hello{{}}World', SearchPhrase))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('{}Hello{}', MobilePhoneModel, PageCharset))</query> <query>SELECT count() FROM test.hits WHERE NOT ignore(format('{{}}Hello{{}}', MobilePhoneModel, PageCharset))</query>
<query>SELECT count() FROM test.hits WHERE NOT ignore(format('a{}b', PageCharset))</query> <query>SELECT count() FROM test.hits WHERE NOT ignore(format('a{{}}b', PageCharset))</query>
</test> </test>