performance comparison

This commit is contained in:
Alexander Kuzmenkov 2020-02-11 23:00:53 +03:00
parent 413cb601dd
commit 0de8e7cf1a
3 changed files with 29 additions and 21 deletions

View File

@ -129,7 +129,7 @@ function run_tests
rm right/performance/{IPv4,IPv6,modulo,parse_engine_file,number_formatting_formats,select_format}.xml ||: rm right/performance/{IPv4,IPv6,modulo,parse_engine_file,number_formatting_formats,select_format}.xml ||:
# Run the tests # Run the tests
for test in right/performance/${CHPC_TEST_GLOB:-*.xml} for test in right/performance/${CHPC_TEST_GLOB:-*}.xml
do do
test_name=$(basename $test ".xml") test_name=$(basename $test ".xml")
echo test $test_name echo test $test_name
@ -150,22 +150,24 @@ rm test-times.tsv test-dump.tsv unstable.tsv changed-perf.tsv unstable-tests.tsv
right/clickhouse local --query " right/clickhouse local --query "
create table queries engine Memory as select create table queries engine Memory as select
replaceAll(_file, '-report.tsv', '') test, replaceAll(_file, '-report.tsv', '') test,
if(abs(diff) < 0.05 and rd[3] > 0.05, 1, 0) unstable, left + right < 0.01 as short,
if(abs(diff) > 0.05 and abs(diff) > rd[3], 1, 0) changed,
*
from file('*-report.tsv', TSV, 'left float, right float, diff float, rd Array(float), query text')
-- FIXME Comparison mode doesn't make sense for queries that complete -- FIXME Comparison mode doesn't make sense for queries that complete
-- immediately, so for now we pretend they don't exist. We don't want to -- immediately, so for now we pretend they don't exist. We don't want to
-- remove them altogether because we want to be able to detect regressions, -- remove them altogether because we want to be able to detect regressions,
-- but the right way to do this is not yet clear. -- but the right way to do this is not yet clear.
where left + right > 0.01; not short and abs(diff) < 0.05 and rd[3] > 0.05 as unstable,
not short and abs(diff) > 0.05 and abs(diff) > rd[3] as changed,
*
from file('*-report.tsv', TSV, 'left float, right float, diff float, rd Array(float), query text');
create table changed_perf_tsv engine File(TSV, 'changed-perf.tsv') as create table changed_perf_tsv engine File(TSV, 'changed-perf.tsv') as
select left, right, diff, rd, test, query from queries where changed select left, right, diff, rd, test, query from queries where changed
order by rd[3] desc; order by rd[3] desc;
create table unstable_queries_tsv engine File(TSV, 'unstable-queries.tsv') as create table unstable_queries_tsv engine File(TSV, 'unstable-queries.tsv') as
select left, right, diff, rd, test, query from queries where unstable select left, right, diff, rd, test, query from queries where unstable
order by rd[3] desc; order by rd[3] desc;
create table unstable_tests_tsv engine File(TSV, 'bad-tests.tsv') as create table unstable_tests_tsv engine File(TSV, 'bad-tests.tsv') as
select test, sum(unstable) u, sum(changed) c, u + c s from queries select test, sum(unstable) u, sum(changed) c, u + c s from queries
group by test having s > 0 order by s desc; group by test having s > 0 order by s desc;
@ -182,20 +184,23 @@ create table slow_on_client_tsv engine File(TSV, 'slow-on-client.tsv') as
create table test_time engine Memory as create table test_time engine Memory as
select test, sum(client) total_client_time, select test, sum(client) total_client_time,
max(client) query_max, min(client) query_min, count(*) queries maxIf(client, not short) query_max,
from query_time minIf(client, not short) query_min,
-- for consistency, filter out everything we filtered out of queries table count(*) queries,
semi join queries using query sum(short) short_queries
from query_time, queries
where query_time.query = queries.query
group by test; group by test;
create table test_times_tsv engine File(TSV, 'test-times.tsv') as create table test_times_tsv engine File(TSV, 'test-times.tsv') as
select wall_clock.test, real, select wall_clock.test, real,
floor(total_client_time, 3), floor(total_client_time, 3),
queries, queries,
short_queries,
floor(query_max, 3), floor(query_max, 3),
floor(real / queries, 3) avg_real_per_query, floor(real / queries, 3) avg_real_per_query,
floor(query_min, 3) floor(query_min, 3)
from test_time right join wall_clock using test from test_time join wall_clock using test
order by query_max / query_min desc; order by query_max / query_min desc;
create table all_queries_tsv engine File(TSV, 'all-queries.tsv') as create table all_queries_tsv engine File(TSV, 'all-queries.tsv') as

View File

@ -94,10 +94,11 @@ test_query_templates = [q.text for q in root.findall('query')]
test_queries = substitute_parameters(test_query_templates, parameter_combinations) test_queries = substitute_parameters(test_query_templates, parameter_combinations)
for q in test_queries: for q in test_queries:
# Warmup: run once on both servers. Helps to bring the data into memory, # Prewarm: run once on both servers. Helps to bring the data into memory,
# precompile the queries, etc. # precompile the queries, etc.
for c in connections: for conn_index, c in enumerate(connections):
c.execute(q) res = c.execute(q)
print('prewarm\t' + tsv_escape(q) + '\t' + str(conn_index) + '\t' + str(c.last_query.elapsed))
# Now, perform measured runs. # Now, perform measured runs.
# Track the time spent by the client to process this query, so that we can notice # Track the time spent by the client to process this query, so that we can notice

View File

@ -81,7 +81,8 @@ def nextRowAnchor():
def tr(x): def tr(x):
a = nextRowAnchor() a = nextRowAnchor()
return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x)) #return '<tr onclick="location.href=\'#{a}\'" id={a}>{x}</tr>'.format(a=a, x=str(x))
return '<tr id={a}>{x}</tr>'.format(a=a, x=str(x))
def td(x): def td(x):
return '<td>' + str(x) + '</td>' return '<td>' + str(x) + '</td>'
@ -140,16 +141,17 @@ params['test_part'] = (
table_template.format( table_template.format(
anchor = nextTableAnchor(), anchor = nextTableAnchor(),
caption = 'Tests with most unstable queries', caption = 'Tests with most unstable queries',
header = table_header(['Test', 'Unstable', 'Changed perf', 'Total']), header = table_header(['Test', 'Unstable', 'Changed perf', 'Total not OK']),
rows = tsv_rows('bad-tests.tsv')) + rows = tsv_rows('bad-tests.tsv')) +
table_template.format( table_template.format(
anchor = nextTableAnchor(), anchor = nextTableAnchor(),
caption = 'Tests times', caption = 'Tests times',
header = table_header(['Test', 'Wall clock time, s', 'Total client time, s', header = table_header(['Test', 'Wall clock time, s', 'Total client time, s',
'Number of queries', 'Number of queries',
'Max client time<br>(sum for all runs), s', 'Number of short queries',
'Longest query<br>(sum for all runs), s',
'Avg wall clock time<br>(sum for all runs), s', 'Avg wall clock time<br>(sum for all runs), s',
'Min client time<br>(sum for all runs), s']), 'Shortest query<br>(sum for all runs), s']),
rows = tsv_rows('test-times.tsv')) rows = tsv_rows('test-times.tsv'))
) )
print(doc_template.format_map(params)) print(doc_template.format_map(params))