mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 16:42:05 +00:00
performance comparison
This commit is contained in:
parent
0b4e601d81
commit
5fd5b15c36
@ -5,6 +5,8 @@ trap "exit" INT TERM
|
||||
trap "kill $(jobs -pr) ||:" EXIT
|
||||
|
||||
stage=${stage:-}
|
||||
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
|
||||
function configure
|
||||
{
|
||||
@ -209,10 +211,8 @@ done
|
||||
rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv ||:
|
||||
|
||||
right/clickhouse local --query "
|
||||
create table queries engine File(TSVWithNamesAndTypes, 'queries.tsv')
|
||||
create table queries engine File(TSVWithNamesAndTypes, 'queries.rep')
|
||||
as select
|
||||
replaceAll(_file, '-report.tsv', '') test,
|
||||
|
||||
-- FIXME Comparison mode doesn't make sense for queries that complete
|
||||
-- immediately, so for now we pretend they don't exist. We don't want to
|
||||
-- remove them altogether because we want to be able to detect regressions,
|
||||
@ -225,7 +225,9 @@ create table queries engine File(TSVWithNamesAndTypes, 'queries.tsv')
|
||||
-- likely to observe a difference > 5% in less than 5% cases.
|
||||
-- Not sure it is correct, but empirically it filters out a lot of noise.
|
||||
not short and abs(diff) > 0.15 and abs(diff) > rd[3] and rd[1] > 0.05 as changed,
|
||||
*
|
||||
left, right, diff, rd,
|
||||
replaceAll(_file, '-report.tsv', '') test,
|
||||
query
|
||||
from file('*-report.tsv', TSV, 'left float, right float, diff float, rd Array(float), query text');
|
||||
|
||||
create table changed_perf_tsv engine File(TSV, 'changed-perf.tsv') as
|
||||
@ -274,36 +276,52 @@ create table test_times_tsv engine File(TSV, 'test-times.tsv') as
|
||||
create table all_queries_tsv engine File(TSV, 'all-queries.tsv') as
|
||||
select left, right, diff, rd, test, query
|
||||
from queries order by rd[3] desc;
|
||||
" 2> >(head -2 >> report-errors.rep) ||:
|
||||
|
||||
create view right_query_log as select *
|
||||
from file('right-query-log.tsv', TSVWithNamesAndTypes, '$(cat right-query-log.tsv.columns)');
|
||||
for version in {right,left}
|
||||
do
|
||||
right/clickhouse local --query "
|
||||
create view queries as
|
||||
select * from file('queries.rep', TSVWithNamesAndTypes,
|
||||
'short int, unstable int, changed int, left float, right float,
|
||||
diff float, rd Array(float), test text, query text');
|
||||
|
||||
create view right_trace_log as select *
|
||||
from file('right-trace-log.tsv', TSVWithNamesAndTypes, '$(cat right-trace-log.tsv.columns)');
|
||||
create view query_log as select *
|
||||
from file('$version-query-log.tsv', TSVWithNamesAndTypes,
|
||||
'$(cat "$version-query-log.tsv.columns")');
|
||||
|
||||
create view right_addresses_src as select *
|
||||
from file('right-addresses.tsv', TSVWithNamesAndTypes, '$(cat right-addresses.tsv.columns)');
|
||||
create view trace_log as select *
|
||||
from file('$version-trace-log.tsv', TSVWithNamesAndTypes,
|
||||
'$(cat "$version-trace-log.tsv.columns")');
|
||||
|
||||
create table right_addresses_join engine Join(any, left, address) as
|
||||
select addr address, name from right_addresses_src;
|
||||
create view addresses_src as select *
|
||||
from file('$version-addresses.tsv', TSVWithNamesAndTypes,
|
||||
'$(cat "$version-addresses.tsv.columns")');
|
||||
|
||||
create table unstable_query_runs engine File(TSVWithNamesAndTypes, 'unstable-query-runs.rep') as
|
||||
select query_id, query from right_query_log
|
||||
create table addresses_join engine Join(any, left, address) as
|
||||
select addr address, name from addresses_src;
|
||||
|
||||
create table unstable_query_runs engine File(TSVWithNamesAndTypes,
|
||||
'unstable-query-runs.$version.rep') as
|
||||
select query_id, query from query_log
|
||||
join queries using query
|
||||
where query_id not like 'prewarm %' and (unstable or changed)
|
||||
;
|
||||
|
||||
create table unstable_query_log engine File(Vertical, 'unstable-query-log.rep') as
|
||||
select * from right_query_log
|
||||
create table unstable_query_log engine File(Vertical,
|
||||
'unstable-query-log.$version.rep') as
|
||||
select * from query_log
|
||||
where query_id in (select query_id from unstable_query_runs);
|
||||
|
||||
create table unstable_run_metrics engine File(TSVWithNamesAndTypes, 'unstable-run-metrics.rep') as
|
||||
create table unstable_run_metrics engine File(TSVWithNamesAndTypes,
|
||||
'unstable-run-metrics.$version.rep') as
|
||||
select ProfileEvents.Values value, ProfileEvents.Names metric, query_id, query
|
||||
from right_query_log array join ProfileEvents
|
||||
from query_log array join ProfileEvents
|
||||
where query_id in (select query_id from unstable_query_runs)
|
||||
;
|
||||
|
||||
create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes, 'unstable-run-metrics-2.rep') as
|
||||
create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes,
|
||||
'unstable-run-metrics-2.$version.rep') as
|
||||
select v, n, query_id, query
|
||||
from
|
||||
(select
|
||||
@ -311,20 +329,25 @@ create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes, 'unstable-
|
||||
[memory_usage, read_bytes, written_bytes, query_duration_ms] v,
|
||||
query,
|
||||
query_id
|
||||
from right_query_log
|
||||
from query_log
|
||||
where query_id in (select query_id from unstable_query_runs))
|
||||
array join n, v;
|
||||
|
||||
create table unstable_run_traces engine File(TSVWithNamesAndTypes, 'unstable-run-traces.rep') as
|
||||
select count() value, joinGet(right_addresses_join, 'name', arrayJoin(trace)) metric,
|
||||
unstable_query_runs.query_id, any(unstable_query_runs.query) query
|
||||
create table unstable_run_traces engine File(TSVWithNamesAndTypes,
|
||||
'unstable-run-traces.$version.rep') as
|
||||
select
|
||||
count() value,
|
||||
joinGet(addresses_join, 'name', arrayJoin(trace)) metric,
|
||||
unstable_query_runs.query_id,
|
||||
any(unstable_query_runs.query) query
|
||||
from unstable_query_runs
|
||||
join right_trace_log on right_trace_log.query_id = unstable_query_runs.query_id
|
||||
join trace_log on trace_log.query_id = unstable_query_runs.query_id
|
||||
group by unstable_query_runs.query_id, metric
|
||||
order by count() desc
|
||||
;
|
||||
|
||||
create table metric_devation engine File(TSVWithNamesAndTypes, 'metric-deviation.rep') as
|
||||
create table metric_devation engine File(TSVWithNamesAndTypes,
|
||||
'metric-deviation.$version.rep') as
|
||||
select floor((q[3] - q[1])/q[2], 3) d,
|
||||
quantilesExact(0, 0.5, 1)(value) q, metric, query
|
||||
from (select * from unstable_run_metrics
|
||||
@ -336,31 +359,36 @@ create table metric_devation engine File(TSVWithNamesAndTypes, 'metric-deviation
|
||||
order by any(rd[3]) desc, query desc, d desc
|
||||
;
|
||||
|
||||
create table stacks engine File(TSV, 'stacks.rep') as
|
||||
create table stacks engine File(TSV, 'stacks.$version.rep') as
|
||||
select
|
||||
query,
|
||||
arrayStringConcat(
|
||||
arrayMap(x -> joinGet(right_addresses_join, 'name', x),
|
||||
arrayMap(x -> joinGet(addresses_join, 'name', x),
|
||||
arrayReverse(trace)
|
||||
),
|
||||
';'
|
||||
) readable_trace,
|
||||
count()
|
||||
from right_trace_log
|
||||
from trace_log
|
||||
join unstable_query_runs using query_id
|
||||
group by query, trace
|
||||
;
|
||||
" 2>> report-errors.txt ||:
|
||||
" 2> >(head -2 >> report-errors.rep) ||: # do not run in parallel because they use the same data dir for StorageJoins which leads to weird errors.
|
||||
done
|
||||
wait
|
||||
|
||||
IFS=$'\n'
|
||||
for query in $(cut -d' ' -f1 stacks.rep | sort | uniq)
|
||||
for version in {right,left}
|
||||
do
|
||||
for query in $(cut -d' ' -f1 "stacks.$version.rep" | sort | uniq)
|
||||
do
|
||||
query_file=$(echo "$query" | cut -c-120 | sed 's/[/]/_/g')
|
||||
grep -F "$query " stacks.rep \
|
||||
grep -F "$query " "stacks.$version.rep" \
|
||||
| cut -d' ' -f 2- \
|
||||
| sed 's/\t/ /g' \
|
||||
| tee "$query_file.stacks.rep" \
|
||||
| ~/fg/flamegraph.pl > "$query_file.svg" &
|
||||
| tee "$query_file.stacks.$version.rep" \
|
||||
| ~/fg/flamegraph.pl > "$query_file.$version.svg" &
|
||||
done
|
||||
done
|
||||
wait
|
||||
unset IFS
|
||||
|
@ -14,6 +14,6 @@
|
||||
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
|
||||
</metric_log>
|
||||
|
||||
<use_uncompressed_cache>1</use_uncompressed_cache>
|
||||
<use_uncompressed_cache>0</use_uncompressed_cache>
|
||||
<uncompressed_cache_size>1000000000</uncompressed_cache_size>
|
||||
</yandex>
|
||||
|
@ -1,5 +1,6 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import ast
|
||||
import collections
|
||||
import csv
|
||||
import itertools
|
||||
@ -8,14 +9,12 @@ import sys
|
||||
import traceback
|
||||
|
||||
report_errors = []
|
||||
status = 'success'
|
||||
message = 'See the report'
|
||||
message_array = []
|
||||
error_tests = 0
|
||||
slow_average_tests = 0
|
||||
faster_queries = 0
|
||||
slower_queries = 0
|
||||
unstable_queries = 0
|
||||
very_unstable_queries = 0
|
||||
|
||||
print("""
|
||||
<!DOCTYPE html>
|
||||
@ -180,15 +179,43 @@ printSimpleTable('Slow on client',
|
||||
['Client time, s', 'Server time, s', 'Ratio', 'Query'],
|
||||
slow_on_client_rows)
|
||||
|
||||
unstable_rows = tsvRows('unstable-queries.tsv')
|
||||
unstable_queries += len(unstable_rows)
|
||||
printSimpleTable('Unstable queries',
|
||||
[
|
||||
'Old, s', 'New, s', 'Relative difference (new - old)/old',
|
||||
'Randomization distribution quantiles [5%, 50%, 95%, 99%]',
|
||||
'Test', 'Query'
|
||||
],
|
||||
unstable_rows)
|
||||
def print_unstable_queries():
|
||||
global unstable_queries
|
||||
global very_unstable_queries
|
||||
|
||||
unstable_rows = tsvRows('unstable-queries.tsv')
|
||||
if not unstable_rows:
|
||||
return
|
||||
|
||||
unstable_queries += len(unstable_rows)
|
||||
|
||||
columns = [
|
||||
'Old, s', #0
|
||||
'New, s', #1
|
||||
'Relative difference (new - old)/old', #2
|
||||
'Randomization distribution quantiles [5%, 50%, 95%, 99%]', #3
|
||||
'Test', #4
|
||||
'Query' #5
|
||||
]
|
||||
|
||||
print(tableStart('Unstable queries'))
|
||||
print(tableHeader(columns))
|
||||
|
||||
attrs = ['' for c in columns]
|
||||
for r in unstable_rows:
|
||||
rd = ast.literal_eval(r[3])
|
||||
# Note the zero-based array index, this is rd[3] in SQL.
|
||||
if rd[2] > 0.2:
|
||||
very_unstable_queries += 1
|
||||
attrs[3] = 'style="background: #ffb0a0"'
|
||||
else:
|
||||
attrs[3] = ''
|
||||
|
||||
print(tableRow(r, attrs))
|
||||
|
||||
print(tableEnd())
|
||||
|
||||
print_unstable_queries()
|
||||
|
||||
run_error_rows = tsvRows('run-errors.tsv')
|
||||
error_tests += len(run_error_rows)
|
||||
@ -243,7 +270,7 @@ def print_test_times():
|
||||
print_test_times()
|
||||
|
||||
# Add the errors reported by various steps of comparison script
|
||||
report_errors += tsvRows('report-errors.txt')
|
||||
report_errors += [l.strip() for l in open('report-errors.rep')]
|
||||
if len(report_errors):
|
||||
print(tableStart('Errors while building the report'))
|
||||
print(tableHeader(['Error']))
|
||||
@ -261,6 +288,10 @@ print("""
|
||||
</html>
|
||||
""")
|
||||
|
||||
status = 'success'
|
||||
message = 'See the report'
|
||||
message_array = []
|
||||
|
||||
if slow_average_tests:
|
||||
status = 'failure'
|
||||
message_array.append(str(slow_average_tests) + ' too long')
|
||||
@ -269,11 +300,15 @@ if faster_queries:
|
||||
message_array.append(str(faster_queries) + ' faster')
|
||||
|
||||
if slower_queries:
|
||||
status = 'failure'
|
||||
message_array.append(str(slower_queries) + ' slower')
|
||||
|
||||
if unstable_queries:
|
||||
message_array.append(str(unstable_queries) + ' unstable')
|
||||
|
||||
if very_unstable_queries:
|
||||
status = 'failure'
|
||||
|
||||
error_tests += slow_average_tests
|
||||
if error_tests:
|
||||
status = 'failure'
|
||||
|
Loading…
Reference in New Issue
Block a user