mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 16:42:05 +00:00
performance comparison
This commit is contained in:
parent
0b4e601d81
commit
5fd5b15c36
@ -5,6 +5,8 @@ trap "exit" INT TERM
|
|||||||
trap "kill $(jobs -pr) ||:" EXIT
|
trap "kill $(jobs -pr) ||:" EXIT
|
||||||
|
|
||||||
stage=${stage:-}
|
stage=${stage:-}
|
||||||
|
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
|
||||||
|
|
||||||
function configure
|
function configure
|
||||||
{
|
{
|
||||||
@ -209,10 +211,8 @@ done
|
|||||||
rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv ||:
|
rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv ||:
|
||||||
|
|
||||||
right/clickhouse local --query "
|
right/clickhouse local --query "
|
||||||
create table queries engine File(TSVWithNamesAndTypes, 'queries.tsv')
|
create table queries engine File(TSVWithNamesAndTypes, 'queries.rep')
|
||||||
as select
|
as select
|
||||||
replaceAll(_file, '-report.tsv', '') test,
|
|
||||||
|
|
||||||
-- FIXME Comparison mode doesn't make sense for queries that complete
|
-- FIXME Comparison mode doesn't make sense for queries that complete
|
||||||
-- immediately, so for now we pretend they don't exist. We don't want to
|
-- immediately, so for now we pretend they don't exist. We don't want to
|
||||||
-- remove them altogether because we want to be able to detect regressions,
|
-- remove them altogether because we want to be able to detect regressions,
|
||||||
@ -225,7 +225,9 @@ create table queries engine File(TSVWithNamesAndTypes, 'queries.tsv')
|
|||||||
-- likely to observe a difference > 5% in less than 5% cases.
|
-- likely to observe a difference > 5% in less than 5% cases.
|
||||||
-- Not sure it is correct, but empirically it filters out a lot of noise.
|
-- Not sure it is correct, but empirically it filters out a lot of noise.
|
||||||
not short and abs(diff) > 0.15 and abs(diff) > rd[3] and rd[1] > 0.05 as changed,
|
not short and abs(diff) > 0.15 and abs(diff) > rd[3] and rd[1] > 0.05 as changed,
|
||||||
*
|
left, right, diff, rd,
|
||||||
|
replaceAll(_file, '-report.tsv', '') test,
|
||||||
|
query
|
||||||
from file('*-report.tsv', TSV, 'left float, right float, diff float, rd Array(float), query text');
|
from file('*-report.tsv', TSV, 'left float, right float, diff float, rd Array(float), query text');
|
||||||
|
|
||||||
create table changed_perf_tsv engine File(TSV, 'changed-perf.tsv') as
|
create table changed_perf_tsv engine File(TSV, 'changed-perf.tsv') as
|
||||||
@ -274,36 +276,52 @@ create table test_times_tsv engine File(TSV, 'test-times.tsv') as
|
|||||||
create table all_queries_tsv engine File(TSV, 'all-queries.tsv') as
|
create table all_queries_tsv engine File(TSV, 'all-queries.tsv') as
|
||||||
select left, right, diff, rd, test, query
|
select left, right, diff, rd, test, query
|
||||||
from queries order by rd[3] desc;
|
from queries order by rd[3] desc;
|
||||||
|
" 2> >(head -2 >> report-errors.rep) ||:
|
||||||
|
|
||||||
create view right_query_log as select *
|
for version in {right,left}
|
||||||
from file('right-query-log.tsv', TSVWithNamesAndTypes, '$(cat right-query-log.tsv.columns)');
|
do
|
||||||
|
right/clickhouse local --query "
|
||||||
|
create view queries as
|
||||||
|
select * from file('queries.rep', TSVWithNamesAndTypes,
|
||||||
|
'short int, unstable int, changed int, left float, right float,
|
||||||
|
diff float, rd Array(float), test text, query text');
|
||||||
|
|
||||||
create view right_trace_log as select *
|
create view query_log as select *
|
||||||
from file('right-trace-log.tsv', TSVWithNamesAndTypes, '$(cat right-trace-log.tsv.columns)');
|
from file('$version-query-log.tsv', TSVWithNamesAndTypes,
|
||||||
|
'$(cat "$version-query-log.tsv.columns")');
|
||||||
|
|
||||||
create view right_addresses_src as select *
|
create view trace_log as select *
|
||||||
from file('right-addresses.tsv', TSVWithNamesAndTypes, '$(cat right-addresses.tsv.columns)');
|
from file('$version-trace-log.tsv', TSVWithNamesAndTypes,
|
||||||
|
'$(cat "$version-trace-log.tsv.columns")');
|
||||||
|
|
||||||
create table right_addresses_join engine Join(any, left, address) as
|
create view addresses_src as select *
|
||||||
select addr address, name from right_addresses_src;
|
from file('$version-addresses.tsv', TSVWithNamesAndTypes,
|
||||||
|
'$(cat "$version-addresses.tsv.columns")');
|
||||||
|
|
||||||
create table unstable_query_runs engine File(TSVWithNamesAndTypes, 'unstable-query-runs.rep') as
|
create table addresses_join engine Join(any, left, address) as
|
||||||
select query_id, query from right_query_log
|
select addr address, name from addresses_src;
|
||||||
|
|
||||||
|
create table unstable_query_runs engine File(TSVWithNamesAndTypes,
|
||||||
|
'unstable-query-runs.$version.rep') as
|
||||||
|
select query_id, query from query_log
|
||||||
join queries using query
|
join queries using query
|
||||||
where query_id not like 'prewarm %' and (unstable or changed)
|
where query_id not like 'prewarm %' and (unstable or changed)
|
||||||
;
|
;
|
||||||
|
|
||||||
create table unstable_query_log engine File(Vertical, 'unstable-query-log.rep') as
|
create table unstable_query_log engine File(Vertical,
|
||||||
select * from right_query_log
|
'unstable-query-log.$version.rep') as
|
||||||
|
select * from query_log
|
||||||
where query_id in (select query_id from unstable_query_runs);
|
where query_id in (select query_id from unstable_query_runs);
|
||||||
|
|
||||||
create table unstable_run_metrics engine File(TSVWithNamesAndTypes, 'unstable-run-metrics.rep') as
|
create table unstable_run_metrics engine File(TSVWithNamesAndTypes,
|
||||||
|
'unstable-run-metrics.$version.rep') as
|
||||||
select ProfileEvents.Values value, ProfileEvents.Names metric, query_id, query
|
select ProfileEvents.Values value, ProfileEvents.Names metric, query_id, query
|
||||||
from right_query_log array join ProfileEvents
|
from query_log array join ProfileEvents
|
||||||
where query_id in (select query_id from unstable_query_runs)
|
where query_id in (select query_id from unstable_query_runs)
|
||||||
;
|
;
|
||||||
|
|
||||||
create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes, 'unstable-run-metrics-2.rep') as
|
create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes,
|
||||||
|
'unstable-run-metrics-2.$version.rep') as
|
||||||
select v, n, query_id, query
|
select v, n, query_id, query
|
||||||
from
|
from
|
||||||
(select
|
(select
|
||||||
@ -311,20 +329,25 @@ create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes, 'unstable-
|
|||||||
[memory_usage, read_bytes, written_bytes, query_duration_ms] v,
|
[memory_usage, read_bytes, written_bytes, query_duration_ms] v,
|
||||||
query,
|
query,
|
||||||
query_id
|
query_id
|
||||||
from right_query_log
|
from query_log
|
||||||
where query_id in (select query_id from unstable_query_runs))
|
where query_id in (select query_id from unstable_query_runs))
|
||||||
array join n, v;
|
array join n, v;
|
||||||
|
|
||||||
create table unstable_run_traces engine File(TSVWithNamesAndTypes, 'unstable-run-traces.rep') as
|
create table unstable_run_traces engine File(TSVWithNamesAndTypes,
|
||||||
select count() value, joinGet(right_addresses_join, 'name', arrayJoin(trace)) metric,
|
'unstable-run-traces.$version.rep') as
|
||||||
unstable_query_runs.query_id, any(unstable_query_runs.query) query
|
select
|
||||||
|
count() value,
|
||||||
|
joinGet(addresses_join, 'name', arrayJoin(trace)) metric,
|
||||||
|
unstable_query_runs.query_id,
|
||||||
|
any(unstable_query_runs.query) query
|
||||||
from unstable_query_runs
|
from unstable_query_runs
|
||||||
join right_trace_log on right_trace_log.query_id = unstable_query_runs.query_id
|
join trace_log on trace_log.query_id = unstable_query_runs.query_id
|
||||||
group by unstable_query_runs.query_id, metric
|
group by unstable_query_runs.query_id, metric
|
||||||
order by count() desc
|
order by count() desc
|
||||||
;
|
;
|
||||||
|
|
||||||
create table metric_devation engine File(TSVWithNamesAndTypes, 'metric-deviation.rep') as
|
create table metric_devation engine File(TSVWithNamesAndTypes,
|
||||||
|
'metric-deviation.$version.rep') as
|
||||||
select floor((q[3] - q[1])/q[2], 3) d,
|
select floor((q[3] - q[1])/q[2], 3) d,
|
||||||
quantilesExact(0, 0.5, 1)(value) q, metric, query
|
quantilesExact(0, 0.5, 1)(value) q, metric, query
|
||||||
from (select * from unstable_run_metrics
|
from (select * from unstable_run_metrics
|
||||||
@ -336,31 +359,36 @@ create table metric_devation engine File(TSVWithNamesAndTypes, 'metric-deviation
|
|||||||
order by any(rd[3]) desc, query desc, d desc
|
order by any(rd[3]) desc, query desc, d desc
|
||||||
;
|
;
|
||||||
|
|
||||||
create table stacks engine File(TSV, 'stacks.rep') as
|
create table stacks engine File(TSV, 'stacks.$version.rep') as
|
||||||
select
|
select
|
||||||
query,
|
query,
|
||||||
arrayStringConcat(
|
arrayStringConcat(
|
||||||
arrayMap(x -> joinGet(right_addresses_join, 'name', x),
|
arrayMap(x -> joinGet(addresses_join, 'name', x),
|
||||||
arrayReverse(trace)
|
arrayReverse(trace)
|
||||||
),
|
),
|
||||||
';'
|
';'
|
||||||
) readable_trace,
|
) readable_trace,
|
||||||
count()
|
count()
|
||||||
from right_trace_log
|
from trace_log
|
||||||
join unstable_query_runs using query_id
|
join unstable_query_runs using query_id
|
||||||
group by query, trace
|
group by query, trace
|
||||||
;
|
;
|
||||||
" 2>> report-errors.txt ||:
|
" 2> >(head -2 >> report-errors.rep) ||: # do not run in parallel because they use the same data dir for StorageJoins which leads to weird errors.
|
||||||
|
done
|
||||||
|
wait
|
||||||
|
|
||||||
IFS=$'\n'
|
IFS=$'\n'
|
||||||
for query in $(cut -d' ' -f1 stacks.rep | sort | uniq)
|
for version in {right,left}
|
||||||
do
|
do
|
||||||
|
for query in $(cut -d' ' -f1 "stacks.$version.rep" | sort | uniq)
|
||||||
|
do
|
||||||
query_file=$(echo "$query" | cut -c-120 | sed 's/[/]/_/g')
|
query_file=$(echo "$query" | cut -c-120 | sed 's/[/]/_/g')
|
||||||
grep -F "$query " stacks.rep \
|
grep -F "$query " "stacks.$version.rep" \
|
||||||
| cut -d' ' -f 2- \
|
| cut -d' ' -f 2- \
|
||||||
| sed 's/\t/ /g' \
|
| sed 's/\t/ /g' \
|
||||||
| tee "$query_file.stacks.rep" \
|
| tee "$query_file.stacks.$version.rep" \
|
||||||
| ~/fg/flamegraph.pl > "$query_file.svg" &
|
| ~/fg/flamegraph.pl > "$query_file.$version.svg" &
|
||||||
|
done
|
||||||
done
|
done
|
||||||
wait
|
wait
|
||||||
unset IFS
|
unset IFS
|
||||||
|
@ -14,6 +14,6 @@
|
|||||||
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
|
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
|
||||||
</metric_log>
|
</metric_log>
|
||||||
|
|
||||||
<use_uncompressed_cache>1</use_uncompressed_cache>
|
<use_uncompressed_cache>0</use_uncompressed_cache>
|
||||||
<uncompressed_cache_size>1000000000</uncompressed_cache_size>
|
<uncompressed_cache_size>1000000000</uncompressed_cache_size>
|
||||||
</yandex>
|
</yandex>
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import ast
|
||||||
import collections
|
import collections
|
||||||
import csv
|
import csv
|
||||||
import itertools
|
import itertools
|
||||||
@ -8,14 +9,12 @@ import sys
|
|||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
report_errors = []
|
report_errors = []
|
||||||
status = 'success'
|
|
||||||
message = 'See the report'
|
|
||||||
message_array = []
|
|
||||||
error_tests = 0
|
error_tests = 0
|
||||||
slow_average_tests = 0
|
slow_average_tests = 0
|
||||||
faster_queries = 0
|
faster_queries = 0
|
||||||
slower_queries = 0
|
slower_queries = 0
|
||||||
unstable_queries = 0
|
unstable_queries = 0
|
||||||
|
very_unstable_queries = 0
|
||||||
|
|
||||||
print("""
|
print("""
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
@ -180,15 +179,43 @@ printSimpleTable('Slow on client',
|
|||||||
['Client time, s', 'Server time, s', 'Ratio', 'Query'],
|
['Client time, s', 'Server time, s', 'Ratio', 'Query'],
|
||||||
slow_on_client_rows)
|
slow_on_client_rows)
|
||||||
|
|
||||||
unstable_rows = tsvRows('unstable-queries.tsv')
|
def print_unstable_queries():
|
||||||
unstable_queries += len(unstable_rows)
|
global unstable_queries
|
||||||
printSimpleTable('Unstable queries',
|
global very_unstable_queries
|
||||||
[
|
|
||||||
'Old, s', 'New, s', 'Relative difference (new - old)/old',
|
unstable_rows = tsvRows('unstable-queries.tsv')
|
||||||
'Randomization distribution quantiles [5%, 50%, 95%, 99%]',
|
if not unstable_rows:
|
||||||
'Test', 'Query'
|
return
|
||||||
],
|
|
||||||
unstable_rows)
|
unstable_queries += len(unstable_rows)
|
||||||
|
|
||||||
|
columns = [
|
||||||
|
'Old, s', #0
|
||||||
|
'New, s', #1
|
||||||
|
'Relative difference (new - old)/old', #2
|
||||||
|
'Randomization distribution quantiles [5%, 50%, 95%, 99%]', #3
|
||||||
|
'Test', #4
|
||||||
|
'Query' #5
|
||||||
|
]
|
||||||
|
|
||||||
|
print(tableStart('Unstable queries'))
|
||||||
|
print(tableHeader(columns))
|
||||||
|
|
||||||
|
attrs = ['' for c in columns]
|
||||||
|
for r in unstable_rows:
|
||||||
|
rd = ast.literal_eval(r[3])
|
||||||
|
# Note the zero-based array index, this is rd[3] in SQL.
|
||||||
|
if rd[2] > 0.2:
|
||||||
|
very_unstable_queries += 1
|
||||||
|
attrs[3] = 'style="background: #ffb0a0"'
|
||||||
|
else:
|
||||||
|
attrs[3] = ''
|
||||||
|
|
||||||
|
print(tableRow(r, attrs))
|
||||||
|
|
||||||
|
print(tableEnd())
|
||||||
|
|
||||||
|
print_unstable_queries()
|
||||||
|
|
||||||
run_error_rows = tsvRows('run-errors.tsv')
|
run_error_rows = tsvRows('run-errors.tsv')
|
||||||
error_tests += len(run_error_rows)
|
error_tests += len(run_error_rows)
|
||||||
@ -243,7 +270,7 @@ def print_test_times():
|
|||||||
print_test_times()
|
print_test_times()
|
||||||
|
|
||||||
# Add the errors reported by various steps of comparison script
|
# Add the errors reported by various steps of comparison script
|
||||||
report_errors += tsvRows('report-errors.txt')
|
report_errors += [l.strip() for l in open('report-errors.rep')]
|
||||||
if len(report_errors):
|
if len(report_errors):
|
||||||
print(tableStart('Errors while building the report'))
|
print(tableStart('Errors while building the report'))
|
||||||
print(tableHeader(['Error']))
|
print(tableHeader(['Error']))
|
||||||
@ -261,6 +288,10 @@ print("""
|
|||||||
</html>
|
</html>
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
status = 'success'
|
||||||
|
message = 'See the report'
|
||||||
|
message_array = []
|
||||||
|
|
||||||
if slow_average_tests:
|
if slow_average_tests:
|
||||||
status = 'failure'
|
status = 'failure'
|
||||||
message_array.append(str(slow_average_tests) + ' too long')
|
message_array.append(str(slow_average_tests) + ' too long')
|
||||||
@ -269,11 +300,15 @@ if faster_queries:
|
|||||||
message_array.append(str(faster_queries) + ' faster')
|
message_array.append(str(faster_queries) + ' faster')
|
||||||
|
|
||||||
if slower_queries:
|
if slower_queries:
|
||||||
|
status = 'failure'
|
||||||
message_array.append(str(slower_queries) + ' slower')
|
message_array.append(str(slower_queries) + ' slower')
|
||||||
|
|
||||||
if unstable_queries:
|
if unstable_queries:
|
||||||
message_array.append(str(unstable_queries) + ' unstable')
|
message_array.append(str(unstable_queries) + ' unstable')
|
||||||
|
|
||||||
|
if very_unstable_queries:
|
||||||
|
status = 'failure'
|
||||||
|
|
||||||
error_tests += slow_average_tests
|
error_tests += slow_average_tests
|
||||||
if error_tests:
|
if error_tests:
|
||||||
status = 'failure'
|
status = 'failure'
|
||||||
|
Loading…
Reference in New Issue
Block a user