[wip] performance comparison test

This commit is contained in:
Alexander Kuzmenkov 2020-01-16 22:39:07 +03:00
parent f81aa1c9f0
commit 7ae38a3400
5 changed files with 34 additions and 7 deletions

View File

@ -52,5 +52,5 @@
</substitution>
</substitutions>
<query>SELECT {key} AS k, {func}(UserID) FROM hits_100m_single GROUP BY k</query>
<query>SELECT {key} AS k, {func}(UserID) FROM hits_100m_single GROUP BY k FORMAT Null</query>
</test>

View File

@ -1,6 +1,9 @@
# docker build -t yandex/clickhouse-performance-comparison .
FROM ubuntu:18.04
ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
p7zip-full bash git ncdu wget psmisc python3 python3-pip tzdata python3-dev g++ \

View File

@ -118,7 +118,8 @@ function run_tests
# Just check that the script runs at all
"$script_dir/perf.py" --help > /dev/null
rm test-times.tsv ||:
# FIXME remove some broken long tests
rm left/performance/IPv* ||:
# Run the tests
for test in left/performance/*.xml
@ -126,8 +127,11 @@ function run_tests
test_name=$(basename $test ".xml")
echo test $test_name
TIMEFORMAT=$(printf "time\t$test_name\t%%3R\t%%3U\t%%3S\n")
#time "$script_dir/perf.py" "$test" > >(tee "$test_name-raw.tsv") 2> >(tee "$test_name-err.log") || continue
time "$script_dir/perf.py" "$test" > "$test_name-raw.tsv" 2> "$test_name-err.log" || continue
right/clickhouse local --file "$test_name-raw.tsv" --structure 'query text, run int, version UInt32, time float' --query "$(cat $script_dir/eqmed.sql)" > "$test_name-report.tsv"
grep ^query "$test_name-raw.tsv" | cut -f2- > "$test_name-queries.tsv"
grep ^client-time "$test_name-raw.tsv" | cut -f2- > "$test_name-client-time.tsv"
right/clickhouse local --file "$test_name-queries.tsv" --structure 'query text, run int, version UInt32, time float' --query "$(cat $script_dir/eqmed.sql)" > "$test_name-report.tsv"
done
}
run_tests
@ -136,4 +140,5 @@ run_tests
result_structure="left float, right float, diff float, rd Array(float), query text"
right/clickhouse local --file '*-report.tsv' -S "$result_structure" --query "select * from table where diff < 0.05 and rd[3] > 0.05 order by rd[3] desc" > flap-prone.tsv
right/clickhouse local --file '*-report.tsv' -S "$result_structure" --query "select * from table where diff > 0.05 and diff > rd[3] order by diff desc" > bad-perf.tsv
right/clickhouse local --file '*-client-time.tsv' -S "query text, client float, server float" -q "select *, floor(client/server, 3) p from table order by p desc" > client-time.tsv
grep Exception:[^:] *-err.log > run-errors.log

View File

@ -16,7 +16,14 @@ echo Reference SHA is $ref_sha
# Set python output encoding so that we can print queries with Russian letters.
export PYTHONIOENCODING=utf-8
../compare.sh 0 $ref_sha $PR_TO_TEST $SHA_TO_TEST > compare.log 2>&1 ||:
# Even if we have some errors, try our best to save the logs.
set +e
# compare.sh kills its process group, so put it into a separate one.
# It's probably at fault for using `kill 0` as an error handling mechanism,
# but I can't be bothered to change this now.
set -m
../compare.sh 0 $ref_sha $PR_TO_TEST $SHA_TO_TEST 2>&1 | tee compare.log
set +m
7z a /output/output.7z *.log *.tsv
cp compare.log /output

View File

@ -6,6 +6,8 @@ import clickhouse_driver
import xml.etree.ElementTree as et
import argparse
import pprint
import time
import traceback
parser = argparse.ArgumentParser(description='Run performance test.')
# Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set.
@ -63,7 +65,8 @@ for c in connections:
try:
c.execute(q)
except:
print("Error:", sys.exc_info()[0], file=sys.stderr)
traceback.print_exc()
pass
# Run create queries
create_query_templates = [q.text for q in root.findall('create_query')]
@ -87,10 +90,19 @@ test_query_templates = [q.text for q in root.findall('query')]
test_queries = substitute_parameters(test_query_templates, parameter_combinations)
for q in test_queries:
for run in range(0, 7):
# Track the time spent by the client to process this query, so that we can notice
# out the queries that take long to process on the client side, e.g. by sending
# excessive data.
start_seconds = time.perf_counter()
server_seconds = 0
for run in range(0, 13):
for conn_index, c in enumerate(connections):
res = c.execute(q)
print(tsv_escape(q) + '\t' + str(run) + '\t' + str(conn_index) + '\t' + str(c.last_query.elapsed))
print('query\t' + tsv_escape(q) + '\t' + str(run) + '\t' + str(conn_index) + '\t' + str(c.last_query.elapsed))
server_seconds += c.last_query.elapsed
client_seconds = time.perf_counter() - start_seconds
print('client-time\t{}\t{}\t{}'.format(tsv_escape(q), client_seconds, server_seconds))
# Run drop queries
drop_query_templates = [q.text for q in root.findall('drop_query')]