mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
performance comparison
This commit is contained in:
parent
551cd4776b
commit
3c754113f1
@ -18,6 +18,7 @@ RUN apt-get update \
|
||||
python3 \
|
||||
python3-dev \
|
||||
python3-pip \
|
||||
rsync \
|
||||
tree \
|
||||
tzdata \
|
||||
vim \
|
||||
|
@ -45,7 +45,10 @@ function configure
|
||||
sed -i 's/<tcp_port>9000/<tcp_port>9001/g' left/config/config.xml
|
||||
sed -i 's/<tcp_port>9000/<tcp_port>9002/g' right/config/config.xml
|
||||
|
||||
cat > right/config/config.d/zz-perf-test-tweaks.xml <<EOF
|
||||
mkdir right/config/users.d ||:
|
||||
mkdir left/config/users.d ||:
|
||||
|
||||
cat > right/config/config.d/zz-perf-test-tweaks-config.xml <<EOF
|
||||
<yandex>
|
||||
<logger>
|
||||
<console>true</console>
|
||||
@ -59,7 +62,20 @@ function configure
|
||||
</yandex>
|
||||
EOF
|
||||
|
||||
cp right/config/config.d/zz-perf-test-tweaks.xml left/config/config.d/zz-perf-test-tweaks.xml
|
||||
cat > right/config/users.d/zz-perf-test-tweaks-users.xml <<EOF
|
||||
<yandex>
|
||||
<profiles>
|
||||
<default>
|
||||
<query_profiler_real_time_period_ns>10000000</query_profiler_real_time_period_ns>
|
||||
<query_profiler_cpu_time_period_ns>0</query_profiler_cpu_time_period_ns>
|
||||
<allow_introspection_functions>1</allow_introspection_functions>
|
||||
</default>
|
||||
</profiles>
|
||||
</yandex>
|
||||
EOF
|
||||
|
||||
cp right/config/config.d/zz-perf-test-tweaks-config.xml left/config/config.d/zz-perf-test-tweaks-config.xml
|
||||
cp right/config/users.d/zz-perf-test-tweaks-users.xml left/config/users.d/zz-perf-test-tweaks-users.xml
|
||||
|
||||
rm left/config/config.d/metric_log.xml ||:
|
||||
rm left/config/config.d/text_log.xml ||:
|
||||
@ -81,6 +97,13 @@ EOF
|
||||
|
||||
left/clickhouse client --port 9001 --query "create database test" ||:
|
||||
left/clickhouse client --port 9001 --query "rename table datasets.hits_v1 to test.hits" ||:
|
||||
|
||||
while killall clickhouse ; do echo . ; sleep 1 ; done
|
||||
echo all killed
|
||||
|
||||
# Remove logs etc, because they will be updated, and sharing them between
|
||||
# servers with hardlink might cause unpredictable behavior.
|
||||
rm db0/data/system/* -rf ||:
|
||||
}
|
||||
|
||||
function restart
|
||||
@ -125,11 +148,36 @@ function run_tests
|
||||
|
||||
rm -v test-times.tsv ||:
|
||||
|
||||
# Why the ugly cut:
|
||||
# 1) can't make --out-format='%n' work for deleted files, it outputs things
|
||||
# like "deleted 1.xml";
|
||||
# 2) the output is not tab separated, but at least it's fixed width, so I
|
||||
# cut by characters.
|
||||
changed_files=$(rsync --dry-run --dirs --checksum --delete --itemize-changes left/performance/ right/performance/ | cut -c13-)
|
||||
|
||||
# FIXME remove some broken long tests
|
||||
rm right/performance/{IPv4,IPv6,modulo,parse_engine_file,number_formatting_formats,select_format}.xml ||:
|
||||
|
||||
test_files=$(ls right/performance/*)
|
||||
|
||||
# FIXME a quick crutch to bring the run time down for the flappy tests --
|
||||
# run only those that have changed. Only on my prs for now.
|
||||
if grep Kuzmenkov right-commit.txt
|
||||
then
|
||||
if [ "PR_TO_TEST" != "0" ]
|
||||
then
|
||||
test_files=$(cd right/performance && readlink -e $changed_files)
|
||||
fi
|
||||
fi
|
||||
|
||||
# Run only explicitly specified tests, if any
|
||||
if [ -v CHPC_TEST_GLOB ]
|
||||
then
|
||||
test_files=$(ls right/performance/${CHPC_TEST_GLOB}.xml)
|
||||
fi
|
||||
|
||||
# Run the tests
|
||||
for test in right/performance/${CHPC_TEST_GLOB:-*}.xml
|
||||
for test in $test_files
|
||||
do
|
||||
test_name=$(basename $test ".xml")
|
||||
echo test $test_name
|
||||
@ -138,8 +186,19 @@ function run_tests
|
||||
{ time "$script_dir/perf.py" "$test" > "$test_name-raw.tsv" 2> "$test_name-err.log" ; } 2>&1 >/dev/null | grep -v ^+ >> "wall-clock-times.tsv" || continue
|
||||
grep ^query "$test_name-raw.tsv" | cut -f2- > "$test_name-queries.tsv"
|
||||
grep ^client-time "$test_name-raw.tsv" | cut -f2- > "$test_name-client-time.tsv"
|
||||
right/clickhouse local --file "$test_name-queries.tsv" --structure 'query text, run int, version UInt32, time float' --query "$(cat $script_dir/eqmed.sql)" > "$test_name-report.tsv"
|
||||
# this may be slow, run it in background
|
||||
right/clickhouse local --file "$test_name-queries.tsv" --structure 'query text, run int, version UInt32, time float' --query "$(cat $script_dir/eqmed.sql)" > "$test_name-report.tsv" &
|
||||
done
|
||||
|
||||
wait
|
||||
|
||||
# Collect the profiles
|
||||
left/clickhouse client --port 9001 --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: &
|
||||
left/clickhouse client --port 9001 --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: &
|
||||
right/clickhouse client --port 9002 --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: &
|
||||
right/clickhouse client --port 9002 --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: &
|
||||
|
||||
wait
|
||||
}
|
||||
|
||||
# Analyze results
|
||||
@ -156,7 +215,7 @@ create table queries engine Memory as select
|
||||
-- remove them altogether because we want to be able to detect regressions,
|
||||
-- but the right way to do this is not yet clear.
|
||||
not short and abs(diff) < 0.05 and rd[3] > 0.05 as unstable,
|
||||
not short and abs(diff) > 0.05 and abs(diff) > rd[3] as changed,
|
||||
not short and abs(diff) > 0.10 and abs(diff) > rd[3] as changed,
|
||||
*
|
||||
from file('*-report.tsv', TSV, 'left float, right float, diff float, rd Array(float), query text');
|
||||
|
||||
@ -201,7 +260,7 @@ create table test_times_tsv engine File(TSV, 'test-times.tsv') as
|
||||
floor(real / queries, 3) avg_real_per_query,
|
||||
floor(query_min, 3)
|
||||
from test_time join wall_clock using test
|
||||
order by query_max / query_min desc;
|
||||
order by avg_real_per_query desc;
|
||||
|
||||
create table all_queries_tsv engine File(TSV, 'all-queries.tsv') as
|
||||
select left, right, diff, rd, test, query
|
||||
|
@ -8,7 +8,7 @@ select
|
||||
query
|
||||
from
|
||||
(
|
||||
select query, quantiles(0.05, 0.5, 0.95)(abs(time_by_label[1] - time_by_label[2])) rd_quantiles -- quantiles of randomization distribution
|
||||
select query, quantiles(0.05, 0.5, 0.95, 0.99)(abs(time_by_label[1] - time_by_label[2])) rd_quantiles -- quantiles of randomization distribution
|
||||
from
|
||||
(
|
||||
select query, virtual_run, groupArrayInsertAt(median_time, random_label) time_by_label -- make array 'random label' -> 'median time'
|
||||
|
@ -10,6 +10,15 @@ import pprint
|
||||
import time
|
||||
import traceback
|
||||
|
||||
stage_start_seconds = time.perf_counter()
|
||||
|
||||
def report_stage_end(stage_name):
|
||||
global stage_start_seconds
|
||||
print('{}\t{}'.format(stage_name, time.perf_counter() - stage_start_seconds))
|
||||
stage_start_seconds = time.perf_counter()
|
||||
|
||||
report_stage_end('start')
|
||||
|
||||
parser = argparse.ArgumentParser(description='Run performance test.')
|
||||
# Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set.
|
||||
parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file')
|
||||
@ -35,6 +44,8 @@ if infinite_sign is not None:
|
||||
servers = [{'host': host, 'port': port} for (host, port) in zip(args.host, args.port)]
|
||||
connections = [clickhouse_driver.Client(**server) for server in servers]
|
||||
|
||||
report_stage_end('connect')
|
||||
|
||||
# Check tables that should exist
|
||||
tables = [e.text for e in root.findall('preconditions/table_exists')]
|
||||
for t in tables:
|
||||
@ -47,6 +58,8 @@ for c in connections:
|
||||
for s in settings:
|
||||
c.execute("set {} = '{}'".format(s.tag, s.text))
|
||||
|
||||
report_stage_end('preconditions')
|
||||
|
||||
# Process substitutions
|
||||
subst_elems = root.findall('substitutions/substitution')
|
||||
|
||||
@ -61,6 +74,8 @@ parameter_combinations = [dict(zip(parameter_keys, parameter_combination)) for p
|
||||
def substitute_parameters(query_templates, parameter_combinations):
|
||||
return list(set([template.format(**parameters) for template, parameters in itertools.product(query_templates, parameter_combinations)]))
|
||||
|
||||
report_stage_end('substitute')
|
||||
|
||||
# Run drop queries, ignoring errors
|
||||
drop_query_templates = [q.text for q in root.findall('drop_query')]
|
||||
drop_queries = substitute_parameters(drop_query_templates, parameter_combinations)
|
||||
@ -86,6 +101,8 @@ for c in connections:
|
||||
for q in fill_queries:
|
||||
c.execute(q)
|
||||
|
||||
report_stage_end('fill')
|
||||
|
||||
# Run test queries
|
||||
def tsv_escape(s):
|
||||
return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
|
||||
@ -93,6 +110,8 @@ def tsv_escape(s):
|
||||
test_query_templates = [q.text for q in root.findall('query')]
|
||||
test_queries = substitute_parameters(test_query_templates, parameter_combinations)
|
||||
|
||||
report_stage_end('substitute2')
|
||||
|
||||
for q in test_queries:
|
||||
# Prewarm: run once on both servers. Helps to bring the data into memory,
|
||||
# precompile the queries, etc.
|
||||
@ -115,9 +134,13 @@ for q in test_queries:
|
||||
client_seconds = time.perf_counter() - start_seconds
|
||||
print('client-time\t{}\t{}\t{}'.format(tsv_escape(q), client_seconds, server_seconds))
|
||||
|
||||
report_stage_end('benchmark')
|
||||
|
||||
# Run drop queries
|
||||
drop_query_templates = [q.text for q in root.findall('drop_query')]
|
||||
drop_queries = substitute_parameters(drop_query_templates, parameter_combinations)
|
||||
for c in connections:
|
||||
for q in drop_queries:
|
||||
c.execute(q)
|
||||
|
||||
report_stage_end('drop')
|
||||
|
Loading…
Reference in New Issue
Block a user