mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-28 18:42:26 +00:00
[wip] some experimental scripts for peformance comparison
This commit is contained in:
parent
c8c4a3595b
commit
8a3ddb67ef
97
docker/test/performance-comparison/compare.sh
Executable file
97
docker/test/performance-comparison/compare.sh
Executable file
@ -0,0 +1,97 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -ex
|
||||||
|
set -o pipefail
|
||||||
|
trap "exit" INT TERM
|
||||||
|
trap "kill 0" EXIT
|
||||||
|
|
||||||
|
mkdir left ||:
|
||||||
|
mkdir right ||:
|
||||||
|
mkdir db0 ||:
|
||||||
|
|
||||||
|
left_pr=$1
|
||||||
|
left_sha=$2
|
||||||
|
|
||||||
|
right_pr=$3
|
||||||
|
right_sha=$4
|
||||||
|
|
||||||
|
function download
|
||||||
|
{
|
||||||
|
la="$left_pr-$left_sha.tgz"
|
||||||
|
ra="$right_pr-$right_sha.tgz"
|
||||||
|
wget -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O "$la" && tar -C left --strip-components=1 -zxvf "$la" &
|
||||||
|
wget -nd -c "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/performance/performance.tgz" -O "$ra" && tar -C right --strip-components=1 -zxvf "$ra" &
|
||||||
|
cd db0 && wget -nd -c "https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar" && tar -xvf hits_10m_single.tar &
|
||||||
|
cd db0 && wget -nd -c "https://s3.mds.yandex.net/clickhouse-private-datasets/hits_100m_single/partitions/hits_100m_single.tar" && tar -xvf hits_100m_single.tar &
|
||||||
|
#cd db0 && wget -nd -c "https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar" && tar -xvf hits_v1.tar &
|
||||||
|
wait
|
||||||
|
|
||||||
|
# Use hardlinks instead of copying
|
||||||
|
rm -r left/db ||:
|
||||||
|
rm -r right/db ||:
|
||||||
|
cp -al db0/ left/db/
|
||||||
|
cp -al db0/ right/db/
|
||||||
|
}
|
||||||
|
|
||||||
|
#download
|
||||||
|
|
||||||
|
function configure
|
||||||
|
{
|
||||||
|
sed -i 's/<tcp_port>9000/<tcp_port>9001/g' right/config/config.xml
|
||||||
|
|
||||||
|
cat > right/config/config.d/perf-test-tweaks.xml <<EOF
|
||||||
|
<yandex>
|
||||||
|
<logger>
|
||||||
|
<console>true</console>
|
||||||
|
</logger>
|
||||||
|
<text_log remove="remove"/>
|
||||||
|
</yandex>
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cp right/config/config.d/perf-test-tweaks.xml left/config/config.d/perf-test-tweaks.xml
|
||||||
|
}
|
||||||
|
|
||||||
|
configure
|
||||||
|
|
||||||
|
|
||||||
|
function restart
|
||||||
|
{
|
||||||
|
while killall clickhouse ; do echo . ; sleep 1 ; done
|
||||||
|
echo all killed
|
||||||
|
|
||||||
|
# Spawn servers in their own process groups
|
||||||
|
set -m
|
||||||
|
|
||||||
|
left/clickhouse server --config-file=left/config/config.xml -- --path left/db &> left/log.txt &
|
||||||
|
left_pid=$!
|
||||||
|
kill -0 $left_pid
|
||||||
|
disown $left_pid
|
||||||
|
|
||||||
|
right/clickhouse server --config-file=right/config/config.xml -- --path right/db &> right/log.txt &
|
||||||
|
right_pid=$!
|
||||||
|
kill -0 $right_pid
|
||||||
|
disown $right_pid
|
||||||
|
|
||||||
|
set +m
|
||||||
|
|
||||||
|
while ! left/clickhouse client --query "select 1" ; do kill -0 $left_pid ; echo . ; sleep 1 ; done
|
||||||
|
echo left ok
|
||||||
|
|
||||||
|
while ! right/clickhouse client --port 9001 --query "select 1" ; do kill -0 $right_pid ; echo . ; sleep 1 ; done
|
||||||
|
echo right ok
|
||||||
|
}
|
||||||
|
|
||||||
|
restart
|
||||||
|
|
||||||
|
for test in ch/dbms/tests/performance/*.xml
|
||||||
|
do
|
||||||
|
test_name=$(basename $test ".xml")
|
||||||
|
./perf.py "$test" > "$test_name-raw.tsv" || continue
|
||||||
|
right/clickhouse local --file "$test_name-raw.tsv" --structure 'query text, run int, version UInt32, time float' --query "$(cat eqmed.sql)" > "$test_name-report.tsv"
|
||||||
|
done
|
||||||
|
|
||||||
|
#while killall clickhouse ; do echo . ; sleep 1 ; done
|
||||||
|
#echo ok
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
41
docker/test/performance-comparison/eqmed.sql
Normal file
41
docker/test/performance-comparison/eqmed.sql
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
-- input is table(query text, run UInt32, version int, time float)
|
||||||
|
select
|
||||||
|
abs(diff_percent) > rd_quantiles_percent[3] fail,
|
||||||
|
floor(original_medians_array.time_by_version[1], 4) m1,
|
||||||
|
floor(original_medians_array.time_by_version[2], 4) m2,
|
||||||
|
floor((m1 - m2) / m1, 3) diff_percent,
|
||||||
|
arrayMap(x -> floor(x / m1, 3), rd.rd_quantiles) rd_quantiles_percent,
|
||||||
|
query
|
||||||
|
from
|
||||||
|
(
|
||||||
|
select query, quantiles(0.05, 0.5, 0.95)(abs(time_by_label[1] - time_by_label[2])) rd_quantiles -- quantiles of randomization distribution
|
||||||
|
from
|
||||||
|
(
|
||||||
|
select query, virtual_run, groupArrayInsertAt(median_time, random_label) time_by_label -- make array 'random label' -> 'median time'
|
||||||
|
from (
|
||||||
|
select query, medianExact(time) median_time, virtual_run, random_label -- get median times, grouping by random label
|
||||||
|
from (
|
||||||
|
select *, toUInt32(rowNumberInBlock() % 2) random_label -- randomly relabel measurements
|
||||||
|
from (
|
||||||
|
select query, time, number virtual_run
|
||||||
|
from table, numbers(1, 10000) -- duplicate input measurements into many virtual runs
|
||||||
|
order by query, virtual_run, rand() -- for each virtual run, randomly reorder measurements
|
||||||
|
) virtual_runs
|
||||||
|
) relabeled
|
||||||
|
group by query, virtual_run, random_label
|
||||||
|
) virtual_medians
|
||||||
|
group by query, virtual_run -- aggregate by random_label
|
||||||
|
) virtual_medians_array
|
||||||
|
group by query -- aggregate by virtual_run
|
||||||
|
) rd,
|
||||||
|
(
|
||||||
|
select groupArrayInsertAt(median_time, version) time_by_version, query
|
||||||
|
from
|
||||||
|
(
|
||||||
|
select medianExact(time) median_time, query, version
|
||||||
|
from table group by query, version
|
||||||
|
) original_medians
|
||||||
|
group by query
|
||||||
|
) original_medians_array
|
||||||
|
where rd.query = original_medians_array.query
|
||||||
|
order by fail desc, rd_quantiles_percent[3] asc;
|
87
docker/test/performance-comparison/perf.py
Executable file
87
docker/test/performance-comparison/perf.py
Executable file
@ -0,0 +1,87 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import itertools
|
||||||
|
import clickhouse_driver
|
||||||
|
import xml.etree.ElementTree as et
|
||||||
|
import argparse
|
||||||
|
import pprint
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description='Run performance test.')
|
||||||
|
parser.add_argument('file', metavar='FILE', type=argparse.FileType('r'), nargs=1, help='test description file')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
tree = et.parse(args.file[0])
|
||||||
|
root = tree.getroot()
|
||||||
|
|
||||||
|
# Check main metric
|
||||||
|
main_metric = root.find('main_metric/*').tag
|
||||||
|
if main_metric != 'min_time':
|
||||||
|
raise Exception('Only the min_time main metric is supported. This test uses \'{}\''.format(main_metric))
|
||||||
|
|
||||||
|
# Open connections
|
||||||
|
servers = [{'host': 'localhost', 'port': 9000, 'client_name': 'left'}, {'host': 'localhost', 'port': 9001, 'client_name': 'right'}]
|
||||||
|
connections = [clickhouse_driver.Client(**server) for server in servers]
|
||||||
|
|
||||||
|
# Check tables that should exist
|
||||||
|
tables = [e.text for e in root.findall('preconditions/table_exists')]
|
||||||
|
if tables:
|
||||||
|
for c in connections:
|
||||||
|
tables_list = ", ".join("'{}'".format(t) for t in tables)
|
||||||
|
res = c.execute("select t from values('t text', {}) anti join system.tables on database = currentDatabase() and name = t".format(tables_list))
|
||||||
|
if res:
|
||||||
|
raise Exception('Some tables are not found: {}'.format(res))
|
||||||
|
|
||||||
|
# Process substitutions
|
||||||
|
subst_elems = root.findall('substitutions/substitution')
|
||||||
|
|
||||||
|
parameter_keys = [] # ['table', 'limit' ]
|
||||||
|
parameter_value_arrays = [] # [['hits_100m', 'hits_10m'], ['1', '10']]
|
||||||
|
parameter_combinations = [] # [{table: hits_100m, limit: 1}, ...]
|
||||||
|
for se in subst_elems:
|
||||||
|
parameter_keys.append(se.find('name').text)
|
||||||
|
parameter_value_arrays.append([v.text for v in se.findall('values/value')])
|
||||||
|
parameter_combinations = [dict(zip(parameter_keys, parameter_combination)) for parameter_combination in itertools.product(*parameter_value_arrays)]
|
||||||
|
|
||||||
|
def substitute_parameters(query_templates, parameter_combinations):
|
||||||
|
return list(set([template.format(**parameters) for template, parameters in itertools.product(query_templates, parameter_combinations)]))
|
||||||
|
|
||||||
|
# Run drop queries, ignoring errors
|
||||||
|
drop_query_templates = [q.text for q in root.findall('drop_query')]
|
||||||
|
drop_queries = substitute_parameters(drop_query_templates, parameter_combinations)
|
||||||
|
for c in connections:
|
||||||
|
for q in drop_queries:
|
||||||
|
try:
|
||||||
|
c.execute(q)
|
||||||
|
except:
|
||||||
|
print("Error:", sys.exc_info()[0], file=sys.stderr)
|
||||||
|
|
||||||
|
# Run create queries
|
||||||
|
create_query_templates = [q.text for q in root.findall('create_query')]
|
||||||
|
create_queries = substitute_parameters(create_query_templates, parameter_combinations)
|
||||||
|
for c in connections:
|
||||||
|
for q in create_queries:
|
||||||
|
c.execute(q)
|
||||||
|
|
||||||
|
# Run fill queries
|
||||||
|
fill_query_templates = [q.text for q in root.findall('fill_query')]
|
||||||
|
fill_queries = substitute_parameters(fill_query_templates, parameter_combinations)
|
||||||
|
for c in connections:
|
||||||
|
for q in fill_queries:
|
||||||
|
c.execute(q)
|
||||||
|
|
||||||
|
# Run test queries
|
||||||
|
test_query_templates = [q.text for q in root.findall('query')]
|
||||||
|
test_queries = substitute_parameters(test_query_templates, parameter_combinations)
|
||||||
|
|
||||||
|
for q in test_queries:
|
||||||
|
for run in range(0, 7):
|
||||||
|
for conn_index, c in enumerate(connections):
|
||||||
|
res = c.execute(q)
|
||||||
|
print(q + '\t' + str(run) + '\t' + str(conn_index) + '\t' + str(c.last_query.elapsed))
|
||||||
|
|
||||||
|
# Run drop queries
|
||||||
|
drop_query_templates = [q.text for q in root.findall('drop_query')]
|
||||||
|
drop_queries = substitute_parameters(drop_query_templates, parameter_combinations)
|
||||||
|
for c in connections:
|
||||||
|
for q in drop_queries:
|
||||||
|
c.execute(q)
|
Loading…
Reference in New Issue
Block a user