2020-09-22 02:19:11 +00:00
#!/bin/bash -e
QUERIES_FILE = "queries.sql"
TRIES = 3
2022-06-24 01:18:51 +00:00
mkdir -p clickhouse-benchmark
pushd clickhouse-benchmark
2022-02-28 19:09:55 +00:00
2022-06-24 01:18:51 +00:00
# Download the binary
if [ [ ! -x clickhouse ] ] ; then
curl https://clickhouse.com/ | sh
2022-02-28 19:09:55 +00:00
fi
2020-09-22 02:19:11 +00:00
if [ [ ! -f $QUERIES_FILE ] ] ; then
wget " https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/ $QUERIES_FILE "
fi
2020-09-25 01:03:17 +00:00
uptime
2020-09-22 02:19:11 +00:00
echo "Starting clickhouse-server"
2022-06-25 02:28:14 +00:00
./clickhouse server >/dev/null 2>& 1 &
2020-09-22 02:19:11 +00:00
PID = $!
function finish {
kill $PID
wait
}
trap finish EXIT
echo "Waiting for clickhouse-server to start"
for i in { 1..30} ; do
sleep 1
2022-06-24 01:18:51 +00:00
./clickhouse client --query "SELECT 'Ok.'" 2>/dev/null && break || echo -n '.'
2020-09-22 02:19:11 +00:00
if [ [ $i = = 30 ] ] ; then exit 1; fi
done
2022-06-25 02:28:14 +00:00
if [ [ $( ./clickhouse client --query "EXISTS hits" ) = = '1' && $( ./clickhouse client --query "SELECT count() FROM hits" ) = = '100000000' ] ] ; then
echo "Dataset already downloaded"
else
echo "Will download the dataset"
2022-07-01 09:12:31 +00:00
if [ "`uname`" = "Darwin" ]
then
./clickhouse client --receive_timeout 1000 --max_insert_threads $( sysctl -n hw.ncpu) --progress --query "
CREATE OR REPLACE TABLE hits ENGINE = MergeTree PARTITION BY toYYYYMM( EventDate) ORDER BY ( CounterID, EventDate, intHash32( UserID) , EventTime)
AS SELECT * FROM url( 'https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst' ) "
else
2022-06-25 05:08:37 +00:00
./clickhouse client --receive_timeout 1000 --max_insert_threads $( nproc || 4) --progress --query "
2022-06-25 02:29:43 +00:00
CREATE OR REPLACE TABLE hits ENGINE = MergeTree PARTITION BY toYYYYMM( EventDate) ORDER BY ( CounterID, EventDate, intHash32( UserID) , EventTime)
2022-06-25 02:28:14 +00:00
AS SELECT * FROM url( 'https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst' ) "
2022-07-01 09:12:31 +00:00
fi
2022-06-25 02:29:43 +00:00
./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM hits"
2022-06-25 02:28:14 +00:00
fi
2022-06-24 01:18:51 +00:00
2022-06-25 02:28:14 +00:00
if [ [ $( ./clickhouse client --query "SELECT count() FROM system.parts WHERE table = 'hits' AND database = 'default' AND active" ) = = '1' ] ] ; then
echo "Dataset already prepared"
else
echo "Will prepare the dataset"
2022-06-25 05:08:37 +00:00
./clickhouse client --receive_timeout 1000 --query "OPTIMIZE TABLE hits FINAL"
2022-06-25 02:28:14 +00:00
fi
2022-06-24 01:18:51 +00:00
2020-09-22 02:19:11 +00:00
echo
echo "Will perform benchmark. Results:"
echo
2022-06-25 05:05:56 +00:00
>result.csv
2022-06-25 04:07:41 +00:00
QUERY_NUM = 1
2022-06-25 05:05:56 +00:00
2022-06-25 02:29:43 +00:00
cat " $QUERIES_FILE " | sed "s/{table}/hits/g" | while read query; do
2020-09-22 02:19:11 +00:00
sync
2022-07-01 09:12:31 +00:00
if [ "`uname`" = "Darwin" ]
then
2022-02-28 19:09:55 +00:00
sudo purge > /dev/null
else
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
fi
2020-09-22 02:19:11 +00:00
echo -n "["
for i in $( seq 1 $TRIES ) ; do
2022-06-24 01:18:51 +00:00
RES = $( ./clickhouse client --time --format= Null --query= " $query " 2>& 1 || :)
2020-09-22 02:19:11 +00:00
[ [ " $? " = = "0" ] ] && echo -n " ${ RES } " || echo -n "null"
[ [ " $i " != $TRIES ] ] && echo -n ", "
2022-06-25 04:07:41 +00:00
echo " ${ QUERY_NUM } , ${ i } , ${ RES } " >> result.csv
2020-09-22 02:19:11 +00:00
done
echo "],"
2022-06-25 04:07:41 +00:00
QUERY_NUM = $(( QUERY_NUM + 1 ))
2020-09-22 02:19:11 +00:00
done
echo
echo "Benchmark complete. System info:"
2020-09-22 02:22:53 +00:00
echo
2020-09-22 02:19:11 +00:00
2022-06-25 04:07:41 +00:00
touch { cpu_model,cpu,df,memory,memory_total,blk,mdstat,instance} .txt
2022-07-01 09:12:31 +00:00
if [ "`uname`" = "Darwin" ]
then
2022-03-01 10:29:46 +00:00
echo '----Version, build id-----------'
./clickhouse local --query "SELECT format('Version: {}', version())"
./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw
./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))"
echo '----CPU-------------------------'
2022-06-25 04:07:41 +00:00
sysctl hw.model | tee cpu_model.txt
sysctl -a | grep -E 'hw.activecpu|hw.memsize|hw.byteorder|cachesize' | tee cpu.txt
2022-03-01 10:29:46 +00:00
echo '----Disk Free and Total--------'
2022-06-25 04:07:41 +00:00
df -h . | tee df.txt
2022-03-01 10:29:46 +00:00
echo '----Memory Free and Total-------'
2022-06-25 04:07:41 +00:00
vm_stat | tee memory.txt
2022-03-01 10:29:46 +00:00
echo '----Physical Memory Amount------'
2022-06-25 04:07:41 +00:00
ls -l /var/vm | tee memory_total.txt
2022-03-01 10:29:46 +00:00
echo '--------------------------------'
else
echo '----Version, build id-----------'
./clickhouse local --query "SELECT format('Version: {}, build id: {}', version(), buildId())"
./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw
./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))"
echo '----CPU-------------------------'
2022-06-25 04:07:41 +00:00
cat /proc/cpuinfo | grep -i -F 'model name' | uniq | tee cpu_model.txt
lscpu | tee cpu.txt
2022-03-01 10:29:46 +00:00
echo '----Block Devices---------------'
2022-06-25 04:07:41 +00:00
lsblk | tee blk.txt
2022-03-01 10:29:46 +00:00
echo '----Disk Free and Total--------'
2022-06-25 04:07:41 +00:00
df -h . | tee df.txt
2022-03-01 10:29:46 +00:00
echo '----Memory Free and Total-------'
2022-06-25 04:07:41 +00:00
free -h | tee memory.txt
2022-03-01 10:29:46 +00:00
echo '----Physical Memory Amount------'
2022-06-25 04:07:41 +00:00
cat /proc/meminfo | grep MemTotal | tee memory_total.txt
2022-03-01 10:29:46 +00:00
echo '----RAID Info-------------------'
2022-06-25 04:07:41 +00:00
cat /proc/mdstat| tee mdstat.txt
2022-03-01 10:29:46 +00:00
echo '--------------------------------'
fi
2020-09-22 02:19:11 +00:00
echo
2022-06-24 01:18:51 +00:00
echo "Instance type from IMDS (if available):"
2022-06-25 04:07:41 +00:00
curl -s --connect-timeout 1 'http://169.254.169.254/latest/meta-data/instance-type' | tee instance.txt
2022-06-24 01:18:51 +00:00
echo
2022-06-25 04:07:41 +00:00
echo "Uploading the results (if possible)"
2022-06-25 21:18:33 +00:00
UUID = $( ./clickhouse local --query "SELECT generateUUIDv4()" )
2022-06-25 05:05:56 +00:00
2022-06-25 04:07:41 +00:00
./clickhouse local --query "
SELECT
2022-06-25 05:05:56 +00:00
'${UUID}' AS run_id,
2022-06-25 04:07:41 +00:00
version( ) AS version,
now( ) AS test_time,
( SELECT value FROM system.settings WHERE name = 'max_threads' ) AS threads,
filesystemCapacity( ) AS fs_capacity,
filesystemAvailable( ) AS fs_available,
file( 'cpu_model.txt' ) AS cpu_model,
file( 'cpu.txt' ) AS cpu,
file( 'df.txt' ) AS df,
file( 'memory.txt' ) AS memory,
file( 'memory_total.txt' ) AS memory_total,
file( 'blk.txt' ) AS blk,
file( 'mdstat.txt' ) AS mdstat,
file( 'instance.txt' ) AS instance
2022-06-25 05:05:56 +00:00
" | tee meta.tsv | ./clickhouse client --host play.clickhouse.com --secure --user benchmark --query "
INSERT INTO benchmark_runs
( run_id, version, test_time, threads, fs_capacity, fs_available, cpu_model, cpu, df, memory, memory_total, blk, mdstat, instance)
FORMAT TSV" || echo " Cannot upload results."
./clickhouse local --query "
SELECT
'${UUID}' AS run_id,
c1 AS query_num,
c2 AS try_num,
c3 AS time
2022-06-25 04:07:41 +00:00
FROM file( 'result.csv' )
2022-06-25 05:05:56 +00:00
" | tee results.tsv | ./clickhouse client --host play.clickhouse.com --secure --user benchmark --query "
INSERT INTO benchmark_results
( run_id, query_num, try_num, time )
FORMAT TSV" || echo " Cannot upload results. Please send the output to feedback@clickhouse.com"
<<////
Server Setup:
CREATE TABLE benchmark_runs
(
run_id UUID,
version String,
test_time DateTime,
threads String,
fs_capacity UInt64,
fs_available UInt64,
cpu_model String,
cpu String,
df String,
memory String,
memory_total String,
blk String,
mdstat String,
instance String
) ENGINE = ReplicatedMergeTree ORDER BY run_id;
CREATE TABLE benchmark_results
(
run_id UUID,
query_num UInt8,
try_num UInt8,
time Decimal32( 3)
) ENGINE = ReplicatedMergeTree ORDER BY ( run_id, query_num, try_num) ;
CREATE USER benchmark IDENTIFIED WITH no_password SETTINGS max_rows_to_read = 1, max_result_rows = 1, max_execution_time = 1;
CREATE QUOTA benchmark
KEYED BY ip_address
2022-06-25 16:27:26 +00:00
FOR RANDOMIZED INTERVAL 1 MINUTE MAX query_inserts = 4, written_bytes = 100000,
2022-06-25 05:05:56 +00:00
FOR RANDOMIZED INTERVAL 1 HOUR MAX query_inserts = 10, written_bytes = 500000,
FOR RANDOMIZED INTERVAL 1 DAY MAX query_inserts = 50, written_bytes = 2000000
TO benchmark;
GRANT INSERT ON benchmark_runs TO benchmark;
GRANT INSERT ON benchmark_results TO benchmark;
2022-07-02 01:43:32 +00:00
Example query:
SELECT
cpu_model,
threads,
instance,
k
FROM
(
SELECT
run_id,
exp( avg( log( adjusted_time / best_time) ) ) AS k
FROM
(
WITH greatest( time, 0.01) AS adjusted_time
SELECT
run_id,
adjusted_time,
min( adjusted_time) OVER ( PARTITION BY query_num, try_num) AS best_time
FROM benchmark_results
WHERE try_num > 1
)
GROUP BY run_id
ORDER BY k ASC
) AS t
INNER JOIN benchmark_runs USING ( run_id)
2022-06-25 05:05:56 +00:00
////