mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-16 12:44:42 +00:00
97f2a2213e
* Move some code outside dbms/src folder * Fix paths
184 lines
6.0 KiB
Bash
Executable File
184 lines
6.0 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -e
|
|
|
|
SOURCE_RAW=/opt/ontime/ontime.csv
|
|
#SOURCE=/opt/ontime/ontime9M_id.csv
|
|
#SOURCE=/opt/ontime/ontime9M_id.Native
|
|
SOURCE=/opt/ontime/ontime_id.csv
|
|
if [[ ! -f $SOURCE ]]; then
|
|
echo "Inserting id field from $SOURCE_RAW to $SOURCE ..."
|
|
tail -n +2 "$SOURCE_RAW" | ./add_id_to_csv > $SOURCE
|
|
fi
|
|
|
|
STRUCT="id UInt32, "`cat ontime.struct`
|
|
COLUMNS=`echo "$STRUCT" | tr " " "\n" | awk 'NR % 2 == 1' | tr "\n" "," | head -c -1`
|
|
ACTIVE_COLUMNS=11
|
|
|
|
ST_OPTIONS=""
|
|
#ST_OPTIONS="--max_threads=1 --background_pool_size=1" # increase std. dev. of measurements
|
|
|
|
db="test"
|
|
table_name="ontime"
|
|
table="test.ontime"
|
|
|
|
function read_src_data {
|
|
clickhouse-local -s --file "$SOURCE" --input-format CSV --structure "$STRUCT" -of Native --query "$@"
|
|
#clickhouse-local -s --file "$SOURCE" --input-format Native --structure "$STRUCT" -of Native --query "$@"
|
|
}
|
|
|
|
function set_vertical_alg {
|
|
echo "<yandex><merge_tree><enable_vertical_merge_algorithm>$1</enable_vertical_merge_algorithm></merge_tree></yandex>" | sudo tee /etc/clickhouse-server/conf.d/enable_vertical_merge_algorithm.xml >/dev/null
|
|
echo "<yandex><merge_tree><vertical_merge_algorithm_min_rows_to_activate>0</vertical_merge_algorithm_min_rows_to_activate></merge_tree></yandex>" | sudo tee /etc/clickhouse-server/conf.d/vertical_merge_algorithm_min_rows_to_activate.xml >/dev/null
|
|
}
|
|
|
|
function set_and_restart {
|
|
sudo service clickhouse-server stop 1>/dev/null
|
|
set_vertical_alg $1
|
|
sudo service clickhouse-server start 1>/dev/null
|
|
./wait_clickhouse_server
|
|
}
|
|
|
|
function get_n_columns {
|
|
echo $1 | cut -d ',' -f -$2
|
|
}
|
|
|
|
function parts_stat {
|
|
clickhouse-client --query "SELECT count() as parts, round(avg(marks), 2) AS marks_avg, min(marks) AS marks_min, max(marks) AS marks_max FROM system.parts WHERE active AND table='$table_name' AND database='$db' FORMAT TSKV"
|
|
}
|
|
|
|
function parts_count {
|
|
clickhouse-client --query "SELECT count() FROM system.parts WHERE active AND table='$table_name' AND database='$db'"
|
|
}
|
|
|
|
function merges_count {
|
|
clickhouse-client --query "SELECT count() FROM system.merges WHERE table='$table_name' AND database='$db'"
|
|
}
|
|
|
|
function wait_merges {
|
|
while [[ -n $(merges_count) ]]; do sleep 1; done
|
|
}
|
|
|
|
function drop_cache {
|
|
sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches'
|
|
}
|
|
|
|
function get_last_merge_info {
|
|
cat /var/log/clickhouse-server/clickhouse-server.log | grep "(Merger): Merging" | tail -1 | cut -d " " -f 12-
|
|
}
|
|
|
|
function get_last_merge_time {
|
|
cat /var/log/clickhouse-server/clickhouse-server.log | grep "(Merger): Merge sorted" | tail -1 | cut -d " " -f 21
|
|
}
|
|
|
|
function total_merge_time_from_log {
|
|
cat /var/log/clickhouse-server/clickhouse-server.log | grep "(Merger): Merge sorted" | cut -d " " -f 21 | clickhouse-local -s -S "d Float64" --query "SELECT round(sum(d), 3) FROM table"
|
|
}
|
|
|
|
function get_max_clickhouse_server_memory {
|
|
cat /proc/`cat /var/run/clickhouse-server/clickhouse-server.pid`/status | grep VmPeak | awk '{ print $2/1024 }' #MiB
|
|
}
|
|
|
|
function optimize_rounds {
|
|
[[ -z $1 ]] && NUM_ROUNDS=29 || NUM_ROUNDS=$1
|
|
drop_cache
|
|
|
|
echo "OPTIMIZE before: $(parts_stat)"
|
|
t_optimize_total=0
|
|
for i in $(seq 1 $NUM_ROUNDS); do
|
|
local t=`clickhouse-client --time ${ST_OPTIONS} --query "OPTIMIZE TABLE $table" 2>&1`
|
|
#echo "$t $(get_last_merge_time) $(get_last_merge_info)"
|
|
t_optimize_total=`echo "$t_optimize_total + $t" | bc -l`
|
|
done
|
|
|
|
echo "OPTIMIZE after : $(parts_stat)"
|
|
echo "OPTIMIZE time : $t_optimize_total"
|
|
}
|
|
|
|
function run_case {
|
|
case_func="case_$1"
|
|
|
|
sudo service clickhouse-server stop 1>/dev/null
|
|
sudo rm -f /var/log/clickhouse-server/clickhouse-server.log
|
|
sudo service clickhouse-server start 1>/dev/null
|
|
./wait_clickhouse_server
|
|
|
|
clickhouse-client --query "DROP TABLE IF EXISTS $table"
|
|
drop_cache
|
|
|
|
$case_func 1>&2
|
|
wait_merges
|
|
t_merges=$(total_merge_time_from_log)
|
|
|
|
echo "After INSERT: $(parts_stat)"
|
|
#echo "Merges time : $t_merges"
|
|
|
|
#optimize_rounds
|
|
#optimize_times="$optimize_times $t_optimize_total"
|
|
|
|
insert_times="$insert_times $t_insert"
|
|
merges_times="$merges_times $t_merges"
|
|
|
|
clickhouse-client --query "DROP TABLE IF EXISTS $table"
|
|
}
|
|
|
|
function run_cases {
|
|
cur_struct=$(get_n_columns "$STRUCT" $ACTIVE_COLUMNS)
|
|
cur_columns=$(get_n_columns "$COLUMNS" $ACTIVE_COLUMNS)
|
|
|
|
t_insert=0
|
|
insert_times=""
|
|
merges_times=""
|
|
optimize_times=""
|
|
|
|
run_case 1
|
|
run_case 2
|
|
run_case 3
|
|
|
|
echo "INSERT times : $insert_times"
|
|
echo "Merges times : $merges_times"
|
|
#echo "OPTIMIZE times: $optimize_times"
|
|
}
|
|
|
|
function case_1 {
|
|
echo "Case #1. Trivial. All parts not intersected by PK."
|
|
|
|
clickhouse-client --query "CREATE TABLE $table ($cur_struct) ENGINE = MergeTree(FlightDate, (FlightDate), 8192)"
|
|
t_insert=`read_src_data "SELECT $cur_columns FROM table" | clickhouse-client --time ${ST_OPTIONS} --query "INSERT INTO $table FORMAT Native" 2>&1`
|
|
}
|
|
|
|
function case_2 {
|
|
echo "Case #2. Strong mixture. Each new (merged) row comes from new part."
|
|
|
|
clickhouse-client --query "CREATE TABLE $table ($cur_struct) ENGINE = MergeTree(FlightDate, (intHash32(id), FlightDate), 8192)"
|
|
t_insert=`read_src_data "SELECT $cur_columns FROM table" | clickhouse-client --time ${ST_OPTIONS} --query "INSERT INTO $table FORMAT Native" 2>&1`
|
|
}
|
|
|
|
function case_3 {
|
|
echo "Case #3. Chunked mixture. Merged row with dozens of its neighbors come from the same part."
|
|
|
|
clickhouse-client --query "CREATE TABLE $table ($cur_struct) ENGINE = MergeTree(FlightDate, (bitAnd(id, 15), intHash32(bitShiftRight(id, 4))), 8192)"
|
|
t_insert=`read_src_data "SELECT $cur_columns FROM table" | clickhouse-client --time ${ST_OPTIONS} --query "INSERT INTO $table FORMAT Native" 2>&1`
|
|
}
|
|
|
|
|
|
[[ $(whoami) -ne "root" ]] && echo "Run script as root"
|
|
|
|
echo "### Vertical ###"
|
|
set_and_restart 1
|
|
run_cases
|
|
vertical_optimize_times="$optimize_times"
|
|
vertical_merges_times="$merges_times"
|
|
|
|
echo
|
|
|
|
echo "### Horizontal ###"
|
|
set_and_restart 0
|
|
run_cases
|
|
horizontal_optimize_times="$optimize_times"
|
|
horizontal_merges_times="$merges_times"
|
|
|
|
echo
|
|
|
|
echo "#V" "Merges:" ${vertical_merges_times} #"Optimitze:" ${vertical_optimize_times}
|
|
echo "#H" "Merges:" ${horizontal_merges_times} #"Optimitze:" ${horizontal_optimize_times}
|