adding tests draft

This commit is contained in:
Mikhail Filimonov 2019-05-13 15:41:47 +02:00
parent e7faa04726
commit f683c16712
9 changed files with 427 additions and 0 deletions

View File

@ -0,0 +1,83 @@
## Test stand for multiple disks feature
Currently for namual tests, can be easily scripted to be the part of inntergration tests.
To run you need to have docker & docker-compose.
```
(Check makefile)
make run
make ch1_shell
> clickhouse-client
make logs # Ctrl+C
make cleup
```
### basic
* allows to configure multiple disks & folumes & shemas
* clickhouse check that all disks are write-accessible
* clickhouse can create a table with provided schema
### one volume-one disk custom schema
* clickhouse puts data to correct folder when schema is used
* clickhouse can do merges / detach / attach / freeze on that folder
### one volume-multiple disks schema (JBOD scenario)
* clickhouse uses round-robin to place new parts
* clickhouse can do merges / detach / attach / freeze on that folder
### two volumes-one disk per volume (fast expensive / slow cheap storage)
* clickhouse uses round-robin to place new parts
* clickhouse can do merges / detach / attach / freeze on that folder
* clickhouse put parts to different volumes depending on part size
### use 'default' schema for tables created without schema provided.
# ReplicatedMergeTree
....
For all above:
clickhouse respect free space limitation setting.
ClickHouse writes important disk-related information to logs.
## Queries
```
CREATE TABLE schema_default (id UInt64) Engine=MergeTree() ORDER BY (id);
INSERT INTO schema_default SELECT * FROM numbers(1);
CREATE TABLE schema_default_explicit (id UInt64) Engine=MergeTree() ORDER BY (id) SETTINGS storage_schema_name='default';
CREATE TABLE schema_default_disk_with_external (id UInt64) Engine=MergeTree() ORDER BY (id) SETTINGS storage_schema_name='default_disk_with_external';
CREATE TABLE schema_jbod_with_external (id UInt64) Engine=MergeTree() ORDER BY (id) SETTINGS storage_schema_name='jbod_with_external';
CREATE TABLE replicated_schema_default (id UInt64) Engine=ReplicatedMergeTree('/clickhouse/tables/{database}/{table}', '{replica}') ORDER BY (id);
CREATE TABLE replicated_schema_default_explicit (id UInt64) Engine=ReplicatedMergeTree('/clickhouse/tables/{database}/{table}', '{replica}') ORDER BY (id) SETTINGS storage_schema_name='default';
CREATE TABLE replicated_schema_default_disk_with_external (id UInt64) Engine=ReplicatedMergeTree('/clickhouse/tables/{database}/{table}', '{replica}') ORDER BY (id) SETTINGS storage_schema_name='default_disk_with_external';
CREATE TABLE replicated_schema_jbod_with_external (id UInt64) Engine=ReplicatedMergeTree('/clickhouse/tables/{database}/{table}', '{replica}') ORDER BY (id) SETTINGS storage_schema_name='jbod_with_external';
```
## Extra acceptance criterias
* hardlinks problems. Thouse stetements should be able to work properly (or give a proper feedback) on multidisk scenarios
* ALTER TABLE ... UPDATE
* ALTER TABLE ... TABLE
* ALTER TABLE ... MODIFY COLUMN ...
* ALTER TABLE ... CLEAR COLUMN
* ALTER TABLE ... REPLACE PARTITION ...
* Maintainance - system tables show proper values:
* system.parts
* system.tables
* system.part_log (target disk?)
* New system table
* system.volumes
* system.disks
* system.schemas
* chown / create needed disk folders in docker

View File

@ -0,0 +1,7 @@
<yandex>
<macros>
<cluster>test_cluster</cluster>
<shard>0</shard>
<replica>ch2</replica>
</macros>
</yandex>

View File

@ -0,0 +1,7 @@
<yandex>
<macros>
<cluster>test_cluster</cluster>
<shard>0</shard>
<replica>ch2</replica>
</macros>
</yandex>

View File

@ -0,0 +1,16 @@
<yandex>
<remote_servers>
<test_cluster>
<shard>
<replica>
<host>ch1</host>
<port>9000</port>
</replica>
<replica>
<host>ch2</host>
<port>9000</port>
</replica>
</shard>
</test_cluster>
</remote_servers>
</yandex>

View File

@ -0,0 +1,63 @@
<yandex>
<storage_configuration>
<disks>
<default> <!-- path for deafult disk is provided in main config -->
<keep_free_space_bytes>0</keep_free_space_bytes>
</default>
<jbod1>
<path>/jbod1/</path> <!-- trailing slash is mandatory -->
<keep_free_space_bytes>1000000</keep_free_space_bytes>
</jbod1>
<jbod2>
<path>/jbod2/</path>
<keep_free_space_bytes>1000000</keep_free_space_bytes>
</jbod2>
<external>
<path>/external/</path>
<keep_free_space_bytes>1000000</keep_free_space_bytes>
</external>
</disks>
<schemas>
<!-- default: store on jbod1-->
<default>
<volume>
<disk>jbod1</disk>
<max_data_part_size_bytes>1000000</max_data_part_size_bytes>
<!-- Q: how it will behave if the only disk has max_data_part_size_bytes limitation? -->
</volume>
</default>
<!-- store local by default, store big parts on external -->
<default_disk_with_external>
<volume>
<!-- names for the volumes should be added to allow moving parts between volumed with DDL commands -->
<disk>default</disk>
<max_data_part_size_bytes>1000000</max_data_part_size_bytes>
</volume>
<volume>
<disk>external</disk>
<max_data_part_size_bytes>10000000</max_data_part_size_bytes>
</volume>
</default_disk_with_external>
<!-- store on JBOD by default (round-robin), store bif parts on external -->
<jbod_with_external>
<volume>
<disk>jbod1</disk>
<disk>jbod2</disk>
<max_data_part_size_bytes>10000000</max_data_part_size_bytes>
<!-- max_data_part_size_ratio>0.2</max_data_part_size_ratio -->
</volume>
<volume>
<disk>external</disk>
<!-- max_data_part_size_bytes>10000000</max_data_part_size_bytes -->
</volume>
</jbod_with_external>
</schemas>
</storage_configuration>
</yandex>

View File

@ -0,0 +1,8 @@
<yandex>
<zookeeper>
<node>
<host>zookeeper</host>
<port>2181</port>
</node>
</zookeeper>
</yandex>

View File

@ -0,0 +1,52 @@
version: '3'
services:
zookeeper:
image: zookeeper
networks:
- 001_miltiple_disks
ch1:
image: yandex/clickhouse-server
hostname: ch1
depends_on:
- "zookeeper"
networks:
- 001_miltiple_disks
tmpfs:
- /jbod1:size=40M
- /jbod2:size=40M
- /external:size=200M
volumes:
- ./config/conf.d:/etc/clickhouse-server/conf.d
- ./config/ch1.metrika.xml:/etc/metrika.xml
# TODO: hacky, but "cheap" - don't need to rebuild docker image.
# for CI & tests automation should be done another way, not relying on build path
- ../../../../build/dbms/programs/clickhouse:/usr/bin/clickhouse
- ./test_setup:/test_setup
entrypoint: ['/test_setup/entrypoint.sh']
ch2:
image: yandex/clickhouse-server
hostname: ch2
depends_on:
- "zookeeper"
networks:
- 001_miltiple_disks
tmpfs:
- /jbod1:size=40M
- /jbod2:size=40M
- /external:size=200M
volumes:
- ./config/conf.d:/etc/clickhouse-server/conf.d
- ./config/ch2.metrika.xml:/etc/metrika.xml
# TODO: hacky, but "cheap" - don't need to rebuild docker image.
# for CI & tests automation should be done another way, not relying on build path
- ../../../../build/dbms/programs/clickhouse:/usr/bin/clickhouse
- ./test_setup:/test_setup
entrypoint: ['/test_setup/entrypoint.sh']
networks:
001_miltiple_disks:

View File

@ -0,0 +1,9 @@
#!/bin/bash
if [ -f /etc/clickhouse-server/conf.d/container_maintanence_mode.flag ]; then
echo "Starting container in maintanence mode. It will sleep unless you shutdown it"
sleep infinity
else
chown -R clickhouse:clickhouse /jbod1 /jbod2 /external
/test_setup/wait-for-it.sh zookeeper:2181 --timeout=0 --strict -- /entrypoint.sh
fi

View File

@ -0,0 +1,182 @@
#!/usr/bin/env bash
# Use this script to test if a given TCP host/port are available
# The MIT License (MIT)
# Copyright (c) 2016 Giles Hall
# See https://github.com/vishnubob/wait-for-it/blob/master/LICENSE
WAITFORIT_cmdname=${0##*/}
echoerr() { if [[ $WAITFORIT_QUIET -ne 1 ]]; then echo "$@" 1>&2; fi }
usage()
{
cat << USAGE >&2
Usage:
$WAITFORIT_cmdname host:port [-s] [-t timeout] [-- command args]
-h HOST | --host=HOST Host or IP under test
-p PORT | --port=PORT TCP port under test
Alternatively, you specify the host and port as host:port
-s | --strict Only execute subcommand if the test succeeds
-q | --quiet Don't output any status messages
-t TIMEOUT | --timeout=TIMEOUT
Timeout in seconds, zero for no timeout
-- COMMAND ARGS Execute command with args after the test finishes
USAGE
exit 1
}
wait_for()
{
if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then
echoerr "$WAITFORIT_cmdname: waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT"
else
echoerr "$WAITFORIT_cmdname: waiting for $WAITFORIT_HOST:$WAITFORIT_PORT without a timeout"
fi
WAITFORIT_start_ts=$(date +%s)
while :
do
if [[ $WAITFORIT_ISBUSY -eq 1 ]]; then
nc -z $WAITFORIT_HOST $WAITFORIT_PORT
WAITFORIT_result=$?
else
(echo > /dev/tcp/$WAITFORIT_HOST/$WAITFORIT_PORT) >/dev/null 2>&1
WAITFORIT_result=$?
fi
if [[ $WAITFORIT_result -eq 0 ]]; then
WAITFORIT_end_ts=$(date +%s)
echoerr "$WAITFORIT_cmdname: $WAITFORIT_HOST:$WAITFORIT_PORT is available after $((WAITFORIT_end_ts - WAITFORIT_start_ts)) seconds"
break
fi
sleep 1
done
return $WAITFORIT_result
}
wait_for_wrapper()
{
# In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692
if [[ $WAITFORIT_QUIET -eq 1 ]]; then
timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --quiet --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT &
else
timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT &
fi
WAITFORIT_PID=$!
trap "kill -INT -$WAITFORIT_PID" INT
wait $WAITFORIT_PID
WAITFORIT_RESULT=$?
if [[ $WAITFORIT_RESULT -ne 0 ]]; then
echoerr "$WAITFORIT_cmdname: timeout occurred after waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT"
fi
return $WAITFORIT_RESULT
}
# process arguments
while [[ $# -gt 0 ]]
do
case "$1" in
*:* )
WAITFORIT_hostport=(${1//:/ })
WAITFORIT_HOST=${WAITFORIT_hostport[0]}
WAITFORIT_PORT=${WAITFORIT_hostport[1]}
shift 1
;;
--child)
WAITFORIT_CHILD=1
shift 1
;;
-q | --quiet)
WAITFORIT_QUIET=1
shift 1
;;
-s | --strict)
WAITFORIT_STRICT=1
shift 1
;;
-h)
WAITFORIT_HOST="$2"
if [[ $WAITFORIT_HOST == "" ]]; then break; fi
shift 2
;;
--host=*)
WAITFORIT_HOST="${1#*=}"
shift 1
;;
-p)
WAITFORIT_PORT="$2"
if [[ $WAITFORIT_PORT == "" ]]; then break; fi
shift 2
;;
--port=*)
WAITFORIT_PORT="${1#*=}"
shift 1
;;
-t)
WAITFORIT_TIMEOUT="$2"
if [[ $WAITFORIT_TIMEOUT == "" ]]; then break; fi
shift 2
;;
--timeout=*)
WAITFORIT_TIMEOUT="${1#*=}"
shift 1
;;
--)
shift
WAITFORIT_CLI=("$@")
break
;;
--help)
usage
;;
*)
echoerr "Unknown argument: $1"
usage
;;
esac
done
if [[ "$WAITFORIT_HOST" == "" || "$WAITFORIT_PORT" == "" ]]; then
echoerr "Error: you need to provide a host and port to test."
usage
fi
WAITFORIT_TIMEOUT=${WAITFORIT_TIMEOUT:-15}
WAITFORIT_STRICT=${WAITFORIT_STRICT:-0}
WAITFORIT_CHILD=${WAITFORIT_CHILD:-0}
WAITFORIT_QUIET=${WAITFORIT_QUIET:-0}
# check to see if timeout is from busybox?
WAITFORIT_TIMEOUT_PATH=$(type -p timeout)
WAITFORIT_TIMEOUT_PATH=$(realpath $WAITFORIT_TIMEOUT_PATH 2>/dev/null || readlink -f $WAITFORIT_TIMEOUT_PATH)
if [[ $WAITFORIT_TIMEOUT_PATH =~ "busybox" ]]; then
WAITFORIT_ISBUSY=1
WAITFORIT_BUSYTIMEFLAG="-t"
else
WAITFORIT_ISBUSY=0
WAITFORIT_BUSYTIMEFLAG=""
fi
if [[ $WAITFORIT_CHILD -gt 0 ]]; then
wait_for
WAITFORIT_RESULT=$?
exit $WAITFORIT_RESULT
else
if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then
wait_for_wrapper
WAITFORIT_RESULT=$?
else
wait_for
WAITFORIT_RESULT=$?
fi
fi
if [[ $WAITFORIT_CLI != "" ]]; then
if [[ $WAITFORIT_RESULT -ne 0 && $WAITFORIT_STRICT -eq 1 ]]; then
echoerr "$WAITFORIT_cmdname: strict mode, refusing to execute subprocess"
exit $WAITFORIT_RESULT
fi
exec "${WAITFORIT_CLI[@]}"
else
exit $WAITFORIT_RESULT
fi