mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Split test into 2
This commit is contained in:
parent
111b553ee5
commit
77f66e5a09
@ -1,8 +0,0 @@
|
||||
1 2020-01-01 String
|
||||
2 2020-02-02 Another string
|
||||
3 2020-03-03 One more string
|
||||
4 2020-01-02 String for first partition
|
||||
1 2020-01-01 String
|
||||
2 2020-02-02 Another string
|
||||
3 2020-03-03 One more string
|
||||
4 2020-01-02 String for first partition
|
@ -4,48 +4,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
WORKING_FOLDER_01527="${CLICKHOUSE_TMP}/01527_clickhouse_local_optimize"
|
||||
|
||||
rm -rf "${WORKING_FOLDER_01527}"
|
||||
mkdir -p "${WORKING_FOLDER_01527}/metadata/local/"
|
||||
mkdir -p "${WORKING_FOLDER_01527}"
|
||||
|
||||
# OPTIMIZE was crashing due to lack of temporary volume in local
|
||||
${CLICKHOUSE_LOCAL} --query "drop database if exists d; create database d; create table d.t engine MergeTree order by a as select 1 a; optimize table d.t final" -- --path="${WORKING_FOLDER_01527}"
|
||||
|
||||
# Some extra (unrealted) scenarios of clickhouse-local usage.
|
||||
|
||||
## 1. Imagine we want to process this file:
|
||||
cat <<EOF > "${WORKING_FOLDER_01527}/data.csv"
|
||||
1,2020-01-01,"String"
|
||||
2,2020-02-02,"Another string"
|
||||
3,2020-03-03,"One more string"
|
||||
4,2020-01-02,"String for first partition"
|
||||
EOF
|
||||
|
||||
## 2. that is the metadata for the table we want to fill
|
||||
## schema should match the schema of the table from server
|
||||
## (the easiest way is just to copy it from the server)
|
||||
cat <<EOF > "${WORKING_FOLDER_01527}/metadata/local/test.sql"
|
||||
ATTACH TABLE local.test (id UInt64, d Date, s String) Engine=MergeTree ORDER BY id PARTITION BY toYYYYMM(d);
|
||||
EOF
|
||||
|
||||
## 3a. that is the metadata for the input file we want to read
|
||||
## it should match the structure of source file
|
||||
## use stdin to read from pipe
|
||||
cat <<EOF > "${WORKING_FOLDER_01527}/metadata/local/stdin.sql"
|
||||
ATTACH TABLE local.stdin (id UInt64, d Date, s String) Engine=File(CSV, stdin);
|
||||
EOF
|
||||
|
||||
## 3b. Instead of stdin you can use file path
|
||||
cat <<EOF > "${WORKING_FOLDER_01527}/metadata/local/data_csv.sql"
|
||||
ATTACH TABLE local.data_csv (id UInt64, d Date, s String) Engine=File(CSV, '${WORKING_FOLDER_01527}/data.csv');
|
||||
EOF
|
||||
|
||||
## All preparations done, the rest is simple:
|
||||
|
||||
# option a (if 3a used) with pipe / reading stdin (truncate was added for the test)
|
||||
cat "${WORKING_FOLDER_01527}/data.csv" | ${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.stdin; SELECT * FROM local.test ORDER BY id; TRUNCATE TABLE local.test;" -- --path="${WORKING_FOLDER_01527}"
|
||||
|
||||
# option b (if 3b used) 0 with filepath (truncate was added for the test)
|
||||
${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.data_csv; SELECT * FROM local.test ORDER BY id; TRUNCATE TABLE local.test;" -- --path="${WORKING_FOLDER_01527}"
|
||||
|
||||
rm -rf "${WORKING_FOLDER_01527}"
|
||||
rm -rf "${WORKING_FOLDER_01527}"
|
||||
|
@ -0,0 +1,19 @@
|
||||
Option 1. Prepare parts from from table with Engine=File defined in metadata, read from an arbitrary path
|
||||
1 2020-01-01 String
|
||||
2 2020-02-02 Another string
|
||||
3 2020-03-03 One more string
|
||||
4 2020-01-02 String for first partition
|
||||
Option 2. Prepare parts from from table with Engine=File defined in metadata, read from stdin (pipe)
|
||||
11 2020-01-01 String
|
||||
12 2020-02-02 Another string
|
||||
13 2020-03-03 One more string
|
||||
14 2020-01-02 String for first partition
|
||||
Option 3. Prepare parts from from table with Engine=File defined via command line, read from stdin (pipe)
|
||||
21 2020-01-01 String
|
||||
22 2020-02-02 Another string
|
||||
23 2020-03-03 One more string
|
||||
24 2020-01-02 String for first partition
|
||||
Possibility to run optimize on prepared parts before sending parts to server
|
||||
202001 1
|
||||
202002 1
|
||||
202003 1
|
83
tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh
Executable file
83
tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh
Executable file
@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
WORKING_FOLDER_01528="${CLICKHOUSE_TMP}/01527_clickhouse_local_optimize"
|
||||
rm -rf "${WORKING_FOLDER_01528}"
|
||||
|
||||
mkdir -p "${WORKING_FOLDER_01528}/metadata/local"
|
||||
|
||||
## Checks scenario of preparing parts offline by clickhouse-local
|
||||
|
||||
## that is the metadata for the table we want to fill
|
||||
## schema should match the schema of the table from server
|
||||
## (the easiest way is just to copy it from the server)
|
||||
cat <<EOF > "${WORKING_FOLDER_01528}/metadata/local/test.sql"
|
||||
ATTACH TABLE local.test (id UInt64, d Date, s String) Engine=MergeTree ORDER BY id PARTITION BY toYYYYMM(d);
|
||||
EOF
|
||||
|
||||
#################
|
||||
|
||||
echo "Option 1. Prepare parts from from table with Engine=File defined in metadata, read from an arbitrary path"
|
||||
|
||||
## Source file:
|
||||
cat <<EOF > "${WORKING_FOLDER_01528}/data.csv"
|
||||
1,2020-01-01,"String"
|
||||
2,2020-02-02,"Another string"
|
||||
3,2020-03-03,"One more string"
|
||||
4,2020-01-02,"String for first partition"
|
||||
EOF
|
||||
|
||||
## metadata written into file
|
||||
cat <<EOF > "${WORKING_FOLDER_01528}/metadata/local/data_csv.sql"
|
||||
ATTACH TABLE local.data_csv (id UInt64, d Date, s String) Engine=File(CSV, '${WORKING_FOLDER_01528}/data.csv');
|
||||
EOF
|
||||
|
||||
## feed the table
|
||||
${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.data_csv;" -- --path="${WORKING_FOLDER_01528}"
|
||||
|
||||
## check the parts were created
|
||||
${CLICKHOUSE_LOCAL} --query "SELECT * FROM local.test WHERE id < 10 ORDER BY id;" -- --path="${WORKING_FOLDER_01528}"
|
||||
|
||||
#################
|
||||
|
||||
echo "Option 2. Prepare parts from from table with Engine=File defined in metadata, read from stdin (pipe)"
|
||||
|
||||
cat <<EOF > "${WORKING_FOLDER_01528}/metadata/local/stdin.sql"
|
||||
ATTACH TABLE local.stdin (id UInt64, d Date, s String) Engine=File(CSV, stdin);
|
||||
EOF
|
||||
|
||||
cat <<EOF | ${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.stdin;" -- --path="${WORKING_FOLDER_01528}"
|
||||
11,2020-01-01,"String"
|
||||
12,2020-02-02,"Another string"
|
||||
13,2020-03-03,"One more string"
|
||||
14,2020-01-02,"String for first partition"
|
||||
EOF
|
||||
|
||||
${CLICKHOUSE_LOCAL} --query "SELECT * FROM local.test WHERE id BETWEEN 10 AND 19 ORDER BY id;" -- --path="${WORKING_FOLDER_01528}"
|
||||
|
||||
#################
|
||||
|
||||
echo "Option 3. Prepare parts from from table with Engine=File defined via command line, read from stdin (pipe)"
|
||||
|
||||
cat <<EOF | ${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM table;" -S "id UInt64, d Date, s String" --input-format=CSV -- --path="${WORKING_FOLDER_01528}"
|
||||
21,2020-01-01,"String"
|
||||
22,2020-02-02,"Another string"
|
||||
23,2020-03-03,"One more string"
|
||||
24,2020-01-02,"String for first partition"
|
||||
EOF
|
||||
|
||||
${CLICKHOUSE_LOCAL} --query "SELECT * FROM local.test WHERE id BETWEEN 20 AND 29 ORDER BY id;" -- --path="${WORKING_FOLDER_01528}"
|
||||
|
||||
#################
|
||||
|
||||
echo "Possibility to run optimize on prepared parts before sending parts to server"
|
||||
|
||||
${CLICKHOUSE_LOCAL} --query "OPTIMIZE TABLE local.test FINAL;" -- --path="${WORKING_FOLDER_01528}"
|
||||
|
||||
# ensure we have one part per partition
|
||||
${CLICKHOUSE_LOCAL} --query "SELECT toYYYYMM(d) m, uniqExact(_part) FROM local.test GROUP BY m ORDER BY m" -- --path="${WORKING_FOLDER_01528}"
|
||||
|
||||
# cleanup
|
||||
rm -rf "${WORKING_FOLDER_01528}"
|
Loading…
Reference in New Issue
Block a user