mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 16:42:05 +00:00
Add setTemporaryStorage to clickhouse-local to make OPTIMIZE work
This commit is contained in:
parent
e89a3b5d09
commit
d7ea9b6d93
@ -152,6 +152,10 @@ void LocalServer::tryInitPath()
|
||||
path += '/';
|
||||
|
||||
context->setPath(path);
|
||||
|
||||
context->setTemporaryStorage(path + "tmp");
|
||||
context->setFlagsPath(path + "flags");
|
||||
|
||||
context->setUserFilesPath(""); // user's files are everywhere
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,16 @@
|
||||
202001_1_1_0 1 2020-01-01 String
|
||||
202001_4_4_0 1 2020-01-01 String
|
||||
202002_2_2_0 2 2020-02-02 Another string
|
||||
202002_5_5_0 2 2020-02-02 Another string
|
||||
202003_3_3_0 3 2020-03-03 One more string
|
||||
202003_6_6_0 3 2020-03-03 One more string
|
||||
202001_1_1_0 4 2020-01-02 String for first partition
|
||||
202001_4_4_0 4 2020-01-02 String for first partition
|
||||
202001_1_4_1 1 2020-01-01 String
|
||||
202001_1_4_1 1 2020-01-01 String
|
||||
202002_2_5_1 2 2020-02-02 Another string
|
||||
202002_2_5_1 2 2020-02-02 Another string
|
||||
202003_3_6_1 3 2020-03-03 One more string
|
||||
202003_3_6_1 3 2020-03-03 One more string
|
||||
202001_1_4_1 4 2020-01-02 String for first partition
|
||||
202001_1_4_1 4 2020-01-02 String for first partition
|
60
tests/queries/0_stateless/01527_clickhouse_local_optimize.sh
Executable file
60
tests/queries/0_stateless/01527_clickhouse_local_optimize.sh
Executable file
@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
WORKING_FOLDER="${CLICKHOUSE_TMP}/01527_clickhouse_local_optimize"
|
||||
|
||||
rm -rf "${WORKING_FOLDER}"
|
||||
mkdir -p "${WORKING_FOLDER}/metadata/local/"
|
||||
|
||||
## 1. Imagine we want to process this file:
|
||||
cat <<EOF > "${WORKING_FOLDER}/data.csv"
|
||||
1,2020-01-01,"String"
|
||||
2,2020-02-02,"Another string"
|
||||
3,2020-03-03,"One more string"
|
||||
4,2020-01-02,"String for first partition"
|
||||
EOF
|
||||
|
||||
## 2. that is the metadata for the table we want to fill
|
||||
## schema should match the schema of the table from server
|
||||
## (the easiest way is just to copy it from the server)
|
||||
|
||||
## I've added sleepEachRow(0.5) here just to mimic slow insert
|
||||
cat <<EOF > "${WORKING_FOLDER}/metadata/local/test.sql"
|
||||
ATTACH TABLE local.test (id UInt64, d Date, s String, x MATERIALIZED sleepEachRow(0.5)) Engine=MergeTree ORDER BY id PARTITION BY toYYYYMM(d);
|
||||
EOF
|
||||
|
||||
## 3a. that is the metadata for the input file we want to read
|
||||
## it should match the structure of source file
|
||||
|
||||
## use stdin to read from pipe
|
||||
cat <<EOF > "${WORKING_FOLDER}/metadata/local/stdin.sql"
|
||||
ATTACH TABLE local.stdin (id UInt64, d Date, s String) Engine=File(CSV, stdin);
|
||||
EOF
|
||||
|
||||
## 3b. Instead of stdin you can use file path
|
||||
cat <<EOF > "${WORKING_FOLDER}/metadata/local/data_csv.sql"
|
||||
ATTACH TABLE local.data_csv (id UInt64, d Date, s String) Engine=File(CSV, '${WORKING_FOLDER}/data.csv');
|
||||
EOF
|
||||
|
||||
## All preparations done,
|
||||
## the rest is simple:
|
||||
|
||||
# option a (if 3a used) with pipe / reading stdin
|
||||
cat "${WORKING_FOLDER}/data.csv" | ${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.stdin" -- --path="${WORKING_FOLDER}"
|
||||
|
||||
# option b (if 3b used) 0 with filepath
|
||||
${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.data_csv" -- --path="${WORKING_FOLDER}"
|
||||
|
||||
# now you can check what was inserted (i did both options so i have doubled data)
|
||||
${CLICKHOUSE_LOCAL} --query "SELECT _part,* FROM local.test ORDER BY id, _part" -- --path="${WORKING_FOLDER}"
|
||||
|
||||
# But you can't do OPTIMIZE (local will die with coredump) :) That would be too good
|
||||
clickhouse-local --query "OPTIMIZE TABLE local.test FINAL" -- --path="${WORKING_FOLDER}"
|
||||
|
||||
# now you can check what was inserted (i did both options so i have doubled data)
|
||||
${CLICKHOUSE_LOCAL} --query "SELECT _part,* FROM local.test ORDER BY id, _part" -- --path="${WORKING_FOLDER}"
|
||||
|
||||
## now you can upload those parts to a server (in detached subfolder) and attach them.
|
||||
rm -rf "${WORKING_FOLDER}"
|
Loading…
Reference in New Issue
Block a user