Merge branch 'master' into xcode-build-fix

This commit is contained in:
alexey-milovidov 2020-05-03 05:03:49 +03:00 committed by GitHub
commit aeb5487261
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3087 changed files with 250030 additions and 245373 deletions

View File

@ -1,41 +0,0 @@
stages:
- builder
- build
variables:
GIT_SUBMODULE_STRATEGY: recursive
builder:
stage: builder
when: manual
services:
- docker:dind
script:
- docker info
- apk add --no-cache git curl binutils ca-certificates
- docker login -u gitlab -p nopasswd $CI_REGISTRY
- docker build -t yandex/clickhouse-builder ./docker/builder
- docker tag yandex/clickhouse-builder $CI_REGISTRY/yandex/clickhouse-builder
- docker push $CI_REGISTRY/yandex/clickhouse-builder
tags:
- docker
build:
stage: build
when: manual
services:
- docker:dind
script:
- apk add --no-cache git curl binutils ca-certificates
- git submodule sync --recursive
- git submodule update --init --recursive
- docker info
- docker login -u gitlab -p nopasswd $CI_REGISTRY
- docker pull $CI_REGISTRY/yandex/clickhouse-builder
- docker run --rm --volumes-from "${HOSTNAME}-build" --workdir "${CI_PROJECT_DIR}" --env CI_PROJECT_DIR=${CI_PROJECT_DIR} $CI_REGISTRY/yandex/clickhouse-builder /build_gitlab_ci.sh
# You can upload your binary to nexus
- curl -v --keepalive-time 60 --keepalive --user "$NEXUS_USER:$NEXUS_PASSWORD" -XPUT "http://$NEXUS_HOST/repository/binaries/$CI_PROJECT_NAME" --upload-file ./src/Server/clickhouse
# Or download artifacts from gitlab
artifacts:
paths:
- ./src/Server/clickhouse
expire_in: 1 day
tags:
- docker

View File

@ -34,7 +34,7 @@ set(CMAKE_DEBUG_POSTFIX "d" CACHE STRING "Generate debug library name with a pos
# For more info see https://cmake.org/cmake/help/latest/prop_gbl/USE_FOLDERS.html # For more info see https://cmake.org/cmake/help/latest/prop_gbl/USE_FOLDERS.html
set_property(GLOBAL PROPERTY USE_FOLDERS ON) set_property(GLOBAL PROPERTY USE_FOLDERS ON)
option(ENABLE_IPO "Enable inter-procedural optimization (aka LTO)" OFF) # need cmake 3.9+ option(ENABLE_IPO "Enable full link time optimization (it's usually impractical; see also ENABLE_THINLTO)" OFF) # need cmake 3.9+
if(ENABLE_IPO) if(ENABLE_IPO)
cmake_policy(SET CMP0069 NEW) cmake_policy(SET CMP0069 NEW)
include(CheckIPOSupported) include(CheckIPOSupported)
@ -214,11 +214,6 @@ set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${
set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_C_FLAGS_ADD}") set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_C_FLAGS_ADD}")
if (COMPILER_CLANG) if (COMPILER_CLANG)
# Exception unwinding doesn't work in clang release build without this option
# TODO investigate that
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-omit-frame-pointer")
if (OS_DARWIN) if (OS_DARWIN)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-U,_inside_main") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-U,_inside_main")
@ -226,6 +221,36 @@ if (COMPILER_CLANG)
# Display absolute paths in error messages. Otherwise KDevelop fails to navigate to correct file and opens a new file instead. # Display absolute paths in error messages. Otherwise KDevelop fails to navigate to correct file and opens a new file instead.
set(COMPILER_FLAGS "${COMPILER_FLAGS} -fdiagnostics-absolute-paths") set(COMPILER_FLAGS "${COMPILER_FLAGS} -fdiagnostics-absolute-paths")
option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON)
# We cannot afford to use LTO when compiling unitests, and it's not enough
# to only supply -fno-lto at the final linking stage. So we disable it
# completely.
if (ENABLE_THINLTO AND NOT ENABLE_TESTS AND NOT SANITIZE)
# Link time optimization
set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -flto=thin")
set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -flto=thin")
set (CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} -flto=thin")
endif ()
# Always prefer llvm tools when using clang. For instance, we cannot use GNU ar when llvm LTO is enabled
find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-10" "llvm-ar-9" "llvm-ar-8")
if (LLVM_AR_PATH)
message(STATUS "Using llvm-ar: ${LLVM_AR_PATH}.")
set (CMAKE_AR ${LLVM_AR_PATH})
else ()
message(WARNING "Cannot find llvm-ar. System ar will be used instead. It does not work with ThinLTO.")
endif ()
find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-10" "llvm-ranlib-9" "llvm-ranlib-8")
if (LLVM_RANLIB_PATH)
message(STATUS "Using llvm-ranlib: ${LLVM_RANLIB_PATH}.")
set (CMAKE_RANLIB ${LLVM_RANLIB_PATH})
else ()
message(WARNING "Cannot find llvm-ranlib. System ranlib will be used instead. It does not work with ThinLTO.")
endif ()
endif () endif ()
option (ENABLE_LIBRARIES "Enable all libraries (Global default switch)" ON) option (ENABLE_LIBRARIES "Enable all libraries (Global default switch)" ON)

View File

@ -15,6 +15,5 @@ ClickHouse is an open-source column-oriented database management system that all
## Upcoming Events ## Upcoming Events
* [ClickHouse Online Meetup East (in English)](https://www.eventbrite.com/e/clickhouse-online-meetup-east-registration-102989325846) on April 28, 2020.
* [ClickHouse Workshop in Novosibirsk](https://2020.codefest.ru/lecture/1628) on TBD date. * [ClickHouse Workshop in Novosibirsk](https://2020.codefest.ru/lecture/1628) on TBD date.
* [Yandex C++ Open-Source Sprints in Moscow](https://events.yandex.ru/events/otkrytyj-kod-v-yandek-28-03-2020) on TBD date. * [Yandex C++ Open-Source Sprints in Moscow](https://events.yandex.ru/events/otkrytyj-kod-v-yandek-28-03-2020) on TBD date.

View File

@ -166,12 +166,29 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log
logger.root().setChannel(logger.getChannel()); logger.root().setChannel(logger.getChannel());
// Explicitly specified log levels for specific loggers. // Explicitly specified log levels for specific loggers.
Poco::Util::AbstractConfiguration::Keys levels; {
config.keys("logger.levels", levels); Poco::Util::AbstractConfiguration::Keys loggers_level;
config.keys("logger.levels", loggers_level);
if (!levels.empty()) if (!loggers_level.empty())
for (const auto & level : levels) {
logger.root().get(level).setLevel(config.getString("logger.levels." + level, "trace")); for (const auto & key : loggers_level)
{
if (key == "logger" || key.starts_with("logger["))
{
const std::string name(config.getString("logger.levels." + key + ".name"));
const std::string level(config.getString("logger.levels." + key + ".level"));
logger.root().get(name).setLevel(level);
}
else
{
// Legacy syntax
const std::string level(config.getString("logger.levels." + key, "trace"));
logger.root().get(key).setLevel(level);
}
}
}
}
} }
void Loggers::closeLogs(Poco::Logger & logger) void Loggers::closeLogs(Poco::Logger & logger)

View File

@ -7,6 +7,9 @@ set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/freebsd-x86_64")
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) # disable linkage check - it doesn't work in CMake set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) # disable linkage check - it doesn't work in CMake
set (CMAKE_AR "/usr/bin/ar" CACHE FILEPATH "" FORCE)
set (CMAKE_RANLIB "/usr/bin/ranlib" CACHE FILEPATH "" FORCE)
set (LINKER_NAME "lld" CACHE STRING "" FORCE) set (LINKER_NAME "lld" CACHE STRING "" FORCE)
set (CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld") set (CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld")

View File

@ -27,6 +27,7 @@ if (CMAKE_CROSSCOMPILING)
elseif (OS_FREEBSD) elseif (OS_FREEBSD)
# FIXME: broken dependencies # FIXME: broken dependencies
set (ENABLE_PROTOBUF OFF CACHE INTERNAL "") set (ENABLE_PROTOBUF OFF CACHE INTERNAL "")
set (ENABLE_EMBEDDED_COMPILER OFF CACHE INTERNAL "")
else () else ()
message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!") message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!")
endif () endif ()

View File

@ -1,11 +1,11 @@
# This strings autochanged from release_lib.sh: # This strings autochanged from release_lib.sh:
set(VERSION_REVISION 54434) set(VERSION_REVISION 54435)
set(VERSION_MAJOR 20) set(VERSION_MAJOR 20)
set(VERSION_MINOR 4) set(VERSION_MINOR 5)
set(VERSION_PATCH 1) set(VERSION_PATCH 1)
set(VERSION_GITHASH 05da1bff8b5826608d05618dab984cdf8f96e679) set(VERSION_GITHASH 91df18a906dcffdbee6816e5389df6c65f86e35f)
set(VERSION_DESCRIBE v20.4.1.1-prestable) set(VERSION_DESCRIBE v20.5.1.1-prestable)
set(VERSION_STRING 20.4.1.1) set(VERSION_STRING 20.5.1.1)
# end of autochange # end of autochange
set(VERSION_EXTRA "" CACHE STRING "") set(VERSION_EXTRA "" CACHE STRING "")

View File

@ -335,10 +335,6 @@ if (USE_SIMDJSON)
add_subdirectory (simdjson-cmake) add_subdirectory (simdjson-cmake)
endif() endif()
if (USE_MIMALLOC)
add_subdirectory (mimalloc)
endif()
if (USE_FASTOPS) if (USE_FASTOPS)
add_subdirectory (fastops-cmake) add_subdirectory (fastops-cmake)
endif() endif()

2
contrib/aws vendored

@ -1 +1 @@
Subproject commit 45dd8552d3c492defca79d2720bcc809e35654da Subproject commit 04d54dfa0342d9465fb2eb3bfd4b77a3f7682e99

2
contrib/lz4 vendored

@ -1 +1 @@
Subproject commit 3d67671559be723b0912bbee2fcd2eb14783a721 Subproject commit f39b79fb02962a1cd880bbdecb6dffba4f754a11

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (20.4.1.1) unstable; urgency=low clickhouse (20.5.1.1) unstable; urgency=low
* Modified source code * Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Thu, 12 Mar 2020 16:07:04 +0300 -- clickhouse-release <clickhouse-release@yandex-team.ru> Tue, 28 Apr 2020 20:12:13 +0300

View File

@ -23,6 +23,7 @@ RUN apt-get update -y \
python-termcolor \ python-termcolor \
sudo \ sudo \
tzdata \ tzdata \
llvm-10 \
clang-10 \ clang-10 \
clang-tidy-10 \ clang-tidy-10 \
lld-10 \ lld-10 \

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04 FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
ARG version=20.4.1.* ARG version=20.5.1.*
RUN apt-get update \ RUN apt-get update \
&& apt-get install --yes --no-install-recommends \ && apt-get install --yes --no-install-recommends \

View File

@ -18,11 +18,11 @@ $ ls -l deb/test_output
``` ```
Build ClickHouse binary with `clang-9` and `address` sanitizer in `relwithdebuginfo` Build ClickHouse binary with `clang-10` and `address` sanitizer in `relwithdebuginfo`
mode: mode:
``` ```
$ mkdir $HOME/some_clickhouse $ mkdir $HOME/some_clickhouse
$ ./packager --output-dir=$HOME/some_clickhouse --package-type binary --compiler=clang-9 --sanitizer=address $ ./packager --output-dir=$HOME/some_clickhouse --package-type binary --compiler=clang-10 --sanitizer=address
$ ls -l $HOME/some_clickhouse $ ls -l $HOME/some_clickhouse
-rwxr-xr-x 1 root root 787061952 clickhouse -rwxr-xr-x 1 root root 787061952 clickhouse
lrwxrwxrwx 1 root root 10 clickhouse-benchmark -> clickhouse lrwxrwxrwx 1 root root 10 clickhouse-benchmark -> clickhouse

View File

@ -1,4 +1,3 @@
# Trigger new image build
# docker build -t yandex/clickhouse-binary-builder . # docker build -t yandex/clickhouse-binary-builder .
FROM ubuntu:19.10 FROM ubuntu:19.10
@ -27,6 +26,7 @@ RUN apt-get update -y \
curl \ curl \
gcc-9 \ gcc-9 \
g++-9 \ g++-9 \
llvm-10 \
clang-10 \ clang-10 \
lld-10 \ lld-10 \
clang-tidy-10 \ clang-tidy-10 \

View File

@ -23,6 +23,7 @@ RUN apt-get --allow-unauthenticated update -y \
apt-get --allow-unauthenticated install --yes --no-install-recommends \ apt-get --allow-unauthenticated install --yes --no-install-recommends \
gcc-9 \ gcc-9 \
g++-9 \ g++-9 \
llvm-10 \
clang-10 \ clang-10 \
lld-10 \ lld-10 \
clang-tidy-10 \ clang-tidy-10 \

View File

@ -165,7 +165,7 @@ if __name__ == "__main__":
parser.add_argument("--clickhouse-repo-path", default="../../") parser.add_argument("--clickhouse-repo-path", default="../../")
parser.add_argument("--output-dir", required=True) parser.add_argument("--output-dir", required=True)
parser.add_argument("--build-type", choices=("debug", ""), default="") parser.add_argument("--build-type", choices=("debug", ""), default="")
parser.add_argument("--compiler", choices=("clang-8", "clang-8-darwin", "clang-9-aarch64", "clang-9-freebsd", "gcc-8", "gcc-9", "clang-9"), default="gcc-8") parser.add_argument("--compiler", choices=("clang-8", "clang-8-darwin", "clang-9-aarch64", "clang-9-freebsd", "gcc-9", "clang-9", "clang-10"), default="gcc-9")
parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="") parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="")
parser.add_argument("--unbundled", action="store_true") parser.add_argument("--unbundled", action="store_true")
parser.add_argument("--split-binary", action="store_true") parser.add_argument("--split-binary", action="store_true")

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04 FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
ARG version=20.4.1.* ARG version=20.5.1.*
ARG gosu_ver=1.10 ARG gosu_ver=1.10
RUN apt-get update \ RUN apt-get update \

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04 FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
ARG version=20.4.1.* ARG version=20.5.1.*
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \ apt-get install -y apt-transport-https dirmngr && \

View File

@ -27,5 +27,16 @@ services:
- SERVER_REDIRECT_CODE=307 - SERVER_REDIRECT_CODE=307
- SERVER_ACCESS_LOG=/nginx/access.log - SERVER_ACCESS_LOG=/nginx/access.log
# HTTP proxies for Minio.
proxy1:
image: vimagick/tinyproxy
ports:
- "4081:8888"
proxy2:
image: vimagick/tinyproxy
ports:
- "4082:8888"
volumes: volumes:
data1-1: data1-1:

View File

@ -93,53 +93,52 @@ function run_tests
# Just check that the script runs at all # Just check that the script runs at all
"$script_dir/perf.py" --help > /dev/null "$script_dir/perf.py" --help > /dev/null
# When testing commits from master, use the older test files. This allows the # Find the directory with test files.
# tests to pass even when we add new functions and tests for them, that are if [ -v CHPC_TEST_PATH ]
# not supported in the old revision. then
# When testing a PR, use the test files from the PR so that we can test their # Use the explicitly set path to directory with test files.
# changes. test_prefix="$CHPC_TEST_PATH"
test_prefix=$([ "$PR_TO_TEST" == "0" ] && echo left || echo right)/performance elif [ "$PR_TO_TEST" = "0" ]
then
# When testing commits from master, use the older test files. This
# allows the tests to pass even when we add new functions and tests for
# them, that are not supported in the old revision.
test_prefix=left/performance
elif [ "$PR_TO_TEST" != "" ] && [ "$PR_TO_TEST" != "0" ]
then
# For PRs, use newer test files so we can test these changes.
test_prefix=right/performance
for x in {test-times,skipped-tests,wall-clock-times}.tsv # If some tests were changed in the PR, we may want to run only these
# ones. The list of changed tests in changed-test.txt is prepared in
# entrypoint.sh from git diffs, because it has the cloned repo. Used
# to use rsync for that but it was really ugly and not always correct
# (e.g. when the reference SHA is really old and has some other
# differences to the tested SHA, besides the one introduced by the PR).
changed_test_files=$(sed "s/tests\/performance/${test_prefix//\//\\/}/" changed-tests.txt)
fi
# Determine which tests to run.
if [ -v CHPC_TEST_GREP ]
then
# Run only explicitly specified tests, if any.
test_files=$(ls "$test_prefix" | grep "$CHPC_TEST_GREP" | xargs -I{} -n1 readlink -f "$test_prefix/{}")
elif [ "$changed_test_files" != "" ]
then
# Use test files that changed in the PR.
test_files="$changed_test_files"
else
# The default -- run all tests found in the test dir.
test_files=$(ls "$test_prefix"/*.xml)
fi
# Delete old report files.
for x in {test-times,skipped-tests,wall-clock-times,report-thresholds,client-times}.tsv
do do
rm -v "$x" ||: rm -v "$x" ||:
touch "$x" touch "$x"
done done
# FIXME a quick crutch to bring the run time down for the unstable tests --
# if some performance tests xmls were changed in a PR, run only these ones.
if [ "$PR_TO_TEST" != "0" ]
then
# changed-test.txt prepared in entrypoint.sh from git diffs, because it
# has the cloned repo. Used to use rsync for that but it was really ugly
# and not always correct (e.g. when the reference SHA is really old and
# has some other differences to the tested SHA, besides the one introduced
# by the PR).
test_files_override=$(sed "s/tests\/performance/${test_prefix//\//\\/}/" changed-tests.txt)
if [ "$test_files_override" != "" ]
then
test_files=$test_files_override
fi
fi
# Run only explicitly specified tests, if any
if [ -v CHPC_TEST_GREP ]
then
test_files=$(ls "$test_prefix" | grep "$CHPC_TEST_GREP" | xargs -I{} -n1 readlink -f "$test_prefix/{}")
fi
if [ "$test_files" == "" ]
then
# FIXME remove some broken long tests
for test_name in {IPv4,IPv6,modulo,parse_engine_file,number_formatting_formats,select_format,arithmetic,cryptographic_hashes,logical_functions_{medium,small}}
do
printf "%s\tMarked as broken (see compare.sh)\n" "$test_name">> skipped-tests.tsv
rm "$test_prefix/$test_name.xml" ||:
done
test_files=$(ls "$test_prefix"/*.xml)
fi
# Run the tests. # Run the tests.
test_name="<none>" test_name="<none>"
for test in $test_files for test in $test_files
@ -159,14 +158,6 @@ function run_tests
# The test completed with zero status, so we treat stderr as warnings # The test completed with zero status, so we treat stderr as warnings
mv "$test_name-err.log" "$test_name-warn.log" mv "$test_name-err.log" "$test_name-warn.log"
grep ^query "$test_name-raw.tsv" | cut -f2- > "$test_name-queries.tsv"
grep ^client-time "$test_name-raw.tsv" | cut -f2- > "$test_name-client-time.tsv"
skipped=$(grep ^skipped "$test_name-raw.tsv" | cut -f2-)
if [ "$skipped" != "" ]
then
printf "%s\t%s\n" "$test_name" "$skipped">> skipped-tests.tsv
fi
done done
unset TIMEFORMAT unset TIMEFORMAT
@ -182,11 +173,11 @@ function get_profiles_watchdog
for pid in $(pgrep -f clickhouse) for pid in $(pgrep -f clickhouse)
do do
gdb -p $pid --batch --ex "info proc all" --ex "thread apply all bt" --ex quit &> "$pid.gdb.log" & gdb -p "$pid" --batch --ex "info proc all" --ex "thread apply all bt" --ex quit &> "$pid.gdb.log" &
done done
wait wait
for i in {1..10} for _ in {1..10}
do do
if ! pkill -f clickhouse if ! pkill -f clickhouse
then then
@ -229,7 +220,19 @@ function get_profiles
# Build and analyze randomization distribution for all queries. # Build and analyze randomization distribution for all queries.
function analyze_queries function analyze_queries
{ {
rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.tsv *-report.tsv ||: rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.tsv ./*-report.tsv raw-queries.tsv client-times.tsv report-thresholds.tsv ||:
# Split the raw test output into files suitable for analysis.
IFS=$'\n'
for test_file in $(find . -maxdepth 1 -name "*-raw.tsv" -print)
do
test_name=$(basename "$test_file" "-raw.tsv")
sed -n "s/^query\t//p" < "$test_file" > "$test_name-queries.tsv"
sed -n "s/^client-time/$test_name/p" < "$test_file" >> "client-times.tsv"
sed -n "s/^report-threshold/$test_name/p" < "$test_file" >> "report-thresholds.tsv"
sed -n "s/^skipped/$test_name/p" < "$test_file" >> "skipped-tests.tsv"
done
unset IFS
# This is a lateral join in bash... please forgive me. # This is a lateral join in bash... please forgive me.
# We don't have arrayPermute(), so I have to make random permutations with # We don't have arrayPermute(), so I have to make random permutations with
@ -245,14 +248,14 @@ do
for query in $(cut -d' ' -f1 "$test_file" | sort | uniq) for query in $(cut -d' ' -f1 "$test_file" | sort | uniq)
do do
query_prefix="$test_name.q$query_index" query_prefix="$test_name.q$query_index"
query_index=$(($query_index + 1)) query_index=$((query_index + 1))
grep -F "$query " "$test_file" > "$query_prefix.tmp" grep -F "$query " "$test_file" > "$query_prefix.tmp"
printf "%s\0\n" \ printf "%s\0\n" \
"clickhouse-local \ "clickhouse-local \
--file "$query_prefix.tmp" \ --file \"$query_prefix.tmp\" \
--structure 'query text, run int, version UInt32, time float' \ --structure 'query text, run int, version UInt32, time float' \
--query \"$(cat "$script_dir/eqmed.sql")\" \ --query \"$(cat "$script_dir/eqmed.sql")\" \
>> "$test_name-report.tsv"" \ >> \"$test_name-report.tsv\"" \
2>> analyze-errors.log \ 2>> analyze-errors.log \
>> analyze-commands.txt >> analyze-commands.txt
done done
@ -267,6 +270,108 @@ parallel --verbose --null < analyze-commands.txt
function report function report
{ {
rm -r report ||:
mkdir report ||:
rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv ||:
cat analyze-errors.log >> report/errors.log ||:
cat profile-errors.log >> report/errors.log ||:
clickhouse-local --query "
create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
as select
-- FIXME Comparison mode doesn't make sense for queries that complete
-- immediately, so for now we pretend they don't exist. We don't want to
-- remove them altogether because we want to be able to detect regressions,
-- but the right way to do this is not yet clear.
(left + right) / 2 < 0.02 as short,
not short and abs(diff) > report_threshold and abs(diff) > stat_threshold as changed_fail,
not short and abs(diff) > report_threshold - 0.05 and abs(diff) > stat_threshold as changed_show,
not short and not changed_fail and stat_threshold > report_threshold + 0.10 as unstable_fail,
not short and not changed_show and stat_threshold > report_threshold - 0.05 as unstable_show,
left, right, diff, stat_threshold,
if(report_threshold > 0, report_threshold, 0.10) as report_threshold,
reports.test,
query
from
(
select *,
replaceAll(_file, '-report.tsv', '') test
from file('*-report.tsv', TSV, 'left float, right float, diff float, stat_threshold float, query text')
) reports
left join file('report-thresholds.tsv', TSV, 'test text, report_threshold float') thresholds
using test
;
-- keep the table in old format so that we can analyze new and old data together
create table queries_old_format engine File(TSVWithNamesAndTypes, 'queries.rep')
as select short, changed_fail, unstable_fail, left, right, diff, stat_threshold, test, query
from queries
;
create table changed_perf_tsv engine File(TSV, 'report/changed-perf.tsv') as
select left, right, diff, stat_threshold, changed_fail, test, query from queries where changed_show
order by abs(diff) desc;
create table unstable_queries_tsv engine File(TSV, 'report/unstable-queries.tsv') as
select left, right, diff, stat_threshold, unstable_fail, test, query from queries where unstable_show
order by stat_threshold desc;
create table queries_for_flamegraph engine File(TSVWithNamesAndTypes, 'report/queries-for-flamegraph.tsv') as
select query, test from queries where unstable_show or changed_show
;
create table unstable_tests_tsv engine File(TSV, 'report/bad-tests.tsv') as
select test, sum(unstable_fail) u, sum(changed_fail) c, u + c s from queries
group by test having s > 0 order by s desc;
create table query_time engine Memory as select *
from file('client-times.tsv', TSV, 'test text, query text, client float, server float');
create table wall_clock engine Memory as select *
from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float');
create table slow_on_client_tsv engine File(TSV, 'report/slow-on-client.tsv') as
select client, server, floor(client/server, 3) p, query
from query_time where p > 1.02 order by p desc;
create table test_time engine Memory as
select test, sum(client) total_client_time,
maxIf(client, not short) query_max,
minIf(client, not short) query_min,
count(*) queries,
sum(short) short_queries
from query_time full join queries
on query_time.query = queries.query
group by test;
create table test_times_tsv engine File(TSV, 'report/test-times.tsv') as
select wall_clock.test, real,
floor(total_client_time, 3),
queries,
short_queries,
floor(query_max, 3),
floor(real / queries, 3) avg_real_per_query,
floor(query_min, 3)
from test_time
-- wall clock times are also measured for skipped tests, so don't
-- do full join
left join wall_clock using test
order by avg_real_per_query desc;
create table all_tests_tsv engine File(TSV, 'report/all-queries.tsv') as
select changed_fail, unstable_fail,
left, right, diff,
floor(left > right ? left / right : right / left, 3),
stat_threshold, test, query
from queries order by test, query;
" 2> >(tee -a report/errors.log 1>&2)
for x in {right,left}-{addresses,{query,query-thread,trace,metric}-log}.tsv for x in {right,left}-{addresses,{query,query-thread,trace,metric}-log}.tsv
do do
# FIXME This loop builds column definitons from TSVWithNamesAndTypes in an # FIXME This loop builds column definitons from TSVWithNamesAndTypes in an
@ -277,88 +382,12 @@ do
| tr '\n' ', ' | sed 's/,$//' > "$x.columns" | tr '\n' ', ' | sed 's/,$//' > "$x.columns"
done done
rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv ||:
cat analyze-errors.log >> report-errors.rep ||:
cat profile-errors.log >> report-errors.rep ||:
clickhouse-local --query "
create table queries engine File(TSVWithNamesAndTypes, 'queries.rep')
as select
-- FIXME Comparison mode doesn't make sense for queries that complete
-- immediately, so for now we pretend they don't exist. We don't want to
-- remove them altogether because we want to be able to detect regressions,
-- but the right way to do this is not yet clear.
(left + right) / 2 < 0.02 as short,
not short and abs(diff) > 0.05 and abs(diff) > threshold as changed,
not short and not changed and threshold > 0.05 as unstable,
left, right, diff, threshold,
replaceAll(_file, '-report.tsv', '') test,
-- Truncate long queries.
if(length(query) < 300, query, substr(query, 1, 298) || '...') query
from file('*-report.tsv', TSV, 'left float, right float, diff float, threshold float, query text');
create table changed_perf_tsv engine File(TSV, 'changed-perf.tsv') as
select left, right, diff, threshold, test, query from queries where changed
order by abs(diff) desc;
create table unstable_queries_tsv engine File(TSV, 'unstable-queries.tsv') as
select left, right, diff, threshold, test, query from queries where unstable
order by threshold desc;
create table unstable_tests_tsv engine File(TSV, 'bad-tests.tsv') as
select test, sum(unstable) u, sum(changed) c, u + c s from queries
group by test having s > 0 order by s desc;
create table query_time engine Memory as select *, replaceAll(_file, '-client-time.tsv', '') test
from file('*-client-time.tsv', TSV, 'query text, client float, server float');
create table wall_clock engine Memory as select *
from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float');
create table slow_on_client_tsv engine File(TSV, 'slow-on-client.tsv') as
select client, server, floor(client/server, 3) p, query
from query_time where p > 1.02 order by p desc;
create table test_time engine Memory as
select test, sum(client) total_client_time,
maxIf(client, not short) query_max,
minIf(client, not short) query_min,
count(*) queries,
sum(short) short_queries
from query_time, queries
where query_time.query = queries.query
group by test;
create table test_times_tsv engine File(TSV, 'test-times.tsv') as
select wall_clock.test, real,
floor(total_client_time, 3),
queries,
short_queries,
floor(query_max, 3),
floor(real / queries, 3) avg_real_per_query,
floor(query_min, 3)
from test_time join wall_clock using test
order by avg_real_per_query desc;
create table all_tests_tsv engine File(TSV, 'all-queries.tsv') as
select left, right, diff,
floor(left > right ? left / right : right / left, 3),
threshold, test, query
from queries order by test, query;
" 2> >(head -2 >> report-errors.rep) ||:
for version in {right,left} for version in {right,left}
do do
clickhouse-local --query " clickhouse-local --query "
create view queries as create view queries_for_flamegraph as
select * from file('queries.rep', TSVWithNamesAndTypes, select * from file('report/queries-for-flamegraph.tsv', TSVWithNamesAndTypes,
'short int, changed int, unstable int, left float, right float, 'query text, test text');
diff float, threshold float, test text, query text');
create view query_log as select * create view query_log as select *
from file('$version-query-log.tsv', TSVWithNamesAndTypes, from file('$version-query-log.tsv', TSVWithNamesAndTypes,
@ -377,9 +406,9 @@ create table addresses_join_$version engine Join(any, left, address) as
create table unstable_query_runs engine File(TSVWithNamesAndTypes, create table unstable_query_runs engine File(TSVWithNamesAndTypes,
'unstable-query-runs.$version.rep') as 'unstable-query-runs.$version.rep') as
select query_id, query from query_log select query, query_id from query_log
join queries using query where query in (select query from queries_for_flamegraph)
where query_id not like 'prewarm %' and (unstable or changed) and query_id not like 'prewarm %'
; ;
create table unstable_query_log engine File(Vertical, create table unstable_query_log engine File(Vertical,
@ -427,10 +456,10 @@ create table metric_devation engine File(TSVWithNamesAndTypes,
from (select * from unstable_run_metrics from (select * from unstable_run_metrics
union all select * from unstable_run_traces union all select * from unstable_run_traces
union all select * from unstable_run_metrics_2) mm union all select * from unstable_run_metrics_2) mm
join queries using query join queries_for_flamegraph using query
group by query, metric group by query, metric
having d > 0.5 having d > 0.5
order by any(threshold) desc, query desc, d desc order by query desc, d desc
; ;
create table stacks engine File(TSV, 'stacks.$version.rep') as create table stacks engine File(TSV, 'stacks.$version.rep') as
@ -447,7 +476,7 @@ create table stacks engine File(TSV, 'stacks.$version.rep') as
join unstable_query_runs using query_id join unstable_query_runs using query_id
group by query, trace group by query, trace
; ;
" 2> >(head -2 >> report-errors.rep) ||: # do not run in parallel because they use the same data dir for StorageJoins which leads to weird errors. " 2> >(tee -a report/errors.log 1>&2) # do not run in parallel because they use the same data dir for StorageJoins which leads to weird errors.
done done
wait wait
@ -529,7 +558,7 @@ case "$stage" in
"report") "report")
time report ||: time report ||:
time "$script_dir/report.py" --report=all-queries > all-queries.html 2> >(head -2 >> report-errors.rep) ||: time "$script_dir/report.py" --report=all-queries > all-queries.html 2> >(tee -a report/errors.log 1>&2) ||:
time "$script_dir/report.py" > report.html time "$script_dir/report.py" > report.html
;& ;&
esac esac

View File

@ -30,7 +30,8 @@ function download
wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv & wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv &
wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/performance/performance.tgz" -O- | tar -C right --strip-components=1 -zxv & wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/performance/performance.tgz" -O- | tar -C right --strip-components=1 -zxv &
else else
wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv && cp -a left right & mkdir right ||:
wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv && cp -a left/* right &
fi fi
for dataset_name in $datasets for dataset_name in $datasets

View File

@ -119,5 +119,5 @@ done
dmesg -T > dmesg.log dmesg -T > dmesg.log
7z a /output/output.7z ./*.{log,tsv,html,txt,rep,svg} {right,left}/{performance,db/preprocessed_configs} 7z a /output/output.7z ./*.{log,tsv,html,txt,rep,svg} {right,left}/{performance,db/preprocessed_configs,scripts} ./report
cp compare.log /output cp compare.log /output

View File

@ -9,8 +9,12 @@ from
( (
-- quantiles of randomization distributions -- quantiles of randomization distributions
select quantileExact(0.999)(abs(time_by_label[1] - time_by_label[2]) as d) threshold select quantileExact(0.999)(abs(time_by_label[1] - time_by_label[2]) as d) threshold
-- uncomment to see what the distribution is really like ---- uncomment to see what the distribution is really like
-- , sumMap([d], [1]) full_histogram --, uniqExact(d) u
--, arraySort(x->x.1,
-- arrayZip(
-- (sumMap([d], [1]) as f).1,
-- f.2)) full_histogram
from from
( (
select virtual_run, groupArrayInsertAt(median_time, random_label) time_by_label -- make array 'random label' -> 'median time' select virtual_run, groupArrayInsertAt(median_time, random_label) time_by_label -- make array 'random label' -> 'median time'

View File

@ -48,6 +48,10 @@ infinite_sign = root.find('.//average_speed_not_changing_for_ms')
if infinite_sign is not None: if infinite_sign is not None:
raise Exception('Looks like the test is infinite (sign 1)') raise Exception('Looks like the test is infinite (sign 1)')
# Print report threshold for the test if it is set.
if 'max_ignored_relative_change' in root.attrib:
print(f'report-threshold\t{root.attrib["max_ignored_relative_change"]}')
# Open connections # Open connections
servers = [{'host': host, 'port': port} for (host, port) in zip(args.host, args.port)] servers = [{'host': host, 'port': port} for (host, port) in zip(args.host, args.port)]
connections = [clickhouse_driver.Client(**server) for server in servers] connections = [clickhouse_driver.Client(**server) for server in servers]
@ -137,17 +141,25 @@ test_queries = substitute_parameters(test_query_templates)
report_stage_end('substitute2') report_stage_end('substitute2')
for q in test_queries: for i, q in enumerate(test_queries):
# We have some crazy long queries (about 100kB), so trim them to a sane
# length.
query_display_name = q
if len(query_display_name) > 1000:
query_display_name = f'{query_display_name[:1000]}...({i})'
# Prewarm: run once on both servers. Helps to bring the data into memory, # Prewarm: run once on both servers. Helps to bring the data into memory,
# precompile the queries, etc. # precompile the queries, etc.
try: try:
for conn_index, c in enumerate(connections): for conn_index, c in enumerate(connections):
res = c.execute(q, query_id = 'prewarm {} {}'.format(0, q)) res = c.execute(q, query_id = f'prewarm {0} {query_display_name}')
print('prewarm\t' + tsv_escape(q) + '\t' + str(conn_index) + '\t' + str(c.last_query.elapsed)) print(f'prewarm\t{tsv_escape(query_display_name)}\t{conn_index}\t{c.last_query.elapsed}')
except: except:
# If prewarm fails for some query -- skip it, and try to test the others. # If prewarm fails for some query -- skip it, and try to test the others.
# This might happen if the new test introduces some function that the # This might happen if the new test introduces some function that the
# old server doesn't support. Still, report it as an error. # old server doesn't support. Still, report it as an error.
# FIXME the driver reconnects on error and we lose settings, so this might
# lead to further errors or unexpected behavior.
print(traceback.format_exc(), file=sys.stderr) print(traceback.format_exc(), file=sys.stderr)
continue continue
@ -160,11 +172,11 @@ for q in test_queries:
for run in range(0, args.runs): for run in range(0, args.runs):
for conn_index, c in enumerate(connections): for conn_index, c in enumerate(connections):
res = c.execute(q) res = c.execute(q)
print('query\t' + tsv_escape(q) + '\t' + str(run) + '\t' + str(conn_index) + '\t' + str(c.last_query.elapsed)) print(f'query\t{tsv_escape(query_display_name)}\t{run}\t{conn_index}\t{c.last_query.elapsed}')
server_seconds += c.last_query.elapsed server_seconds += c.last_query.elapsed
client_seconds = time.perf_counter() - start_seconds client_seconds = time.perf_counter() - start_seconds
print('client-time\t{}\t{}\t{}'.format(tsv_escape(q), client_seconds, server_seconds)) print(f'client-time\t{tsv_escape(query_display_name)}\t{client_seconds}\t{server_seconds}')
report_stage_end('benchmark') report_stage_end('benchmark')

View File

@ -53,6 +53,29 @@ More stages are available, e.g. restart servers or run the tests. See the code.
docker/test/performance-comparison/perf.py --host=localhost --port=9000 --runs=1 tests/performance/logical_functions_small.xml docker/test/performance-comparison/perf.py --host=localhost --port=9000 --runs=1 tests/performance/logical_functions_small.xml
``` ```
#### Run all tests on some custom configuration
Start two servers manually on ports `9001` (old) and `9002` (new). Change to a
new directory to be used as workspace for tests, and try something like this:
```
$ PATH=$PATH:~/ch4/build-gcc9-rel/programs \
CHPC_TEST_PATH=~/ch3/ch/tests/performance \
CHPC_TEST_GREP=visit_param \
stage=run_tests \
~/ch3/ch/docker/test/performance-comparison/compare.sh
```
* `PATH` must contain `clickhouse-local` and `clickhouse-client`.
* `CHPC_TEST_PATH` -- path to performance test cases, e.g. `tests/performance`.
* `CHPC_TEST_GREP` -- a filter for which tests to run, as a grep pattern.
* `stage` -- from which execution stage to start. To run the tests, use
`run_tests` stage.
The tests will run, and the `report.html` will be generated in the current
directory.
More complex setup is possible, but inconvenient and requires some scripting.
See `manual-run.sh` for inspiration.
#### Statistical considerations #### Statistical considerations
Generating randomization distribution for medians is tricky. Suppose we have N Generating randomization distribution for medians is tricky. Suppose we have N
runs for each version, and then use the combined 2N run results to make a runs for each version, and then use the combined 2N run results to make a

View File

@ -22,6 +22,9 @@ slower_queries = 0
unstable_queries = 0 unstable_queries = 0
very_unstable_queries = 0 very_unstable_queries = 0
# max seconds to run one query by itself, not counting preparation
allowed_single_run_time = 2
header_template = """ header_template = """
<!DOCTYPE html> <!DOCTYPE html>
<html> <html>
@ -95,7 +98,8 @@ def tableRow(cell_values, cell_attributes = []):
return tr(''.join([td(v, a) return tr(''.join([td(v, a)
for v, a in itertools.zip_longest( for v, a in itertools.zip_longest(
cell_values, cell_attributes, cell_values, cell_attributes,
fillvalue = '')])) fillvalue = '')
if a is not None]))
def tableHeader(r): def tableHeader(r):
return tr(''.join([th(f) for f in r])) return tr(''.join([th(f) for f in r]))
@ -139,16 +143,45 @@ def printSimpleTable(caption, columns, rows):
print(tableRow(row)) print(tableRow(row))
print(tableEnd()) print(tableEnd())
def print_tested_commits():
global report_errors
try:
printSimpleTable('Tested commits', ['Old', 'New'],
[['<pre>{}</pre>'.format(x) for x in
[open('left-commit.txt').read(),
open('right-commit.txt').read()]]])
except:
# Don't fail if no commit info -- maybe it's a manual run.
report_errors.append(
traceback.format_exception_only(
*sys.exc_info()[:2])[-1])
pass
def print_report_errors():
global report_errors
# Add the errors reported by various steps of comparison script
try:
report_errors += [l.strip() for l in open('report/errors.log')]
except:
report_errors.append(
traceback.format_exception_only(
*sys.exc_info()[:2])[-1])
pass
if len(report_errors):
print(tableStart('Errors while building the report'))
print(tableHeader(['Error']))
for x in report_errors:
print(tableRow([x]))
print(tableEnd())
if args.report == 'main': if args.report == 'main':
print(header_template.format()) print(header_template.format())
printSimpleTable('Tested commits', ['Old', 'New'], print_tested_commits()
[['<pre>{}</pre>'.format(x) for x in
[open('left-commit.txt').read(),
open('right-commit.txt').read()]]])
def print_changes(): def print_changes():
rows = tsvRows('changed-perf.tsv') rows = tsvRows('report/changed-perf.tsv')
if not rows: if not rows:
return return
@ -160,22 +193,25 @@ if args.report == 'main':
'New, s', # 1 'New, s', # 1
'Relative difference (new&nbsp;-&nbsp;old)/old', # 2 'Relative difference (new&nbsp;-&nbsp;old)/old', # 2
'p&nbsp;<&nbsp;0.001 threshold', # 3 'p&nbsp;<&nbsp;0.001 threshold', # 3
'Test', # 4 # Failed # 4
'Query', # 5 'Test', # 5
'Query', # 6
] ]
print(tableHeader(columns)) print(tableHeader(columns))
attrs = ['' for c in columns] attrs = ['' for c in columns]
attrs[4] = None
for row in rows: for row in rows:
attrs[2] = '' if int(row[4]):
if abs(float(row[2])) > 0.10:
if float(row[2]) < 0.: if float(row[2]) < 0.:
faster_queries += 1 faster_queries += 1
attrs[2] = 'style="background: #adbdff"' attrs[2] = 'style="background: #adbdff"'
else: else:
slower_queries += 1 slower_queries += 1
attrs[2] = 'style="background: #ffb0a0"' attrs[2] = 'style="background: #ffb0a0"'
else:
attrs[2] = ''
print(tableRow(row, attrs)) print(tableRow(row, attrs))
@ -183,7 +219,7 @@ if args.report == 'main':
print_changes() print_changes()
slow_on_client_rows = tsvRows('slow-on-client.tsv') slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
error_tests += len(slow_on_client_rows) error_tests += len(slow_on_client_rows)
printSimpleTable('Slow on client', printSimpleTable('Slow on client',
['Client time, s', 'Server time, s', 'Ratio', 'Query'], ['Client time, s', 'Server time, s', 'Ratio', 'Query'],
@ -193,7 +229,7 @@ if args.report == 'main':
global unstable_queries global unstable_queries
global very_unstable_queries global very_unstable_queries
unstable_rows = tsvRows('unstable-queries.tsv') unstable_rows = tsvRows('report/unstable-queries.tsv')
if not unstable_rows: if not unstable_rows:
return return
@ -204,16 +240,18 @@ if args.report == 'main':
'New, s', #1 'New, s', #1
'Relative difference (new&nbsp;-&nbsp;old)/old', #2 'Relative difference (new&nbsp;-&nbsp;old)/old', #2
'p&nbsp;<&nbsp;0.001 threshold', #3 'p&nbsp;<&nbsp;0.001 threshold', #3
'Test', #4 # Failed #4
'Query' #5 'Test', #5
'Query' #6
] ]
print(tableStart('Unstable queries')) print(tableStart('Unstable queries'))
print(tableHeader(columns)) print(tableHeader(columns))
attrs = ['' for c in columns] attrs = ['' for c in columns]
attrs[4] = None
for r in unstable_rows: for r in unstable_rows:
if float(r[3]) > 0.2: if int(r[4]):
very_unstable_queries += 1 very_unstable_queries += 1
attrs[3] = 'style="background: #ffb0a0"' attrs[3] = 'style="background: #ffb0a0"'
else: else:
@ -234,11 +272,11 @@ if args.report == 'main':
printSimpleTable('Tests with most unstable queries', printSimpleTable('Tests with most unstable queries',
['Test', 'Unstable', 'Changed perf', 'Total not OK'], ['Test', 'Unstable', 'Changed perf', 'Total not OK'],
tsvRows('bad-tests.tsv')) tsvRows('report/bad-tests.tsv'))
def print_test_times(): def print_test_times():
global slow_average_tests global slow_average_tests
rows = tsvRows('test-times.tsv') rows = tsvRows('report/test-times.tsv')
if not rows: if not rows:
return return
@ -256,17 +294,18 @@ if args.report == 'main':
print(tableStart('Test times')) print(tableStart('Test times'))
print(tableHeader(columns)) print(tableHeader(columns))
runs = 13 # FIXME pass this as an argument nominal_runs = 13 # FIXME pass this as an argument
total_runs = (nominal_runs + 1) * 2 # one prewarm run, two servers
attrs = ['' for c in columns] attrs = ['' for c in columns]
for r in rows: for r in rows:
if float(r[6]) > 3 * runs: if float(r[6]) > 1.5 * total_runs:
# FIXME should be 15s max -- investigate parallel_insert # FIXME should be 15s max -- investigate parallel_insert
slow_average_tests += 1 slow_average_tests += 1
attrs[6] = 'style="background: #ffb0a0"' attrs[6] = 'style="background: #ffb0a0"'
else: else:
attrs[6] = '' attrs[6] = ''
if float(r[5]) > 4 * runs: if float(r[5]) > allowed_single_run_time * total_runs:
slow_average_tests += 1 slow_average_tests += 1
attrs[5] = 'style="background: #ffb0a0"' attrs[5] = 'style="background: #ffb0a0"'
else: else:
@ -278,15 +317,7 @@ if args.report == 'main':
print_test_times() print_test_times()
# Add the errors reported by various steps of comparison script print_report_errors()
report_errors += [l.strip() for l in open('report-errors.rep')]
if len(report_errors):
print(tableStart('Errors while building the report'))
print(tableHeader(['Error']))
for x in report_errors:
print(tableRow([x]))
print(tableEnd())
print(""" print("""
<p class="links"> <p class="links">
@ -340,44 +371,50 @@ elif args.report == 'all-queries':
print(header_template.format()) print(header_template.format())
printSimpleTable('Tested commits', ['Old', 'New'], print_tested_commits()
[['<pre>{}</pre>'.format(x) for x in
[open('left-commit.txt').read(),
open('right-commit.txt').read()]]])
def print_all_queries(): def print_all_queries():
rows = tsvRows('all-queries.tsv') rows = tsvRows('report/all-queries.tsv')
if not rows: if not rows:
return return
columns = [ columns = [
'Old, s', #0 # Changed #0
'New, s', #1 # Unstable #1
'Relative difference (new&nbsp;-&nbsp;old)/old', #2 'Old, s', #2
'Times speedup/slowdown', #3 'New, s', #3
'p&nbsp;<&nbsp;0.001 threshold', #4 'Relative difference (new&nbsp;-&nbsp;old)/old', #4
'Test', #5 'Times speedup/slowdown', #5
'Query', #6 'p&nbsp;<&nbsp;0.001 threshold', #6
'Test', #7
'Query', #8
] ]
print(tableStart('All query times')) print(tableStart('All query times'))
print(tableHeader(columns)) print(tableHeader(columns))
attrs = ['' for c in columns] attrs = ['' for c in columns]
attrs[0] = None
attrs[1] = None
for r in rows: for r in rows:
threshold = float(r[3]) if int(r[1]):
if threshold > 0.2: attrs[6] = 'style="background: #ffb0a0"'
attrs[4] = 'style="background: #ffb0a0"' else:
attrs[6] = ''
if int(r[0]):
if float(r[4]) > 0.:
attrs[4] = 'style="background: #ffb0a0"'
else:
attrs[4] = 'style="background: #adbdff"'
else: else:
attrs[4] = '' attrs[4] = ''
diff = float(r[2]) if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time:
if abs(diff) > threshold and threshold >= 0.05: attrs[2] = 'style="background: #ffb0a0"'
if diff > 0.: attrs[3] = 'style="background: #ffb0a0"'
attrs[3] = 'style="background: #ffb0a0"'
else:
attrs[3] = 'style="background: #adbdff"'
else: else:
attrs[2] = ''
attrs[3] = '' attrs[3] = ''
print(tableRow(r, attrs)) print(tableRow(r, attrs))
@ -386,6 +423,8 @@ elif args.report == 'all-queries':
print_all_queries() print_all_queries()
print_report_errors()
print(""" print("""
<p class="links"> <p class="links">
<a href="output.7z">Test output</a> <a href="output.7z">Test output</a>

View File

@ -20,10 +20,10 @@ RUN apt-get --allow-unauthenticated update -y \
# apt-get --allow-unauthenticated install --yes --no-install-recommends \ # apt-get --allow-unauthenticated install --yes --no-install-recommends \
# pvs-studio # pvs-studio
ENV PKG_VERSION="pvs-studio-7.07.37949.43-amd64.deb" ENV PKG_VERSION="pvs-studio-7.07.38234.46-amd64.deb"
RUN wget -q http://files.viva64.com/$PKG_VERSION RUN wget "http://files.viva64.com/$PKG_VERSION"
RUN sudo dpkg -i $PKG_VERSION RUN sudo dpkg -i "$PKG_VERSION"
CMD cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ CMD cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \
&& cmake . && ninja re2_st && \ && cmake . && ninja re2_st && \

View File

@ -0,0 +1,11 @@
sudo apt-get install apt-transport-https ca-certificates dirmngr
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4
echo "deb https://repo.clickhouse.tech/deb/stable/ main/" | sudo tee \
/etc/apt/sources.list.d/clickhouse.list
sudo apt-get update
sudo apt-get install -y clickhouse-server clickhouse-client
sudo service clickhouse-server start
clickhouse-client

View File

@ -0,0 +1,8 @@
sudo yum install yum-utils
sudo rpm --import https://repo.clickhouse.tech/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.tech/rpm/stable/x86_64
sudo yum install clickhouse-server clickhouse-client
sudo /etc/init.d/clickhouse-server start
clickhouse-client

View File

@ -0,0 +1,19 @@
export LATEST_VERSION=$(curl -s https://repo.clickhouse.tech/tgz/stable/ | \
grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1)
curl -O https://repo.clickhouse.tech/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz
curl -O https://repo.clickhouse.tech/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz
tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz
sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh
tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz
sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh
tar -xzvf clickhouse-server-$LATEST_VERSION.tgz
sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh
sudo /etc/init.d/clickhouse-server start
tar -xzvf clickhouse-client-$LATEST_VERSION.tgz
sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh

View File

@ -1,6 +1,6 @@
--- ---
toc_title: Cloud
toc_priority: 1 toc_priority: 1
toc_title: Cloud
--- ---
# ClickHouse Cloud Service Providers {#clickhouse-cloud-service-providers} # ClickHouse Cloud Service Providers {#clickhouse-cloud-service-providers}
@ -12,7 +12,7 @@ toc_priority: 1
[Yandex Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse?utm_source=referrals&utm_medium=clickhouseofficialsite&utm_campaign=link3) provides the following key features: [Yandex Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse?utm_source=referrals&utm_medium=clickhouseofficialsite&utm_campaign=link3) provides the following key features:
- Fully managed ZooKeeper service for [ClickHouse replication](../engines/table_engines/mergetree_family/replication.md) - Fully managed ZooKeeper service for [ClickHouse replication](../engines/table-engines/mergetree-family/replication.md)
- Multiple storage type choices - Multiple storage type choices
- Replicas in different availability zones - Replicas in different availability zones
- Encryption and isolation - Encryption and isolation

View File

@ -1,6 +1,6 @@
--- ---
toc_title: Support
toc_priority: 3 toc_priority: 3
toc_title: Support
--- ---
# ClickHouse Commercial Support Service Providers {#clickhouse-commercial-support-service-providers} # ClickHouse Commercial Support Service Providers {#clickhouse-commercial-support-service-providers}

View File

@ -120,7 +120,7 @@ There are ordinary functions and aggregate functions. For aggregate functions, s
Ordinary functions dont change the number of rows they work as if they are processing each row independently. In fact, functions are not called for individual rows, but for `Block`s of data to implement vectorized query execution. Ordinary functions dont change the number of rows they work as if they are processing each row independently. In fact, functions are not called for individual rows, but for `Block`s of data to implement vectorized query execution.
There are some miscellaneous functions, like [blockSize](../sql_reference/functions/other_functions.md#function-blocksize), [rowNumberInBlock](../sql_reference/functions/other_functions.md#function-rownumberinblock), and [runningAccumulate](../sql_reference/functions/other_functions.md#function-runningaccumulate), that exploit block processing and violate the independence of rows. There are some miscellaneous functions, like [blockSize](../sql-reference/functions/other-functions.md#function-blocksize), [rowNumberInBlock](../sql-reference/functions/other-functions.md#function-rownumberinblock), and [runningAccumulate](../sql-reference/functions/other-functions.md#function-runningaccumulate), that exploit block processing and violate the independence of rows.
ClickHouse has strong typing, so theres no implicit type conversion. If a function doesnt support a specific combination of types, it throws an exception. But functions can work (be overloaded) for many different combinations of types. For example, the `plus` function (to implement the `+` operator) works for any combination of numeric types: `UInt8` + `Float32`, `UInt16` + `Int8`, and so on. Also, some variadic functions can accept any number of arguments, such as the `concat` function. ClickHouse has strong typing, so theres no implicit type conversion. If a function doesnt support a specific combination of types, it throws an exception. But functions can work (be overloaded) for many different combinations of types. For example, the `plus` function (to implement the `+` operator) works for any combination of numeric types: `UInt8` + `Float32`, `UInt16` + `Int8`, and so on. Also, some variadic functions can accept any number of arguments, such as the `concat` function.

View File

@ -0,0 +1,62 @@
---
toc_priority: 66
toc_title: How to Build ClickHouse on Linux for Mac OS X
---
# How to Build ClickHouse on Linux for Mac OS X {#how-to-build-clickhouse-on-linux-for-mac-os-x}
This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on OS X. This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](build-osx.md).
The cross-build for Mac OS X is based on the [Build instructions](build.md), follow them first.
# Install Clang-8 {#install-clang-8}
Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup.
For example the commands for Bionic are like:
``` bash
sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" >> /etc/apt/sources.list
sudo apt-get install clang-8
```
# Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
Lets remember the path where we install `cctools` as ${CCTOOLS}
``` bash
mkdir ${CCTOOLS}
git clone https://github.com/tpoechtrager/apple-libtapi.git
cd apple-libtapi
INSTALLPREFIX=${CCTOOLS} ./build.sh
./install.sh
cd ..
git clone https://github.com/tpoechtrager/cctools-port.git
cd cctools-port/cctools
./configure --prefix=${CCTOOLS} --with-libtapi=${CCTOOLS} --target=x86_64-apple-darwin
make install
```
Also, we need to download macOS X SDK into the working tree.
``` bash
cd ClickHouse
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz'
mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
```
# Build ClickHouse {#build-clickhouse}
``` bash
cd ClickHouse
mkdir build-osx
CC=clang-8 CXX=clang++-8 cmake . -Bbuild-osx -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake \
-DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar \
-DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib \
-DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld
ninja -C build-osx
```
The resulting binary will have a Mach-O executable format and cant be run on Linux.

View File

@ -1,62 +0,0 @@
---
toc_priority: 66
toc_title: How to Build ClickHouse on Linux for Mac OS X
---
# How to Build ClickHouse on Linux for Mac OS X {#how-to-build-clickhouse-on-linux-for-mac-os-x}
This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on OS X. This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](build_osx.md).
The cross-build for Mac OS X is based on the [Build instructions](build.md), follow them first.
# Install Clang-8 {#install-clang-8}
Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup.
For example the commands for Bionic are like:
``` bash
sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" >> /etc/apt/sources.list
sudo apt-get install clang-8
```
# Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
Lets remember the path where we install `cctools` as ${CCTOOLS}
``` bash
mkdir ${CCTOOLS}
git clone https://github.com/tpoechtrager/apple-libtapi.git
cd apple-libtapi
INSTALLPREFIX=${CCTOOLS} ./build.sh
./install.sh
cd ..
git clone https://github.com/tpoechtrager/cctools-port.git
cd cctools-port/cctools
./configure --prefix=${CCTOOLS} --with-libtapi=${CCTOOLS} --target=x86_64-apple-darwin
make install
```
Also, we need to download macOS X SDK into the working tree.
``` bash
cd ClickHouse
wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz'
mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
```
# Build ClickHouse {#build-clickhouse}
``` bash
cd ClickHouse
mkdir build-osx
CC=clang-8 CXX=clang++-8 cmake . -Bbuild-osx -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake \
-DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar \
-DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib \
-DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld
ninja -C build-osx
```
The resulting binary will have a Mach-O executable format and cant be run on Linux.

View File

@ -0,0 +1,285 @@
---
toc_priority: 61
toc_title: The Beginner ClickHouse Developer Instruction
---
Building of ClickHouse is supported on Linux, FreeBSD and Mac OS X.
# If You Use Windows {#if-you-use-windows}
If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/\#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.
# If You Use a 32-bit System {#if-you-use-a-32-bit-system}
ClickHouse cannot work or build on a 32-bit system. You should acquire access to a 64-bit system and you can continue reading.
# Creating a Repository on GitHub {#creating-a-repository-on-github}
To start working with ClickHouse repository you will need a GitHub account.
You probably already have one, but if you dont, please register at https://github.com. In case you do not have SSH keys, you should generate them and then upload them on GitHub. It is required for sending over your patches. It is also possible to use the same SSH keys that you use with any other SSH servers - probably you already have those.
Create a fork of ClickHouse repository. To do that please click on the “fork” button in the upper right corner at https://github.com/ClickHouse/ClickHouse. It will fork your own copy of ClickHouse/ClickHouse to your account.
The development process consists of first committing the intended changes into your fork of ClickHouse and then creating a “pull request” for these changes to be accepted into the main repository (ClickHouse/ClickHouse).
To work with git repositories, please install `git`.
To do that in Ubuntu you would run in the command line terminal:
sudo apt update
sudo apt install git
A brief manual on using Git can be found here: https://services.github.com/on-demand/downloads/github-git-cheat-sheet.pdf.
For a detailed manual on Git see https://git-scm.com/book/en/v2.
# Cloning a Repository to Your Development Machine {#cloning-a-repository-to-your-development-machine}
Next, you need to download the source files onto your working machine. This is called “to clone a repository” because it creates a local copy of the repository on your working machine.
In the command line terminal run:
git clone --recursive git@github.com:your_github_username/ClickHouse.git
cd ClickHouse
Note: please, substitute *your\_github\_username* with what is appropriate!
This command will create a directory `ClickHouse` containing the working copy of the project.
It is important that the path to the working directory contains no whitespaces as it may lead to problems with running the build system.
Please note that ClickHouse repository uses `submodules`. That is what the references to additional repositories are called (i.e. external libraries on which the project depends). It means that when cloning the repository you need to specify the `--recursive` flag as in the example above. If the repository has been cloned without submodules, to download them you need to run the following:
git submodule init
git submodule update
You can check the status with the command: `git submodule status`.
If you get the following error message:
Permission denied (publickey).
fatal: Could not read from remote repository.
Please make sure you have the correct access rights
and the repository exists.
It generally means that the SSH keys for connecting to GitHub are missing. These keys are normally located in `~/.ssh`. For SSH keys to be accepted you need to upload them in the settings section of GitHub UI.
You can also clone the repository via https protocol:
git clone https://github.com/ClickHouse/ClickHouse.git
This, however, will not let you send your changes to the server. You can still use it temporarily and add the SSH keys later replacing the remote address of the repository with `git remote` command.
You can also add original ClickHouse repos address to your local repository to pull updates from there:
git remote add upstream git@github.com:ClickHouse/ClickHouse.git
After successfully running this command you will be able to pull updates from the main ClickHouse repo by running `git pull upstream master`.
## Working with Submodules {#working-with-submodules}
Working with submodules in git could be painful. Next commands will help to manage it:
# ! each command accepts --recursive
# Update remote URLs for submodules. Barely rare case
git submodule sync
# Add new submodules
git submodule init
# Update existing submodules to the current state
git submodule update
# Two last commands could be merged together
git submodule update --init
The next commands would help you to reset all submodules to the initial state (!WARNING! - any changes inside will be deleted):
# Synchronizes submodules' remote URL with .gitmodules
git submodule sync --recursive
# Update the registered submodules with initialize not yet initialized
git submodule update --init --recursive
# Reset all changes done after HEAD
git submodule foreach git reset --hard
# Clean files from .gitignore
git submodule foreach git clean -xfd
# Repeat last 4 commands for all submodule
git submodule foreach git submodule sync --recursive
git submodule foreach git submodule update --init --recursive
git submodule foreach git submodule foreach git reset --hard
git submodule foreach git submodule foreach git clean -xfd
# Build System {#build-system}
ClickHouse uses CMake and Ninja for building.
CMake - a meta-build system that can generate Ninja files (build tasks).
Ninja - a smaller build system with a focus on the speed used to execute those cmake generated tasks.
To install on Ubuntu, Debian or Mint run `sudo apt install cmake ninja-build`.
On CentOS, RedHat run `sudo yum install cmake ninja-build`.
If you use Arch or Gentoo, you probably know it yourself how to install CMake.
For installing CMake and Ninja on Mac OS X first install Homebrew and then install everything else via brew:
/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
brew install cmake ninja
Next, check the version of CMake: `cmake --version`. If it is below 3.3, you should install a newer version from the website: https://cmake.org/download/.
# Optional External Libraries {#optional-external-libraries}
ClickHouse uses several external libraries for building. All of them do not need to be installed separately as they are built together with ClickHouse from the sources located in the submodules. You can check the list in `contrib`.
# C++ Compiler {#c-compiler}
Compilers GCC starting from version 9 and Clang version 8 or above are supported for building ClickHouse.
Official Yandex builds currently use GCC because it generates machine code of slightly better performance (yielding a difference of up to several percent according to our benchmarks). And Clang is more convenient for development usually. Though, our continuous integration (CI) platform runs checks for about a dozen of build combinations.
To install GCC on Ubuntu run: `sudo apt install gcc g++`
Check the version of gcc: `gcc --version`. If it is below 9, then follow the instruction here: https://clickhouse.tech/docs/en/development/build/\#install-gcc-9.
Mac OS X build is supported only for Clang. Just run `brew install llvm`
If you decide to use Clang, you can also install `libc++` and `lld`, if you know what it is. Using `ccache` is also recommended.
# The Building Process {#the-building-process}
Now that you are ready to build ClickHouse we recommend you to create a separate directory `build` inside `ClickHouse` that will contain all of the build artefacts:
mkdir build
cd build
You can have several different directories (build\_release, build\_debug, etc.) for different types of build.
While inside the `build` directory, configure your build by running CMake. Before the first run, you need to define environment variables that specify compiler (version 9 gcc compiler in this example).
Linux:
export CC=gcc-9 CXX=g++-9
cmake ..
Mac OS X:
export CC=clang CXX=clang++
cmake ..
The `CC` variable specifies the compiler for C (short for C Compiler), and `CXX` variable instructs which C++ compiler is to be used for building.
For a faster build, you can resort to the `debug` build type - a build with no optimizations. For that supply the following parameter `-D CMAKE_BUILD_TYPE=Debug`:
cmake -D CMAKE_BUILD_TYPE=Debug ..
You can change the type of build by running this command in the `build` directory.
Run ninja to build:
ninja clickhouse-server clickhouse-client
Only the required binaries are going to be built in this example.
If you require to build all the binaries (utilities and tests), you should run ninja with no parameters:
ninja
Full build requires about 30GB of free disk space or 15GB to build the main binaries.
When a large amount of RAM is available on build machine you should limit the number of build tasks run in parallel with `-j` param:
ninja -j 1 clickhouse-server clickhouse-client
On machines with 4GB of RAM, it is recommended to specify 1, for 8GB of RAM `-j 2` is recommended.
If you get the message: `ninja: error: loading 'build.ninja': No such file or directory`, it means that generating a build configuration has failed and you need to inspect the message above.
Upon the successful start of the building process, youll see the build progress - the number of processed tasks and the total number of tasks.
While building messages about protobuf files in libhdfs2 library like `libprotobuf WARNING` may show up. They affect nothing and are safe to be ignored.
Upon successful build you get an executable file `ClickHouse/<build_dir>/programs/clickhouse`:
ls -l programs/clickhouse
# Running the Built Executable of ClickHouse {#running-the-built-executable-of-clickhouse}
To run the server under the current user you need to navigate to `ClickHouse/programs/server/` (located outside of `build`) and run:
../../../build/programs/clickhouse server
In this case, ClickHouse will use config files located in the current directory. You can run `clickhouse server` from any directory specifying the path to a config file as a command-line parameter `--config-file`.
To connect to ClickHouse with clickhouse-client in another terminal navigate to `ClickHouse/build/programs/` and run `clickhouse client`.
If you get `Connection refused` message on Mac OS X or FreeBSD, try specifying host address 127.0.0.1:
clickhouse client --host 127.0.0.1
You can replace the production version of ClickHouse binary installed in your system with your custom-built ClickHouse binary. To do that install ClickHouse on your machine following the instructions from the official website. Next, run the following:
sudo service clickhouse-server stop
sudo cp ClickHouse/build/programs/clickhouse /usr/bin/
sudo service clickhouse-server start
Note that `clickhouse-client`, `clickhouse-server` and others are symlinks to the commonly shared `clickhouse` binary.
You can also run your custom-built ClickHouse binary with the config file from the ClickHouse package installed on your system:
sudo service clickhouse-server stop
sudo -u clickhouse ClickHouse/build/programs/clickhouse server --config-file /etc/clickhouse-server/config.xml
# IDE (Integrated Development Environment) {#ide-integrated-development-environment}
If you do not know which IDE to use, we recommend that you use CLion. CLion is commercial software, but it offers 30 days free trial period. It is also free of charge for students. CLion can be used both on Linux and on Mac OS X.
KDevelop and QTCreator are other great alternatives of an IDE for developing ClickHouse. KDevelop comes in as a very handy IDE although unstable. If KDevelop crashes after a while upon opening project, you should click “Stop All” button as soon as it has opened the list of projects files. After doing so KDevelop should be fine to work with.
As simple code editors, you can use Sublime Text or Visual Studio Code, or Kate (all of which are available on Linux).
Just in case, it is worth mentioning that CLion creates `build` path on its own, it also on its own selects `debug` for build type, for configuration it uses a version of CMake that is defined in CLion and not the one installed by you, and finally, CLion will use `make` to run build tasks instead of `ninja`. This is normal behaviour, just keep that in mind to avoid confusion.
# Writing Code {#writing-code}
The description of ClickHouse architecture can be found here: https://clickhouse.tech/docs/en/development/architecture/
The Code Style Guide: https://clickhouse.tech/docs/en/development/style/
Writing tests: https://clickhouse.tech/docs/en/development/tests/
List of tasks: https://github.com/ClickHouse/ClickHouse/blob/master/testsructions/easy\_tasks\_sorted\_en.md
# Test Data {#test-data}
Developing ClickHouse often requires loading realistic datasets. It is particularly important for performance testing. We have a specially prepared set of anonymized data from Yandex.Metrica. It requires additionally some 3GB of free disk space. Note that this data is not required to accomplish most of the development tasks.
sudo apt install wget xz-utils
wget https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_v1.tsv.xz
wget https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz
xz -v -d hits_v1.tsv.xz
xz -v -d visits_v1.tsv.xz
clickhouse-client
CREATE DATABASE IF NOT EXISTS test
CREATE TABLE test.hits ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime);
CREATE TABLE test.visits ( CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32, SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32, SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16, UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16, FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8, Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), Params Array(String), `Goals.ID` Array(UInt32), `Goals.Serial` Array(UInt32), `Goals.EventTime` Array(DateTime), `Goals.Price` Array(Int64), `Goals.OrderID` Array(String), `Goals.CurrencyID` Array(UInt32), WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64, ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32, ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32, ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32, ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16, ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime, PredLastVisit Date, LastVisit Date, TotalVisits UInt32, `TraficSource.ID` Array(Int8), `TraficSource.SearchEngineID` Array(UInt16), `TraficSource.AdvEngineID` Array(UInt8), `TraficSource.PlaceID` Array(UInt16), `TraficSource.SocialSourceNetworkID` Array(UInt8), `TraficSource.Domain` Array(String), `TraficSource.SearchPhrase` Array(String), `TraficSource.SocialSourcePage` Array(String), Attendance FixedString(16), CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64, StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64, OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64, UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `Market.Type` Array(UInt8), `Market.GoalID` Array(UInt32), `Market.OrderID` Array(String), `Market.OrderPrice` Array(Int64), `Market.PP` Array(UInt32), `Market.DirectPlaceID` Array(UInt32), `Market.DirectOrderID` Array(UInt32), `Market.DirectBannerID` Array(UInt32), `Market.GoodID` Array(String), `Market.GoodName` Array(String), `Market.GoodQuantity` Array(Int32), `Market.GoodPrice` Array(Int64), IslandID FixedString(16)) ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) SAMPLE BY intHash32(UserID) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID);
clickhouse-client --max_insert_block_size 100000 --query "INSERT INTO test.hits FORMAT TSV" < hits_v1.tsv
clickhouse-client --max_insert_block_size 100000 --query "INSERT INTO test.visits FORMAT TSV" < visits_v1.tsv
# Creating Pull Request {#creating-pull-request}
Navigate to your fork repository in GitHubs UI. If you have been developing in a branch, you need to select that branch. There will be a “Pull request” button located on the screen. In essence, this means “create a request for accepting my changes into the main repository”.
A pull request can be created even if the work is not completed yet. In this case please put the word “WIP” (work in progress) at the beginning of the title, it can be changed later. This is useful for cooperative reviewing and discussion of changes as well as for running all of the available tests. It is important that you provide a brief description of your changes, it will later be used for generating release changelogs.
Testing will commence as soon as Yandex employees label your PR with a tag “can be tested”. The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour.
The system will prepare ClickHouse binary builds for your pull request individually. To retrieve these builds click the “Details” link next to “ClickHouse build check” entry in the list of checks. There you will find direct links to the built .deb packages of ClickHouse which you can deploy even on your production servers (if you have no fear).
Most probably some of the builds will fail at first times. This is due to the fact that we check builds both with gcc as well as with clang, with almost all of existing warnings (always with the `-Werror` flag) enabled for clang. On that same page, you can find all of the build logs so that you do not have to build ClickHouse in all of the possible ways.

View File

@ -1,285 +0,0 @@
---
toc_priority: 61
toc_title: The Beginner ClickHouse Developer Instruction
---
Building of ClickHouse is supported on Linux, FreeBSD and Mac OS X.
# If You Use Windows {#if-you-use-windows}
If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/\#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.
# If You Use a 32-bit System {#if-you-use-a-32-bit-system}
ClickHouse cannot work or build on a 32-bit system. You should acquire access to a 64-bit system and you can continue reading.
# Creating a Repository on GitHub {#creating-a-repository-on-github}
To start working with ClickHouse repository you will need a GitHub account.
You probably already have one, but if you dont, please register at https://github.com. In case you do not have SSH keys, you should generate them and then upload them on GitHub. It is required for sending over your patches. It is also possible to use the same SSH keys that you use with any other SSH servers - probably you already have those.
Create a fork of ClickHouse repository. To do that please click on the “fork” button in the upper right corner at https://github.com/ClickHouse/ClickHouse. It will fork your own copy of ClickHouse/ClickHouse to your account.
The development process consists of first committing the intended changes into your fork of ClickHouse and then creating a “pull request” for these changes to be accepted into the main repository (ClickHouse/ClickHouse).
To work with git repositories, please install `git`.
To do that in Ubuntu you would run in the command line terminal:
sudo apt update
sudo apt install git
A brief manual on using Git can be found here: https://services.github.com/on-demand/downloads/github-git-cheat-sheet.pdf.
For a detailed manual on Git see https://git-scm.com/book/en/v2.
# Cloning a Repository to Your Development Machine {#cloning-a-repository-to-your-development-machine}
Next, you need to download the source files onto your working machine. This is called “to clone a repository” because it creates a local copy of the repository on your working machine.
In the command line terminal run:
git clone --recursive git@guthub.com:your_github_username/ClickHouse.git
cd ClickHouse
Note: please, substitute *your\_github\_username* with what is appropriate!
This command will create a directory `ClickHouse` containing the working copy of the project.
It is important that the path to the working directory contains no whitespaces as it may lead to problems with running the build system.
Please note that ClickHouse repository uses `submodules`. That is what the references to additional repositories are called (i.e. external libraries on which the project depends). It means that when cloning the repository you need to specify the `--recursive` flag as in the example above. If the repository has been cloned without submodules, to download them you need to run the following:
git submodule init
git submodule update
You can check the status with the command: `git submodule status`.
If you get the following error message:
Permission denied (publickey).
fatal: Could not read from remote repository.
Please make sure you have the correct access rights
and the repository exists.
It generally means that the SSH keys for connecting to GitHub are missing. These keys are normally located in `~/.ssh`. For SSH keys to be accepted you need to upload them in the settings section of GitHub UI.
You can also clone the repository via https protocol:
git clone https://github.com/ClickHouse/ClickHouse.git
This, however, will not let you send your changes to the server. You can still use it temporarily and add the SSH keys later replacing the remote address of the repository with `git remote` command.
You can also add original ClickHouse repos address to your local repository to pull updates from there:
git remote add upstream git@github.com:ClickHouse/ClickHouse.git
After successfully running this command you will be able to pull updates from the main ClickHouse repo by running `git pull upstream master`.
## Working with Submodules {#working-with-submodules}
Working with submodules in git could be painful. Next commands will help to manage it:
# ! each command accepts --recursive
# Update remote URLs for submodules. Barely rare case
git submodule sync
# Add new submodules
git submodule init
# Update existing submodules to the current state
git submodule update
# Two last commands could be merged together
git submodule update --init
The next commands would help you to reset all submodules to the initial state (!WARNING! - any changes inside will be deleted):
# Synchronizes submodules' remote URL with .gitmodules
git submodule sync --recursive
# Update the registered submodules with initialize not yet initialized
git submodule update --init --recursive
# Reset all changes done after HEAD
git submodule foreach git reset --hard
# Clean files from .gitignore
git submodule foreach git clean -xfd
# Repeat last 4 commands for all submodule
git submodule foreach git submodule sync --recursive
git submodule foreach git submodule update --init --recursive
git submodule foreach git submodule foreach git reset --hard
git submodule foreach git submodule foreach git clean -xfd
# Build System {#build-system}
ClickHouse uses CMake and Ninja for building.
CMake - a meta-build system that can generate Ninja files (build tasks).
Ninja - a smaller build system with a focus on the speed used to execute those cmake generated tasks.
To install on Ubuntu, Debian or Mint run `sudo apt install cmake ninja-build`.
On CentOS, RedHat run `sudo yum install cmake ninja-build`.
If you use Arch or Gentoo, you probably know it yourself how to install CMake.
For installing CMake and Ninja on Mac OS X first install Homebrew and then install everything else via brew:
/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
brew install cmake ninja
Next, check the version of CMake: `cmake --version`. If it is below 3.3, you should install a newer version from the website: https://cmake.org/download/.
# Optional External Libraries {#optional-external-libraries}
ClickHouse uses several external libraries for building. All of them do not need to be installed separately as they are built together with ClickHouse from the sources located in the submodules. You can check the list in `contrib`.
# C++ Compiler {#c-compiler}
Compilers GCC starting from version 9 and Clang version 8 or above are supported for building ClickHouse.
Official Yandex builds currently use GCC because it generates machine code of slightly better performance (yielding a difference of up to several percent according to our benchmarks). And Clang is more convenient for development usually. Though, our continuous integration (CI) platform runs checks for about a dozen of build combinations.
To install GCC on Ubuntu run: `sudo apt install gcc g++`
Check the version of gcc: `gcc --version`. If it is below 9, then follow the instruction here: https://clickhouse.tech/docs/en/development/build/\#install-gcc-9.
Mac OS X build is supported only for Clang. Just run `brew install llvm`
If you decide to use Clang, you can also install `libc++` and `lld`, if you know what it is. Using `ccache` is also recommended.
# The Building Process {#the-building-process}
Now that you are ready to build ClickHouse we recommend you to create a separate directory `build` inside `ClickHouse` that will contain all of the build artefacts:
mkdir build
cd build
You can have several different directories (build\_release, build\_debug, etc.) for different types of build.
While inside the `build` directory, configure your build by running CMake. Before the first run, you need to define environment variables that specify compiler (version 9 gcc compiler in this example).
Linux:
export CC=gcc-9 CXX=g++-9
cmake ..
Mac OS X:
export CC=clang CXX=clang++
cmake ..
The `CC` variable specifies the compiler for C (short for C Compiler), and `CXX` variable instructs which C++ compiler is to be used for building.
For a faster build, you can resort to the `debug` build type - a build with no optimizations. For that supply the following parameter `-D CMAKE_BUILD_TYPE=Debug`:
cmake -D CMAKE_BUILD_TYPE=Debug ..
You can change the type of build by running this command in the `build` directory.
Run ninja to build:
ninja clickhouse-server clickhouse-client
Only the required binaries are going to be built in this example.
If you require to build all the binaries (utilities and tests), you should run ninja with no parameters:
ninja
Full build requires about 30GB of free disk space or 15GB to build the main binaries.
When a large amount of RAM is available on build machine you should limit the number of build tasks run in parallel with `-j` param:
ninja -j 1 clickhouse-server clickhouse-client
On machines with 4GB of RAM, it is recommended to specify 1, for 8GB of RAM `-j 2` is recommended.
If you get the message: `ninja: error: loading 'build.ninja': No such file or directory`, it means that generating a build configuration has failed and you need to inspect the message above.
Upon the successful start of the building process, youll see the build progress - the number of processed tasks and the total number of tasks.
While building messages about protobuf files in libhdfs2 library like `libprotobuf WARNING` may show up. They affect nothing and are safe to be ignored.
Upon successful build you get an executable file `ClickHouse/<build_dir>/programs/clickhouse`:
ls -l programs/clickhouse
# Running the Built Executable of ClickHouse {#running-the-built-executable-of-clickhouse}
To run the server under the current user you need to navigate to `ClickHouse/programs/server/` (located outside of `build`) and run:
../../../build/programs/clickhouse server
In this case, ClickHouse will use config files located in the current directory. You can run `clickhouse server` from any directory specifying the path to a config file as a command-line parameter `--config-file`.
To connect to ClickHouse with clickhouse-client in another terminal navigate to `ClickHouse/build/programs/` and run `clickhouse client`.
If you get `Connection refused` message on Mac OS X or FreeBSD, try specifying host address 127.0.0.1:
clickhouse client --host 127.0.0.1
You can replace the production version of ClickHouse binary installed in your system with your custom-built ClickHouse binary. To do that install ClickHouse on your machine following the instructions from the official website. Next, run the following:
sudo service clickhouse-server stop
sudo cp ClickHouse/build/programs/clickhouse /usr/bin/
sudo service clickhouse-server start
Note that `clickhouse-client`, `clickhouse-server` and others are symlinks to the commonly shared `clickhouse` binary.
You can also run your custom-built ClickHouse binary with the config file from the ClickHouse package installed on your system:
sudo service clickhouse-server stop
sudo -u clickhouse ClickHouse/build/programs/clickhouse server --config-file /etc/clickhouse-server/config.xml
# IDE (Integrated Development Environment) {#ide-integrated-development-environment}
If you do not know which IDE to use, we recommend that you use CLion. CLion is commercial software, but it offers 30 days free trial period. It is also free of charge for students. CLion can be used both on Linux and on Mac OS X.
KDevelop and QTCreator are other great alternatives of an IDE for developing ClickHouse. KDevelop comes in as a very handy IDE although unstable. If KDevelop crashes after a while upon opening project, you should click “Stop All” button as soon as it has opened the list of projects files. After doing so KDevelop should be fine to work with.
As simple code editors, you can use Sublime Text or Visual Studio Code, or Kate (all of which are available on Linux).
Just in case, it is worth mentioning that CLion creates `build` path on its own, it also on its own selects `debug` for build type, for configuration it uses a version of CMake that is defined in CLion and not the one installed by you, and finally, CLion will use `make` to run build tasks instead of `ninja`. This is normal behaviour, just keep that in mind to avoid confusion.
# Writing Code {#writing-code}
The description of ClickHouse architecture can be found here: https://clickhouse.tech/docs/en/development/architecture/
The Code Style Guide: https://clickhouse.tech/docs/en/development/style/
Writing tests: https://clickhouse.tech/docs/en/development/tests/
List of tasks: https://github.com/ClickHouse/ClickHouse/blob/master/testsructions/easy\_tasks\_sorted\_en.md
# Test Data {#test-data}
Developing ClickHouse often requires loading realistic datasets. It is particularly important for performance testing. We have a specially prepared set of anonymized data from Yandex.Metrica. It requires additionally some 3GB of free disk space. Note that this data is not required to accomplish most of the development tasks.
sudo apt install wget xz-utils
wget https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_v1.tsv.xz
wget https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz
xz -v -d hits_v1.tsv.xz
xz -v -d visits_v1.tsv.xz
clickhouse-client
CREATE DATABASE IF NOT EXISTS test
CREATE TABLE test.hits ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime);
CREATE TABLE test.visits ( CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32, SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32, SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16, UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16, FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8, Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), Params Array(String), `Goals.ID` Array(UInt32), `Goals.Serial` Array(UInt32), `Goals.EventTime` Array(DateTime), `Goals.Price` Array(Int64), `Goals.OrderID` Array(String), `Goals.CurrencyID` Array(UInt32), WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64, ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32, ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32, ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32, ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16, ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime, PredLastVisit Date, LastVisit Date, TotalVisits UInt32, `TraficSource.ID` Array(Int8), `TraficSource.SearchEngineID` Array(UInt16), `TraficSource.AdvEngineID` Array(UInt8), `TraficSource.PlaceID` Array(UInt16), `TraficSource.SocialSourceNetworkID` Array(UInt8), `TraficSource.Domain` Array(String), `TraficSource.SearchPhrase` Array(String), `TraficSource.SocialSourcePage` Array(String), Attendance FixedString(16), CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64, StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64, OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64, UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `Market.Type` Array(UInt8), `Market.GoalID` Array(UInt32), `Market.OrderID` Array(String), `Market.OrderPrice` Array(Int64), `Market.PP` Array(UInt32), `Market.DirectPlaceID` Array(UInt32), `Market.DirectOrderID` Array(UInt32), `Market.DirectBannerID` Array(UInt32), `Market.GoodID` Array(String), `Market.GoodName` Array(String), `Market.GoodQuantity` Array(Int32), `Market.GoodPrice` Array(Int64), IslandID FixedString(16)) ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) SAMPLE BY intHash32(UserID) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID);
clickhouse-client --max_insert_block_size 100000 --query "INSERT INTO test.hits FORMAT TSV" < hits_v1.tsv
clickhouse-client --max_insert_block_size 100000 --query "INSERT INTO test.visits FORMAT TSV" < visits_v1.tsv
# Creating Pull Request {#creating-pull-request}
Navigate to your fork repository in GitHubs UI. If you have been developing in a branch, you need to select that branch. There will be a “Pull request” button located on the screen. In essence, this means “create a request for accepting my changes into the main repository”.
A pull request can be created even if the work is not completed yet. In this case please put the word “WIP” (work in progress) at the beginning of the title, it can be changed later. This is useful for cooperative reviewing and discussion of changes as well as for running all of the available tests. It is important that you provide a brief description of your changes, it will later be used for generating release changelogs.
Testing will commence as soon as Yandex employees label your PR with a tag “can be tested”. The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour.
The system will prepare ClickHouse binary builds for your pull request individually. To retrieve these builds click the “Details” link next to “ClickHouse build check” entry in the list of checks. There you will find direct links to the built .deb packages of ClickHouse which you can deploy even on your production servers (if you have no fear).
Most probably some of the builds will fail at first times. This is due to the fact that we check builds both with gcc as well as with clang, with almost all of existing warnings (always with the `-Werror` flag) enabled for clang. On that same page, you can find all of the build logs so that you do not have to build ClickHouse in all of the possible ways.

View File

@ -0,0 +1,19 @@
---
toc_folder_title: Database Engines
toc_priority: 27
toc_title: Introduction
---
# Database Engines {#database-engines}
Database engines allow you to work with tables.
By default, ClickHouse uses its native database engine, which provides configurable [table engines](../../engines/table-engines/index.md) and an [SQL dialect](../../sql-reference/syntax.md).
You can also use the following database engines:
- [MySQL](mysql.md)
- [Lazy](lazy.md)
[Original article](https://clickhouse.tech/docs/en/database_engines/) <!--hide-->

View File

@ -0,0 +1,133 @@
---
toc_priority: 30
toc_title: MySQL
---
# MySQL {#mysql}
Allows to connect to databases on a remote MySQL server and perform `INSERT` and `SELECT` queries to exchange data between ClickHouse and MySQL.
The `MySQL` database engine translate queries to the MySQL server so you can perform operations such as `SHOW TABLES` or `SHOW CREATE TABLE`.
You cannot perform the following queries:
- `RENAME`
- `CREATE TABLE`
- `ALTER`
## Creating a Database {#creating-a-database}
``` sql
CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster]
ENGINE = MySQL('host:port', ['database' | database], 'user', 'password')
```
**Engine Parameters**
- `host:port` — MySQL server address.
- `database` — Remote database name.
- `user` — MySQL user.
- `password` — User password.
## Data Types Support {#data_types-support}
| MySQL | ClickHouse |
|----------------------------------|--------------------------------------------------------------|
| UNSIGNED TINYINT | [UInt8](../../sql-reference/data-types/int-uint.md) |
| TINYINT | [Int8](../../sql-reference/data-types/int-uint.md) |
| UNSIGNED SMALLINT | [UInt16](../../sql-reference/data-types/int-uint.md) |
| SMALLINT | [Int16](../../sql-reference/data-types/int-uint.md) |
| UNSIGNED INT, UNSIGNED MEDIUMINT | [UInt32](../../sql-reference/data-types/int-uint.md) |
| INT, MEDIUMINT | [Int32](../../sql-reference/data-types/int-uint.md) |
| UNSIGNED BIGINT | [UInt64](../../sql-reference/data-types/int-uint.md) |
| BIGINT | [Int64](../../sql-reference/data-types/int-uint.md) |
| FLOAT | [Float32](../../sql-reference/data-types/float.md) |
| DOUBLE | [Float64](../../sql-reference/data-types/float.md) |
| DATE | [Date](../../sql-reference/data-types/date.md) |
| DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) |
| BINARY | [FixedString](../../sql-reference/data-types/fixedstring.md) |
All other MySQL data types are converted into [String](../../sql-reference/data-types/string.md).
[Nullable](../../sql-reference/data-types/nullable.md) is supported.
## Examples of Use {#examples-of-use}
Table in MySQL:
``` text
mysql> USE test;
Database changed
mysql> CREATE TABLE `mysql_table` (
-> `int_id` INT NOT NULL AUTO_INCREMENT,
-> `float` FLOAT NOT NULL,
-> PRIMARY KEY (`int_id`));
Query OK, 0 rows affected (0,09 sec)
mysql> insert into mysql_table (`int_id`, `float`) VALUES (1,2);
Query OK, 1 row affected (0,00 sec)
mysql> select * from mysql_table;
+------+-----+
| int_id | value |
+------+-----+
| 1 | 2 |
+------+-----+
1 row in set (0,00 sec)
```
Database in ClickHouse, exchanging data with the MySQL server:
``` sql
CREATE DATABASE mysql_db ENGINE = MySQL('localhost:3306', 'test', 'my_user', 'user_password')
```
``` sql
SHOW DATABASES
```
``` text
┌─name─────┐
│ default │
│ mysql_db │
│ system │
└──────────┘
```
``` sql
SHOW TABLES FROM mysql_db
```
``` text
┌─name─────────┐
│ mysql_table │
└──────────────┘
```
``` sql
SELECT * FROM mysql_db.mysql_table
```
``` text
┌─int_id─┬─value─┐
│ 1 │ 2 │
└────────┴───────┘
```
``` sql
INSERT INTO mysql_db.mysql_table VALUES (3,4)
```
``` sql
SELECT * FROM mysql_db.mysql_table
```
``` text
┌─int_id─┬─value─┐
│ 1 │ 2 │
│ 3 │ 4 │
└────────┴───────┘
```
[Original article](https://clickhouse.tech/docs/en/database_engines/mysql/) <!--hide-->

View File

@ -1,19 +0,0 @@
---
toc_folder_title: Database Engines
toc_priority: 27
toc_title: Introduction
---
# Database Engines {#database-engines}
Database engines allow you to work with tables.
By default, ClickHouse uses its native database engine, which provides configurable [table engines](../../engines/table_engines/index.md) and an [SQL dialect](../../sql_reference/syntax.md).
You can also use the following database engines:
- [MySQL](mysql.md)
- [Lazy](lazy.md)
[Original article](https://clickhouse.tech/docs/en/database_engines/) <!--hide-->

View File

@ -1,133 +0,0 @@
---
toc_priority: 30
toc_title: MySQL
---
# MySQL {#mysql}
Allows to connect to databases on a remote MySQL server and perform `INSERT` and `SELECT` queries to exchange data between ClickHouse and MySQL.
The `MySQL` database engine translate queries to the MySQL server so you can perform operations such as `SHOW TABLES` or `SHOW CREATE TABLE`.
You cannot perform the following queries:
- `RENAME`
- `CREATE TABLE`
- `ALTER`
## Creating a Database {#creating-a-database}
``` sql
CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster]
ENGINE = MySQL('host:port', ['database' | database], 'user', 'password')
```
**Engine Parameters**
- `host:port` — MySQL server address.
- `database` — Remote database name.
- `user` — MySQL user.
- `password` — User password.
## Data Types Support {#data_types-support}
| MySQL | ClickHouse |
|----------------------|--------------------------------------|
| UNSIGNED TINYINT | [UInt8](../../sql_reference/data_types/int_uint.md) |
| TINYINT | [Int8](../../sql_reference/data_types/int_uint.md) |
| UNSIGNED SMALLINT | [UInt16](../../sql_reference/data_types/int_uint.md) |
| SMALLINT | [Int16](../../sql_reference/data_types/int_uint.md) |
| UNSIGNED INT, UNSIGNED MEDIUMINT | [UInt32](../../sql_reference/data_types/int_uint.md) |
| INT, MEDIUMINT | [Int32](../../sql_reference/data_types/int_uint.md) |
| UNSIGNED BIGINT | [UInt64](../../sql_reference/data_types/int_uint.md) |
| BIGINT | [Int64](../../sql_reference/data_types/int_uint.md) |
| FLOAT | [Float32](../../sql_reference/data_types/float.md) |
| DOUBLE | [Float64](../../sql_reference/data_types/float.md) |
| DATE | [Date](../../sql_reference/data_types/date.md) |
| DATETIME, TIMESTAMP | [DateTime](../../sql_reference/data_types/datetime.md) |
| BINARY | [FixedString](../../sql_reference/data_types/fixedstring.md) |
All other MySQL data types are converted into [String](../../sql_reference/data_types/string.md).
[Nullable](../../sql_reference/data_types/nullable.md) is supported.
## Examples Of Use {#examples-of-use}
Table in MySQL:
``` text
mysql> USE test;
Database changed
mysql> CREATE TABLE `mysql_table` (
-> `int_id` INT NOT NULL AUTO_INCREMENT,
-> `float` FLOAT NOT NULL,
-> PRIMARY KEY (`int_id`));
Query OK, 0 rows affected (0,09 sec)
mysql> insert into mysql_table (`int_id`, `float`) VALUES (1,2);
Query OK, 1 row affected (0,00 sec)
mysql> select * from mysql_table;
+------+-----+
| int_id | value |
+------+-----+
| 1 | 2 |
+------+-----+
1 row in set (0,00 sec)
```
Database in ClickHouse, exchanging data with the MySQL server:
``` sql
CREATE DATABASE mysql_db ENGINE = MySQL('localhost:3306', 'test', 'my_user', 'user_password')
```
``` sql
SHOW DATABASES
```
``` text
┌─name─────┐
│ default │
│ mysql_db │
│ system │
└──────────┘
```
``` sql
SHOW TABLES FROM mysql_db
```
``` text
┌─name─────────┐
│ mysql_table │
└──────────────┘
```
``` sql
SELECT * FROM mysql_db.mysql_table
```
``` text
┌─int_id─┬─value─┐
│ 1 │ 2 │
└────────┴───────┘
```
``` sql
INSERT INTO mysql_db.mysql_table VALUES (3,4)
```
``` sql
SELECT * FROM mysql_db.mysql_table
```
``` text
┌─int_id─┬─value─┐
│ 1 │ 2 │
│ 3 │ 4 │
└────────┴───────┘
```
[Original article](https://clickhouse.tech/docs/en/database_engines/mysql/) <!--hide-->

View File

@ -0,0 +1,83 @@
---
toc_folder_title: Table Engines
toc_priority: 26
toc_title: Introduction
---
# Table Engines {#table_engines}
The table engine (type of table) determines:
- How and where data is stored, where to write it to, and where to read it from.
- Which queries are supported, and how.
- Concurrent data access.
- Use of indexes, if present.
- Whether multithreaded request execution is possible.
- Data replication parameters.
## Engine Families {#engine-families}
### MergeTree {#mergetree}
The most universal and functional table engines for high-load tasks. The property shared by these engines is quick data insertion with subsequent background data processing. `MergeTree` family engines support data replication (with [Replicated\*](mergetree-family/replication.md) versions of engines), partitioning, and other features not supported in other engines.
Engines in the family:
- [MergeTree](mergetree-family/mergetree.md)
- [ReplacingMergeTree](mergetree-family/replacingmergetree.md)
- [SummingMergeTree](mergetree-family/summingmergetree.md)
- [AggregatingMergeTree](mergetree-family/aggregatingmergetree.md)
- [CollapsingMergeTree](mergetree-family/collapsingmergetree.md)
- [VersionedCollapsingMergeTree](mergetree-family/versionedcollapsingmergetree.md)
- [GraphiteMergeTree](mergetree-family/graphitemergetree.md)
### Log {#log}
Lightweight [engines](log-family/index.md) with minimum functionality. Theyre the most effective when you need to quickly write many small tables (up to approximately 1 million rows) and read them later as a whole.
Engines in the family:
- [TinyLog](log-family/tinylog.md)
- [StripeLog](log-family/stripelog.md)
- [Log](log-family/log.md)
### Integration Engines {#integration-engines}
Engines for communicating with other data storage and processing systems.
Engines in the family:
- [Kafka](integrations/kafka.md)
- [MySQL](integrations/mysql.md)
- [ODBC](integrations/odbc.md)
- [JDBC](integrations/jdbc.md)
- [HDFS](integrations/hdfs.md)
### Special Engines {#special-engines}
Engines in the family:
- [Distributed](special/distributed.md)
- [MaterializedView](special/materializedview.md)
- [Dictionary](special/dictionary.md)
- [Merge](special/merge.md)
- [File](special/file.md)
- [Null](special/null.md)
- [Set](special/set.md)
- [Join](special/join.md)
- [URL](special/url.md)
- [View](special/view.md)
- [Memory](special/memory.md)
- [Buffer](special/buffer.md)
## Virtual Columns {#table_engines-virtual-columns}
Virtual column is an integral table engine attribute that is defined in the engine source code.
You shouldnt specify virtual columns in the `CREATE TABLE` query and you cant see them in `SHOW CREATE TABLE` and `DESCRIBE TABLE` query results. Virtual columns are also read-only, so you cant insert data into virtual columns.
To select data from a virtual column, you must specify its name in the `SELECT` query. `SELECT *` doesnt return values from virtual columns.
If you create a table with a column that has the same name as one of the table virtual columns, the virtual column becomes inaccessible. We dont recommend doing this. To help avoid conflicts, virtual column names are usually prefixed with an underscore.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/) <!--hide-->

View File

@ -0,0 +1,121 @@
---
toc_priority: 36
toc_title: HDFS
---
# HDFS {#table_engines-hdfs}
This engine provides integration with [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)via ClickHouse. This engine is similar
to the [File](../special/file.md) and [URL](../special/url.md) engines, but provides Hadoop-specific features.
## Usage {#usage}
``` sql
ENGINE = HDFS(URI, format)
```
The `URI` parameter is the whole file URI in HDFS.
The `format` parameter specifies one of the available file formats. To perform
`SELECT` queries, the format must be supported for input, and to perform
`INSERT` queries for output. The available formats are listed in the
[Formats](../../../interfaces/formats.md#formats) section.
The path part of `URI` may contain globs. In this case the table would be readonly.
**Example:**
**1.** Set up the `hdfs_engine_table` table:
``` sql
CREATE TABLE hdfs_engine_table (name String, value UInt32) ENGINE=HDFS('hdfs://hdfs1:9000/other_storage', 'TSV')
```
**2.** Fill file:
``` sql
INSERT INTO hdfs_engine_table VALUES ('one', 1), ('two', 2), ('three', 3)
```
**3.** Query the data:
``` sql
SELECT * FROM hdfs_engine_table LIMIT 2
```
``` text
┌─name─┬─value─┐
│ one │ 1 │
│ two │ 2 │
└──────┴───────┘
```
## Implementation Details {#implementation-details}
- Reads and writes can be parallel
- Not supported:
- `ALTER` and `SELECT...SAMPLE` operations.
- Indexes.
- Replication.
**Globs in path**
Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern. Listing of files determines during `SELECT` (not at `CREATE` moment).
- `*` — Substitutes any number of any characters except `/` including empty string.
- `?` — Substitutes any single character.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}` — Substitutes any number in range from N to M including both borders.
Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function.
**Example**
1. Suppose we have several files in TSV format with the following URIs on HDFS:
- hdfs://hdfs1:9000/some\_dir/some\_file\_1
- hdfs://hdfs1:9000/some\_dir/some\_file\_2
- hdfs://hdfs1:9000/some\_dir/some\_file\_3
- hdfs://hdfs1:9000/another\_dir/some\_file\_1
- hdfs://hdfs1:9000/another\_dir/some\_file\_2
- hdfs://hdfs1:9000/another\_dir/some\_file\_3
1. There are several ways to make a table consisting of all six files:
<!-- -->
``` sql
CREATE TABLE table_with_range (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/{some,another}_dir/some_file_{1..3}', 'TSV')
```
Another way:
``` sql
CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/{some,another}_dir/some_file_?', 'TSV')
```
Table consists of all the files in both directories (all files should satisfy format and schema described in query):
``` sql
CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV')
```
!!! warning "Warning"
If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
**Example**
Create table with files named `file000`, `file001`, … , `file999`:
``` sql
CREARE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV')
```
## Virtual Columns {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
**See Also**
- [Virtual columns](../index.md#table_engines-virtual_columns)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/hdfs/) <!--hide-->

View File

@ -0,0 +1,6 @@
---
toc_folder_title: Integrations
toc_priority: 30
---

View File

@ -0,0 +1,88 @@
---
toc_priority: 34
toc_title: JDBC
---
# JDBC {#table-engine-jdbc}
Allows ClickHouse to connect to external databases via [JDBC](https://en.wikipedia.org/wiki/Java_Database_Connectivity).
To implement the JDBC connection, ClickHouse uses the separate program [clickhouse-jdbc-bridge](https://github.com/alex-krash/clickhouse-jdbc-bridge) that should run as a daemon.
This engine supports the [Nullable](../../../sql-reference/data-types/nullable.md) data type.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name
(
columns list...
)
ENGINE = JDBC(dbms_uri, external_database, external_table)
```
**Engine Parameters**
- `dbms_uri` — URI of an external DBMS.
Format: `jdbc:<driver_name>://<host_name>:<port>/?user=<username>&password=<password>`.
Example for MySQL: `jdbc:mysql://localhost:3306/?user=root&password=root`.
- `external_database` — Database in an external DBMS.
- `external_table` — Name of the table in `external_database`.
## Usage Example {#usage-example}
Creating a table in MySQL server by connecting directly with its console client:
``` text
mysql> CREATE TABLE `test`.`test` (
-> `int_id` INT NOT NULL AUTO_INCREMENT,
-> `int_nullable` INT NULL DEFAULT NULL,
-> `float` FLOAT NOT NULL,
-> `float_nullable` FLOAT NULL DEFAULT NULL,
-> PRIMARY KEY (`int_id`));
Query OK, 0 rows affected (0,09 sec)
mysql> insert into test (`int_id`, `float`) VALUES (1,2);
Query OK, 1 row affected (0,00 sec)
mysql> select * from test;
+------+----------+-----+----------+
| int_id | int_nullable | float | float_nullable |
+------+----------+-----+----------+
| 1 | NULL | 2 | NULL |
+------+----------+-----+----------+
1 row in set (0,00 sec)
```
Creating a table in ClickHouse server and selecting data from it:
``` sql
CREATE TABLE jdbc_table
(
`int_id` Int32,
`int_nullable` Nullable(Int32),
`float` Float32,
`float_nullable` Nullable(Float32)
)
ENGINE JDBC('jdbc:mysql://localhost:3306/?user=root&password=root', 'test', 'test')
```
``` sql
SELECT *
FROM jdbc_table
```
``` text
┌─int_id─┬─int_nullable─┬─float─┬─float_nullable─┐
│ 1 │ ᴺᵁᴸᴸ │ 2 │ ᴺᵁᴸᴸ │
└────────┴──────────────┴───────┴────────────────┘
```
## See Also {#see-also}
- [JDBC table function](../../../sql-reference/table-functions/jdbc.md).
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/jdbc/) <!--hide-->

View File

@ -0,0 +1,174 @@
---
toc_priority: 32
toc_title: Kafka
---
# Kafka {#kafka}
This engine works with [Apache Kafka](http://kafka.apache.org/).
Kafka lets you:
- Publish or subscribe to data flows.
- Organize fault-tolerant storage.
- Process streams as they become available.
## Creating a Table {#table_engine-kafka-creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE = Kafka()
SETTINGS
kafka_broker_list = 'host:port',
kafka_topic_list = 'topic1,topic2,...',
kafka_group_name = 'group_name',
kafka_format = 'data_format'[,]
[kafka_row_delimiter = 'delimiter_symbol',]
[kafka_schema = '',]
[kafka_num_consumers = N,]
[kafka_skip_broken_messages = N]
```
Required parameters:
- `kafka_broker_list` A comma-separated list of brokers (for example, `localhost:9092`).
- `kafka_topic_list` A list of Kafka topics.
- `kafka_group_name` A group of Kafka consumers. Reading margins are tracked for each group separately. If you dont want messages to be duplicated in the cluster, use the same group name everywhere.
- `kafka_format` Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section.
Optional parameters:
- `kafka_row_delimiter` Delimiter character, which ends the message.
- `kafka_schema` Parameter that must be used if the format requires a schema definition. For example, [Capn Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object.
- `kafka_num_consumers` The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition.
- `kafka_skip_broken_messages` Kafka message parser tolerance to schema-incompatible messages per block. Default: `0`. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data).
Examples:
``` sql
CREATE TABLE queue (
timestamp UInt64,
level String,
message String
) ENGINE = Kafka('localhost:9092', 'topic', 'group1', 'JSONEachRow');
SELECT * FROM queue LIMIT 5;
CREATE TABLE queue2 (
timestamp UInt64,
level String,
message String
) ENGINE = Kafka SETTINGS kafka_broker_list = 'localhost:9092',
kafka_topic_list = 'topic',
kafka_group_name = 'group1',
kafka_format = 'JSONEachRow',
kafka_num_consumers = 4;
CREATE TABLE queue2 (
timestamp UInt64,
level String,
message String
) ENGINE = Kafka('localhost:9092', 'topic', 'group1')
SETTINGS kafka_format = 'JSONEachRow',
kafka_num_consumers = 4;
```
<details markdown="1">
<summary>Deprecated Method for Creating a Table</summary>
!!! attention "Attention"
Do not use this method in new projects. If possible, switch old projects to the method described above.
``` sql
Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format
[, kafka_row_delimiter, kafka_schema, kafka_num_consumers, kafka_skip_broken_messages])
```
</details>
## Description {#description}
The delivered messages are tracked automatically, so each message in a group is only counted once. If you want to get the data twice, then create a copy of the table with another group name.
Groups are flexible and synced on the cluster. For instance, if you have 10 topics and 5 copies of a table in a cluster, then each copy gets 2 topics. If the number of copies changes, the topics are redistributed across the copies automatically. Read more about this at http://kafka.apache.org/intro.
`SELECT` is not particularly useful for reading messages (except for debugging), because each message can be read only once. It is more practical to create real-time threads using materialized views. To do this:
1. Use the engine to create a Kafka consumer and consider it a data stream.
2. Create a table with the desired structure.
3. Create a materialized view that converts data from the engine and puts it into a previously created table.
When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the background. This allows you to continually receive messages from Kafka and convert them to the required format using `SELECT`.
One kafka table can have as many materialized views as you like, they do not read data from the kafka table directly, but receive new records (in blocks), this way you can write to several tables with different detail level (with grouping - aggregation and without).
Example:
``` sql
CREATE TABLE queue (
timestamp UInt64,
level String,
message String
) ENGINE = Kafka('localhost:9092', 'topic', 'group1', 'JSONEachRow');
CREATE TABLE daily (
day Date,
level String,
total UInt64
) ENGINE = SummingMergeTree(day, (day, level), 8192);
CREATE MATERIALIZED VIEW consumer TO daily
AS SELECT toDate(toDateTime(timestamp)) AS day, level, count() as total
FROM queue GROUP BY day, level;
SELECT level, sum(total) FROM daily GROUP BY level;
```
To improve performance, received messages are grouped into blocks the size of [max\_insert\_block\_size](../../../operations/server-configuration-parameters/settings.md#settings-max_insert_block_size). If the block wasnt formed within [stream\_flush\_interval\_ms](../../../operations/server-configuration-parameters/settings.md) milliseconds, the data will be flushed to the table regardless of the completeness of the block.
To stop receiving topic data or to change the conversion logic, detach the materialized view:
``` sql
DETACH TABLE consumer;
ATTACH TABLE consumer;
```
If you want to change the target table by using `ALTER`, we recommend disabling the material view to avoid discrepancies between the target table and the data from the view.
## Configuration {#configuration}
Similar to GraphiteMergeTree, the Kafka engine supports extended configuration using the ClickHouse config file. There are two configuration keys that you can use: global (`kafka`) and topic-level (`kafka_*`). The global configuration is applied first, and then the topic-level configuration is applied (if it exists).
``` xml
<!-- Global configuration options for all tables of Kafka engine type -->
<kafka>
<debug>cgrp</debug>
<auto_offset_reset>smallest</auto_offset_reset>
</kafka>
<!-- Configuration specific for topic "logs" -->
<kafka_logs>
<retry_backoff_ms>250</retry_backoff_ms>
<fetch_min_bytes>100000</fetch_min_bytes>
</kafka_logs>
```
For a list of possible configuration options, see the [librdkafka configuration reference](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Use the underscore (`_`) instead of a dot in the ClickHouse configuration. For example, `check.crcs=true` will be `<check_crcs>true</check_crcs>`.
## Virtual Columns {#virtual-columns}
- `_topic` — Kafka topic.
- `_key` — Key of the message.
- `_offset` — Offset of the message.
- `_timestamp` — Timestamp of the message.
- `_partition` — Partition of Kafka topic.
**See Also**
- [Virtual columns](../index.md#table_engines-virtual_columns)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) <!--hide-->

View File

@ -0,0 +1,103 @@
---
toc_priority: 33
toc_title: MySQL
---
# Mysql {#mysql}
The MySQL engine allows you to perform `SELECT` queries on data that is stored on a remote MySQL server.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
...
) ENGINE = MySQL('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']);
```
See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create.md#create-table-query) query.
The table structure can differ from the original MySQL table structure:
- Column names should be the same as in the original MySQL table, but you can use just some of these columns and in any order.
- Column types may differ from those in the original MySQL table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types.
**Engine Parameters**
- `host:port` — MySQL server address.
- `database` — Remote database name.
- `table` — Remote table name.
- `user` — MySQL user.
- `password` — User password.
- `replace_query` — Flag that converts `INSERT INTO` queries to `REPLACE INTO`. If `replace_query=1`, the query is substituted.
- `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` expression that is added to the `INSERT` query.
Example: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, where `on_duplicate_clause` is `UPDATE c2 = c2 + 1`. See the [MySQL documentation](https://dev.mysql.com/doc/refman/8.0/en/insert-on-duplicate.html) to find which `on_duplicate_clause` you can use with the `ON DUPLICATE KEY` clause.
To specify `on_duplicate_clause` you need to pass `0` to the `replace_query` parameter. If you simultaneously pass `replace_query = 1` and `on_duplicate_clause`, ClickHouse generates an exception.
Simple `WHERE` clauses such as `=, !=, >, >=, <, <=` are executed on the MySQL server.
The rest of the conditions and the `LIMIT` sampling constraint are executed in ClickHouse only after the query to MySQL finishes.
## Usage Example {#usage-example}
Table in MySQL:
``` text
mysql> CREATE TABLE `test`.`test` (
-> `int_id` INT NOT NULL AUTO_INCREMENT,
-> `int_nullable` INT NULL DEFAULT NULL,
-> `float` FLOAT NOT NULL,
-> `float_nullable` FLOAT NULL DEFAULT NULL,
-> PRIMARY KEY (`int_id`));
Query OK, 0 rows affected (0,09 sec)
mysql> insert into test (`int_id`, `float`) VALUES (1,2);
Query OK, 1 row affected (0,00 sec)
mysql> select * from test;
+------+----------+-----+----------+
| int_id | int_nullable | float | float_nullable |
+------+----------+-----+----------+
| 1 | NULL | 2 | NULL |
+------+----------+-----+----------+
1 row in set (0,00 sec)
```
Table in ClickHouse, retrieving data from the MySQL table created above:
``` sql
CREATE TABLE mysql_table
(
`float_nullable` Nullable(Float32),
`int_id` Int32
)
ENGINE = MySQL('localhost:3306', 'test', 'test', 'bayonet', '123')
```
``` sql
SELECT * FROM mysql_table
```
``` text
┌─float_nullable─┬─int_id─┐
│ ᴺᵁᴸᴸ │ 1 │
└────────────────┴────────┘
```
## See Also {#see-also}
- [The mysql table function](../../../sql-reference/table-functions/mysql.md)
- [Using MySQL as a source of external dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/mysql/) <!--hide-->

View File

@ -0,0 +1,130 @@
---
toc_priority: 35
toc_title: ODBC
---
# ODBC {#table-engine-odbc}
Allows ClickHouse to connect to external databases via [ODBC](https://en.wikipedia.org/wiki/Open_Database_Connectivity).
To safely implement ODBC connections, ClickHouse uses a separate program `clickhouse-odbc-bridge`. If the ODBC driver is loaded directly from `clickhouse-server`, driver problems can crash the ClickHouse server. ClickHouse automatically starts `clickhouse-odbc-bridge` when it is required. The ODBC bridge program is installed from the same package as the `clickhouse-server`.
This engine supports the [Nullable](../../../sql-reference/data-types/nullable.md) data type.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1],
name2 [type2],
...
)
ENGINE = ODBC(connection_settings, external_database, external_table)
```
See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create.md#create-table-query) query.
The table structure can differ from the source table structure:
- Column names should be the same as in the source table, but you can use just some of these columns and in any order.
- Column types may differ from those in the source table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types.
**Engine Parameters**
- `connection_settings` — Name of the section with connection settings in the `odbc.ini` file.
- `external_database` — Name of a database in an external DBMS.
- `external_table` — Name of a table in the `external_database`.
## Usage Example {#usage-example}
**Retrieving data from the local MySQL installation via ODBC**
This example is checked for Ubuntu Linux 18.04 and MySQL server 5.7.
Ensure that unixODBC and MySQL Connector are installed.
By default (if installed from packages), ClickHouse starts as user `clickhouse`. Thus, you need to create and configure this user in the MySQL server.
``` bash
$ sudo mysql
```
``` sql
mysql> CREATE USER 'clickhouse'@'localhost' IDENTIFIED BY 'clickhouse';
mysql> GRANT ALL PRIVILEGES ON *.* TO 'clickhouse'@'clickhouse' WITH GRANT OPTION;
```
Then configure the connection in `/etc/odbc.ini`.
``` bash
$ cat /etc/odbc.ini
[mysqlconn]
DRIVER = /usr/local/lib/libmyodbc5w.so
SERVER = 127.0.0.1
PORT = 3306
DATABASE = test
USERNAME = clickhouse
PASSWORD = clickhouse
```
You can check the connection using the `isql` utility from the unixODBC installation.
``` bash
$ isql -v mysqlconn
+-------------------------+
| Connected! |
| |
...
```
Table in MySQL:
``` text
mysql> CREATE TABLE `test`.`test` (
-> `int_id` INT NOT NULL AUTO_INCREMENT,
-> `int_nullable` INT NULL DEFAULT NULL,
-> `float` FLOAT NOT NULL,
-> `float_nullable` FLOAT NULL DEFAULT NULL,
-> PRIMARY KEY (`int_id`));
Query OK, 0 rows affected (0,09 sec)
mysql> insert into test (`int_id`, `float`) VALUES (1,2);
Query OK, 1 row affected (0,00 sec)
mysql> select * from test;
+------+----------+-----+----------+
| int_id | int_nullable | float | float_nullable |
+------+----------+-----+----------+
| 1 | NULL | 2 | NULL |
+------+----------+-----+----------+
1 row in set (0,00 sec)
```
Table in ClickHouse, retrieving data from the MySQL table:
``` sql
CREATE TABLE odbc_t
(
`int_id` Int32,
`float_nullable` Nullable(Float32)
)
ENGINE = ODBC('DSN=mysqlconn', 'test', 'test')
```
``` sql
SELECT * FROM odbc_t
```
``` text
┌─int_id─┬─float_nullable─┐
│ 1 │ ᴺᵁᴸᴸ │
└────────┴────────────────┘
```
## See Also {#see-also}
- [ODBC external dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-odbc)
- [ODBC table function](../../../sql-reference/table-functions/odbc.md)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/odbc/) <!--hide-->

View File

@ -0,0 +1,6 @@
---
toc_folder_title: Log Family
toc_priority: 29
---

View File

@ -0,0 +1,44 @@
---
toc_priority: 31
toc_title: Introduction
---
# Log Engine Family {#log-engine-family}
These engines were developed for scenarios when you need to quickly write many small tables (up to about 1 million rows) and read them later as a whole.
Engines of the family:
- [StripeLog](stripelog.md)
- [Log](log.md)
- [TinyLog](tinylog.md)
## Common Properties {#common-properties}
Engines:
- Store data on a disk.
- Append data to the end of file when writing.
- Support locks for concurrent data access.
During `INSERT` queries, the table is locked, and other queries for reading and writing data both wait for the table to unlock. If there are no data writing queries, any number of data reading queries can be performed concurrently.
- Do not support [mutation](../../../sql-reference/statements/alter.md#alter-mutations) operations.
- Do not support indexes.
This means that `SELECT` queries for ranges of data are not efficient.
- Do not write data atomically.
You can get a table with corrupted data if something breaks the write operation, for example, abnormal server shutdown.
## Differences {#differences}
The `TinyLog` engine is the simplest in the family and provides the poorest functionality and lowest efficiency. The `TinyLog` engine doesnt support parallel data reading by several threads. It reads data slower than other engines in the family that support parallel reading and it uses almost as many descriptors as the `Log` engine because it stores each column in a separate file. Use it in simple low-load scenarios.
The `Log` and `StripeLog` engines support parallel data reading. When reading data, ClickHouse uses multiple threads. Each thread processes a separate data block. The `Log` engine uses a separate file for each column of the table. `StripeLog` stores all the data in one file. As a result, the `StripeLog` engine uses fewer descriptors in the operating system, but the `Log` engine provides higher efficiency when reading data.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/log_family/) <!--hide-->

View File

@ -0,0 +1,14 @@
---
toc_priority: 33
toc_title: Log
---
# Log {#log}
Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log-family.md) article.
Log differs from [TinyLog](tinylog.md) in that a small file of “marks” resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads.
For concurrent data access, the read operations can be performed simultaneously, while write operations block reads and each other.
The Log engine does not support indexes. Similarly, if writing to a table failed, the table is broken, and reading from it returns an error. The Log engine is appropriate for temporary data, write-once tables, and for testing or demonstration purposes.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/log/) <!--hide-->

View File

@ -0,0 +1,93 @@
---
toc_priority: 32
toc_title: StripeLog
---
# Stripelog {#stripelog}
This engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log-family.md) article.
Use this engine in scenarios when you need to write many tables with a small amount of data (less than 1 million rows).
## Creating a Table {#table_engines-stripelog-creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
column1_name [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
column2_name [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE = StripeLog
```
See the detailed description of the [CREATE TABLE](../../../sql-reference/statements/create.md#create-table-query) query.
## Writing the Data {#table_engines-stripelog-writing-the-data}
The `StripeLog` engine stores all the columns in one file. For each `INSERT` query, ClickHouse appends the data block to the end of a table file, writing columns one by one.
For each table ClickHouse writes the files:
- `data.bin` — Data file.
- `index.mrk` — File with marks. Marks contain offsets for each column of each data block inserted.
The `StripeLog` engine does not support the `ALTER UPDATE` and `ALTER DELETE` operations.
## Reading the Data {#table_engines-stripelog-reading-the-data}
The file with marks allows ClickHouse to parallelize the reading of data. This means that a `SELECT` query returns rows in an unpredictable order. Use the `ORDER BY` clause to sort rows.
## Example of Use {#table_engines-stripelog-example-of-use}
Creating a table:
``` sql
CREATE TABLE stripe_log_table
(
timestamp DateTime,
message_type String,
message String
)
ENGINE = StripeLog
```
Inserting data:
``` sql
INSERT INTO stripe_log_table VALUES (now(),'REGULAR','The first regular message')
INSERT INTO stripe_log_table VALUES (now(),'REGULAR','The second regular message'),(now(),'WARNING','The first warning message')
```
We used two `INSERT` queries to create two data blocks inside the `data.bin` file.
ClickHouse uses multiple threads when selecting data. Each thread reads a separate data block and returns resulting rows independently as it finishes. As a result, the order of blocks of rows in the output does not match the order of the same blocks in the input in most cases. For example:
``` sql
SELECT * FROM stripe_log_table
```
``` text
┌───────────timestamp─┬─message_type─┬─message────────────────────┐
│ 2019-01-18 14:27:32 │ REGULAR │ The second regular message │
│ 2019-01-18 14:34:53 │ WARNING │ The first warning message │
└─────────────────────┴──────────────┴────────────────────────────┘
┌───────────timestamp─┬─message_type─┬─message───────────────────┐
│ 2019-01-18 14:23:43 │ REGULAR │ The first regular message │
└─────────────────────┴──────────────┴───────────────────────────┘
```
Sorting the results (ascending order by default):
``` sql
SELECT * FROM stripe_log_table ORDER BY timestamp
```
``` text
┌───────────timestamp─┬─message_type─┬─message────────────────────┐
│ 2019-01-18 14:23:43 │ REGULAR │ The first regular message │
│ 2019-01-18 14:27:32 │ REGULAR │ The second regular message │
│ 2019-01-18 14:34:53 │ WARNING │ The first warning message │
└─────────────────────┴──────────────┴────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/stripelog/) <!--hide-->

View File

@ -0,0 +1,14 @@
---
toc_priority: 34
toc_title: TinyLog
---
# TinyLog {#tinylog}
The engine belongs to the log engine family. See [Log Engine Family](log-family.md) for common properties of log engines and their differences.
This table engine is typically used with the write-once method: write data one time, then read it as many times as necessary. For example, you can use `TinyLog`-type tables for intermediary data that is processed in small batches. Note that storing data in a large number of small tables is inefficient.
Queries are executed in a single stream. In other words, this engine is intended for relatively small tables (up to about 1,000,000 rows). It makes sense to use this table engine if you have many small tables, since its simpler than the [Log](log.md) engine (fewer files need to be opened).
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/tinylog/) <!--hide-->

View File

@ -0,0 +1,103 @@
---
toc_priority: 35
toc_title: AggregatingMergeTree
---
# Aggregatingmergetree {#aggregatingmergetree}
The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree), altering the logic for data parts merging. ClickHouse replaces all rows with the same primary key (or more accurately, with the same [sorting key](mergetree.md)) with a single row (within a one data part) that stores a combination of states of aggregate functions.
You can use `AggregatingMergeTree` tables for incremental data aggregation, including for aggregated materialized views.
The engine processes all columns with the following types:
- [AggregateFunction](../../../sql-reference/data-types/aggregatefunction.md)
- [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md)
It is appropriate to use `AggregatingMergeTree` if it reduces the number of rows by orders.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE = AggregatingMergeTree()
[PARTITION BY expr]
[ORDER BY expr]
[SAMPLE BY expr]
[TTL expr]
[SETTINGS name=value, ...]
```
For a description of request parameters, see [request description](../../../sql-reference/statements/create.md).
**Query clauses**
When creating a `AggregatingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table.
<details markdown="1">
<summary>Deprecated Method for Creating a Table</summary>
!!! attention "Attention"
Do not use this method in new projects and, if possible, switch the old projects to the method described above.
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE [=] AggregatingMergeTree(date-column [, sampling_expression], (primary, key), index_granularity)
```
All of the parameters have the same meaning as in `MergeTree`.
</details>
## SELECT and INSERT {#select-and-insert}
To insert data, use [INSERT SELECT](../../../sql-reference/statements/insert-into.md) query with aggregate -State- functions.
When selecting data from `AggregatingMergeTree` table, use `GROUP BY` clause and the same aggregate functions as when inserting data, but using `-Merge` suffix.
In the results of `SELECT` query, the values of `AggregateFunction` type have implementation-specific binary representation for all of the ClickHouse output formats. If dump data into, for example, `TabSeparated` format with `SELECT` query then this dump can be loaded back using `INSERT` query.
## Example of an Aggregated Materialized View {#example-of-an-aggregated-materialized-view}
`AggregatingMergeTree` materialized view that watches the `test.visits` table:
``` sql
CREATE MATERIALIZED VIEW test.basic
ENGINE = AggregatingMergeTree() PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate)
AS SELECT
CounterID,
StartDate,
sumState(Sign) AS Visits,
uniqState(UserID) AS Users
FROM test.visits
GROUP BY CounterID, StartDate;
```
Inserting data into the `test.visits` table.
``` sql
INSERT INTO test.visits ...
```
The data are inserted in both the table and view `test.basic` that will perform the aggregation.
To get the aggregated data, we need to execute a query such as `SELECT ... GROUP BY ...` from the view `test.basic`:
``` sql
SELECT
StartDate,
sumMerge(Visits) AS Visits,
uniqMerge(Users) AS Users
FROM test.basic
GROUP BY StartDate
ORDER BY StartDate;
```
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/aggregatingmergetree/) <!--hide-->

View File

@ -0,0 +1,307 @@
---
toc_priority: 36
toc_title: CollapsingMergeTree
---
# CollapsingMergeTree {#table_engine-collapsingmergetree}
The engine inherits from [MergeTree](mergetree.md) and adds the logic of rows collapsing to data parts merge algorithm.
`CollapsingMergeTree` asynchronously deletes (collapses) pairs of rows if all of the fields in a sorting key (`ORDER BY`) are equivalent excepting the particular field `Sign` which can have `1` and `-1` values. Rows without a pair are kept. For more details see the [Collapsing](#table_engine-collapsingmergetree-collapsing) section of the document.
The engine may significantly reduce the volume of storage and increase the efficiency of `SELECT` query as a consequence.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE = CollapsingMergeTree(sign)
[PARTITION BY expr]
[ORDER BY expr]
[SAMPLE BY expr]
[SETTINGS name=value, ...]
```
For a description of query parameters, see [query description](../../../sql-reference/statements/create.md).
**CollapsingMergeTree Parameters**
- `sign` — Name of the column with the type of row: `1` is a “state” row, `-1` is a “cancel” row.
Column data type — `Int8`.
**Query clauses**
When creating a `CollapsingMergeTree` table, the same [query clauses](mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
<details markdown="1">
<summary>Deprecated Method for Creating a Table</summary>
!!! attention "Attention"
Do not use this method in new projects and, if possible, switch the old projects to the method described above.
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE [=] CollapsingMergeTree(date-column [, sampling_expression], (primary, key), index_granularity, sign)
```
All of the parameters excepting `sign` have the same meaning as in `MergeTree`.
- `sign` — Name of the column with the type of row: `1` — “state” row, `-1` — “cancel” row.
Column Data Type — `Int8`.
</details>
## Collapsing {#table_engine-collapsingmergetree-collapsing}
### Data {#data}
Consider the situation where you need to save continually changing data for some object. It sounds logical to have one row for an object and update it at any change, but update operation is expensive and slow for DBMS because it requires rewriting of the data in the storage. If you need to write data quickly, update not acceptable, but you can write the changes of an object sequentially as follows.
Use the particular column `Sign`. If `Sign = 1` it means that the row is a state of an object, lets call it “state” row. If `Sign = -1` it means the cancellation of the state of an object with the same attributes, lets call it “cancel” row.
For example, we want to calculate how much pages users checked at some site and how long they were there. At some moment we write the following row with the state of user activity:
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 5 │ 146 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┘
```
At some moment later we register the change of user activity and write it with the following two rows.
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 5 │ 146 │ -1 │
│ 4324182021466249494 │ 6 │ 185 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┘
```
The first row cancels the previous state of the object (user). It should copy the sorting key fields of the cancelled state excepting `Sign`.
The second row contains the current state.
As we need only the last state of user activity, the rows
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 5 │ 146 │ 1 │
│ 4324182021466249494 │ 5 │ 146 │ -1 │
└─────────────────────┴───────────┴──────────┴──────┘
```
can be deleted collapsing the invalid (old) state of an object. `CollapsingMergeTree` does this while merging of the data parts.
Why we need 2 rows for each change read in the [Algorithm](#table_engine-collapsingmergetree-collapsing-algorithm) paragraph.
**Peculiar properties of such approach**
1. The program that writes the data should remember the state of an object to be able to cancel it. “Cancel” string should contain copies of the sorting key fields of the “state” string and the opposite `Sign`. It increases the initial size of storage but allows to write the data quickly.
2. Long growing arrays in columns reduce the efficiency of the engine due to load for writing. The more straightforward data, the higher the efficiency.
3. The `SELECT` results depend strongly on the consistency of object changes history. Be accurate when preparing data for inserting. You can get unpredictable results in inconsistent data, for example, negative values for non-negative metrics such as session depth.
### Algorithm {#table_engine-collapsingmergetree-collapsing-algorithm}
When ClickHouse merges data parts, each group of consecutive rows with the same sorting key (`ORDER BY`) is reduced to not more than two rows, one with `Sign = 1` (“state” row) and another with `Sign = -1` (“cancel” row). In other words, entries collapse.
For each resulting data part ClickHouse saves:
1. The first “cancel” and the last “state” rows, if the number of “state” and “cancel” rows matches and the last row is a “state” row.
2. The last “state” row, if there are more “state” rows than “cancel” rows.
3. The first “cancel” row, if there are more “cancel” rows than “state” rows.
4. None of the rows, in all other cases.
Also when there are at least 2 more “state” rows than “cancel” rows, or at least 2 more “cancel” rows then “state” rows, the merge continues, but ClickHouse treats this situation as a logical error and records it in the server log. This error can occur if the same data were inserted more than once.
Thus, collapsing should not change the results of calculating statistics.
Changes gradually collapsed so that in the end only the last state of almost every object left.
The `Sign` is required because the merging algorithm doesnt guarantee that all of the rows with the same sorting key will be in the same resulting data part and even on the same physical server. ClickHouse process `SELECT` queries with multiple threads, and it can not predict the order of rows in the result. The aggregation is required if there is a need to get completely “collapsed” data from `CollapsingMergeTree` table.
To finalize collapsing, write a query with `GROUP BY` clause and aggregate functions that account for the sign. For example, to calculate quantity, use `sum(Sign)` instead of `count()`. To calculate the sum of something, use `sum(Sign * x)` instead of `sum(x)`, and so on, and also add `HAVING sum(Sign) > 0`.
The aggregates `count`, `sum` and `avg` could be calculated this way. The aggregate `uniq` could be calculated if an object has at least one state not collapsed. The aggregates `min` and `max` could not be calculated because `CollapsingMergeTree` does not save the values history of the collapsed states.
If you need to extract data without aggregation (for example, to check whether rows are present whose newest values match certain conditions), you can use the `FINAL` modifier for the `FROM` clause. This approach is significantly less efficient.
## Example of Use {#example-of-use}
Example data:
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 5 │ 146 │ 1 │
│ 4324182021466249494 │ 5 │ 146 │ -1 │
│ 4324182021466249494 │ 6 │ 185 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┘
```
Creation of the table:
``` sql
CREATE TABLE UAct
(
UserID UInt64,
PageViews UInt8,
Duration UInt8,
Sign Int8
)
ENGINE = CollapsingMergeTree(Sign)
ORDER BY UserID
```
Insertion of the data:
``` sql
INSERT INTO UAct VALUES (4324182021466249494, 5, 146, 1)
```
``` sql
INSERT INTO UAct VALUES (4324182021466249494, 5, 146, -1),(4324182021466249494, 6, 185, 1)
```
We use two `INSERT` queries to create two different data parts. If we insert the data with one query ClickHouse creates one data part and will not perform any merge ever.
Getting the data:
``` sql
SELECT * FROM UAct
```
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 5 │ 146 │ -1 │
│ 4324182021466249494 │ 6 │ 185 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┘
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 5 │ 146 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┘
```
What do we see and where is collapsing?
With two `INSERT` queries, we created 2 data parts. The `SELECT` query was performed in 2 threads, and we got a random order of rows. Collapsing not occurred because there was no merge of the data parts yet. ClickHouse merges data part in an unknown moment which we can not predict.
Thus we need aggregation:
``` sql
SELECT
UserID,
sum(PageViews * Sign) AS PageViews,
sum(Duration * Sign) AS Duration
FROM UAct
GROUP BY UserID
HAVING sum(Sign) > 0
```
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┐
│ 4324182021466249494 │ 6 │ 185 │
└─────────────────────┴───────────┴──────────┘
```
If we do not need aggregation and want to force collapsing, we can use `FINAL` modifier for `FROM` clause.
``` sql
SELECT * FROM UAct FINAL
```
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 6 │ 185 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┘
```
This way of selecting the data is very inefficient. Dont use it for big tables.
## Example of Another Approach {#example-of-another-approach}
Example data:
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 5 │ 146 │ 1 │
│ 4324182021466249494 │ -5 │ -146 │ -1 │
│ 4324182021466249494 │ 6 │ 185 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┘
```
The idea is that merges take into account only key fields. And in the “Cancel” line we can specify negative values that equalize the previous version of the row when summing without using the Sign column. For this approach, it is necessary to change the data type `PageViews`,`Duration` to store negative values of UInt8 -\> Int16.
``` sql
CREATE TABLE UAct
(
UserID UInt64,
PageViews Int16,
Duration Int16,
Sign Int8
)
ENGINE = CollapsingMergeTree(Sign)
ORDER BY UserID
```
Lets test the approach:
``` sql
insert into UAct values(4324182021466249494, 5, 146, 1);
insert into UAct values(4324182021466249494, -5, -146, -1);
insert into UAct values(4324182021466249494, 6, 185, 1);
select * from UAct final; // avoid using final in production (just for a test or small tables)
```
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 6 │ 185 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┘
```
``` sql
SELECT
UserID,
sum(PageViews) AS PageViews,
sum(Duration) AS Duration
FROM UAct
GROUP BY UserID
```text
┌──────────────UserID─┬─PageViews─┬─Duration─┐
│ 4324182021466249494 │ 6 │ 185 │
└─────────────────────┴───────────┴──────────┘
```
``` sqk
select count() FROM UAct
```
``` text
┌─count()─┐
│ 3 │
└─────────┘
```
``` sql
optimize table UAct final;
select * FROM UAct
```
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 6 │ 185 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┘
```
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/collapsingmergetree/) <!--hide-->

View File

@ -0,0 +1,125 @@
---
toc_priority: 32
toc_title: Custom Partitioning Key
---
# Custom Partitioning Key {#custom-partitioning-key}
Partitioning is available for the [MergeTree](mergetree.md) family tables (including [replicated](replication.md) tables). [Materialized views](../special/materializedview.md) based on MergeTree tables support partitioning, as well.
A partition is a logical combination of records in a table by a specified criterion. You can set a partition by an arbitrary criterion, such as by month, by day, or by event type. Each partition is stored separately to simplify manipulations of this data. When accessing the data, ClickHouse uses the smallest subset of partitions possible.
The partition is specified in the `PARTITION BY expr` clause when [creating a table](mergetree.md#table_engine-mergetree-creating-a-table). The partition key can be any expression from the table columns. For example, to specify partitioning by month, use the expression `toYYYYMM(date_column)`:
``` sql
CREATE TABLE visits
(
VisitDate Date,
Hour UInt8,
ClientID UUID
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(VisitDate)
ORDER BY Hour;
```
The partition key can also be a tuple of expressions (similar to the [primary key](mergetree.md#primary-keys-and-indexes-in-queries)). For example:
``` sql
ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/name', 'replica1', Sign)
PARTITION BY (toMonday(StartDate), EventType)
ORDER BY (CounterID, StartDate, intHash32(UserID));
```
In this example, we set partitioning by the event types that occurred during the current week.
When inserting new data to a table, this data is stored as a separate part (chunk) sorted by the primary key. In 10-15 minutes after inserting, the parts of the same partition are merged into the entire part.
!!! info "Info"
A merge only works for data parts that have the same value for the partitioning expression. This means **you shouldnt make overly granular partitions** (more than about a thousand partitions). Otherwise, the `SELECT` query performs poorly because of an unreasonably large number of files in the file system and open file descriptors.
Use the [system.parts](../../../operations/system-tables.md#system_tables-parts) table to view the table parts and partitions. For example, lets assume that we have a `visits` table with partitioning by month. Lets perform the `SELECT` query for the `system.parts` table:
``` sql
SELECT
partition,
name,
active
FROM system.parts
WHERE table = 'visits'
```
``` text
┌─partition─┬─name───────────┬─active─┐
│ 201901 │ 201901_1_3_1 │ 0 │
│ 201901 │ 201901_1_9_2 │ 1 │
│ 201901 │ 201901_8_8_0 │ 0 │
│ 201901 │ 201901_9_9_0 │ 0 │
│ 201902 │ 201902_4_6_1 │ 1 │
│ 201902 │ 201902_10_10_0 │ 1 │
│ 201902 │ 201902_11_11_0 │ 1 │
└───────────┴────────────────┴────────┘
```
The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER … PARTITION](#alter_manipulations-with-partitions) queries.
The `name` column contains the names of the partition data parts. You can use this column to specify the name of the part in the [ALTER ATTACH PART](#alter_attach-partition) query.
Lets break down the name of the first part: `201901_1_3_1`:
- `201901` is the partition name.
- `1` is the minimum number of the data block.
- `3` is the maximum number of the data block.
- `1` is the chunk level (the depth of the merge tree it is formed from).
!!! info "Info"
The parts of old-type tables have the name: `20190117_20190123_2_2_0` (minimum date - maximum date - minimum block number - maximum block number - level).
The `active` column shows the status of the part. `1` is active; `0` is inactive. The inactive parts are, for example, source parts remaining after merging to a larger part. The corrupted data parts are also indicated as inactive.
As you can see in the example, there are several separated parts of the same partition (for example, `201901_1_3_1` and `201901_1_9_2`). This means that these parts are not merged yet. ClickHouse merges the inserted parts of data periodically, approximately 15 minutes after inserting. In addition, you can perform a non-scheduled merge using the [OPTIMIZE](../../../sql-reference/statements/misc.md#misc_operations-optimize) query. Example:
``` sql
OPTIMIZE TABLE visits PARTITION 201902;
```
``` text
┌─partition─┬─name───────────┬─active─┐
│ 201901 │ 201901_1_3_1 │ 0 │
│ 201901 │ 201901_1_9_2 │ 1 │
│ 201901 │ 201901_8_8_0 │ 0 │
│ 201901 │ 201901_9_9_0 │ 0 │
│ 201902 │ 201902_4_6_1 │ 0 │
│ 201902 │ 201902_4_11_2 │ 1 │
│ 201902 │ 201902_10_10_0 │ 0 │
│ 201902 │ 201902_11_11_0 │ 0 │
└───────────┴────────────────┴────────┘
```
Inactive parts will be deleted approximately 10 minutes after merging.
Another way to view a set of parts and partitions is to go into the directory of the table: `/var/lib/clickhouse/data/<database>/<table>/`. For example:
``` bash
/var/lib/clickhouse/data/default/visits$ ls -l
total 40
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 1 16:48 201901_1_3_1
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201901_1_9_2
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 15:52 201901_8_8_0
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 15:52 201901_9_9_0
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201902_10_10_0
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201902_11_11_0
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:19 201902_4_11_2
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 12:09 201902_4_6_1
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 1 16:48 detached
```
The folders 201901\_1\_1\_0, 201901\_1\_7\_1 and so on are the directories of the parts. Each part relates to a corresponding partition and contains data just for a certain month (the table in this example has partitioning by month).
The `detached` directory contains parts that were detached from the table using the [DETACH](#alter_detach-partition) query. The corrupted parts are also moved to this directory, instead of being deleted. The server does not use the parts from the `detached` directory. You can add, delete, or modify the data in this directory at any time the server will not know about this until you run the [ATTACH](../../../sql-reference/statements/alter.md#alter_attach-partition) query.
Note that on the operating server, you cannot manually change the set of parts or their data on the file system, since the server will not know about it. For non-replicated tables, you can do this when the server is stopped, but it isnt recommended. For replicated tables, the set of parts cannot be changed in any case.
ClickHouse allows you to perform operations with the partitions: delete them, copy from one table to another, or create a backup. See the list of all operations in the section [Manipulations With Partitions and Parts](../../../sql-reference/statements/alter.md#alter_manipulations-with-partitions).
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/custom_partitioning_key/) <!--hide-->

View File

@ -0,0 +1,172 @@
---
toc_priority: 38
toc_title: GraphiteMergeTree
---
# GraphiteMergeTree {#graphitemergetree}
This engine is designed for thinning and aggregating/averaging (rollup) [Graphite](http://graphite.readthedocs.io/en/latest/index.html) data. It may be helpful to developers who want to use ClickHouse as a data store for Graphite.
You can use any ClickHouse table engine to store the Graphite data if you dont need rollup, but if you need a rollup use `GraphiteMergeTree`. The engine reduces the volume of storage and increases the efficiency of queries from Graphite.
The engine inherits properties from [MergeTree](mergetree.md).
## Creating a Table {#creating-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
Path String,
Time DateTime,
Value <Numeric_type>,
Version <Numeric_type>
...
) ENGINE = GraphiteMergeTree(config_section)
[PARTITION BY expr]
[ORDER BY expr]
[SAMPLE BY expr]
[SETTINGS name=value, ...]
```
See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create.md#create-table-query) query.
A table for the Graphite data should have the following columns for the following data:
- Metric name (Graphite sensor). Data type: `String`.
- Time of measuring the metric. Data type: `DateTime`.
- Value of the metric. Data type: any numeric.
- Version of the metric. Data type: any numeric.
ClickHouse saves the rows with the highest version or the last written if versions are the same. Other rows are deleted during the merge of data parts.
The names of these columns should be set in the rollup configuration.
**GraphiteMergeTree parameters**
- `config_section` — Name of the section in the configuration file, where are the rules of rollup set.
**Query clauses**
When creating a `GraphiteMergeTree` table, the same [clauses](mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
<details markdown="1">
<summary>Deprecated Method for Creating a Table</summary>
!!! attention "Attention"
Do not use this method in new projects and, if possible, switch the old projects to the method described above.
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
EventDate Date,
Path String,
Time DateTime,
Value <Numeric_type>,
Version <Numeric_type>
...
) ENGINE [=] GraphiteMergeTree(date-column [, sampling_expression], (primary, key), index_granularity, config_section)
```
All of the parameters excepting `config_section` have the same meaning as in `MergeTree`.
- `config_section` — Name of the section in the configuration file, where are the rules of rollup set.
</details>
## Rollup Configuration {#rollup-configuration}
The settings for rollup are defined by the [graphite\_rollup](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite_rollup) parameter in the server configuration. The name of the parameter could be any. You can create several configurations and use them for different tables.
Rollup configuration structure:
required-columns
patterns
### Required Columns {#required-columns}
- `path_column_name` — The name of the column storing the metric name (Graphite sensor). Default value: `Path`.
- `time_column_name` — The name of the column storing the time of measuring the metric. Default value: `Time`.
- `value_column_name` — The name of the column storing the value of the metric at the time set in `time_column_name`. Default value: `Value`.
- `version_column_name` — The name of the column storing the version of the metric. Default value: `Timestamp`.
### Patterns {#patterns}
Structure of the `patterns` section:
``` text
pattern
regexp
function
pattern
regexp
age + precision
...
pattern
regexp
function
age + precision
...
pattern
...
default
function
age + precision
...
```
!!! warning "Attention"
Patterns must be strictly ordered:
1. Patterns without `function` or `retention`.
1. Patterns with both `function` and `retention`.
1. Pattern `default`.
When processing a row, ClickHouse checks the rules in the `pattern` sections. Each of `pattern` (including `default`) sections can contain `function` parameter for aggregation, `retention` parameters or both. If the metric name matches the `regexp`, the rules from the `pattern` section (or sections) are applied; otherwise, the rules from the `default` section are used.
Fields for `pattern` and `default` sections:
- `regexp` A pattern for the metric name.
- `age` The minimum age of the data in seconds.
- `precision` How precisely to define the age of the data in seconds. Should be a divisor for 86400 (seconds in a day).
- `function` The name of the aggregating function to apply to data whose age falls within the range `[age, age + precision]`.
### Configuration Example {#configuration-example}
``` xml
<graphite_rollup>
<version_column_name>Version</version_column_name>
<pattern>
<regexp>click_cost</regexp>
<function>any</function>
<retention>
<age>0</age>
<precision>5</precision>
</retention>
<retention>
<age>86400</age>
<precision>60</precision>
</retention>
</pattern>
<default>
<function>max</function>
<retention>
<age>0</age>
<precision>60</precision>
</retention>
<retention>
<age>3600</age>
<precision>300</precision>
</retention>
<retention>
<age>86400</age>
<precision>3600</precision>
</retention>
</default>
</graphite_rollup>
```
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/graphitemergetree/) <!--hide-->

View File

@ -0,0 +1,6 @@
---
toc_folder_title: MergeTree Family
toc_priority: 28
---

View File

@ -0,0 +1,652 @@
---
toc_priority: 30
toc_title: MergeTree
---
# MergeTree {#table_engines-mergetree}
The `MergeTree` engine and other engines of this family (`*MergeTree`) are the most robust ClickHouse table engines.
Engines in the `MergeTree` family are designed for inserting a very large amount of data into a table. The data is quickly written to the table part by part, then rules are applied for merging the parts in the background. This method is much more efficient than continually rewriting the data in storage during insert.
Main features:
- Stores data sorted by primary key.
This allows you to create a small sparse index that helps find data faster.
- Partitions can be used if the [partitioning key](custom-partitioning-key.md) is specified.
ClickHouse supports certain operations with partitions that are more effective than general operations on the same data with the same result. ClickHouse also automatically cuts off the partition data where the partitioning key is specified in the query. This also improves query performance.
- Data replication support.
The family of `ReplicatedMergeTree` tables provides data replication. For more information, see [Data replication](replication.md).
- Data sampling support.
If necessary, you can set the data sampling method in the table.
!!! info "Info"
The [Merge](../special/merge.md) engine does not belong to the `*MergeTree` family.
## Creating a Table {#table_engine-mergetree-creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
...
INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1,
INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2
) ENGINE = MergeTree()
[PARTITION BY expr]
[ORDER BY expr]
[PRIMARY KEY expr]
[SAMPLE BY expr]
[TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...]
[SETTINGS name=value, ...]
```
For a description of parameters, see the [CREATE query description](../../../sql-reference/statements/create.md).
!!! note "Note"
`INDEX` is an experimental feature, see [Data Skipping Indexes](#table_engine-mergetree-data_skipping-indexes).
### Query Clauses {#mergetree-query-clauses}
- `ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters.
- `PARTITION BY` — The [partitioning key](custom-partitioning-key.md).
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](../../../sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
- `ORDER BY` — The sorting key.
A tuple of columns or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`.
- `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key).
By default the primary key is the same as the sorting key (which is specified by the `ORDER BY` clause). Thus in most cases it is unnecessary to specify a separate `PRIMARY KEY` clause.
- `SAMPLE BY` — An expression for sampling.
If a sampling expression is used, the primary key must contain it. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`.
- `TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes).
Expression must have one `Date` or `DateTime` column as a result. Example:
`TTL date + INTERVAL 1 DAY`
Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`). Default type of the rule is removal (`DELETE`). List of multiple rules can specified, but there should be no more than one `DELETE` rule.
For more details, see [TTL for columns and tables](#table_engine-mergetree-ttl)
- `SETTINGS` — Additional parameters that control the behavior of the `MergeTree`:
- `index_granularity` — Maximum number of data rows between the marks of an index. Default value: 8192. See [Data Storage](#mergetree-data-storage).
- `index_granularity_bytes` — Maximum size of data granules in bytes. Default value: 10Mb. To restrict the granule size only by number of rows, set to 0 (not recommended). See [Data Storage](#mergetree-data-storage).
- `enable_mixed_granularity_parts` — Enables or disables transitioning to control the granule size with the `index_granularity_bytes` setting. Before version 19.11, there was only the `index_granularity` setting for restricting granule size. The `index_granularity_bytes` setting improves ClickHouse performance when selecting data from tables with big rows (tens and hundreds of megabytes). If you have tables with big rows, you can enable this setting for the tables to improve the efficiency of `SELECT` queries.
- `use_minimalistic_part_header_in_zookeeper` — Storage method of the data parts headers in ZooKeeper. If `use_minimalistic_part_header_in_zookeeper=1`, then ZooKeeper stores less data. For more information, see the [setting description](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) in “Server configuration parameters”.
- `min_merge_bytes_to_use_direct_io` — The minimum data volume for merge operation that is required for using direct I/O access to the storage disk. When merging data parts, ClickHouse calculates the total storage volume of all the data to be merged. If the volume exceeds `min_merge_bytes_to_use_direct_io` bytes, ClickHouse reads and writes the data to the storage disk using the direct I/O interface (`O_DIRECT` option). If `min_merge_bytes_to_use_direct_io = 0`, then direct I/O is disabled. Default value: `10 * 1024 * 1024 * 1024` bytes.
<a name="mergetree_setting-merge_with_ttl_timeout"></a>
- `merge_with_ttl_timeout` — Minimum delay in seconds before repeating a merge with TTL. Default value: 86400 (1 day).
- `write_final_mark` — Enables or disables writing the final index mark at the end of data part (after the last byte). Default value: 1. Dont turn it off.
- `merge_max_block_size` — Maximum number of rows in block for merge operations. Default value: 8192.
- `storage_policy` — Storage policy. See [Using Multiple Block Devices for Data Storage](#table_engine-mergetree-multiple-volumes).
**Example of Sections Setting**
``` sql
ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity=8192
```
In the example, we set partitioning by month.
We also set an expression for sampling as a hash by the user ID. This allows you to pseudorandomize the data in the table for each `CounterID` and `EventDate`. If you define a [SAMPLE](../../../sql-reference/statements/select.md#select-sample-clause) clause when selecting the data, ClickHouse will return an evenly pseudorandom data sample for a subset of users.
The `index_granularity` setting can be omitted because 8192 is the default value.
<details markdown="1">
<summary>Deprecated Method for Creating a Table</summary>
!!! attention "Attention"
Do not use this method in new projects. If possible, switch old projects to the method described above.
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE [=] MergeTree(date-column [, sampling_expression], (primary, key), index_granularity)
```
**MergeTree() Parameters**
- `date-column` — The name of a column of the [Date](../../../sql-reference/data-types/date.md) type. ClickHouse automatically creates partitions by month based on this column. The partition names are in the `"YYYYMM"` format.
- `sampling_expression` — An expression for sampling.
- `(primary, key)` — Primary key. Type: [Tuple()](../../../sql-reference/data-types/tuple.md)
- `index_granularity` — The granularity of an index. The number of data rows between the “marks” of an index. The value 8192 is appropriate for most tasks.
**Example**
``` sql
MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID)), 8192)
```
The `MergeTree` engine is configured in the same way as in the example above for the main engine configuration method.
</details>
## Data Storage {#mergetree-data-storage}
A table consists of data parts sorted by primary key.
When data is inserted in a table, separate data parts are created and each of them is lexicographically sorted by primary key. For example, if the primary key is `(CounterID, Date)`, the data in the part is sorted by `CounterID`, and within each `CounterID`, it is ordered by `Date`.
Data belonging to different partitions are separated into different parts. In the background, ClickHouse merges data parts for more efficient storage. Parts belonging to different partitions are not merged. The merge mechanism does not guarantee that all rows with the same primary key will be in the same data part.
Each data part is logically divided into granules. A granule is the smallest indivisible data set that ClickHouse reads when selecting data. ClickHouse doesnt split rows or values, so each granule always contains an integer number of rows. The first row of a granule is marked with the value of the primary key for the row. For each data part, ClickHouse creates an index file that stores the marks. For each column, whether its in the primary key or not, ClickHouse also stores the same marks. These marks let you find data directly in column files.
The granule size is restricted by the `index_granularity` and `index_granularity_bytes` settings of the table engine. The number of rows in a granule lays in the `[1, index_granularity]` range, depending on the size of the rows. The size of a granule can exceed `index_granularity_bytes` if the size of a single row is greater than the value of the setting. In this case, the size of the granule equals the size of the row.
## Primary Keys and Indexes in Queries {#primary-keys-and-indexes-in-queries}
Take the `(CounterID, Date)` primary key as an example. In this case, the sorting and index can be illustrated as follows:
Whole data: [---------------------------------------------]
CounterID: [aaaaaaaaaaaaaaaaaabbbbcdeeeeeeeeeeeeefgggggggghhhhhhhhhiiiiiiiiikllllllll]
Date: [1111111222222233331233211111222222333211111112122222223111112223311122333]
Marks: | | | | | | | | | | |
a,1 a,2 a,3 b,3 e,2 e,3 g,1 h,2 i,1 i,3 l,3
Marks numbers: 0 1 2 3 4 5 6 7 8 9 10
If the data query specifies:
- `CounterID in ('a', 'h')`, the server reads the data in the ranges of marks `[0, 3)` and `[6, 8)`.
- `CounterID IN ('a', 'h') AND Date = 3`, the server reads the data in the ranges of marks `[1, 3)` and `[7, 8)`.
- `Date = 3`, the server reads the data in the range of marks `[1, 10]`.
The examples above show that it is always more effective to use an index than a full scan.
A sparse index allows extra data to be read. When reading a single range of the primary key, up to `index_granularity * 2` extra rows in each data block can be read.
Sparse indexes allow you to work with a very large number of table rows, because in most cases, such indexes fit in the computers RAM.
ClickHouse does not require a unique primary key. You can insert multiple rows with the same primary key.
### Selecting the Primary Key {#selecting-the-primary-key}
The number of columns in the primary key is not explicitly limited. Depending on the data structure, you can include more or fewer columns in the primary key. This may:
- Improve the performance of an index.
If the primary key is `(a, b)`, then adding another column `c` will improve the performance if the following conditions are met:
- There are queries with a condition on column `c`.
- Long data ranges (several times longer than the `index_granularity`) with identical values for `(a, b)` are common. In other words, when adding another column allows you to skip quite long data ranges.
- Improve data compression.
ClickHouse sorts data by primary key, so the higher the consistency, the better the compression.
- Provide additional logic when merging data parts in the [CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) and [SummingMergeTree](summingmergetree.md) engines.
In this case it makes sense to specify the *sorting key* that is different from the primary key.
A long primary key will negatively affect the insert performance and memory consumption, but extra columns in the primary key do not affect ClickHouse performance during `SELECT` queries.
### Choosing a Primary Key that Differs from the Sorting Key {#choosing-a-primary-key-that-differs-from-the-sorting-key}
It is possible to specify a primary key (an expression with values that are written in the index file for each mark) that is different from the sorting key (an expression for sorting the rows in data parts). In this case the primary key expression tuple must be a prefix of the sorting key expression tuple.
This feature is helpful when using the [SummingMergeTree](summingmergetree.md) and
[AggregatingMergeTree](aggregatingmergetree.md) table engines. In a common case when using these engines, the table has two types of columns: *dimensions* and *measures*. Typical queries aggregate values of measure columns with arbitrary `GROUP BY` and filtering by dimensions. Because SummingMergeTree and AggregatingMergeTree aggregate rows with the same value of the sorting key, it is natural to add all dimensions to it. As a result, the key expression consists of a long list of columns and this list must be frequently updated with newly added dimensions.
In this case it makes sense to leave only a few columns in the primary key that will provide efficient range scans and add the remaining dimension columns to the sorting key tuple.
[ALTER](../../../sql-reference/statements/alter.md) of the sorting key is a lightweight operation because when a new column is simultaneously added to the table and to the sorting key, existing data parts dont need to be changed. Since the old sorting key is a prefix of the new sorting key and there is no data in the newly added column, the data is sorted by both the old and new sorting keys at the moment of table modification.
### Use of Indexes and Partitions in Queries {#use-of-indexes-and-partitions-in-queries}
For `SELECT` queries, ClickHouse analyzes whether an index can be used. An index can be used if the `WHERE/PREWHERE` clause has an expression (as one of the conjunction elements, or entirely) that represents an equality or inequality comparison operation, or if it has `IN` or `LIKE` with a fixed prefix on columns or expressions that are in the primary key or partitioning key, or on certain partially repetitive functions of these columns, or logical relationships of these expressions.
Thus, it is possible to quickly run queries on one or many ranges of the primary key. In this example, queries will be fast when run for a specific tracking tag, for a specific tag and date range, for a specific tag and date, for multiple tags with a date range, and so on.
Lets look at the engine configured as follows:
ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate) SETTINGS index_granularity=8192
In this case, in queries:
``` sql
SELECT count() FROM table WHERE EventDate = toDate(now()) AND CounterID = 34
SELECT count() FROM table WHERE EventDate = toDate(now()) AND (CounterID = 34 OR CounterID = 42)
SELECT count() FROM table WHERE ((EventDate >= toDate('2014-01-01') AND EventDate <= toDate('2014-01-31')) OR EventDate = toDate('2014-05-01')) AND CounterID IN (101500, 731962, 160656) AND (CounterID = 101500 OR EventDate != toDate('2014-05-01'))
```
ClickHouse will use the primary key index to trim improper data and the monthly partitioning key to trim partitions that are in improper date ranges.
The queries above show that the index is used even for complex expressions. Reading from the table is organized so that using the index cant be slower than a full scan.
In the example below, the index cant be used.
``` sql
SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%'
```
To check whether ClickHouse can use the index when running a query, use the settings [force\_index\_by\_date](../../../operations/settings/settings.md#settings-force_index_by_date) and [force\_primary\_key](../../../operations/settings/settings.md).
The key for partitioning by month allows reading only those data blocks which contain dates from the proper range. In this case, the data block may contain data for many dates (up to an entire month). Within a block, data is sorted by primary key, which might not contain the date as the first column. Because of this, using a query with only a date condition that does not specify the primary key prefix will cause more data to be read than for a single date.
### Use of Index for Partially-monotonic Primary Keys {#use-of-index-for-partially-monotonic-primary-keys}
Consider, for example, the days of the month. They form a [monotonic sequence](https://en.wikipedia.org/wiki/Monotonic_function) for one month, but not monotonic for more extended periods. This is a partially-monotonic sequence. If a user creates the table with partially-monotonic primary key, ClickHouse creates a sparse index as usual. When a user selects data from this kind of table, ClickHouse analyzes the query conditions. If the user wants to get data between two marks of the index and both these marks fall within one month, ClickHouse can use the index in this particular case because it can calculate the distance between the parameters of a query and index marks.
ClickHouse cannot use an index if the values of the primary key in the query parameter range dont represent a monotonic sequence. In this case, ClickHouse uses the full scan method.
ClickHouse uses this logic not only for days of the month sequences, but for any primary key that represents a partially-monotonic sequence.
### Data Skipping Indexes (experimental) {#table_engine-mergetree-data_skipping-indexes}
The index declaration is in the columns section of the `CREATE` query.
``` sql
INDEX index_name expr TYPE type(...) GRANULARITY granularity_value
```
For tables from the `*MergeTree` family, data skipping indices can be specified.
These indices aggregate some information about the specified expression on blocks, which consist of `granularity_value` granules (the size of the granule is specified using the `index_granularity` setting in the table engine). Then these aggregates are used in `SELECT` queries for reducing the amount of data to read from the disk by skipping big blocks of data where the `where` query cannot be satisfied.
**Example**
``` sql
CREATE TABLE table_name
(
u64 UInt64,
i32 Int32,
s String,
...
INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3,
INDEX b (u64 * length(s)) TYPE set(1000) GRANULARITY 4
) ENGINE = MergeTree()
...
```
Indices from the example can be used by ClickHouse to reduce the amount of data to read from disk in the following queries:
``` sql
SELECT count() FROM table WHERE s < 'z'
SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
```
#### Available Types of Indices {#available-types-of-indices}
- `minmax`
Stores extremes of the specified expression (if the expression is `tuple`, then it stores extremes for each element of `tuple`), uses stored info for skipping blocks of data like the primary key.
- `set(max_rows)`
Stores unique values of the specified expression (no more than `max_rows` rows, `max_rows=0` means “no limits”). Uses the values to check if the `WHERE` expression is not satisfiable on a block of data.
- `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)`
Stores a [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter) that contains all ngrams from a block of data. Works only with strings. Can be used for optimization of `equals`, `like` and `in` expressions.
- `n` — ngram size,
- `size_of_bloom_filter_in_bytes` — Bloom filter size in bytes (you can use large values here, for example, 256 or 512, because it can be compressed well).
- `number_of_hash_functions` — The number of hash functions used in the Bloom filter.
- `random_seed` — The seed for Bloom filter hash functions.
- `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)`
The same as `ngrambf_v1`, but stores tokens instead of ngrams. Tokens are sequences separated by non-alphanumeric characters.
- `bloom_filter([false_positive])` — Stores a [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter) for the specified columns.
The optional `false_positive` parameter is the probability of receiving a false positive response from the filter. Possible values: (0, 1). Default value: 0.025.
Supported data types: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`.
The following functions can use it: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md).
<!-- -->
``` sql
INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4
INDEX sample_index2 (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARITY 4
INDEX sample_index3 (lower(str), str) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4
```
#### Functions Support {#functions-support}
Conditions in the `WHERE` clause contains calls of the functions that operate with columns. If the column is a part of an index, ClickHouse tries to use this index when performing the functions. ClickHouse supports different subsets of functions for using indexes.
The `set` index can be used with all functions. Function subsets for other indexes are shown in the table below.
| Function (operator) / Index | primary key | minmax | ngrambf\_v1 | tokenbf\_v1 | bloom\_filter |
|------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------|
| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notEquals(!=, \<\>)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✗ | ✗ |
| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | ✔ | ✗ | ✗ |
| [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ |
| [endsWith](../../../sql-reference/functions/string-functions.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ |
| [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ |
| [in](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notIn](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [less (\<)](../../../sql-reference/functions/comparison-functions.md#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [greater (\>)](../../../sql-reference/functions/comparison-functions.md#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [empty](../../../sql-reference/functions/array-functions.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ |
| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ |
Functions with a constant argument that is less than ngram size cant be used by `ngrambf_v1` for query optimization.
Bloom filters can have false positive matches, so the `ngrambf_v1`, `tokenbf_v1`, and `bloom_filter` indexes cant be used for optimizing queries where the result of a function is expected to be false, for example:
- Can be optimized:
- `s LIKE '%test%'`
- `NOT s NOT LIKE '%test%'`
- `s = 1`
- `NOT s != 1`
- `startsWith(s, 'test')`
- Cant be optimized:
- `NOT s LIKE '%test%'`
- `s NOT LIKE '%test%'`
- `NOT s = 1`
- `s != 1`
- `NOT startsWith(s, 'test')`
## Concurrent Data Access {#concurrent-data-access}
For concurrent table access, we use multi-versioning. In other words, when a table is simultaneously read and updated, data is read from a set of parts that is current at the time of the query. There are no lengthy locks. Inserts do not get in the way of read operations.
Reading from a table is automatically parallelized.
## TTL for Columns and Tables {#table_engine-mergetree-ttl}
Determines the lifetime of values.
The `TTL` clause can be set for the whole table and for each individual column. Table-level TTL can also specify logic of automatic move of data between disks and volumes.
Expressions must evaluate to [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md) data type.
Example:
``` sql
TTL time_column
TTL time_column + interval
```
To define `interval`, use [time interval](../../../sql-reference/operators.md#operators-datetime) operators.
``` sql
TTL date_time + INTERVAL 1 MONTH
TTL date_time + INTERVAL 15 HOUR
```
### Column TTL {#mergetree-column-ttl}
When the values in the column expire, ClickHouse replaces them with the default values for the column data type. If all the column values in the data part expire, ClickHouse deletes this column from the data part in a filesystem.
The `TTL` clause cant be used for key columns.
Examples:
Creating a table with TTL
``` sql
CREATE TABLE example_table
(
d DateTime,
a Int TTL d + INTERVAL 1 MONTH,
b Int TTL d + INTERVAL 1 MONTH,
c String
)
ENGINE = MergeTree
PARTITION BY toYYYYMM(d)
ORDER BY d;
```
Adding TTL to a column of an existing table
``` sql
ALTER TABLE example_table
MODIFY COLUMN
c String TTL d + INTERVAL 1 DAY;
```
Altering TTL of the column
``` sql
ALTER TABLE example_table
MODIFY COLUMN
c String TTL d + INTERVAL 1 MONTH;
```
### Table TTL {#mergetree-table-ttl}
Table can have an expression for removal of expired rows, and multiple expressions for automatic move of parts between [disks or volumes](#table_engine-mergetree-multiple-volumes). When rows in the table expire, ClickHouse deletes all corresponding rows. For parts moving feature, all rows of a part must satisfy the movement expression criteria.
``` sql
TTL expr [DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'], ...
```
Type of TTL rule may follow each TTL expression. It affects an action which is to be done once the expression is satisfied (reaches current time):
- `DELETE` - delete expired rows (default action);
- `TO DISK 'aaa'` - move part to the disk `aaa`;
- `TO VOLUME 'bbb'` - move part to the disk `bbb`.
Examples:
Creating a table with TTL
``` sql
CREATE TABLE example_table
(
d DateTime,
a Int
)
ENGINE = MergeTree
PARTITION BY toYYYYMM(d)
ORDER BY d
TTL d + INTERVAL 1 MONTH [DELETE],
d + INTERVAL 1 WEEK TO VOLUME 'aaa',
d + INTERVAL 2 WEEK TO DISK 'bbb';
```
Altering TTL of the table
``` sql
ALTER TABLE example_table
MODIFY TTL d + INTERVAL 1 DAY;
```
**Removing Data**
Data with an expired TTL is removed when ClickHouse merges data parts.
When ClickHouse see that data is expired, it performs an off-schedule merge. To control the frequency of such merges, you can set [merge\_with\_ttl\_timeout](#mergetree_setting-merge_with_ttl_timeout). If the value is too low, it will perform many off-schedule merges that may consume a lot of resources.
If you perform the `SELECT` query between merges, you may get expired data. To avoid it, use the [OPTIMIZE](../../../sql-reference/statements/misc.md#misc_operations-optimize) query before `SELECT`.
## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes}
### Introduction {#introduction}
`MergeTree` family table engines can store data on multiple block devices. For example, it can be useful when the data of a certain table are implicitly split into “hot” and “cold”. The most recent data is regularly requested but requires only a small amount of space. On the contrary, the fat-tailed historical data is requested rarely. If several disks are available, the “hot” data may be located on fast disks (for example, NVMe SSDs or in memory), while the “cold” data - on relatively slow ones (for example, HDD).
Data part is the minimum movable unit for `MergeTree`-engine tables. The data belonging to one part are stored on one disk. Data parts can be moved between disks in the background (according to user settings) as well as by means of the [ALTER](../../../sql-reference/statements/alter.md#alter_move-partition) queries.
### Terms {#terms}
- Disk — Block device mounted to the filesystem.
- Default disk — Disk that stores the path specified in the [path](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-path) server setting.
- Volume — Ordered set of equal disks (similar to [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)).
- Storage policy — Set of volumes and the rules for moving data between them.
The names given to the described entities can be found in the system tables, [system.storage\_policies](../../../operations/system-tables.md#system_tables-storage_policies) and [system.disks](../../../operations/system-tables.md#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables.
### Configuration {#table_engine-mergetree-multiple-volumes-configure}
Disks, volumes and storage policies should be declared inside the `<storage_configuration>` tag either in the main file `config.xml` or in a distinct file in the `config.d` directory.
Configuration structure:
``` xml
<storage_configuration>
<disks>
<disk_name_1> <!-- disk name -->
<path>/mnt/fast_ssd/clickhouse/</path>
</disk_name_1>
<disk_name_2>
<path>/mnt/hdd1/clickhouse/</path>
<keep_free_space_bytes>10485760</keep_free_space_bytes>
</disk_name_2>
<disk_name_3>
<path>/mnt/hdd2/clickhouse/</path>
<keep_free_space_bytes>10485760</keep_free_space_bytes>
</disk_name_3>
...
</disks>
...
</storage_configuration>
```
Tags:
- `<disk_name_N>` — Disk name. Names must be different for all disks.
- `path` — path under which a server will store data (`data` and `shadow` folders), should be terminated with /.
- `keep_free_space_bytes` — the amount of free disk space to be reserved.
The order of the disk definition is not important.
Storage policies configuration markup:
``` xml
<storage_configuration>
...
<policies>
<policy_name_1>
<volumes>
<volume_name_1>
<disk>disk_name_from_disks_configuration</disk>
<max_data_part_size_bytes>1073741824</max_data_part_size_bytes>
</volume_name_1>
<volume_name_2>
<!-- configuration -->
</volume_name_2>
<!-- more volumes -->
</volumes>
<move_factor>0.2</move_factor>
</policy_name_1>
<policy_name_2>
<!-- configuration -->
</policy_name_2>
<!-- more policies -->
</policies>
...
</storage_configuration>
```
Tags:
- `policy_name_N` — Policy name. Policy names must be unique.
- `volume_name_N` — Volume name. Volume names must be unique.
- `disk` — a disk within a volume.
- `max_data_part_size_bytes` — the maximum size of a part that can be stored on any of the volumes disks.
- `move_factor` — when the amount of available space gets lower than this factor, data automatically start to move on the next volume if any (by default, 0.1).
Cofiguration examples:
``` xml
<storage_configuration>
...
<policies>
<hdd_in_order> <!-- policy name -->
<volumes>
<single> <!-- volume name -->
<disk>disk1</disk>
<disk>disk2</disk>
</single>
</volumes>
</hdd_in_order>
<moving_from_ssd_to_hdd>
<volumes>
<hot>
<disk>fast_ssd</disk>
<max_data_part_size_bytes>1073741824</max_data_part_size_bytes>
</hot>
<cold>
<disk>disk1</disk>
</cold>
</volumes>
<move_factor>0.2</move_factor>
</moving_from_ssd_to_hdd>
</policies>
...
</storage_configuration>
```
In given example, the `hdd_in_order` policy implements the [round-robin](https://en.wikipedia.org/wiki/Round-robin_scheduling) approach. Thus this policy defines only one volume (`single`), the data parts are stored on all its disks in circular order. Such policy can be quite useful if there are several similar disks are mounted to the system, but RAID is not configured. Keep in mind that each individual disk drive is not reliable and you might want to compensate it with replication factor of 3 or more.
If there are different kinds of disks available in the system, `moving_from_ssd_to_hdd` policy can be used instead. The volume `hot` consists of an SSD disk (`fast_ssd`), and the maximum size of a part that can be stored on this volume is 1GB. All the parts with the size larger than 1GB will be stored directly on the `cold` volume, which contains an HDD disk `disk1`.
Also, once the disk `fast_ssd` gets filled by more than 80%, data will be transferred to the `disk1` by a background process.
The order of volume enumeration within a storage policy is important. Once a volume is overfilled, data are moved to the next one. The order of disk enumeration is important as well because data are stored on them in turns.
When creating a table, one can apply one of the configured storage policies to it:
``` sql
CREATE TABLE table_with_non_default_policy (
EventDate Date,
OrderID UInt64,
BannerID UInt64,
SearchPhrase String
) ENGINE = MergeTree
ORDER BY (OrderID, BannerID)
PARTITION BY toYYYYMM(EventDate)
SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
```
The `default` storage policy implies using only one volume, which consists of only one disk given in `<path>`. Once a table is created, its storage policy cannot be changed.
### Details {#details}
In the case of `MergeTree` tables, data is getting to disk in different ways:
- As a result of an insert (`INSERT` query).
- During background merges and [mutations](../../../sql-reference/statements/alter.md#alter-mutations).
- When downloading from another replica.
- As a result of partition freezing [ALTER TABLE … FREEZE PARTITION](../../../sql-reference/statements/alter.md#alter_freeze-partition).
In all these cases except for mutations and partition freezing, a part is stored on a volume and a disk according to the given storage policy:
1. The first volume (in the order of definition) that has enough disk space for storing a part (`unreserved_space > current_part_size`) and allows for storing parts of a given size (`max_data_part_size_bytes > current_part_size`) is chosen.
2. Within this volume, that disk is chosen that follows the one, which was used for storing the previous chunk of data, and that has free space more than the part size (`unreserved_space - keep_free_space_bytes > current_part_size`).
Under the hood, mutations and partition freezing make use of [hard links](https://en.wikipedia.org/wiki/Hard_link). Hard links between different disks are not supported, therefore in such cases the resulting parts are stored on the same disks as the initial ones.
In the background, parts are moved between volumes on the basis of the amount of free space (`move_factor` parameter) according to the order the volumes are declared in the configuration file.
Data is never transferred from the last one and into the first one. One may use system tables [system.part\_log](../../../operations/system-tables.md#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](../../../operations/system-tables.md#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs.
User can force moving a part or a partition from one volume to another using the query [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter.md#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met.
Moving data does not interfere with data replication. Therefore, different storage policies can be specified for the same table on different replicas.
After the completion of background merges and mutations, old parts are removed only after a certain amount of time (`old_parts_lifetime`).
During this time, they are not moved to other volumes or disks. Therefore, until the parts are finally removed, they are still taken into account for evaluation of the occupied disk space.
[Original article](https://clickhouse.tech/docs/ru/operations/table_engines/mergetree/) <!--hide-->

View File

@ -0,0 +1,67 @@
---
toc_priority: 33
toc_title: ReplacingMergeTree
---
# ReplacingMergeTree {#replacingmergetree}
The engine differs from [MergeTree](mergetree.md#table_engines-mergetree) in that it removes duplicate entries with the same primary key value (or more accurately, with the same [sorting key](mergetree.md) value).
Data deduplication occurs only during a merge. Merging occurs in the background at an unknown time, so you cant plan for it. Some of the data may remain unprocessed. Although you can run an unscheduled merge using the `OPTIMIZE` query, dont count on using it, because the `OPTIMIZE` query will read and write a large amount of data.
Thus, `ReplacingMergeTree` is suitable for clearing out duplicate data in the background in order to save space, but it doesnt guarantee the absence of duplicates.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE = ReplacingMergeTree([ver])
[PARTITION BY expr]
[ORDER BY expr]
[PRIMARY KEY expr]
[SAMPLE BY expr]
[SETTINGS name=value, ...]
```
For a description of request parameters, see [request description](../../../sql-reference/statements/create.md).
**ReplacingMergeTree Parameters**
- `ver` — column with version. Type `UInt*`, `Date` or `DateTime`. Optional parameter.
When merging, `ReplacingMergeTree` from all the rows with the same primary key leaves only one:
- Last in the selection, if `ver` not set.
- With the maximum version, if `ver` specified.
**Query clauses**
When creating a `ReplacingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table.
<details markdown="1">
<summary>Deprecated Method for Creating a Table</summary>
!!! attention "Attention"
Do not use this method in new projects and, if possible, switch the old projects to the method described above.
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE [=] ReplacingMergeTree(date-column [, sampling_expression], (primary, key), index_granularity, [ver])
```
All of the parameters excepting `ver` have the same meaning as in `MergeTree`.
- `ver` - column with the version. Optional parameter. For a description, see the text above.
</details>
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/replacingmergetree/) <!--hide-->

View File

@ -0,0 +1,216 @@
---
toc_priority: 31
toc_title: Data Replication
---
# Data Replication {#table_engines-replication}
Replication is only supported for tables in the MergeTree family:
- ReplicatedMergeTree
- ReplicatedSummingMergeTree
- ReplicatedReplacingMergeTree
- ReplicatedAggregatingMergeTree
- ReplicatedCollapsingMergeTree
- ReplicatedVersionedCollapsingMergeTree
- ReplicatedGraphiteMergeTree
Replication works at the level of an individual table, not the entire server. A server can store both replicated and non-replicated tables at the same time.
Replication does not depend on sharding. Each shard has its own independent replication.
Compressed data for `INSERT` and `ALTER` queries is replicated (for more information, see the documentation for [ALTER](../../../sql-reference/statements/alter.md#query_language_queries_alter)).
`CREATE`, `DROP`, `ATTACH`, `DETACH` and `RENAME` queries are executed on a single server and are not replicated:
- The `CREATE TABLE` query creates a new replicatable table on the server where the query is run. If this table already exists on other servers, it adds a new replica.
- The `DROP TABLE` query deletes the replica located on the server where the query is run.
- The `RENAME` query renames the table on one of the replicas. In other words, replicated tables can have different names on different replicas.
ClickHouse uses [Apache ZooKeeper](https://zookeeper.apache.org) for storing replicas meta information. Use ZooKeeper version 3.4.5 or newer.
To use replication, set parameters in the [zookeeper](../../../operations/server-configuration-parameters/settings.md#server-settings_zookeeper) server configuration section.
!!! attention "Attention"
Dont neglect the security setting. ClickHouse supports the `digest` [ACL scheme](https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) of the ZooKeeper security subsystem.
Example of setting the addresses of the ZooKeeper cluster:
``` xml
<zookeeper>
<node index="1">
<host>example1</host>
<port>2181</port>
</node>
<node index="2">
<host>example2</host>
<port>2181</port>
</node>
<node index="3">
<host>example3</host>
<port>2181</port>
</node>
</zookeeper>
```
You can specify any existing ZooKeeper cluster and the system will use a directory on it for its own data (the directory is specified when creating a replicatable table).
If ZooKeeper isnt set in the config file, you cant create replicated tables, and any existing replicated tables will be read-only.
ZooKeeper is not used in `SELECT` queries because replication does not affect the performance of `SELECT` and queries run just as fast as they do for non-replicated tables. When querying distributed replicated tables, ClickHouse behavior is controlled by the settings [max\_replica\_delay\_for\_distributed\_queries](../../../operations/settings/settings.md#settings-max_replica_delay_for_distributed_queries) and [fallback\_to\_stale\_replicas\_for\_distributed\_queries](../../../operations/settings/settings.md#settings-fallback_to_stale_replicas_for_distributed_queries).
For each `INSERT` query, approximately ten entries are added to ZooKeeper through several transactions. (To be more precise, this is for each inserted block of data; an INSERT query contains one block or one block per `max_insert_block_size = 1048576` rows.) This leads to slightly longer latencies for `INSERT` compared to non-replicated tables. But if you follow the recommendations to insert data in batches of no more than one `INSERT` per second, it doesnt create any problems. The entire ClickHouse cluster used for coordinating one ZooKeeper cluster has a total of several hundred `INSERTs` per second. The throughput on data inserts (the number of rows per second) is just as high as for non-replicated data.
For very large clusters, you can use different ZooKeeper clusters for different shards. However, this hasnt proven necessary on the Yandex.Metrica cluster (approximately 300 servers).
Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network.
By default, an INSERT query waits for confirmation of writing the data from only one replica. If the data was successfully written to only one replica and the server with this replica ceases to exist, the stored data will be lost. To enable getting confirmation of data writes from multiple replicas, use the `insert_quorum` option.
Each block of data is written atomically. The INSERT query is divided into blocks up to `max_insert_block_size = 1048576` rows. In other words, if the `INSERT` query has less than 1048576 rows, it is made atomically.
Data blocks are deduplicated. For multiple writes of the same data block (data blocks of the same size containing the same rows in the same order), the block is only written once. The reason for this is in case of network failures when the client application doesnt know if the data was written to the DB, so the `INSERT` query can simply be repeated. It doesnt matter which replica INSERTs were sent to with identical data. `INSERTs` are idempotent. Deduplication parameters are controlled by [merge\_tree](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-merge_tree) server settings.
During replication, only the source data to insert is transferred over the network. Further data transformation (merging) is coordinated and performed on all the replicas in the same way. This minimizes network usage, which means that replication works well when replicas reside in different datacenters. (Note that duplicating data in different datacenters is the main goal of replication.)
You can have any number of replicas of the same data. Yandex.Metrica uses double replication in production. Each server uses RAID-5 or RAID-6, and RAID-10 in some cases. This is a relatively reliable and convenient solution.
The system monitors data synchronicity on replicas and is able to recover after a failure. Failover is automatic (for small differences in data) or semi-automatic (when data differs too much, which may indicate a configuration error).
## Creating Replicated Tables {#creating-replicated-tables}
The `Replicated` prefix is added to the table engine name. For example:`ReplicatedMergeTree`.
**Replicated\*MergeTree parameters**
- `zoo_path` — The path to the table in ZooKeeper.
- `replica_name` — The replica name in ZooKeeper.
Example:
``` sql
CREATE TABLE table_name
(
EventDate DateTime,
CounterID UInt32,
UserID UInt32
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}')
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID)
```
<details markdown="1">
<summary>Example in deprecated syntax</summary>
``` sql
CREATE TABLE table_name
(
EventDate DateTime,
CounterID UInt32,
UserID UInt32
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192)
```
</details>
As the example shows, these parameters can contain substitutions in curly brackets. The substituted values are taken from the macros section of the configuration file. Example:
``` xml
<macros>
<layer>05</layer>
<shard>02</shard>
<replica>example05-02-1.yandex.ru</replica>
</macros>
```
The path to the table in ZooKeeper should be unique for each replicated table. Tables on different shards should have different paths.
In this case, the path consists of the following parts:
`/clickhouse/tables/` is the common prefix. We recommend using exactly this one.
`{layer}-{shard}` is the shard identifier. In this example it consists of two parts, since the Yandex.Metrica cluster uses bi-level sharding. For most tasks, you can leave just the {shard} substitution, which will be expanded to the shard identifier.
`table_name` is the name of the node for the table in ZooKeeper. It is a good idea to make it the same as the table name. It is defined explicitly, because in contrast to the table name, it doesnt change after a RENAME query.
*HINT*: you could add a database name in front of `table_name` as well. E.g. `db_name.table_name`
The replica name identifies different replicas of the same table. You can use the server name for this, as in the example. The name only needs to be unique within each shard.
You can define the parameters explicitly instead of using substitutions. This might be convenient for testing and for configuring small clusters. However, you cant use distributed DDL queries (`ON CLUSTER`) in this case.
When working with large clusters, we recommend using substitutions because they reduce the probability of error.
Run the `CREATE TABLE` query on each replica. This query creates a new replicated table, or adds a new replica to an existing one.
If you add a new replica after the table already contains some data on other replicas, the data will be copied from the other replicas to the new one after running the query. In other words, the new replica syncs itself with the others.
To delete a replica, run `DROP TABLE`. However, only one replica is deleted the one that resides on the server where you run the query.
## Recovery After Failures {#recovery-after-failures}
If ZooKeeper is unavailable when a server starts, replicated tables switch to read-only mode. The system periodically attempts to connect to ZooKeeper.
If ZooKeeper is unavailable during an `INSERT`, or an error occurs when interacting with ZooKeeper, an exception is thrown.
After connecting to ZooKeeper, the system checks whether the set of data in the local file system matches the expected set of data (ZooKeeper stores this information). If there are minor inconsistencies, the system resolves them by syncing data with the replicas.
If the system detects broken data parts (with the wrong size of files) or unrecognized parts (parts written to the file system but not recorded in ZooKeeper), it moves them to the `detached` subdirectory (they are not deleted). Any missing parts are copied from the replicas.
Note that ClickHouse does not perform any destructive actions such as automatically deleting a large amount of data.
When the server starts (or establishes a new session with ZooKeeper), it only checks the quantity and sizes of all files. If the file sizes match but bytes have been changed somewhere in the middle, this is not detected immediately, but only when attempting to read the data for a `SELECT` query. The query throws an exception about a non-matching checksum or size of a compressed block. In this case, data parts are added to the verification queue and copied from the replicas if necessary.
If the local set of data differs too much from the expected one, a safety mechanism is triggered. The server enters this in the log and refuses to launch. The reason for this is that this case may indicate a configuration error, such as if a replica on a shard was accidentally configured like a replica on a different shard. However, the thresholds for this mechanism are set fairly low, and this situation might occur during normal failure recovery. In this case, data is restored semi-automatically - by “pushing a button”.
To start recovery, create the node `/path_to_table/replica_name/flags/force_restore_data` in ZooKeeper with any content, or run the command to restore all replicated tables:
``` bash
sudo -u clickhouse touch /var/lib/clickhouse/flags/force_restore_data
```
Then restart the server. On start, the server deletes these flags and starts recovery.
## Recovery After Complete Data Loss {#recovery-after-complete-data-loss}
If all data and metadata disappeared from one of the servers, follow these steps for recovery:
1. Install ClickHouse on the server. Define substitutions correctly in the config file that contains the shard identifier and replicas, if you use them.
2. If you had unreplicated tables that must be manually duplicated on the servers, copy their data from a replica (in the directory `/var/lib/clickhouse/data/db_name/table_name/`).
3. Copy table definitions located in `/var/lib/clickhouse/metadata/` from a replica. If a shard or replica identifier is defined explicitly in the table definitions, correct it so that it corresponds to this replica. (Alternatively, start the server and make all the `ATTACH TABLE` queries that should have been in the .sql files in `/var/lib/clickhouse/metadata/`.)
4. To start recovery, create the ZooKeeper node `/path_to_table/replica_name/flags/force_restore_data` with any content, or run the command to restore all replicated tables: `sudo -u clickhouse touch /var/lib/clickhouse/flags/force_restore_data`
Then start the server (restart, if it is already running). Data will be downloaded from replicas.
An alternative recovery option is to delete information about the lost replica from ZooKeeper (`/path_to_table/replica_name`), then create the replica again as described in “[Creating replicated tables](#creating-replicated-tables)”.
There is no restriction on network bandwidth during recovery. Keep this in mind if you are restoring many replicas at once.
## Converting from MergeTree to ReplicatedMergeTree {#converting-from-mergetree-to-replicatedmergetree}
We use the term `MergeTree` to refer to all table engines in the `MergeTree family`, the same as for `ReplicatedMergeTree`.
If you had a `MergeTree` table that was manually replicated, you can convert it to a replicated table. You might need to do this if you have already collected a large amount of data in a `MergeTree` table and now you want to enable replication.
If the data differs on various replicas, first sync it, or delete this data on all the replicas except one.
Rename the existing MergeTree table, then create a `ReplicatedMergeTree` table with the old name.
Move the data from the old table to the `detached` subdirectory inside the directory with the new table data (`/var/lib/clickhouse/data/db_name/table_name/`).
Then run `ALTER TABLE ATTACH PARTITION` on one of the replicas to add these data parts to the working set.
## Converting from ReplicatedMergeTree to MergeTree {#converting-from-replicatedmergetree-to-mergetree}
Create a MergeTree table with a different name. Move all the data from the directory with the `ReplicatedMergeTree` table data to the new tables data directory. Then delete the `ReplicatedMergeTree` table and restart the server.
If you want to get rid of a `ReplicatedMergeTree` table without launching the server:
- Delete the corresponding `.sql` file in the metadata directory (`/var/lib/clickhouse/metadata/`).
- Delete the corresponding path in ZooKeeper (`/path_to_table/replica_name`).
After this, you can launch the server, create a `MergeTree` table, move the data to its directory, and then restart the server.
## Recovery When Metadata in the Zookeeper Cluster Is Lost or Damaged {#recovery-when-metadata-in-the-zookeeper-cluster-is-lost-or-damaged}
If the data in ZooKeeper was lost or damaged, you can save data by moving it to an unreplicated table as described above.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/replication/) <!--hide-->

View File

@ -0,0 +1,139 @@
---
toc_priority: 34
toc_title: SummingMergeTree
---
# SummingMergeTree {#summingmergetree}
The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree). The difference is that when merging data parts for `SummingMergeTree` tables ClickHouse replaces all the rows with the same primary key (or more accurately, with the same [sorting key](mergetree.md)) with one row which contains summarized values for the columns with the numeric data type. If the sorting key is composed in a way that a single key value corresponds to large number of rows, this significantly reduces storage volume and speeds up data selection.
We recommend to use the engine together with `MergeTree`. Store complete data in `MergeTree` table, and use `SummingMergeTree` for aggregated data storing, for example, when preparing reports. Such an approach will prevent you from losing valuable data due to an incorrectly composed primary key.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE = SummingMergeTree([columns])
[PARTITION BY expr]
[ORDER BY expr]
[SAMPLE BY expr]
[SETTINGS name=value, ...]
```
For a description of request parameters, see [request description](../../../sql-reference/statements/create.md).
**Parameters of SummingMergeTree**
- `columns` - a tuple with the names of columns where values will be summarized. Optional parameter.
The columns must be of a numeric type and must not be in the primary key.
If `columns` not specified, ClickHouse summarizes the values in all columns with a numeric data type that are not in the primary key.
**Query clauses**
When creating a `SummingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table.
<details markdown="1">
<summary>Deprecated Method for Creating a Table</summary>
!!! attention "Attention"
Do not use this method in new projects and, if possible, switch the old projects to the method described above.
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE [=] SummingMergeTree(date-column [, sampling_expression], (primary, key), index_granularity, [columns])
```
All of the parameters excepting `columns` have the same meaning as in `MergeTree`.
- `columns` — tuple with names of columns values of which will be summarized. Optional parameter. For a description, see the text above.
</details>
## Usage Example {#usage-example}
Consider the following table:
``` sql
CREATE TABLE summtt
(
key UInt32,
value UInt32
)
ENGINE = SummingMergeTree()
ORDER BY key
```
Insert data to it:
``` sql
INSERT INTO summtt Values(1,1),(1,2),(2,1)
```
ClickHouse may sum all the rows not completely ([see below](#data-processing)), so we use an aggregate function `sum` and `GROUP BY` clause in the query.
``` sql
SELECT key, sum(value) FROM summtt GROUP BY key
```
``` text
┌─key─┬─sum(value)─┐
│ 2 │ 1 │
│ 1 │ 3 │
└─────┴────────────┘
```
## Data Processing {#data-processing}
When data are inserted into a table, they are saved as-is. ClickHouse merges the inserted parts of data periodically and this is when rows with the same primary key are summed and replaced with one for each resulting part of data.
ClickHouse can merge the data parts so that different resulting parts of data cat consist rows with the same primary key, i.e. the summation will be incomplete. Therefore (`SELECT`) an aggregate function [sum()](../../../sql-reference/aggregate-functions/reference.md#agg_function-sum) and `GROUP BY` clause should be used in a query as described in the example above.
### Common Rules for Summation {#common-rules-for-summation}
The values in the columns with the numeric data type are summarized. The set of columns is defined by the parameter `columns`.
If the values were 0 in all of the columns for summation, the row is deleted.
If column is not in the primary key and is not summarized, an arbitrary value is selected from the existing ones.
The values are not summarized for columns in the primary key.
### The Summation in the Aggregatefunction Columns {#the-summation-in-the-aggregatefunction-columns}
For columns of [AggregateFunction type](../../../sql-reference/data-types/aggregatefunction.md) ClickHouse behaves as [AggregatingMergeTree](aggregatingmergetree.md) engine aggregating according to the function.
### Nested Structures {#nested-structures}
Table can have nested data structures that are processed in a special way.
If the name of a nested table ends with `Map` and it contains at least two columns that meet the following criteria:
- the first column is numeric `(*Int*, Date, DateTime)` or a string `(String, FixedString)`, lets call it `key`,
- the other columns are arithmetic `(*Int*, Float32/64)`, lets call it `(values...)`,
then this nested table is interpreted as a mapping of `key => (values...)`, and when merging its rows, the elements of two data sets are merged by `key` with a summation of the corresponding `(values...)`.
Examples:
``` text
[(1, 100)] + [(2, 150)] -> [(1, 100), (2, 150)]
[(1, 100)] + [(1, 150)] -> [(1, 250)]
[(1, 100)] + [(1, 150), (2, 150)] -> [(1, 250), (2, 150)]
[(1, 100), (2, 150)] + [(1, -100)] -> [(2, 150)]
```
When requesting data, use the [sumMap(key, value)](../../../sql-reference/aggregate-functions/reference.md) function for aggregation of `Map`.
For nested data structure, you do not need to specify its columns in the tuple of columns for summation.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/summingmergetree/) <!--hide-->

View File

@ -0,0 +1,236 @@
---
toc_priority: 37
toc_title: VersionedCollapsingMergeTree
---
# VersionedCollapsingMergeTree {#versionedcollapsingmergetree}
This engine:
- Allows quick writing of object states that are continually changing.
- Deletes old object states in the background. This significantly reduces the volume of storage.
See the section [Collapsing](#table_engines_versionedcollapsingmergetree) for details.
The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree) and adds the logic for collapsing rows to the algorithm for merging data parts. `VersionedCollapsingMergeTree` serves the same purpose as [CollapsingMergeTree](collapsingmergetree.md) but uses a different collapsing algorithm that allows inserting the data in any order with multiple threads. In particular, the `Version` column helps to collapse the rows properly even if they are inserted in the wrong order. In contrast, `CollapsingMergeTree` allows only strictly consecutive insertion.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE = VersionedCollapsingMergeTree(sign, version)
[PARTITION BY expr]
[ORDER BY expr]
[SAMPLE BY expr]
[SETTINGS name=value, ...]
```
For a description of query parameters, see the [query description](../../../sql-reference/statements/create.md).
**Engine Parameters**
``` sql
VersionedCollapsingMergeTree(sign, version)
```
- `sign` — Name of the column with the type of row: `1` is a “state” row, `-1` is a “cancel” row.
The column data type should be `Int8`.
- `version` — Name of the column with the version of the object state.
The column data type should be `UInt*`.
**Query Clauses**
When creating a `VersionedCollapsingMergeTree` table, the same [clauses](mergetree.md) are required as when creating a `MergeTree` table.
<details markdown="1">
<summary>Deprecated Method for Creating a Table</summary>
!!! attention "Attention"
Do not use this method in new projects. If possible, switch the old projects to the method described above.
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE [=] VersionedCollapsingMergeTree(date-column [, sampling_expression], (primary, key), index_granularity, sign, version)
```
All of the parameters except `sign` and `version` have the same meaning as in `MergeTree`.
- `sign` — Name of the column with the type of row: `1` is a “state” row, `-1` is a “cancel” row.
Column Data Type — `Int8`.
- `version` — Name of the column with the version of the object state.
The column data type should be `UInt*`.
</details>
## Collapsing {#table_engines-versionedcollapsingmergetree}
### Data {#data}
Consider a situation where you need to save continually changing data for some object. It is reasonable to have one row for an object and update the row whenever there are changes. However, the update operation is expensive and slow for a DBMS because it requires rewriting the data in the storage. Update is not acceptable if you need to write data quickly, but you can write the changes to an object sequentially as follows.
Use the `Sign` column when writing the row. If `Sign = 1` it means that the row is a state of an object (lets call it the “state” row). If `Sign = -1` it indicates the cancellation of the state of an object with the same attributes (lets call it the “cancel” row). Also use the `Version` column, which should identify each state of an object with a separate number.
For example, we want to calculate how many pages users visited on some site and how long they were there. At some point in time we write the following row with the state of user activity:
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐
│ 4324182021466249494 │ 5 │ 146 │ 1 │ 1 |
└─────────────────────┴───────────┴──────────┴──────┴─────────┘
```
At some point later we register the change of user activity and write it with the following two rows.
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐
│ 4324182021466249494 │ 5 │ 146 │ -1 │ 1 |
│ 4324182021466249494 │ 6 │ 185 │ 1 │ 2 |
└─────────────────────┴───────────┴──────────┴──────┴─────────┘
```
The first row cancels the previous state of the object (user). It should copy all of the fields of the canceled state except `Sign`.
The second row contains the current state.
Because we need only the last state of user activity, the rows
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐
│ 4324182021466249494 │ 5 │ 146 │ 1 │ 1 |
│ 4324182021466249494 │ 5 │ 146 │ -1 │ 1 |
└─────────────────────┴───────────┴──────────┴──────┴─────────┘
```
can be deleted, collapsing the invalid (old) state of the object. `VersionedCollapsingMergeTree` does this while merging the data parts.
To find out why we need two rows for each change, see [Algorithm](#table_engines-versionedcollapsingmergetree-algorithm).
**Notes on Usage**
1. The program that writes the data should remember the state of an object in order to cancel it. The “cancel” string should be a copy of the “state” string with the opposite `Sign`. This increases the initial size of storage but allows to write the data quickly.
2. Long growing arrays in columns reduce the efficiency of the engine due to the load for writing. The more straightforward the data, the better the efficiency.
3. `SELECT` results depend strongly on the consistency of the history of object changes. Be accurate when preparing data for inserting. You can get unpredictable results with inconsistent data, such as negative values for non-negative metrics like session depth.
### Algorithm {#table_engines-versionedcollapsingmergetree-algorithm}
When ClickHouse merges data parts, it deletes each pair of rows that have the same primary key and version and different `Sign`. The order of rows does not matter.
When ClickHouse inserts data, it orders rows by the primary key. If the `Version` column is not in the primary key, ClickHouse adds it to the primary key implicitly as the last field and uses it for ordering.
## Selecting Data {#selecting-data}
ClickHouse doesnt guarantee that all of the rows with the same primary key will be in the same resulting data part or even on the same physical server. This is true both for writing the data and for subsequent merging of the data parts. In addition, ClickHouse processes `SELECT` queries with multiple threads, and it cannot predict the order of rows in the result. This means that aggregation is required if there is a need to get completely “collapsed” data from a `VersionedCollapsingMergeTree` table.
To finalize collapsing, write a query with a `GROUP BY` clause and aggregate functions that account for the sign. For example, to calculate quantity, use `sum(Sign)` instead of `count()`. To calculate the sum of something, use `sum(Sign * x)` instead of `sum(x)`, and add `HAVING sum(Sign) > 0`.
The aggregates `count`, `sum` and `avg` can be calculated this way. The aggregate `uniq` can be calculated if an object has at least one non-collapsed state. The aggregates `min` and `max` cant be calculated because `VersionedCollapsingMergeTree` does not save the history of values of collapsed states.
If you need to extract the data with “collapsing” but without aggregation (for example, to check whether rows are present whose newest values match certain conditions), you can use the `FINAL` modifier for the `FROM` clause. This approach is inefficient and should not be used with large tables.
## Example of Use {#example-of-use}
Example data:
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐
│ 4324182021466249494 │ 5 │ 146 │ 1 │ 1 |
│ 4324182021466249494 │ 5 │ 146 │ -1 │ 1 |
│ 4324182021466249494 │ 6 │ 185 │ 1 │ 2 |
└─────────────────────┴───────────┴──────────┴──────┴─────────┘
```
Creating the table:
``` sql
CREATE TABLE UAct
(
UserID UInt64,
PageViews UInt8,
Duration UInt8,
Sign Int8,
Version UInt8
)
ENGINE = VersionedCollapsingMergeTree(Sign, Version)
ORDER BY UserID
```
Inserting the data:
``` sql
INSERT INTO UAct VALUES (4324182021466249494, 5, 146, 1, 1)
```
``` sql
INSERT INTO UAct VALUES (4324182021466249494, 5, 146, -1, 1),(4324182021466249494, 6, 185, 1, 2)
```
We use two `INSERT` queries to create two different data parts. If we insert the data with a single query, ClickHouse creates one data part and will never perform any merge.
Getting the data:
``` sql
SELECT * FROM UAct
```
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐
│ 4324182021466249494 │ 5 │ 146 │ 1 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┴─────────┘
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐
│ 4324182021466249494 │ 5 │ 146 │ -1 │ 1 │
│ 4324182021466249494 │ 6 │ 185 │ 1 │ 2 │
└─────────────────────┴───────────┴──────────┴──────┴─────────┘
```
What do we see here and where are the collapsed parts?
We created two data parts using two `INSERT` queries. The `SELECT` query was performed in two threads, and the result is a random order of rows.
Collapsing did not occur because the data parts have not been merged yet. ClickHouse merges data parts at an unknown point in time which we cannot predict.
This is why we need aggregation:
``` sql
SELECT
UserID,
sum(PageViews * Sign) AS PageViews,
sum(Duration * Sign) AS Duration,
Version
FROM UAct
GROUP BY UserID, Version
HAVING sum(Sign) > 0
```
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Version─┐
│ 4324182021466249494 │ 6 │ 185 │ 2 │
└─────────────────────┴───────────┴──────────┴─────────┘
```
If we dont need aggregation and want to force collapsing, we can use the `FINAL` modifier for the `FROM` clause.
``` sql
SELECT * FROM UAct FINAL
```
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐
│ 4324182021466249494 │ 6 │ 185 │ 1 │ 2 │
└─────────────────────┴───────────┴──────────┴──────┴─────────┘
```
This is a very inefficient way to select data. Dont use it for large tables.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/versionedcollapsingmergetree/) <!--hide-->

View File

@ -0,0 +1,95 @@
---
toc_priority: 35
toc_title: Dictionary
---
# Dictionary {#dictionary}
The `Dictionary` engine displays the [dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) data as a ClickHouse table.
As an example, consider a dictionary of `products` with the following configuration:
``` xml
<dictionaries>
<dictionary>
<name>products</name>
<source>
<odbc>
<table>products</table>
<connection_string>DSN=some-db-server</connection_string>
</odbc>
</source>
<lifetime>
<min>300</min>
<max>360</max>
</lifetime>
<layout>
<flat/>
</layout>
<structure>
<id>
<name>product_id</name>
</id>
<attribute>
<name>title</name>
<type>String</type>
<null_value></null_value>
</attribute>
</structure>
</dictionary>
</dictionaries>
```
Query the dictionary data:
``` sql
SELECT
name,
type,
key,
attribute.names,
attribute.types,
bytes_allocated,
element_count,
source
FROM system.dictionaries
WHERE name = 'products'
```
``` text
┌─name─────┬─type─┬─key────┬─attribute.names─┬─attribute.types─┬─bytes_allocated─┬─element_count─┬─source──────────┐
│ products │ Flat │ UInt64 │ ['title'] │ ['String'] │ 23065376 │ 175032 │ ODBC: .products │
└──────────┴──────┴────────┴─────────────────┴─────────────────┴─────────────────┴───────────────┴─────────────────┘
```
You can use the [dictGet\*](../../../sql-reference/functions/ext-dict-functions.md#ext_dict_functions) function to get the dictionary data in this format.
This view isnt helpful when you need to get raw data, or when performing a `JOIN` operation. For these cases, you can use the `Dictionary` engine, which displays the dictionary data in a table.
Syntax:
``` sql
CREATE TABLE %table_name% (%fields%) engine = Dictionary(%dictionary_name%)`
```
Usage example:
``` sql
create table products (product_id UInt64, title String) Engine = Dictionary(products);
```
Ok
Take a look at whats in the table.
``` sql
select * from products limit 1;
```
``` text
┌────product_id─┬─title───────────┐
│ 152689 │ Some item │
└───────────────┴─────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/dictionary/) <!--hide-->

View File

@ -0,0 +1,150 @@
---
toc_priority: 33
toc_title: Distributed
---
# Distributed {#distributed}
**Tables with Distributed engine do not store any data by themself**, but allow distributed query processing on multiple servers.
Reading is automatically parallelized. During a read, the table indexes on remote servers are used, if there are any.
The Distributed engine accepts parameters:
- the cluster name in the servers config file
- the name of a remote database
- the name of a remote table
- (optionally) sharding key
- (optionally) policy name, it will be used to store temporary files for async send
See also:
- `insert_distributed_sync` setting
- [MergeTree](../mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples
Example:
``` sql
Distributed(logs, default, hits[, sharding_key[, policy_name]])
```
Data will be read from all servers in the logs cluster, from the default.hits table located on every server in the cluster.
Data is not only read but is partially processed on the remote servers (to the extent that this is possible).
For example, for a query with GROUP BY, data will be aggregated on remote servers, and the intermediate states of aggregate functions will be sent to the requestor server. Then data will be further aggregated.
Instead of the database name, you can use a constant expression that returns a string. For example: currentDatabase().
logs The cluster name in the servers config file.
Clusters are set like this:
``` xml
<remote_servers>
<logs>
<shard>
<!-- Optional. Shard weight when writing data. Default: 1. -->
<weight>1</weight>
<!-- Optional. Whether to write data to just one of the replicas. Default: false (write data to all replicas). -->
<internal_replication>false</internal_replication>
<replica>
<host>example01-01-1</host>
<port>9000</port>
</replica>
<replica>
<host>example01-01-2</host>
<port>9000</port>
</replica>
</shard>
<shard>
<weight>2</weight>
<internal_replication>false</internal_replication>
<replica>
<host>example01-02-1</host>
<port>9000</port>
</replica>
<replica>
<host>example01-02-2</host>
<secure>1</secure>
<port>9440</port>
</replica>
</shard>
</logs>
</remote_servers>
```
Here a cluster is defined with the name logs that consists of two shards, each of which contains two replicas.
Shards refer to the servers that contain different parts of the data (in order to read all the data, you must access all the shards).
Replicas are duplicating servers (in order to read all the data, you can access the data on any one of the replicas).
Cluster names must not contain dots.
The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `compression` are specified for each server:
- `host` The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server doesnt start. If you change the DNS record, restart the server.
- `port` The TCP port for messenger activity (tcp\_port in the config, usually set to 9000). Do not confuse it with http\_port.
- `user` Name of the user for connecting to a remote server. Default value: default. This user must have access to connect to the specified server. Access is configured in the users.xml file. For more information, see the section [Access rights](../../../operations/access-rights.md).
- `password` The password for connecting to a remote server (not masked). Default value: empty string.
- `secure` - Use ssl for connection, usually you also should define `port` = 9440. Server should listen on <tcp_port_secure>9440</tcp_port_secure> and have correct certificates.
- `compression` - Use data compression. Default value: true.
When specifying replicas, one of the available replicas will be selected for each of the shards when reading. You can configure the algorithm for load balancing (the preference for which replica to access) see the [load\_balancing](../../../operations/settings/settings.md#settings-load_balancing) setting.
If the connection with the server is not established, there will be an attempt to connect with a short timeout. If the connection failed, the next replica will be selected, and so on for all the replicas. If the connection attempt failed for all the replicas, the attempt will be repeated the same way, several times.
This works in favour of resiliency, but does not provide complete fault tolerance: a remote server might accept the connection, but might not work, or work poorly.
You can specify just one of the shards (in this case, query processing should be called remote, rather than distributed) or up to any number of shards. In each shard, you can specify from one to any number of replicas. You can specify a different number of replicas for each shard.
You can specify as many clusters as you wish in the configuration.
To view your clusters, use the system.clusters table.
The Distributed engine allows working with a cluster like a local server. However, the cluster is inextensible: you must write its configuration in the server config file (even better, for all the clusters servers).
The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you dont need to create a Distributed table use the remote table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md).
There are two methods for writing data to a cluster:
First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform INSERT in the tables that the distributed table “looks at”. This is the most flexible solution as you can use any sharding scheme, which could be non-trivial due to the requirements of the subject area. This is also the most optimal solution since data can be written to different shards completely independently.
Second, you can perform INSERT in a Distributed table. In this case, the table will distribute the inserted data across the servers itself. In order to write to a Distributed table, it must have a sharding key set (the last parameter). In addition, if there is only one shard, the write operation works without specifying the sharding key, since it doesnt mean anything in this case.
Each shard can have a weight defined in the config file. By default, the weight is equal to one. Data is distributed across shards in the amount proportional to the shard weight. For example, if there are two shards and the first has a weight of 9 while the second has a weight of 10, the first will be sent 9 / 19 parts of the rows, and the second will be sent 10 / 19.
Each shard can have the internal\_replication parameter defined in the config file.
If this parameter is set to true, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table “looks at” replicated tables. In other words, if the table where data will be written is going to replicate them itself.
If it is set to false (the default), data is written to all replicas. In essence, this means that the Distributed table replicates data itself. This is worse than using replicated tables, because the consistency of replicas is not checked, and over time they will contain slightly different data.
To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from prev\_weight to prev\_weights + weight, where prev\_weights is the total weight of the shards with the smallest number, and weight is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19).
The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression rand() for random distribution of data, or UserID for distribution by the remainder from dividing the users ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID).
A simple reminder from the division is a limited solution for sharding and isnt always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables.
SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you dont have to transfer the old data to it. You can write new data with a heavier weight the data will be distributed slightly unevenly, but queries will work correctly and efficiently.
You should be concerned about the sharding scheme in the following cases:
- Queries are used that require joining data (IN or JOIN) by a specific key. If data is sharded by this key, you can use local IN or JOIN instead of GLOBAL IN or GLOBAL JOIN, which is much more efficient.
- A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as weve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries.
Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed\_directory\_monitor\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed\_directory\_monitor\_batch\_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`.
If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the broken subdirectory and no longer used.
When the max\_parallel\_replicas option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max\_parallel\_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
## Virtual Columns {#virtual-columns}
- `_shard_num` — Contains the `shard_num` (from `system.clusters`). Type: [UInt32](../../../sql-reference/data-types/int-uint.md).
!!! note "Note"
Since [`remote`](../../../sql-reference/table-functions/remote.md)/`cluster` table functions internally create temporary instance of the same Distributed engine, `_shard_num` is available there too.
**See Also**
- [Virtual columns](index.md#table_engines-virtual_columns)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/distributed/) <!--hide-->

View File

@ -0,0 +1,88 @@
---
toc_priority: 37
toc_title: File
---
# File {#table_engines-file}
The File table engine keeps the data in a file in one of the supported [file
formats](../../../interfaces/formats.md#formats) (TabSeparated, Native, etc.).
Usage examples:
- Data export from ClickHouse to file.
- Convert data from one format to another.
- Updating data in ClickHouse via editing a file on a disk.
## Usage in ClickHouse Server {#usage-in-clickhouse-server}
``` sql
File(Format)
```
The `Format` parameter specifies one of the available file formats. To perform
`SELECT` queries, the format must be supported for input, and to perform
`INSERT` queries for output. The available formats are listed in the
[Formats](../../../interfaces/formats.md#formats) section.
ClickHouse does not allow to specify filesystem path for`File`. It will use folder defined by [path](../../../operations/server-configuration-parameters/settings.md) setting in server configuration.
When creating table using `File(Format)` it creates empty subdirectory in that folder. When data is written to that table, its put into `data.Format` file in that subdirectory.
You may manually create this subfolder and file in server filesystem and then [ATTACH](../../../sql-reference/statements/misc.md) it to table information with matching name, so you can query data from that file.
!!! warning "Warning"
Be careful with this functionality, because ClickHouse does not keep track of external changes to such files. The result of simultaneous writes via ClickHouse and outside of ClickHouse is undefined.
**Example:**
**1.** Set up the `file_engine_table` table:
``` sql
CREATE TABLE file_engine_table (name String, value UInt32) ENGINE=File(TabSeparated)
```
By default ClickHouse will create folder `/var/lib/clickhouse/data/default/file_engine_table`.
**2.** Manually create `/var/lib/clickhouse/data/default/file_engine_table/data.TabSeparated` containing:
``` bash
$ cat data.TabSeparated
one 1
two 2
```
**3.** Query the data:
``` sql
SELECT * FROM file_engine_table
```
``` text
┌─name─┬─value─┐
│ one │ 1 │
│ two │ 2 │
└──────┴───────┘
```
## Usage in ClickHouse-local {#usage-in-clickhouse-local}
In [clickhouse-local](../../../operations/utilities/clickhouse-local.md) File engine accepts file path in addition to `Format`. Default input/output streams can be specified using numeric or human-readable names like `0` or `stdin`, `1` or `stdout`.
**Example:**
``` bash
$ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64) ENGINE = File(CSV, stdin); SELECT a, b FROM table; DROP TABLE table"
```
## Details of Implementation {#details-of-implementation}
- Multiple `SELECT` queries can be performed concurrently, but `INSERT` queries will wait each other.
- Supported creating new file by `INSERT` query.
- If file exists, `INSERT` would append new values in it.
- Not supported:
- `ALTER`
- `SELECT ... SAMPLE`
- Indices
- Replication
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/file/) <!--hide-->

View File

@ -0,0 +1,59 @@
---
toc_priority: 46
toc_title: GenerateRandom
---
# Generaterandom {#table_engines-generate}
The GenerateRandom table engine produces random data for given table schema.
Usage examples:
- Use in test to populate reproducible large table.
- Generate random input for fuzzing tests.
## Usage in ClickHouse Server {#usage-in-clickhouse-server}
``` sql
ENGINE = GenerateRandom(random_seed, max_string_length, max_array_length)
```
The `max_array_length` and `max_string_length` parameters specify maximum length of all
array columns and strings correspondingly in generated data.
Generate table engine supports only `SELECT` queries.
It supports all [DataTypes](../../../sql-reference/data-types/index.md) that can be stored in a table except `LowCardinality` and `AggregateFunction`.
**Example:**
**1.** Set up the `generate_engine_table` table:
``` sql
CREATE TABLE generate_engine_table (name String, value UInt32) ENGINE = GenerateRandom(1, 5, 3)
```
**2.** Query the data:
``` sql
SELECT * FROM generate_engine_table LIMIT 3
```
``` text
┌─name─┬──────value─┐
│ c4xJ │ 1412771199 │
│ r │ 1791099446 │
│ 7#$ │ 124312908 │
└──────┴────────────┘
```
## Details of Implementation {#details-of-implementation}
- Not supported:
- `ALTER`
- `SELECT ... SAMPLE`
- `INSERT`
- Indices
- Replication
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/generate/) <!--hide-->

View File

@ -0,0 +1,6 @@
---
toc_folder_title: Special
toc_priority: 31
---

View File

@ -0,0 +1,109 @@
---
toc_priority: 40
toc_title: Join
---
# Join {#join}
Prepared data structure for using in [JOIN](../../../sql-reference/statements/select.md#select-join) operations.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
) ENGINE = Join(join_strictness, join_type, k1[, k2, ...])
```
See the detailed description of the [CREATE TABLE](../../../sql-reference/statements/create.md#create-table-query) query.
**Engine Parameters**
- `join_strictness` [JOIN strictness](../../../sql-reference/statements/select.md#select-join-strictness).
- `join_type` [JOIN type](../../../sql-reference/statements/select.md#select-join-types).
- `k1[, k2, ...]` Key columns from the `USING` clause that the `JOIN` operation is made with.
Enter `join_strictness` and `join_type` parameters without quotes, for example, `Join(ANY, LEFT, col1)`. They must match the `JOIN` operation that the table will be used for. If the parameters dont match, ClickHouse doesnt throw an exception and may return incorrect data.
## Table Usage {#table-usage}
### Example {#example}
Creating the left-side table:
``` sql
CREATE TABLE id_val(`id` UInt32, `val` UInt32) ENGINE = TinyLog
```
``` sql
INSERT INTO id_val VALUES (1,11)(2,12)(3,13)
```
Creating the right-side `Join` table:
``` sql
CREATE TABLE id_val_join(`id` UInt32, `val` UInt8) ENGINE = Join(ANY, LEFT, id)
```
``` sql
INSERT INTO id_val_join VALUES (1,21)(1,22)(3,23)
```
Joining the tables:
``` sql
SELECT * FROM id_val ANY LEFT JOIN id_val_join USING (id) SETTINGS join_use_nulls = 1
```
``` text
┌─id─┬─val─┬─id_val_join.val─┐
│ 1 │ 11 │ 21 │
│ 2 │ 12 │ ᴺᵁᴸᴸ │
│ 3 │ 13 │ 23 │
└────┴─────┴─────────────────┘
```
As an alternative, you can retrieve data from the `Join` table, specifying the join key value:
``` sql
SELECT joinGet('id_val_join', 'val', toUInt32(1))
```
``` text
┌─joinGet('id_val_join', 'val', toUInt32(1))─┐
│ 21 │
└────────────────────────────────────────────┘
```
### Selecting and Inserting Data {#selecting-and-inserting-data}
You can use `INSERT` queries to add data to the `Join`-engine tables. If the table was created with the `ANY` strictness, data for duplicate keys are ignored. With the `ALL` strictness, all rows are added.
You cannot perform a `SELECT` query directly from the table. Instead, use one of the following methods:
- Place the table to the right side in a `JOIN` clause.
- Call the [joinGet](../../../sql-reference/functions/other-functions.md#joinget) function, which lets you extract data from the table the same way as from a dictionary.
### Limitations and Settings {#join-limitations-and-settings}
When creating a table, the following settings are applied:
- [join\_use\_nulls](../../../operations/settings/settings.md#join_use_nulls)
- [max\_rows\_in\_join](../../../operations/settings/query-complexity.md#settings-max_rows_in_join)
- [max\_bytes\_in\_join](../../../operations/settings/query-complexity.md#settings-max_bytes_in_join)
- [join\_overflow\_mode](../../../operations/settings/query-complexity.md#settings-join_overflow_mode)
- [join\_any\_take\_last\_row](../../../operations/settings/settings.md#settings-join_any_take_last_row)
The `Join`-engine tables cant be used in `GLOBAL JOIN` operations.
The `Join`-engine allows use [join\_use\_nulls](../../../operations/settings/settings.md#join_use_nulls) setting in the `CREATE TABLE` statement. And [SELECT](../../../sql-reference/statements/select.md) query allows use `join_use_nulls` too. If you have different `join_use_nulls` settings, you can get an error joining table. It depends on kind of JOIN. When you use [joinGet](../../../sql-reference/functions/other-functions.md#joinget) function, you have to use the same `join_use_nulls` setting in `CRATE TABLE` and `SELECT` statements.
## Data Storage {#data-storage}
`Join` table data is always located in the RAM. When inserting rows into a table, ClickHouse writes data blocks to the directory on the disk so that they can be restored when the server restarts.
If the server restarts incorrectly, the data block on the disk might get lost or damaged. In this case, you may need to manually delete the file with damaged data.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/join/) <!--hide-->

View File

@ -0,0 +1,10 @@
---
toc_priority: 43
toc_title: MaterializedView
---
# Materializedview {#materializedview}
Used for implementing materialized views (for more information, see [CREATE TABLE](../../../sql-reference/statements/create.md)). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses this engine.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/materializedview/) <!--hide-->

View File

@ -0,0 +1,68 @@
---
toc_priority: 36
toc_title: Merge
---
# Merge {#merge}
The `Merge` engine (not to be confused with `MergeTree`) does not store data itself, but allows reading from any number of other tables simultaneously.
Reading is automatically parallelized. Writing to a table is not supported. When reading, the indexes of tables that are actually being read are used, if they exist.
The `Merge` engine accepts parameters: the database name and a regular expression for tables.
Example:
``` sql
Merge(hits, '^WatchLog')
```
Data will be read from the tables in the `hits` database that have names that match the regular expression `^WatchLog`.
Instead of the database name, you can use a constant expression that returns a string. For example, `currentDatabase()`.
Regular expressions — [re2](https://github.com/google/re2) (supports a subset of PCRE), case-sensitive.
See the notes about escaping symbols in regular expressions in the “match” section.
When selecting tables to read, the `Merge` table itself will not be selected, even if it matches the regex. This is to avoid loops.
It is possible to create two `Merge` tables that will endlessly try to read each others data, but this is not a good idea.
The typical way to use the `Merge` engine is for working with a large number of `TinyLog` tables as if with a single table.
Example 2:
Lets say you have a old table (WatchLog\_old) and decided to change partitioning without moving data to a new table (WatchLog\_new) and you need to see data from both tables.
``` sql
CREATE TABLE WatchLog_old(date Date, UserId Int64, EventType String, Cnt UInt64)
ENGINE=MergeTree(date, (UserId, EventType), 8192);
INSERT INTO WatchLog_old VALUES ('2018-01-01', 1, 'hit', 3);
CREATE TABLE WatchLog_new(date Date, UserId Int64, EventType String, Cnt UInt64)
ENGINE=MergeTree PARTITION BY date ORDER BY (UserId, EventType) SETTINGS index_granularity=8192;
INSERT INTO WatchLog_new VALUES ('2018-01-02', 2, 'hit', 3);
CREATE TABLE WatchLog as WatchLog_old ENGINE=Merge(currentDatabase(), '^WatchLog');
SELECT *
FROM WatchLog
```
``` text
┌───────date─┬─UserId─┬─EventType─┬─Cnt─┐
│ 2018-01-01 │ 1 │ hit │ 3 │
└────────────┴────────┴───────────┴─────┘
┌───────date─┬─UserId─┬─EventType─┬─Cnt─┐
│ 2018-01-02 │ 2 │ hit │ 3 │
└────────────┴────────┴───────────┴─────┘
```
## Virtual Columns {#virtual-columns}
- `_table` — Contains the name of the table from which data was read. Type: [String](../../../sql-reference/data-types/string.md).
You can set the constant conditions on `_table` in the `WHERE/PREWHERE` clause (for example, `WHERE _table='xyz'`). In this case the read operation is performed only for that tables where the condition on `_table` is satisfied, so the `_table` column acts as an index.
**See Also**
- [Virtual columns](index.md#table_engines-virtual_columns)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/merge/) <!--hide-->

View File

@ -0,0 +1,80 @@
---
toc_priority: 41
toc_title: URL
---
# URL(URL, Format) {#table_engines-url}
Manages data on a remote HTTP/HTTPS server. This engine is similar
to the [File](file.md) engine.
## Using the Engine in the ClickHouse Server {#using-the-engine-in-the-clickhouse-server}
The `format` must be one that ClickHouse can use in
`SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see
[Formats](../../../interfaces/formats.md#formats).
The `URL` must conform to the structure of a Uniform Resource Locator. The specified URL must point to a server
that uses HTTP or HTTPS. This does not require any
additional headers for getting a response from the server.
`INSERT` and `SELECT` queries are transformed to `POST` and `GET` requests,
respectively. For processing `POST` requests, the remote server must support
[Chunked transfer encoding](https://en.wikipedia.org/wiki/Chunked_transfer_encoding).
You can limit the maximum number of HTTP GET redirect hops using the [max\_http\_get\_redirects](../../../operations/settings/settings.md#setting-max_http_get_redirects) setting.
**Example:**
**1.** Create a `url_engine_table` table on the server :
``` sql
CREATE TABLE url_engine_table (word String, value UInt64)
ENGINE=URL('http://127.0.0.1:12345/', CSV)
```
**2.** Create a basic HTTP server using the standard Python 3 tools and
start it:
``` python3
from http.server import BaseHTTPRequestHandler, HTTPServer
class CSVHTTPServer(BaseHTTPRequestHandler):
def do_GET(self):
self.send_response(200)
self.send_header('Content-type', 'text/csv')
self.end_headers()
self.wfile.write(bytes('Hello,1\nWorld,2\n', "utf-8"))
if __name__ == "__main__":
server_address = ('127.0.0.1', 12345)
HTTPServer(server_address, CSVHTTPServer).serve_forever()
```
``` bash
$ python3 server.py
```
**3.** Request data:
``` sql
SELECT * FROM url_engine_table
```
``` text
┌─word──┬─value─┐
│ Hello │ 1 │
│ World │ 2 │
└───────┴───────┘
```
## Details of Implementation {#details-of-implementation}
- Reads and writes can be parallel
- Not supported:
- `ALTER` and `SELECT...SAMPLE` operations.
- Indexes.
- Replication.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/url/) <!--hide-->

View File

@ -1,83 +0,0 @@
---
toc_folder_title: Table Engines
toc_priority: 26
toc_title: Introduction
---
# Table Engines {#table_engines}
The table engine (type of table) determines:
- How and where data is stored, where to write it to, and where to read it from.
- Which queries are supported, and how.
- Concurrent data access.
- Use of indexes, if present.
- Whether multithreaded request execution is possible.
- Data replication parameters.
## Engine Families {#engine-families}
### MergeTree {#mergetree}
The most universal and functional table engines for high-load tasks. The property shared by these engines is quick data insertion with subsequent background data processing. `MergeTree` family engines support data replication (with [Replicated\*](mergetree_family/replication.md) versions of engines), partitioning, and other features not supported in other engines.
Engines in the family:
- [MergeTree](mergetree_family/mergetree.md)
- [ReplacingMergeTree](mergetree_family/replacingmergetree.md)
- [SummingMergeTree](mergetree_family/summingmergetree.md)
- [AggregatingMergeTree](mergetree_family/aggregatingmergetree.md)
- [CollapsingMergeTree](mergetree_family/collapsingmergetree.md)
- [VersionedCollapsingMergeTree](mergetree_family/versionedcollapsingmergetree.md)
- [GraphiteMergeTree](mergetree_family/graphitemergetree.md)
### Log {#log}
Lightweight [engines](log_family/index.md) with minimum functionality. Theyre the most effective when you need to quickly write many small tables (up to approximately 1 million rows) and read them later as a whole.
Engines in the family:
- [TinyLog](log_family/tinylog.md)
- [StripeLog](log_family/stripelog.md)
- [Log](log_family/log.md)
### Integration Engines {#integration-engines}
Engines for communicating with other data storage and processing systems.
Engines in the family:
- [Kafka](integrations/kafka.md)
- [MySQL](integrations/mysql.md)
- [ODBC](integrations/odbc.md)
- [JDBC](integrations/jdbc.md)
- [HDFS](integrations/hdfs.md)
### Special Engines {#special-engines}
Engines in the family:
- [Distributed](special/distributed.md)
- [MaterializedView](special/materializedview.md)
- [Dictionary](special/dictionary.md)
- [Merge](special/merge.md)
- [File](special/file.md)
- [Null](special/null.md)
- [Set](special/set.md)
- [Join](special/join.md)
- [URL](special/url.md)
- [View](special/view.md)
- [Memory](special/memory.md)
- [Buffer](special/buffer.md)
## Virtual Columns {#table_engines-virtual-columns}
Virtual column is an integral table engine attribute that is defined in the engine source code.
You shouldnt specify virtual columns in the `CREATE TABLE` query and you cant see them in `SHOW CREATE TABLE` and `DESCRIBE TABLE` query results. Virtual columns are also read-only, so you cant insert data into virtual columns.
To select data from a virtual column, you must specify its name in the `SELECT` query. `SELECT *` doesnt return values from virtual columns.
If you create a table with a column that has the same name as one of the table virtual columns, the virtual column becomes inaccessible. We dont recommend doing this. To help avoid conflicts, virtual column names are usually prefixed with an underscore.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/) <!--hide-->

View File

@ -1,121 +0,0 @@
---
toc_priority: 36
toc_title: HDFS
---
# HDFS {#table_engines-hdfs}
This engine provides integration with [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)via ClickHouse. This engine is similar
to the [File](../special/file.md) and [URL](../special/url.md) engines, but provides Hadoop-specific features.
## Usage {#usage}
``` sql
ENGINE = HDFS(URI, format)
```
The `URI` parameter is the whole file URI in HDFS.
The `format` parameter specifies one of the available file formats. To perform
`SELECT` queries, the format must be supported for input, and to perform
`INSERT` queries for output. The available formats are listed in the
[Formats](../../../interfaces/formats.md#formats) section.
The path part of `URI` may contain globs. In this case the table would be readonly.
**Example:**
**1.** Set up the `hdfs_engine_table` table:
``` sql
CREATE TABLE hdfs_engine_table (name String, value UInt32) ENGINE=HDFS('hdfs://hdfs1:9000/other_storage', 'TSV')
```
**2.** Fill file:
``` sql
INSERT INTO hdfs_engine_table VALUES ('one', 1), ('two', 2), ('three', 3)
```
**3.** Query the data:
``` sql
SELECT * FROM hdfs_engine_table LIMIT 2
```
``` text
┌─name─┬─value─┐
│ one │ 1 │
│ two │ 2 │
└──────┴───────┘
```
## Implementation Details {#implementation-details}
- Reads and writes can be parallel
- Not supported:
- `ALTER` and `SELECT...SAMPLE` operations.
- Indexes.
- Replication.
**Globs in path**
Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern. Listing of files determines during `SELECT` (not at `CREATE` moment).
- `*` — Substitutes any number of any characters except `/` including empty string.
- `?` — Substitutes any single character.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}` — Substitutes any number in range from N to M including both borders.
Constructions with `{}` are similar to the [remote](../../../sql_reference/table_functions/remote.md) table function.
**Example**
1. Suppose we have several files in TSV format with the following URIs on HDFS:
- hdfs://hdfs1:9000/some\_dir/some\_file\_1
- hdfs://hdfs1:9000/some\_dir/some\_file\_2
- hdfs://hdfs1:9000/some\_dir/some\_file\_3
- hdfs://hdfs1:9000/another\_dir/some\_file\_1
- hdfs://hdfs1:9000/another\_dir/some\_file\_2
- hdfs://hdfs1:9000/another\_dir/some\_file\_3
1. There are several ways to make a table consisting of all six files:
<!-- -->
``` sql
CREATE TABLE table_with_range (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/{some,another}_dir/some_file_{1..3}', 'TSV')
```
Another way:
``` sql
CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/{some,another}_dir/some_file_?', 'TSV')
```
Table consists of all the files in both directories (all files should satisfy format and schema described in query):
``` sql
CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV')
```
!!! warning "Warning"
If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
**Example**
Create table with files named `file000`, `file001`, … , `file999`:
``` sql
CREARE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV')
```
## Virtual Columns {#virtual-columns}
- `_path` — Path to the file.
- `_file` — Name of the file.
**See Also**
- [Virtual columns](../index.md#table_engines-virtual_columns)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/hdfs/) <!--hide-->

View File

@ -1,5 +0,0 @@
---
toc_folder_title: Integrations
toc_priority: 30
---

View File

@ -1,88 +0,0 @@
---
toc_priority: 34
toc_title: JDBC
---
# JDBC {#table-engine-jdbc}
Allows ClickHouse to connect to external databases via [JDBC](https://en.wikipedia.org/wiki/Java_Database_Connectivity).
To implement the JDBC connection, ClickHouse uses the separate program [clickhouse-jdbc-bridge](https://github.com/alex-krash/clickhouse-jdbc-bridge) that should run as a daemon.
This engine supports the [Nullable](../../../sql_reference/data_types/nullable.md) data type.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name
(
columns list...
)
ENGINE = JDBC(dbms_uri, external_database, external_table)
```
**Engine Parameters**
- `dbms_uri` — URI of an external DBMS.
Format: `jdbc:<driver_name>://<host_name>:<port>/?user=<username>&password=<password>`.
Example for MySQL: `jdbc:mysql://localhost:3306/?user=root&password=root`.
- `external_database` — Database in an external DBMS.
- `external_table` — Name of the table in `external_database`.
## Usage Example {#usage-example}
Creating a table in MySQL server by connecting directly with its console client:
``` text
mysql> CREATE TABLE `test`.`test` (
-> `int_id` INT NOT NULL AUTO_INCREMENT,
-> `int_nullable` INT NULL DEFAULT NULL,
-> `float` FLOAT NOT NULL,
-> `float_nullable` FLOAT NULL DEFAULT NULL,
-> PRIMARY KEY (`int_id`));
Query OK, 0 rows affected (0,09 sec)
mysql> insert into test (`int_id`, `float`) VALUES (1,2);
Query OK, 1 row affected (0,00 sec)
mysql> select * from test;
+------+----------+-----+----------+
| int_id | int_nullable | float | float_nullable |
+------+----------+-----+----------+
| 1 | NULL | 2 | NULL |
+------+----------+-----+----------+
1 row in set (0,00 sec)
```
Creating a table in ClickHouse server and selecting data from it:
``` sql
CREATE TABLE jdbc_table
(
`int_id` Int32,
`int_nullable` Nullable(Int32),
`float` Float32,
`float_nullable` Nullable(Float32)
)
ENGINE JDBC('jdbc:mysql://localhost:3306/?user=root&password=root', 'test', 'test')
```
``` sql
SELECT *
FROM jdbc_table
```
``` text
┌─int_id─┬─int_nullable─┬─float─┬─float_nullable─┐
│ 1 │ ᴺᵁᴸᴸ │ 2 │ ᴺᵁᴸᴸ │
└────────┴──────────────┴───────┴────────────────┘
```
## See Also {#see-also}
- [JDBC table function](../../../sql_reference/table_functions/jdbc.md).
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/jdbc/) <!--hide-->

View File

@ -1,174 +0,0 @@
---
toc_priority: 32
toc_title: Kafka
---
# Kafka {#kafka}
This engine works with [Apache Kafka](http://kafka.apache.org/).
Kafka lets you:
- Publish or subscribe to data flows.
- Organize fault-tolerant storage.
- Process streams as they become available.
## Creating a Table {#table_engine-kafka-creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE = Kafka()
SETTINGS
kafka_broker_list = 'host:port',
kafka_topic_list = 'topic1,topic2,...',
kafka_group_name = 'group_name',
kafka_format = 'data_format'[,]
[kafka_row_delimiter = 'delimiter_symbol',]
[kafka_schema = '',]
[kafka_num_consumers = N,]
[kafka_skip_broken_messages = N]
```
Required parameters:
- `kafka_broker_list` A comma-separated list of brokers (for example, `localhost:9092`).
- `kafka_topic_list` A list of Kafka topics.
- `kafka_group_name` A group of Kafka consumers. Reading margins are tracked for each group separately. If you dont want messages to be duplicated in the cluster, use the same group name everywhere.
- `kafka_format` Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section.
Optional parameters:
- `kafka_row_delimiter` Delimiter character, which ends the message.
- `kafka_schema` Parameter that must be used if the format requires a schema definition. For example, [Capn Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object.
- `kafka_num_consumers` The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition.
- `kafka_skip_broken_messages` Kafka message parser tolerance to schema-incompatible messages per block. Default: `0`. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data).
Examples:
``` sql
CREATE TABLE queue (
timestamp UInt64,
level String,
message String
) ENGINE = Kafka('localhost:9092', 'topic', 'group1', 'JSONEachRow');
SELECT * FROM queue LIMIT 5;
CREATE TABLE queue2 (
timestamp UInt64,
level String,
message String
) ENGINE = Kafka SETTINGS kafka_broker_list = 'localhost:9092',
kafka_topic_list = 'topic',
kafka_group_name = 'group1',
kafka_format = 'JSONEachRow',
kafka_num_consumers = 4;
CREATE TABLE queue2 (
timestamp UInt64,
level String,
message String
) ENGINE = Kafka('localhost:9092', 'topic', 'group1')
SETTINGS kafka_format = 'JSONEachRow',
kafka_num_consumers = 4;
```
<details markdown="1">
<summary>Deprecated Method for Creating a Table</summary>
!!! attention "Attention"
Do not use this method in new projects. If possible, switch old projects to the method described above.
``` sql
Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format
[, kafka_row_delimiter, kafka_schema, kafka_num_consumers, kafka_skip_broken_messages])
```
</details>
## Description {#description}
The delivered messages are tracked automatically, so each message in a group is only counted once. If you want to get the data twice, then create a copy of the table with another group name.
Groups are flexible and synced on the cluster. For instance, if you have 10 topics and 5 copies of a table in a cluster, then each copy gets 2 topics. If the number of copies changes, the topics are redistributed across the copies automatically. Read more about this at http://kafka.apache.org/intro.
`SELECT` is not particularly useful for reading messages (except for debugging), because each message can be read only once. It is more practical to create real-time threads using materialized views. To do this:
1. Use the engine to create a Kafka consumer and consider it a data stream.
2. Create a table with the desired structure.
3. Create a materialized view that converts data from the engine and puts it into a previously created table.
When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the background. This allows you to continually receive messages from Kafka and convert them to the required format using `SELECT`.
One kafka table can have as many materialized views as you like, they do not read data from the kafka table directly, but receive new records (in blocks), this way you can write to several tables with different detail level (with grouping - aggregation and without).
Example:
``` sql
CREATE TABLE queue (
timestamp UInt64,
level String,
message String
) ENGINE = Kafka('localhost:9092', 'topic', 'group1', 'JSONEachRow');
CREATE TABLE daily (
day Date,
level String,
total UInt64
) ENGINE = SummingMergeTree(day, (day, level), 8192);
CREATE MATERIALIZED VIEW consumer TO daily
AS SELECT toDate(toDateTime(timestamp)) AS day, level, count() as total
FROM queue GROUP BY day, level;
SELECT level, sum(total) FROM daily GROUP BY level;
```
To improve performance, received messages are grouped into blocks the size of [max\_insert\_block\_size](../../../operations/server_configuration_parameters/settings.md#settings-max_insert_block_size). If the block wasnt formed within [stream\_flush\_interval\_ms](../../../operations/server_configuration_parameters/settings.md) milliseconds, the data will be flushed to the table regardless of the completeness of the block.
To stop receiving topic data or to change the conversion logic, detach the materialized view:
``` sql
DETACH TABLE consumer;
ATTACH TABLE consumer;
```
If you want to change the target table by using `ALTER`, we recommend disabling the material view to avoid discrepancies between the target table and the data from the view.
## Configuration {#configuration}
Similar to GraphiteMergeTree, the Kafka engine supports extended configuration using the ClickHouse config file. There are two configuration keys that you can use: global (`kafka`) and topic-level (`kafka_*`). The global configuration is applied first, and then the topic-level configuration is applied (if it exists).
``` xml
<!-- Global configuration options for all tables of Kafka engine type -->
<kafka>
<debug>cgrp</debug>
<auto_offset_reset>smallest</auto_offset_reset>
</kafka>
<!-- Configuration specific for topic "logs" -->
<kafka_logs>
<retry_backoff_ms>250</retry_backoff_ms>
<fetch_min_bytes>100000</fetch_min_bytes>
</kafka_logs>
```
For a list of possible configuration options, see the [librdkafka configuration reference](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Use the underscore (`_`) instead of a dot in the ClickHouse configuration. For example, `check.crcs=true` will be `<check_crcs>true</check_crcs>`.
## Virtual Columns {#virtual-columns}
- `_topic` — Kafka topic.
- `_key` — Key of the message.
- `_offset` — Offset of the message.
- `_timestamp` — Timestamp of the message.
- `_partition` — Partition of Kafka topic.
**See Also**
- [Virtual columns](../index.md#table_engines-virtual_columns)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) <!--hide-->

View File

@ -1,103 +0,0 @@
---
toc_priority: 33
toc_title: MySQL
---
# Mysql {#mysql}
The MySQL engine allows you to perform `SELECT` queries on data that is stored on a remote MySQL server.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
...
) ENGINE = MySQL('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']);
```
See a detailed description of the [CREATE TABLE](../../../sql_reference/statements/create.md#create-table-query) query.
The table structure can differ from the original MySQL table structure:
- Column names should be the same as in the original MySQL table, but you can use just some of these columns and in any order.
- Column types may differ from those in the original MySQL table. ClickHouse tries to [cast](../../../sql_reference/functions/type_conversion_functions.md#type_conversion_function-cast) values to the ClickHouse data types.
**Engine Parameters**
- `host:port` — MySQL server address.
- `database` — Remote database name.
- `table` — Remote table name.
- `user` — MySQL user.
- `password` — User password.
- `replace_query` — Flag that converts `INSERT INTO` queries to `REPLACE INTO`. If `replace_query=1`, the query is substituted.
- `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` expression that is added to the `INSERT` query.
Example: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, where `on_duplicate_clause` is `UPDATE c2 = c2 + 1`. See the [MySQL documentation](https://dev.mysql.com/doc/refman/8.0/en/insert-on-duplicate.html) to find which `on_duplicate_clause` you can use with the `ON DUPLICATE KEY` clause.
To specify `on_duplicate_clause` you need to pass `0` to the `replace_query` parameter. If you simultaneously pass `replace_query = 1` and `on_duplicate_clause`, ClickHouse generates an exception.
Simple `WHERE` clauses such as `=, !=, >, >=, <, <=` are executed on the MySQL server.
The rest of the conditions and the `LIMIT` sampling constraint are executed in ClickHouse only after the query to MySQL finishes.
## Usage Example {#usage-example}
Table in MySQL:
``` text
mysql> CREATE TABLE `test`.`test` (
-> `int_id` INT NOT NULL AUTO_INCREMENT,
-> `int_nullable` INT NULL DEFAULT NULL,
-> `float` FLOAT NOT NULL,
-> `float_nullable` FLOAT NULL DEFAULT NULL,
-> PRIMARY KEY (`int_id`));
Query OK, 0 rows affected (0,09 sec)
mysql> insert into test (`int_id`, `float`) VALUES (1,2);
Query OK, 1 row affected (0,00 sec)
mysql> select * from test;
+------+----------+-----+----------+
| int_id | int_nullable | float | float_nullable |
+------+----------+-----+----------+
| 1 | NULL | 2 | NULL |
+------+----------+-----+----------+
1 row in set (0,00 sec)
```
Table in ClickHouse, retrieving data from the MySQL table created above:
``` sql
CREATE TABLE mysql_table
(
`float_nullable` Nullable(Float32),
`int_id` Int32
)
ENGINE = MySQL('localhost:3306', 'test', 'test', 'bayonet', '123')
```
``` sql
SELECT * FROM mysql_table
```
``` text
┌─float_nullable─┬─int_id─┐
│ ᴺᵁᴸᴸ │ 1 │
└────────────────┴────────┘
```
## See Also {#see-also}
- [The mysql table function](../../../sql_reference/table_functions/mysql.md)
- [Using MySQL as a source of external dictionary](../../../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md#dicts-external_dicts_dict_sources-mysql)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/mysql/) <!--hide-->

View File

@ -1,130 +0,0 @@
---
toc_priority: 35
toc_title: ODBC
---
# ODBC {#table-engine-odbc}
Allows ClickHouse to connect to external databases via [ODBC](https://en.wikipedia.org/wiki/Open_Database_Connectivity).
To safely implement ODBC connections, ClickHouse uses a separate program `clickhouse-odbc-bridge`. If the ODBC driver is loaded directly from `clickhouse-server`, driver problems can crash the ClickHouse server. ClickHouse automatically starts `clickhouse-odbc-bridge` when it is required. The ODBC bridge program is installed from the same package as the `clickhouse-server`.
This engine supports the [Nullable](../../../sql_reference/data_types/nullable.md) data type.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1],
name2 [type2],
...
)
ENGINE = ODBC(connection_settings, external_database, external_table)
```
See a detailed description of the [CREATE TABLE](../../../sql_reference/statements/create.md#create-table-query) query.
The table structure can differ from the source table structure:
- Column names should be the same as in the source table, but you can use just some of these columns and in any order.
- Column types may differ from those in the source table. ClickHouse tries to [cast](../../../sql_reference/functions/type_conversion_functions.md#type_conversion_function-cast) values to the ClickHouse data types.
**Engine Parameters**
- `connection_settings` — Name of the section with connection settings in the `odbc.ini` file.
- `external_database` — Name of a database in an external DBMS.
- `external_table` — Name of a table in the `external_database`.
## Usage Example {#usage-example}
**Retrieving data from the local MySQL installation via ODBC**
This example is checked for Ubuntu Linux 18.04 and MySQL server 5.7.
Ensure that unixODBC and MySQL Connector are installed.
By default (if installed from packages), ClickHouse starts as user `clickhouse`. Thus, you need to create and configure this user in the MySQL server.
``` bash
$ sudo mysql
```
``` sql
mysql> CREATE USER 'clickhouse'@'localhost' IDENTIFIED BY 'clickhouse';
mysql> GRANT ALL PRIVILEGES ON *.* TO 'clickhouse'@'clickhouse' WITH GRANT OPTION;
```
Then configure the connection in `/etc/odbc.ini`.
``` bash
$ cat /etc/odbc.ini
[mysqlconn]
DRIVER = /usr/local/lib/libmyodbc5w.so
SERVER = 127.0.0.1
PORT = 3306
DATABASE = test
USERNAME = clickhouse
PASSWORD = clickhouse
```
You can check the connection using the `isql` utility from the unixODBC installation.
``` bash
$ isql -v mysqlconn
+-------------------------+
| Connected! |
| |
...
```
Table in MySQL:
``` text
mysql> CREATE TABLE `test`.`test` (
-> `int_id` INT NOT NULL AUTO_INCREMENT,
-> `int_nullable` INT NULL DEFAULT NULL,
-> `float` FLOAT NOT NULL,
-> `float_nullable` FLOAT NULL DEFAULT NULL,
-> PRIMARY KEY (`int_id`));
Query OK, 0 rows affected (0,09 sec)
mysql> insert into test (`int_id`, `float`) VALUES (1,2);
Query OK, 1 row affected (0,00 sec)
mysql> select * from test;
+------+----------+-----+----------+
| int_id | int_nullable | float | float_nullable |
+------+----------+-----+----------+
| 1 | NULL | 2 | NULL |
+------+----------+-----+----------+
1 row in set (0,00 sec)
```
Table in ClickHouse, retrieving data from the MySQL table:
``` sql
CREATE TABLE odbc_t
(
`int_id` Int32,
`float_nullable` Nullable(Float32)
)
ENGINE = ODBC('DSN=mysqlconn', 'test', 'test')
```
``` sql
SELECT * FROM odbc_t
```
``` text
┌─int_id─┬─float_nullable─┐
│ 1 │ ᴺᵁᴸᴸ │
└────────┴────────────────┘
```
## See Also {#see-also}
- [ODBC external dictionaries](../../../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md#dicts-external_dicts_dict_sources-odbc)
- [ODBC table function](../../../sql_reference/table_functions/odbc.md)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/odbc/) <!--hide-->

View File

@ -1,5 +0,0 @@
---
toc_folder_title: Log Family
toc_priority: 29
---

View File

@ -1,14 +0,0 @@
---
toc_priority: 33
toc_title: Log
---
# Log {#log}
Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log_family.md) article.
Log differs from [TinyLog](tinylog.md) in that a small file of “marks” resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads.
For concurrent data access, the read operations can be performed simultaneously, while write operations block reads and each other.
The Log engine does not support indexes. Similarly, if writing to a table failed, the table is broken, and reading from it returns an error. The Log engine is appropriate for temporary data, write-once tables, and for testing or demonstration purposes.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/log/) <!--hide-->

View File

@ -1,44 +0,0 @@
---
toc_priority: 31
toc_title: Introduction
---
# Log Engine Family {#log-engine-family}
These engines were developed for scenarios when you need to quickly write many small tables (up to about 1 million rows) and read them later as a whole.
Engines of the family:
- [StripeLog](stripelog.md)
- [Log](log.md)
- [TinyLog](tinylog.md)
## Common Properties {#common-properties}
Engines:
- Store data on a disk.
- Append data to the end of file when writing.
- Support locks for concurrent data access.
During `INSERT` queries, the table is locked, and other queries for reading and writing data both wait for the table to unlock. If there are no data writing queries, any number of data reading queries can be performed concurrently.
- Do not support [mutation](../../../sql_reference/statements/alter.md#alter-mutations) operations.
- Do not support indexes.
This means that `SELECT` queries for ranges of data are not efficient.
- Do not write data atomically.
You can get a table with corrupted data if something breaks the write operation, for example, abnormal server shutdown.
## Differences {#differences}
The `TinyLog` engine is the simplest in the family and provides the poorest functionality and lowest efficiency. The `TinyLog` engine doesnt support parallel data reading by several threads. It reads data slower than other engines in the family that support parallel reading and it uses almost as many descriptors as the `Log` engine because it stores each column in a separate file. Use it in simple low-load scenarios.
The `Log` and `StripeLog` engines support parallel data reading. When reading data, ClickHouse uses multiple threads. Each thread processes a separate data block. The `Log` engine uses a separate file for each column of the table. `StripeLog` stores all the data in one file. As a result, the `StripeLog` engine uses fewer descriptors in the operating system, but the `Log` engine provides higher efficiency when reading data.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/log_family/) <!--hide-->

View File

@ -1,93 +0,0 @@
---
toc_priority: 32
toc_title: StripeLog
---
# Stripelog {#stripelog}
This engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log_family.md) article.
Use this engine in scenarios when you need to write many tables with a small amount of data (less than 1 million rows).
## Creating a Table {#table_engines-stripelog-creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
column1_name [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
column2_name [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE = StripeLog
```
See the detailed description of the [CREATE TABLE](../../../sql_reference/statements/create.md#create-table-query) query.
## Writing the Data {#table_engines-stripelog-writing-the-data}
The `StripeLog` engine stores all the columns in one file. For each `INSERT` query, ClickHouse appends the data block to the end of a table file, writing columns one by one.
For each table ClickHouse writes the files:
- `data.bin` — Data file.
- `index.mrk` — File with marks. Marks contain offsets for each column of each data block inserted.
The `StripeLog` engine does not support the `ALTER UPDATE` and `ALTER DELETE` operations.
## Reading the Data {#table_engines-stripelog-reading-the-data}
The file with marks allows ClickHouse to parallelize the reading of data. This means that a `SELECT` query returns rows in an unpredictable order. Use the `ORDER BY` clause to sort rows.
## Example Of Use {#table_engines-stripelog-example-of-use}
Creating a table:
``` sql
CREATE TABLE stripe_log_table
(
timestamp DateTime,
message_type String,
message String
)
ENGINE = StripeLog
```
Inserting data:
``` sql
INSERT INTO stripe_log_table VALUES (now(),'REGULAR','The first regular message')
INSERT INTO stripe_log_table VALUES (now(),'REGULAR','The second regular message'),(now(),'WARNING','The first warning message')
```
We used two `INSERT` queries to create two data blocks inside the `data.bin` file.
ClickHouse uses multiple threads when selecting data. Each thread reads a separate data block and returns resulting rows independently as it finishes. As a result, the order of blocks of rows in the output does not match the order of the same blocks in the input in most cases. For example:
``` sql
SELECT * FROM stripe_log_table
```
``` text
┌───────────timestamp─┬─message_type─┬─message────────────────────┐
│ 2019-01-18 14:27:32 │ REGULAR │ The second regular message │
│ 2019-01-18 14:34:53 │ WARNING │ The first warning message │
└─────────────────────┴──────────────┴────────────────────────────┘
┌───────────timestamp─┬─message_type─┬─message───────────────────┐
│ 2019-01-18 14:23:43 │ REGULAR │ The first regular message │
└─────────────────────┴──────────────┴───────────────────────────┘
```
Sorting the results (ascending order by default):
``` sql
SELECT * FROM stripe_log_table ORDER BY timestamp
```
``` text
┌───────────timestamp─┬─message_type─┬─message────────────────────┐
│ 2019-01-18 14:23:43 │ REGULAR │ The first regular message │
│ 2019-01-18 14:27:32 │ REGULAR │ The second regular message │
│ 2019-01-18 14:34:53 │ WARNING │ The first warning message │
└─────────────────────┴──────────────┴────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/stripelog/) <!--hide-->

View File

@ -1,14 +0,0 @@
---
toc_priority: 34
toc_title: TinyLog
---
# TinyLog {#tinylog}
The engine belongs to the log engine family. See [Log Engine Family](log_family.md) for common properties of log engines and their differences.
This table engine is typically used with the write-once method: write data one time, then read it as many times as necessary. For example, you can use `TinyLog`-type tables for intermediary data that is processed in small batches. Note that storing data in a large number of small tables is inefficient.
Queries are executed in a single stream. In other words, this engine is intended for relatively small tables (up to about 1,000,000 rows). It makes sense to use this table engine if you have many small tables, since its simpler than the [Log](log.md) engine (fewer files need to be opened).
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/tinylog/) <!--hide-->

View File

@ -1,103 +0,0 @@
---
toc_priority: 35
toc_title: AggregatingMergeTree
---
# Aggregatingmergetree {#aggregatingmergetree}
The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree), altering the logic for data parts merging. ClickHouse replaces all rows with the same primary key (or more accurately, with the same [sorting key](mergetree.md)) with a single row (within a one data part) that stores a combination of states of aggregate functions.
You can use `AggregatingMergeTree` tables for incremental data aggregation, including for aggregated materialized views.
The engine processes all columns with the following types:
- [AggregateFunction](../../../sql_reference/data_types/aggregatefunction.md)
- [SimpleAggregateFunction](../../../sql_reference/data_types/simpleaggregatefunction.md)
It is appropriate to use `AggregatingMergeTree` if it reduces the number of rows by orders.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE = AggregatingMergeTree()
[PARTITION BY expr]
[ORDER BY expr]
[SAMPLE BY expr]
[TTL expr]
[SETTINGS name=value, ...]
```
For a description of request parameters, see [request description](../../../sql_reference/statements/create.md).
**Query clauses**
When creating a `AggregatingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table.
<details markdown="1">
<summary>Deprecated Method for Creating a Table</summary>
!!! attention "Attention"
Do not use this method in new projects and, if possible, switch the old projects to the method described above.
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE [=] AggregatingMergeTree(date-column [, sampling_expression], (primary, key), index_granularity)
```
All of the parameters have the same meaning as in `MergeTree`.
</details>
## SELECT and INSERT {#select-and-insert}
To insert data, use [INSERT SELECT](../../../sql_reference/statements/insert_into.md) query with aggregate -State- functions.
When selecting data from `AggregatingMergeTree` table, use `GROUP BY` clause and the same aggregate functions as when inserting data, but using `-Merge` suffix.
In the results of `SELECT` query, the values of `AggregateFunction` type have implementation-specific binary representation for all of the ClickHouse output formats. If dump data into, for example, `TabSeparated` format with `SELECT` query then this dump can be loaded back using `INSERT` query.
## Example Of an Aggregated Materialized View {#example-of-an-aggregated-materialized-view}
`AggregatingMergeTree` materialized view that watches the `test.visits` table:
``` sql
CREATE MATERIALIZED VIEW test.basic
ENGINE = AggregatingMergeTree() PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate)
AS SELECT
CounterID,
StartDate,
sumState(Sign) AS Visits,
uniqState(UserID) AS Users
FROM test.visits
GROUP BY CounterID, StartDate;
```
Inserting data into the `test.visits` table.
``` sql
INSERT INTO test.visits ...
```
The data are inserted in both the table and view `test.basic` that will perform the aggregation.
To get the aggregated data, we need to execute a query such as `SELECT ... GROUP BY ...` from the view `test.basic`:
``` sql
SELECT
StartDate,
sumMerge(Visits) AS Visits,
uniqMerge(Users) AS Users
FROM test.basic
GROUP BY StartDate
ORDER BY StartDate;
```
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/aggregatingmergetree/) <!--hide-->

View File

@ -1,307 +0,0 @@
---
toc_priority: 36
toc_title: CollapsingMergeTree
---
# CollapsingMergeTree {#table_engine-collapsingmergetree}
The engine inherits from [MergeTree](mergetree.md) and adds the logic of rows collapsing to data parts merge algorithm.
`CollapsingMergeTree` asynchronously deletes (collapses) pairs of rows if all of the fields in a sorting key (`ORDER BY`) are equivalent excepting the particular field `Sign` which can have `1` and `-1` values. Rows without a pair are kept. For more details see the [Collapsing](#table_engine-collapsingmergetree-collapsing) section of the document.
The engine may significantly reduce the volume of storage and increase the efficiency of `SELECT` query as a consequence.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE = CollapsingMergeTree(sign)
[PARTITION BY expr]
[ORDER BY expr]
[SAMPLE BY expr]
[SETTINGS name=value, ...]
```
For a description of query parameters, see [query description](../../../sql_reference/statements/create.md).
**CollapsingMergeTree Parameters**
- `sign` — Name of the column with the type of row: `1` is a “state” row, `-1` is a “cancel” row.
Column data type — `Int8`.
**Query clauses**
When creating a `CollapsingMergeTree` table, the same [query clauses](mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
<details markdown="1">
<summary>Deprecated Method for Creating a Table</summary>
!!! attention "Attention"
Do not use this method in new projects and, if possible, switch the old projects to the method described above.
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE [=] CollapsingMergeTree(date-column [, sampling_expression], (primary, key), index_granularity, sign)
```
All of the parameters excepting `sign` have the same meaning as in `MergeTree`.
- `sign` — Name of the column with the type of row: `1` — “state” row, `-1` — “cancel” row.
Column Data Type — `Int8`.
</details>
## Collapsing {#table_engine-collapsingmergetree-collapsing}
### Data {#data}
Consider the situation where you need to save continually changing data for some object. It sounds logical to have one row for an object and update it at any change, but update operation is expensive and slow for DBMS because it requires rewriting of the data in the storage. If you need to write data quickly, update not acceptable, but you can write the changes of an object sequentially as follows.
Use the particular column `Sign`. If `Sign = 1` it means that the row is a state of an object, lets call it “state” row. If `Sign = -1` it means the cancellation of the state of an object with the same attributes, lets call it “cancel” row.
For example, we want to calculate how much pages users checked at some site and how long they were there. At some moment we write the following row with the state of user activity:
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 5 │ 146 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┘
```
At some moment later we register the change of user activity and write it with the following two rows.
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 5 │ 146 │ -1 │
│ 4324182021466249494 │ 6 │ 185 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┘
```
The first row cancels the previous state of the object (user). It should copy the sorting key fields of the cancelled state excepting `Sign`.
The second row contains the current state.
As we need only the last state of user activity, the rows
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 5 │ 146 │ 1 │
│ 4324182021466249494 │ 5 │ 146 │ -1 │
└─────────────────────┴───────────┴──────────┴──────┘
```
can be deleted collapsing the invalid (old) state of an object. `CollapsingMergeTree` does this while merging of the data parts.
Why we need 2 rows for each change read in the [Algorithm](#table_engine-collapsingmergetree-collapsing-algorithm) paragraph.
**Peculiar properties of such approach**
1. The program that writes the data should remember the state of an object to be able to cancel it. “Cancel” string should contain copies of the sorting key fields of the “state” string and the opposite `Sign`. It increases the initial size of storage but allows to write the data quickly.
2. Long growing arrays in columns reduce the efficiency of the engine due to load for writing. The more straightforward data, the higher the efficiency.
3. The `SELECT` results depend strongly on the consistency of object changes history. Be accurate when preparing data for inserting. You can get unpredictable results in inconsistent data, for example, negative values for non-negative metrics such as session depth.
### Algorithm {#table_engine-collapsingmergetree-collapsing-algorithm}
When ClickHouse merges data parts, each group of consecutive rows with the same sorting key (`ORDER BY`) is reduced to not more than two rows, one with `Sign = 1` (“state” row) and another with `Sign = -1` (“cancel” row). In other words, entries collapse.
For each resulting data part ClickHouse saves:
1. The first “cancel” and the last “state” rows, if the number of “state” and “cancel” rows matches and the last row is a “state” row.
2. The last “state” row, if there are more “state” rows than “cancel” rows.
3. The first “cancel” row, if there are more “cancel” rows than “state” rows.
4. None of the rows, in all other cases.
Also when there are at least 2 more “state” rows than “cancel” rows, or at least 2 more “cancel” rows then “state” rows, the merge continues, but ClickHouse treats this situation as a logical error and records it in the server log. This error can occur if the same data were inserted more than once.
Thus, collapsing should not change the results of calculating statistics.
Changes gradually collapsed so that in the end only the last state of almost every object left.
The `Sign` is required because the merging algorithm doesnt guarantee that all of the rows with the same sorting key will be in the same resulting data part and even on the same physical server. ClickHouse process `SELECT` queries with multiple threads, and it can not predict the order of rows in the result. The aggregation is required if there is a need to get completely “collapsed” data from `CollapsingMergeTree` table.
To finalize collapsing, write a query with `GROUP BY` clause and aggregate functions that account for the sign. For example, to calculate quantity, use `sum(Sign)` instead of `count()`. To calculate the sum of something, use `sum(Sign * x)` instead of `sum(x)`, and so on, and also add `HAVING sum(Sign) > 0`.
The aggregates `count`, `sum` and `avg` could be calculated this way. The aggregate `uniq` could be calculated if an object has at least one state not collapsed. The aggregates `min` and `max` could not be calculated because `CollapsingMergeTree` does not save the values history of the collapsed states.
If you need to extract data without aggregation (for example, to check whether rows are present whose newest values match certain conditions), you can use the `FINAL` modifier for the `FROM` clause. This approach is significantly less efficient.
## Example Of Use {#example-of-use}
Example data:
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 5 │ 146 │ 1 │
│ 4324182021466249494 │ 5 │ 146 │ -1 │
│ 4324182021466249494 │ 6 │ 185 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┘
```
Creation of the table:
``` sql
CREATE TABLE UAct
(
UserID UInt64,
PageViews UInt8,
Duration UInt8,
Sign Int8
)
ENGINE = CollapsingMergeTree(Sign)
ORDER BY UserID
```
Insertion of the data:
``` sql
INSERT INTO UAct VALUES (4324182021466249494, 5, 146, 1)
```
``` sql
INSERT INTO UAct VALUES (4324182021466249494, 5, 146, -1),(4324182021466249494, 6, 185, 1)
```
We use two `INSERT` queries to create two different data parts. If we insert the data with one query ClickHouse creates one data part and will not perform any merge ever.
Getting the data:
``` sql
SELECT * FROM UAct
```
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 5 │ 146 │ -1 │
│ 4324182021466249494 │ 6 │ 185 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┘
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 5 │ 146 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┘
```
What do we see and where is collapsing?
With two `INSERT` queries, we created 2 data parts. The `SELECT` query was performed in 2 threads, and we got a random order of rows. Collapsing not occurred because there was no merge of the data parts yet. ClickHouse merges data part in an unknown moment which we can not predict.
Thus we need aggregation:
``` sql
SELECT
UserID,
sum(PageViews * Sign) AS PageViews,
sum(Duration * Sign) AS Duration
FROM UAct
GROUP BY UserID
HAVING sum(Sign) > 0
```
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┐
│ 4324182021466249494 │ 6 │ 185 │
└─────────────────────┴───────────┴──────────┘
```
If we do not need aggregation and want to force collapsing, we can use `FINAL` modifier for `FROM` clause.
``` sql
SELECT * FROM UAct FINAL
```
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 6 │ 185 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┘
```
This way of selecting the data is very inefficient. Dont use it for big tables.
## Example Of Another Approach {#example-of-another-approach}
Example data:
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 5 │ 146 │ 1 │
│ 4324182021466249494 │ -5 │ -146 │ -1 │
│ 4324182021466249494 │ 6 │ 185 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┘
```
The idea is that merges take into account only key fields. And in the “Cancel” line we can specify negative values that equalize the previous version of the row when summing without using the Sign column. For this approach, it is necessary to change the data type `PageViews`,`Duration` to store negative values of UInt8 -\> Int16.
``` sql
CREATE TABLE UAct
(
UserID UInt64,
PageViews Int16,
Duration Int16,
Sign Int8
)
ENGINE = CollapsingMergeTree(Sign)
ORDER BY UserID
```
Lets test the approach:
``` sql
insert into UAct values(4324182021466249494, 5, 146, 1);
insert into UAct values(4324182021466249494, -5, -146, -1);
insert into UAct values(4324182021466249494, 6, 185, 1);
select * from UAct final; // avoid using final in production (just for a test or small tables)
```
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 6 │ 185 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┘
```
``` sql
SELECT
UserID,
sum(PageViews) AS PageViews,
sum(Duration) AS Duration
FROM UAct
GROUP BY UserID
```text
┌──────────────UserID─┬─PageViews─┬─Duration─┐
│ 4324182021466249494 │ 6 │ 185 │
└─────────────────────┴───────────┴──────────┘
```
``` sqk
select count() FROM UAct
```
``` text
┌─count()─┐
│ 3 │
└─────────┘
```
``` sql
optimize table UAct final;
select * FROM UAct
```
``` text
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 6 │ 185 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┘
```
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/collapsingmergetree/) <!--hide-->

View File

@ -1,125 +0,0 @@
---
toc_priority: 32
toc_title: Custom Partitioning Key
---
# Custom Partitioning Key {#custom-partitioning-key}
Partitioning is available for the [MergeTree](mergetree.md) family tables (including [replicated](replication.md) tables). [Materialized views](../special/materializedview.md) based on MergeTree tables support partitioning, as well.
A partition is a logical combination of records in a table by a specified criterion. You can set a partition by an arbitrary criterion, such as by month, by day, or by event type. Each partition is stored separately to simplify manipulations of this data. When accessing the data, ClickHouse uses the smallest subset of partitions possible.
The partition is specified in the `PARTITION BY expr` clause when [creating a table](mergetree.md#table_engine-mergetree-creating-a-table). The partition key can be any expression from the table columns. For example, to specify partitioning by month, use the expression `toYYYYMM(date_column)`:
``` sql
CREATE TABLE visits
(
VisitDate Date,
Hour UInt8,
ClientID UUID
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(VisitDate)
ORDER BY Hour;
```
The partition key can also be a tuple of expressions (similar to the [primary key](mergetree.md#primary-keys-and-indexes-in-queries)). For example:
``` sql
ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/name', 'replica1', Sign)
PARTITION BY (toMonday(StartDate), EventType)
ORDER BY (CounterID, StartDate, intHash32(UserID));
```
In this example, we set partitioning by the event types that occurred during the current week.
When inserting new data to a table, this data is stored as a separate part (chunk) sorted by the primary key. In 10-15 minutes after inserting, the parts of the same partition are merged into the entire part.
!!! info "Info"
A merge only works for data parts that have the same value for the partitioning expression. This means **you shouldnt make overly granular partitions** (more than about a thousand partitions). Otherwise, the `SELECT` query performs poorly because of an unreasonably large number of files in the file system and open file descriptors.
Use the [system.parts](../../../operations/system_tables.md#system_tables-parts) table to view the table parts and partitions. For example, lets assume that we have a `visits` table with partitioning by month. Lets perform the `SELECT` query for the `system.parts` table:
``` sql
SELECT
partition,
name,
active
FROM system.parts
WHERE table = 'visits'
```
``` text
┌─partition─┬─name───────────┬─active─┐
│ 201901 │ 201901_1_3_1 │ 0 │
│ 201901 │ 201901_1_9_2 │ 1 │
│ 201901 │ 201901_8_8_0 │ 0 │
│ 201901 │ 201901_9_9_0 │ 0 │
│ 201902 │ 201902_4_6_1 │ 1 │
│ 201902 │ 201902_10_10_0 │ 1 │
│ 201902 │ 201902_11_11_0 │ 1 │
└───────────┴────────────────┴────────┘
```
The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER … PARTITION](#alter_manipulations-with-partitions) queries.
The `name` column contains the names of the partition data parts. You can use this column to specify the name of the part in the [ALTER ATTACH PART](#alter_attach-partition) query.
Lets break down the name of the first part: `201901_1_3_1`:
- `201901` is the partition name.
- `1` is the minimum number of the data block.
- `3` is the maximum number of the data block.
- `1` is the chunk level (the depth of the merge tree it is formed from).
!!! info "Info"
The parts of old-type tables have the name: `20190117_20190123_2_2_0` (minimum date - maximum date - minimum block number - maximum block number - level).
The `active` column shows the status of the part. `1` is active; `0` is inactive. The inactive parts are, for example, source parts remaining after merging to a larger part. The corrupted data parts are also indicated as inactive.
As you can see in the example, there are several separated parts of the same partition (for example, `201901_1_3_1` and `201901_1_9_2`). This means that these parts are not merged yet. ClickHouse merges the inserted parts of data periodically, approximately 15 minutes after inserting. In addition, you can perform a non-scheduled merge using the [OPTIMIZE](../../../sql_reference/statements/misc.md#misc_operations-optimize) query. Example:
``` sql
OPTIMIZE TABLE visits PARTITION 201902;
```
``` text
┌─partition─┬─name───────────┬─active─┐
│ 201901 │ 201901_1_3_1 │ 0 │
│ 201901 │ 201901_1_9_2 │ 1 │
│ 201901 │ 201901_8_8_0 │ 0 │
│ 201901 │ 201901_9_9_0 │ 0 │
│ 201902 │ 201902_4_6_1 │ 0 │
│ 201902 │ 201902_4_11_2 │ 1 │
│ 201902 │ 201902_10_10_0 │ 0 │
│ 201902 │ 201902_11_11_0 │ 0 │
└───────────┴────────────────┴────────┘
```
Inactive parts will be deleted approximately 10 minutes after merging.
Another way to view a set of parts and partitions is to go into the directory of the table: `/var/lib/clickhouse/data/<database>/<table>/`. For example:
``` bash
/var/lib/clickhouse/data/default/visits$ ls -l
total 40
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 1 16:48 201901_1_3_1
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201901_1_9_2
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 15:52 201901_8_8_0
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 15:52 201901_9_9_0
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201902_10_10_0
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201902_11_11_0
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:19 201902_4_11_2
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 12:09 201902_4_6_1
drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 1 16:48 detached
```
The folders 201901\_1\_1\_0, 201901\_1\_7\_1 and so on are the directories of the parts. Each part relates to a corresponding partition and contains data just for a certain month (the table in this example has partitioning by month).
The `detached` directory contains parts that were detached from the table using the [DETACH](#alter_detach-partition) query. The corrupted parts are also moved to this directory, instead of being deleted. The server does not use the parts from the `detached` directory. You can add, delete, or modify the data in this directory at any time the server will not know about this until you run the [ATTACH](../../../sql_reference/statements/alter.md#alter_attach-partition) query.
Note that on the operating server, you cannot manually change the set of parts or their data on the file system, since the server will not know about it. For non-replicated tables, you can do this when the server is stopped, but it isnt recommended. For replicated tables, the set of parts cannot be changed in any case.
ClickHouse allows you to perform operations with the partitions: delete them, copy from one table to another, or create a backup. See the list of all operations in the section [Manipulations With Partitions and Parts](../../../sql_reference/statements/alter.md#alter_manipulations-with-partitions).
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/custom_partitioning_key/) <!--hide-->

View File

@ -1,172 +0,0 @@
---
toc_priority: 38
toc_title: GraphiteMergeTree
---
# GraphiteMergeTree {#graphitemergetree}
This engine is designed for thinning and aggregating/averaging (rollup) [Graphite](http://graphite.readthedocs.io/en/latest/index.html) data. It may be helpful to developers who want to use ClickHouse as a data store for Graphite.
You can use any ClickHouse table engine to store the Graphite data if you dont need rollup, but if you need a rollup use `GraphiteMergeTree`. The engine reduces the volume of storage and increases the efficiency of queries from Graphite.
The engine inherits properties from [MergeTree](mergetree.md).
## Creating a Table {#creating-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
Path String,
Time DateTime,
Value <Numeric_type>,
Version <Numeric_type>
...
) ENGINE = GraphiteMergeTree(config_section)
[PARTITION BY expr]
[ORDER BY expr]
[SAMPLE BY expr]
[SETTINGS name=value, ...]
```
See a detailed description of the [CREATE TABLE](../../../sql_reference/statements/create.md#create-table-query) query.
A table for the Graphite data should have the following columns for the following data:
- Metric name (Graphite sensor). Data type: `String`.
- Time of measuring the metric. Data type: `DateTime`.
- Value of the metric. Data type: any numeric.
- Version of the metric. Data type: any numeric.
ClickHouse saves the rows with the highest version or the last written if versions are the same. Other rows are deleted during the merge of data parts.
The names of these columns should be set in the rollup configuration.
**GraphiteMergeTree parameters**
- `config_section` — Name of the section in the configuration file, where are the rules of rollup set.
**Query clauses**
When creating a `GraphiteMergeTree` table, the same [clauses](mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
<details markdown="1">
<summary>Deprecated Method for Creating a Table</summary>
!!! attention "Attention"
Do not use this method in new projects and, if possible, switch the old projects to the method described above.
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
EventDate Date,
Path String,
Time DateTime,
Value <Numeric_type>,
Version <Numeric_type>
...
) ENGINE [=] GraphiteMergeTree(date-column [, sampling_expression], (primary, key), index_granularity, config_section)
```
All of the parameters excepting `config_section` have the same meaning as in `MergeTree`.
- `config_section` — Name of the section in the configuration file, where are the rules of rollup set.
</details>
## Rollup Configuration {#rollup-configuration}
The settings for rollup are defined by the [graphite\_rollup](../../../operations/server_configuration_parameters/settings.md#server_configuration_parameters-graphite_rollup) parameter in the server configuration. The name of the parameter could be any. You can create several configurations and use them for different tables.
Rollup configuration structure:
required-columns
patterns
### Required Columns {#required-columns}
- `path_column_name` — The name of the column storing the metric name (Graphite sensor). Default value: `Path`.
- `time_column_name` — The name of the column storing the time of measuring the metric. Default value: `Time`.
- `value_column_name` — The name of the column storing the value of the metric at the time set in `time_column_name`. Default value: `Value`.
- `version_column_name` — The name of the column storing the version of the metric. Default value: `Timestamp`.
### Patterns {#patterns}
Structure of the `patterns` section:
``` text
pattern
regexp
function
pattern
regexp
age + precision
...
pattern
regexp
function
age + precision
...
pattern
...
default
function
age + precision
...
```
!!! warning "Attention"
Patterns must be strictly ordered:
1. Patterns without `function` or `retention`.
1. Patterns with both `function` and `retention`.
1. Pattern `default`.
When processing a row, ClickHouse checks the rules in the `pattern` sections. Each of `pattern` (including `default`) sections can contain `function` parameter for aggregation, `retention` parameters or both. If the metric name matches the `regexp`, the rules from the `pattern` section (or sections) are applied; otherwise, the rules from the `default` section are used.
Fields for `pattern` and `default` sections:
- `regexp` A pattern for the metric name.
- `age` The minimum age of the data in seconds.
- `precision` How precisely to define the age of the data in seconds. Should be a divisor for 86400 (seconds in a day).
- `function` The name of the aggregating function to apply to data whose age falls within the range `[age, age + precision]`.
### Configuration Example {#configuration-example}
``` xml
<graphite_rollup>
<version_column_name>Version</version_column_name>
<pattern>
<regexp>click_cost</regexp>
<function>any</function>
<retention>
<age>0</age>
<precision>5</precision>
</retention>
<retention>
<age>86400</age>
<precision>60</precision>
</retention>
</pattern>
<default>
<function>max</function>
<retention>
<age>0</age>
<precision>60</precision>
</retention>
<retention>
<age>3600</age>
<precision>300</precision>
</retention>
<retention>
<age>86400</age>
<precision>3600</precision>
</retention>
</default>
</graphite_rollup>
```
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/graphitemergetree/) <!--hide-->

View File

@ -1,5 +0,0 @@
---
toc_folder_title: MergeTree Family
toc_priority: 28
---

Some files were not shown because too many files have changed in this diff Show More