Merge branch 'master' into orc_output_format

This commit is contained in:
Nikita Mikhaylov 2020-06-23 15:07:05 +04:00 committed by GitHub
commit 717f63923a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1472 changed files with 33756 additions and 13699 deletions

View File

@ -13,9 +13,3 @@ ClickHouse is an open-source column-oriented database management system that all
* [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian.
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
## Upcoming Events
* [ClickHouse Online Meetup (in Russian)](https://events.yandex.ru/events/click-house-onlajn-vs-18-06-2020) on June 18, 2020.
* [ClickHouse Workshop in Novosibirsk](https://2020.codefest.ru/lecture/1628) on TBD date.
* [Yandex C++ Open-Source Sprints in Moscow](https://events.yandex.ru/events/otkrytyj-kod-v-yandek-28-03-2020) on TBD date.

View File

@ -75,7 +75,7 @@ std::string determineDefaultTimeZone()
try
{
tz_database_path = fs::canonical(tz_database_path);
tz_database_path = fs::weakly_canonical(tz_database_path);
/// The tzdata file exists. If it is inside the tz_database_dir,
/// then the relative path is the time zone id.
@ -91,7 +91,7 @@ std::string determineDefaultTimeZone()
if (!tz_file_path.is_absolute())
tz_file_path = tz_database_path / tz_file_path;
tz_file_path = fs::canonical(tz_file_path);
tz_file_path = fs::weakly_canonical(tz_file_path);
fs::path relative_path = tz_file_path.lexically_relative(tz_database_path);
if (!relative_path.empty() && *relative_path.begin() != ".." && *relative_path.begin() != ".")

View File

@ -38,6 +38,7 @@
#include <common/argsToConfig.h>
#include <common/getThreadId.h>
#include <common/coverage.h>
#include <common/sleep.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
@ -50,6 +51,7 @@
#include <Common/getMultipleKeysFromConfig.h>
#include <Common/ClickHouseRevision.h>
#include <Common/Config/ConfigProcessor.h>
#include <Common/SymbolIndex.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config_version.h>
@ -83,7 +85,8 @@ static const size_t signal_pipe_buf_size =
+ sizeof(ucontext_t)
+ sizeof(StackTrace)
+ sizeof(UInt32)
+ max_query_id_size + 1; /// query_id + varint encoded length
+ max_query_id_size + 1 /// query_id + varint encoded length
+ sizeof(void*);
using signal_function = void(int, siginfo_t*, void*);
@ -133,13 +136,14 @@ static void signalHandler(int sig, siginfo_t * info, void * context)
DB::writePODBinary(stack_trace, out);
DB::writeBinary(UInt32(getThreadId()), out);
DB::writeStringBinary(query_id, out);
DB::writePODBinary(DB::current_thread, out);
out.next();
if (sig != SIGTSTP) /// This signal is used for debugging.
{
/// The time that is usually enough for separate thread to print info into log.
::sleep(10);
sleepForSeconds(10);
call_default_signal_handler(sig);
}
@ -216,16 +220,18 @@ public:
StackTrace stack_trace(NoCapture{});
UInt32 thread_num;
std::string query_id;
DB::ThreadStatus * thread_ptr{};
DB::readPODBinary(info, in);
DB::readPODBinary(context, in);
DB::readPODBinary(stack_trace, in);
DB::readBinary(thread_num, in);
DB::readBinary(query_id, in);
DB::readPODBinary(thread_ptr, in);
/// This allows to receive more signals if failure happens inside onFault function.
/// Example: segfault while symbolizing stack trace.
std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_num, query_id); }).detach();
std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_num, query_id, thread_ptr); }).detach();
}
}
}
@ -236,7 +242,8 @@ private:
void onTerminate(const std::string & message, UInt32 thread_num) const
{
LOG_FATAL(log, "(version {}{}) (from thread {}) {}", VERSION_STRING, VERSION_OFFICIAL, thread_num, message);
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) {}",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, message);
}
void onFault(
@ -245,21 +252,30 @@ private:
const ucontext_t & context,
const StackTrace & stack_trace,
UInt32 thread_num,
const std::string & query_id) const
const std::string & query_id,
DB::ThreadStatus * thread_ptr) const
{
DB::ThreadStatus thread_status;
/// Send logs from this thread to client if possible.
/// It will allow client to see failure messages directly.
if (thread_ptr)
{
if (auto logs_queue = thread_ptr->getInternalTextLogsQueue())
DB::CurrentThread::attachInternalTextLogsQueue(logs_queue, DB::LogsLevel::trace);
}
LOG_FATAL(log, "########################################");
if (query_id.empty())
{
std::stringstream message;
message << "(version " << VERSION_STRING << VERSION_OFFICIAL << ")";
message << " (from thread " << thread_num << ")";
if (query_id.empty())
message << " (no query)";
else
message << " (query_id: " << query_id << ")";
message << " Received signal " << strsignal(sig) << " (" << sig << ").";
LOG_FATAL(log, message.str());
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (no query) Received signal {} ({})",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, strsignal(sig), sig);
}
else
{
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) Received signal {} ({})",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, query_id, strsignal(sig), sig);
}
LOG_FATAL(log, signalToErrorMessage(sig, info, context));
@ -279,6 +295,10 @@ private:
/// Write symbolized stack trace line by line for better grep-ability.
stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); });
/// When everything is done, we will try to send these error messages to client.
if (thread_ptr)
thread_ptr->onFatalError();
}
};
@ -292,17 +312,15 @@ static void sanitizerDeathCallback()
StringRef query_id = DB::CurrentThread::getQueryId(); /// This is signal safe.
if (query_id.size == 0)
{
std::stringstream message;
message << "(version " << VERSION_STRING << VERSION_OFFICIAL << ")";
message << " (from thread " << getThreadId() << ")";
if (query_id.size == 0)
message << " (no query)";
else
message << " (query_id: " << query_id << ")";
message << " Sanitizer trap.";
LOG_FATAL(log, message.str());
LOG_FATAL(log, "(version {}{}) (from thread {}) (no query) Sanitizer trap.",
VERSION_STRING, VERSION_OFFICIAL, getThreadId());
}
else
{
LOG_FATAL(log, "(version {}{}) (from thread {}) (query_id: {}) Sanitizer trap.",
VERSION_STRING, VERSION_OFFICIAL, getThreadId(), query_id);
}
/// Just in case print our own stack trace. In case when llvm-symbolizer does not work.
@ -711,12 +729,23 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
signal_listener = std::make_unique<SignalListener>(*this);
signal_listener_thread.start(*signal_listener);
#if defined(__ELF__) && !defined(__FreeBSD__)
String build_id_hex = DB::SymbolIndex::instance().getBuildIDHex();
if (build_id_hex.empty())
build_id_info = "no build id";
else
build_id_info = "build id: " + build_id_hex;
#else
build_id_info = "no build id";
#endif
}
void BaseDaemon::logRevision() const
{
Poco::Logger::root().information("Starting " + std::string{VERSION_FULL}
+ " with revision " + std::to_string(ClickHouseRevision::get())
+ ", " + build_id_info
+ ", PID " + std::to_string(getpid()));
}

View File

@ -198,6 +198,8 @@ protected:
std::string config_path;
DB::ConfigProcessor::LoadedConfig loaded_config;
Poco::Util::AbstractConfiguration * last_configuration = nullptr;
String build_id_info;
};

View File

@ -1,9 +1,9 @@
# This strings autochanged from release_lib.sh:
SET(VERSION_REVISION 54435)
SET(VERSION_REVISION 54436)
SET(VERSION_MAJOR 20)
SET(VERSION_MINOR 5)
SET(VERSION_MINOR 6)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 91df18a906dcffdbee6816e5389df6c65f86e35f)
SET(VERSION_DESCRIBE v20.5.1.1-prestable)
SET(VERSION_STRING 20.5.1.1)
SET(VERSION_GITHASH efc57fb063b3fb4df968d916720ec4d4ced4642e)
SET(VERSION_DESCRIBE v20.6.1.1-prestable)
SET(VERSION_STRING 20.6.1.1)
# end of autochange

View File

@ -18,7 +18,7 @@ message(STATUS "Default libraries: ${DEFAULT_LIBS}")
set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS})
set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
# glibc-compatibility library relies to fixed version of libc headers
# glibc-compatibility library relies to constant version of libc headers
# (because minor changes in function attributes between different glibc versions will introduce incompatibilities)
# This is for x86_64. For other architectures we have separate toolchains.
if (ARCH_AMD64 AND NOT_UNBUNDLED)

2
contrib/hyperscan vendored

@ -1 +1 @@
Subproject commit 3058c9c20cba3accdf92544d8513a26240c4ff70
Subproject commit 3907fd00ee8b2538739768fa9533f8635a276531

View File

@ -219,7 +219,9 @@ if (ENABLE_HYPERSCAN)
target_compile_definitions (hyperscan PUBLIC USE_HYPERSCAN=1)
target_compile_options (hyperscan
PRIVATE -g0 -march=corei7 # library has too much debug information
PRIVATE -g0 # Library has too much debug information
-march=corei7 -O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # The options from original build system
-fno-sanitize=undefined # Assume the library takes care of itself
)
target_include_directories (hyperscan
PRIVATE

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (20.5.1.1) unstable; urgency=low
clickhouse (20.6.1.1) unstable; urgency=low
* Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Tue, 28 Apr 2020 20:12:13 +0300
-- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 22 Jun 2020 20:40:23 +0300

2
debian/control vendored
View File

@ -28,7 +28,7 @@ Description: Client binary for ClickHouse
Package: clickhouse-common-static
Architecture: any
Depends: ${shlibs:Depends}, ${misc:Depends}, tzdata
Depends: ${shlibs:Depends}, ${misc:Depends}
Suggests: clickhouse-common-static-dbg
Replaces: clickhouse-common, clickhouse-server-base
Provides: clickhouse-common, clickhouse-server-base

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.5.1.*
ARG version=20.6.1.*
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \
@ -17,7 +17,6 @@ RUN apt-get update \
clickhouse-client=$version \
clickhouse-common-static=$version \
locales \
tzdata \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf \
&& apt-get clean

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.5.1.*
ARG version=20.6.1.*
ARG gosu_ver=1.10
RUN apt-get update \
@ -21,7 +21,6 @@ RUN apt-get update \
locales \
ca-certificates \
wget \
tzdata \
&& rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.5.1.*
ARG version=20.6.1.*
RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \

View File

@ -17,8 +17,7 @@ RUN apt-get update \
odbc-postgresql \
sqlite3 \
curl \
tar \
tzdata
tar
RUN rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \

View File

@ -7,3 +7,7 @@ services:
POSTGRES_PASSWORD: mysecretpassword
ports:
- 5432:5432
networks:
default:
aliases:
- postgre-sql.local

View File

@ -22,7 +22,7 @@ function configure
echo all killed
set -m # Spawn temporary in its own process groups
left/clickhouse-server --config-file=left/config/config.xml -- --path db0 &> setup-server-log.log &
left/clickhouse-server --config-file=left/config/config.xml -- --path db0 --user_files_path db0/user_files &> setup-server-log.log &
left_pid=$!
kill -0 $left_pid
disown $left_pid
@ -59,12 +59,12 @@ function restart
set -m # Spawn servers in their own process groups
left/clickhouse-server --config-file=left/config/config.xml -- --path left/db &>> left-server-log.log &
left/clickhouse-server --config-file=left/config/config.xml -- --path left/db --user_files_path left/db/user_files &>> left-server-log.log &
left_pid=$!
kill -0 $left_pid
disown $left_pid
right/clickhouse-server --config-file=right/config/config.xml -- --path right/db &>> right-server-log.log &
right/clickhouse-server --config-file=right/config/config.xml -- --path right/db --user_files_path right/db/user_files &>> right-server-log.log &
right_pid=$!
kill -0 $right_pid
disown $right_pid

View File

@ -17,7 +17,7 @@ function find_reference_sha
# If not master, try to fetch pull/.../{head,merge}
if [ "$PR_TO_TEST" != "0" ]
then
git -C ch fetch origin "refs/pull/$PR_TO_TEST/*:refs/heads/pr/*"
git -C ch fetch origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*"
fi
# Go back from the revision to be tested, trying to find the closest published
@ -28,9 +28,9 @@ function find_reference_sha
# and SHA_TO_TEST, but a revision that is merged with recent master, given
# by pull/.../merge ref.
# Master is the first parent of the pull/.../merge.
if git -C ch rev-parse pr/merge
if git -C ch rev-parse "pull/$PR_TO_TEST/merge"
then
start_ref=pr/merge~
start_ref="pull/$PR_TO_TEST/merge~"
fi
while :
@ -73,11 +73,11 @@ if [ "$REF_PR" == "" ]; then echo Reference PR is not specified ; exit 1 ; fi
(
git -C ch log -1 --decorate "$SHA_TO_TEST" ||:
if git -C ch rev-parse pr/merge &> /dev/null
if git -C ch rev-parse "pull/$PR_TO_TEST/merge" &> /dev/null
then
echo
echo Real tested commit is:
git -C ch log -1 --decorate pr/merge
git -C ch log -1 --decorate "pull/$PR_TO_TEST/merge"
fi
) | tee right-commit.txt
@ -131,5 +131,8 @@ done
dmesg -T > dmesg.log
7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} {right,left}/{performance,db/preprocessed_configs,scripts} report analyze benchmark
7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} \
{right,left}/{performance,scripts} {{right,left}/db,db0}/preprocessed_configs \
report analyze benchmark
cp compare.log /output

View File

@ -20,9 +20,9 @@ RUN apt-get --allow-unauthenticated update -y \
# apt-get --allow-unauthenticated install --yes --no-install-recommends \
# pvs-studio
ENV PKG_VERSION="pvs-studio-7.07.38234.46-amd64.deb"
ENV PKG_VERSION="pvs-studio-7.07.38234.48-amd64.deb"
RUN wget "http://files.viva64.com/$PKG_VERSION"
RUN wget "https://files.viva64.com/$PKG_VERSION"
RUN sudo dpkg -i "$PKG_VERSION"
CMD cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \

View File

@ -1,4 +1,4 @@
## function-name {#function-name-in-lower-case}
## functionName {#functionname-in-lower-case}
Short description.

View File

@ -1,4 +1,4 @@
## setting-name {#setting-name-in-lower-case}
## setting_name {#setting_name}
Description.

View File

@ -7,7 +7,7 @@ toc_title: How to Build ClickHouse on Linux for AARCH64 (ARM64)
This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on another Linux machine with AARCH64 CPU architecture. This is intended for continuous integration checks that run on Linux servers.
The cross-build for AARCH64 is based on the [Build instructions](build.md), follow them first.
The cross-build for AARCH64 is based on the [Build instructions](../development/build.md), follow them first.
# Install Clang-8 {#install-clang-8}

View File

@ -5,9 +5,9 @@ toc_title: How to Build ClickHouse on Linux for Mac OS X
# How to Build ClickHouse on Linux for Mac OS X {#how-to-build-clickhouse-on-linux-for-mac-os-x}
This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on OS X. This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](build-osx.md).
This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on OS X. This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](../development/build-osx.md).
The cross-build for Mac OS X is based on the [Build instructions](build.md), follow them first.
The cross-build for Mac OS X is based on the [Build instructions](../development/build.md), follow them first.
# Install Clang-8 {#install-clang-8}

View File

@ -28,10 +28,9 @@ There are several ways to do this.
### Install from Repository {#install-from-repository}
On Ubuntu 19.10 or newer:
```
$ sudo apt-get update
$ sudo apt-get install gcc-9 g++-9
```
$ sudo apt-get update
$ sudo apt-get install gcc-9 g++-9
### Install from a PPA Package {#install-from-a-ppa-package}

View File

@ -3,11 +3,11 @@ toc_priority: 61
toc_title: For Beginners
---
# The Beginner ClickHouse Developer Instruction
# The Beginner ClickHouse Developer Instruction {#the-beginner-clickhouse-developer-instruction}
Building of ClickHouse is supported on Linux, FreeBSD and Mac OS X.
If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.
If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/\#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.
ClickHouse cannot work or build on a 32-bit system. You should acquire access to a 64-bit system and you can continue reading.
@ -137,7 +137,7 @@ Official Yandex builds currently use GCC because it generates machine code of sl
To install GCC on Ubuntu run: `sudo apt install gcc g++`
Check the version of gcc: `gcc --version`. If it is below 9, then follow the instruction here: https://clickhouse.tech/docs/en/development/build/#install-gcc-9.
Check the version of gcc: `gcc --version`. If it is below 9, then follow the instruction here: https://clickhouse.tech/docs/en/development/build/\#install-gcc-9.
Mac OS X build is supported only for Clang. Just run `brew install llvm`

View File

@ -200,7 +200,7 @@ Debug version of `jemalloc` is used for debug build.
ClickHouse fuzzing is implemented both using [libFuzzer](https://llvm.org/docs/LibFuzzer.html) and random SQL queries.
All the fuzz testing should be performed with sanitizers (Address and Undefined).
LibFuzzer is used for isolated fuzz testing of library code. Fuzzers are implemented as part of test code and have "\_fuzzer" name postfixes.
LibFuzzer is used for isolated fuzz testing of library code. Fuzzers are implemented as part of test code and have “\_fuzzer” name postfixes.
Fuzzer example can be found at `src/Parsers/tests/lexer_fuzzer.cpp`. LibFuzzer-specific configs, dictionaries and corpus are stored at `tests/fuzz`.
We encourage you to write fuzz tests for every functionality that handles user input.
@ -211,7 +211,6 @@ Google OSS-Fuzz can be found at `docker/fuzz`.
We also use simple fuzz test to generate random SQL queries and to check that the server doesnt die executing them.
You can find it in `00746_sql_fuzzy.pl`. This test should be run continuously (overnight and longer).
## Security Audit {#security-audit}
People from Yandex Security Team do some basic overview of ClickHouse capabilities from the security standpoint.

View File

@ -12,8 +12,8 @@ By default, ClickHouse uses its native database engine, which provides configura
You can also use the following database engines:
- [MySQL](mysql.md)
- [MySQL](../../engines/database-engines/mysql.md)
- [Lazy](lazy.md)
- [Lazy](../../engines/database-engines/lazy.md)
[Original article](https://clickhouse.tech/docs/en/database_engines/) <!--hide-->

View File

@ -1,8 +1,15 @@
---
toc_folder_title: Engines
toc_hidden: true
toc_priority: 25
toc_title: hidden
toc_hidden: true
---
# ClickHouse Engines
There are two key engine kinds in ClickHouse:
- [Table engines](table-engines/index.md)
- [Database engines](database-engines/index.md)
{## [Original article](https://clickhouse.tech/docs/en/engines/) ##}

View File

@ -19,27 +19,27 @@ The table engine (type of table) determines:
### MergeTree {#mergetree}
The most universal and functional table engines for high-load tasks. The property shared by these engines is quick data insertion with subsequent background data processing. `MergeTree` family engines support data replication (with [Replicated*](mergetree-family/replication.md#table_engines-replication) versions of engines), partitioning, and other features not supported in other engines.
The most universal and functional table engines for high-load tasks. The property shared by these engines is quick data insertion with subsequent background data processing. `MergeTree` family engines support data replication (with [Replicated\*](../../engines/table-engines/mergetree-family/replication.md#table_engines-replication) versions of engines), partitioning, secondary data-skipping indexes, and other features not supported in other engines.
Engines in the family:
- [MergeTree](mergetree-family/mergetree.md#mergetree)
- [ReplacingMergeTree](mergetree-family/replacingmergetree.md#replacingmergetree)
- [SummingMergeTree](mergetree-family/summingmergetree.md#summingmergetree)
- [AggregatingMergeTree](mergetree-family/aggregatingmergetree.md#aggregatingmergetree)
- [CollapsingMergeTree](mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree)
- [VersionedCollapsingMergeTree](mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree)
- [GraphiteMergeTree](mergetree-family/graphitemergetree.md#graphitemergetree)
- [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#mergetree)
- [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md#replacingmergetree)
- [SummingMergeTree](../../engines/table-engines/mergetree-family/summingmergetree.md#summingmergetree)
- [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md#aggregatingmergetree)
- [CollapsingMergeTree](../../engines/table-engines/mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree)
- [VersionedCollapsingMergeTree](../../engines/table-engines/mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree)
- [GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree)
### Log {#log}
Lightweight [engines](log-family/index.md) with minimum functionality. Theyre the most effective when you need to quickly write many small tables (up to approximately 1 million rows) and read them later as a whole.
Lightweight [engines](../../engines/table-engines/log-family/index.md) with minimum functionality. Theyre the most effective when you need to quickly write many small tables (up to approximately 1 million rows) and read them later as a whole.
Engines in the family:
- [TinyLog](log-family/tinylog.md#tinylog)
- [StripeLog](log-family/stripelog.md#stripelog)
- [Log](log-family/log.md#log)
- [TinyLog](../../engines/table-engines/log-family/tinylog.md#tinylog)
- [StripeLog](../../engines/table-engines/log-family/stripelog.md#stripelog)
- [Log](../../engines/table-engines/log-family/log.md#log)
### Integration Engines {#integration-engines}
@ -47,28 +47,28 @@ Engines for communicating with other data storage and processing systems.
Engines in the family:
- [Kafka](integrations/kafka.md#kafka)
- [MySQL](integrations/mysql.md#mysql)
- [ODBC](integrations/odbc.md#table-engine-odbc)
- [JDBC](integrations/jdbc.md#table-engine-jdbc)
- [HDFS](integrations/hdfs.md#hdfs)
- [Kafka](../../engines/table-engines/integrations/kafka.md#kafka)
- [MySQL](../../engines/table-engines/integrations/mysql.md#mysql)
- [ODBC](../../engines/table-engines/integrations/odbc.md#table-engine-odbc)
- [JDBC](../../engines/table-engines/integrations/jdbc.md#table-engine-jdbc)
- [HDFS](../../engines/table-engines/integrations/hdfs.md#hdfs)
### Special Engines {#special-engines}
Engines in the family:
- [Distributed](special/distributed.md#distributed)
- [MaterializedView](special/materializedview.md#materializedview)
- [Dictionary](special/dictionary.md#dictionary)
- [Merge](special/merge.md#merge)
- [File](special/file.md#file)
- [Null](special/null.md#null)
- [Set](special/set.md#set)
- [Join](special/join.md#join)
- [URL](special/url.md#table_engines-url)
- [View](special/view.md#table_engines-view)
- [Memory](special/memory.md#memory)
- [Buffer](special/buffer.md#buffer)
- [Distributed](../../engines/table-engines/special/distributed.md#distributed)
- [MaterializedView](../../engines/table-engines/special/materializedview.md#materializedview)
- [Dictionary](../../engines/table-engines/special/dictionary.md#dictionary)
- [Merge](../../engines/table-engines/special/merge.md#merge)
- [File](../../engines/table-engines/special/file.md#file)
- [Null](../../engines/table-engines/special/null.md#null)
- [Set](../../engines/table-engines/special/set.md#set)
- [Join](../../engines/table-engines/special/join.md#join)
- [URL](../../engines/table-engines/special/url.md#table_engines-url)
- [View](../../engines/table-engines/special/view.md#table_engines-view)
- [Memory](../../engines/table-engines/special/memory.md#memory)
- [Buffer](../../engines/table-engines/special/buffer.md#buffer)
## Virtual Columns {#table_engines-virtual_columns}
@ -80,4 +80,4 @@ To select data from a virtual column, you must specify its name in the `SELECT`
If you create a table with a column that has the same name as one of the table virtual columns, the virtual column becomes inaccessible. We dont recommend doing this. To help avoid conflicts, virtual column names are usually prefixed with an underscore.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/) <!--hide-->
[Original article](https://clickhouse.tech/docs/en/engines/table-engines/) <!--hide-->

View File

@ -6,7 +6,7 @@ toc_title: HDFS
# HDFS {#table_engines-hdfs}
This engine provides integration with [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)via ClickHouse. This engine is similar
to the [File](../special/file.md#table_engines-file) and [URL](../special/url.md#table_engines-url) engines, but provides Hadoop-specific features.
to the [File](../../../engines/table-engines/special/file.md#table_engines-file) and [URL](../../../engines/table-engines/special/url.md#table_engines-url) engines, but provides Hadoop-specific features.
## Usage {#usage}
@ -116,6 +116,6 @@ CREARE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9
**See Also**
- [Virtual columns](../index.md#table_engines-virtual_columns)
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/hdfs/) <!--hide-->

View File

@ -173,7 +173,7 @@ For a list of possible configuration options, see the [librdkafka configuration
**See Also**
- [Virtual columns](../index.md#table_engines-virtual_columns)
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size)
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
- [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) <!--hide-->

View File

@ -9,9 +9,9 @@ These engines were developed for scenarios when you need to quickly write many s
Engines of the family:
- [StripeLog](stripelog.md)
- [Log](log.md)
- [TinyLog](tinylog.md)
- [StripeLog](../../../engines/table-engines/log-family/stripelog.md)
- [Log](../../../engines/table-engines/log-family/log.md)
- [TinyLog](../../../engines/table-engines/log-family/tinylog.md)
## Common Properties {#common-properties}

View File

@ -5,9 +5,9 @@ toc_title: Log
# Log {#log}
Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log-family.md) article.
Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) article.
Log differs from [TinyLog](tinylog.md) in that a small file of “marks” resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads.
Log differs from [TinyLog](../../../engines/table-engines/log-family/tinylog.md) in that a small file of “marks” resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads.
For concurrent data access, the read operations can be performed simultaneously, while write operations block reads and each other.
The Log engine does not support indexes. Similarly, if writing to a table failed, the table is broken, and reading from it returns an error. The Log engine is appropriate for temporary data, write-once tables, and for testing or demonstration purposes.

View File

@ -5,7 +5,7 @@ toc_title: StripeLog
# Stripelog {#stripelog}
This engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log-family.md) article.
This engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) article.
Use this engine in scenarios when you need to write many tables with a small amount of data (less than 1 million rows).

View File

@ -5,10 +5,10 @@ toc_title: TinyLog
# TinyLog {#tinylog}
The engine belongs to the log engine family. See [Log Engine Family](log-family.md) for common properties of log engines and their differences.
The engine belongs to the log engine family. See [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) for common properties of log engines and their differences.
This table engine is typically used with the write-once method: write data one time, then read it as many times as necessary. For example, you can use `TinyLog`-type tables for intermediary data that is processed in small batches. Note that storing data in a large number of small tables is inefficient.
Queries are executed in a single stream. In other words, this engine is intended for relatively small tables (up to about 1,000,000 rows). It makes sense to use this table engine if you have many small tables, since its simpler than the [Log](log.md) engine (fewer files need to be opened).
Queries are executed in a single stream. In other words, this engine is intended for relatively small tables (up to about 1,000,000 rows). It makes sense to use this table engine if you have many small tables, since its simpler than the [Log](../../../engines/table-engines/log-family/log.md) engine (fewer files need to be opened).
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/tinylog/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: AggregatingMergeTree
# Aggregatingmergetree {#aggregatingmergetree}
The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree), altering the logic for data parts merging. ClickHouse replaces all rows with the same primary key (or more accurately, with the same [sorting key](mergetree.md)) with a single row (within a one data part) that stores a combination of states of aggregate functions.
The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree), altering the logic for data parts merging. ClickHouse replaces all rows with the same primary key (or more accurately, with the same [sorting key](../../../engines/table-engines/mergetree-family/mergetree.md)) with a single row (within a one data part) that stores a combination of states of aggregate functions.
You can use `AggregatingMergeTree` tables for incremental data aggregation, including for aggregated materialized views.
@ -36,7 +36,7 @@ For a description of request parameters, see [request description](../../../sql-
**Query clauses**
When creating a `AggregatingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table.
When creating a `AggregatingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table.
<details markdown="1">

View File

@ -5,7 +5,7 @@ toc_title: CollapsingMergeTree
# CollapsingMergeTree {#table_engine-collapsingmergetree}
The engine inherits from [MergeTree](mergetree.md) and adds the logic of rows collapsing to data parts merge algorithm.
The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) and adds the logic of rows collapsing to data parts merge algorithm.
`CollapsingMergeTree` asynchronously deletes (collapses) pairs of rows if all of the fields in a sorting key (`ORDER BY`) are equivalent excepting the particular field `Sign` which can have `1` and `-1` values. Rows without a pair are kept. For more details see the [Collapsing](#table_engine-collapsingmergetree-collapsing) section of the document.
@ -36,7 +36,7 @@ For a description of query parameters, see [query description](../../../sql-refe
**Query clauses**
When creating a `CollapsingMergeTree` table, the same [query clauses](mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
When creating a `CollapsingMergeTree` table, the same [query clauses](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
<details markdown="1">

View File

@ -5,11 +5,11 @@ toc_title: Custom Partitioning Key
# Custom Partitioning Key {#custom-partitioning-key}
Partitioning is available for the [MergeTree](mergetree.md) family tables (including [replicated](replication.md) tables). [Materialized views](../special/materializedview.md#materializedview) based on MergeTree tables support partitioning, as well.
Partitioning is available for the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). [Materialized views](../../../engines/table-engines/special/materializedview.md#materializedview) based on MergeTree tables support partitioning, as well.
A partition is a logical combination of records in a table by a specified criterion. You can set a partition by an arbitrary criterion, such as by month, by day, or by event type. Each partition is stored separately to simplify manipulations of this data. When accessing the data, ClickHouse uses the smallest subset of partitions possible.
The partition is specified in the `PARTITION BY expr` clause when [creating a table](mergetree.md#table_engine-mergetree-creating-a-table). The partition key can be any expression from the table columns. For example, to specify partitioning by month, use the expression `toYYYYMM(date_column)`:
The partition is specified in the `PARTITION BY expr` clause when [creating a table](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table). The partition key can be any expression from the table columns. For example, to specify partitioning by month, use the expression `toYYYYMM(date_column)`:
``` sql
CREATE TABLE visits
@ -23,7 +23,7 @@ PARTITION BY toYYYYMM(VisitDate)
ORDER BY Hour;
```
The partition key can also be a tuple of expressions (similar to the [primary key](mergetree.md#primary-keys-and-indexes-in-queries)). For example:
The partition key can also be a tuple of expressions (similar to the [primary key](../../../engines/table-engines/mergetree-family/mergetree.md#primary-keys-and-indexes-in-queries)). For example:
``` sql
ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/name', 'replica1', Sign)
@ -38,7 +38,7 @@ When inserting new data to a table, this data is stored as a separate part (chun
!!! info "Info"
A merge only works for data parts that have the same value for the partitioning expression. This means **you shouldnt make overly granular partitions** (more than about a thousand partitions). Otherwise, the `SELECT` query performs poorly because of an unreasonably large number of files in the file system and open file descriptors.
Use the [system.parts](../../../operations/system-tables.md#system_tables-parts) table to view the table parts and partitions. For example, lets assume that we have a `visits` table with partitioning by month. Lets perform the `SELECT` query for the `system.parts` table:
Use the [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) table to view the table parts and partitions. For example, lets assume that we have a `visits` table with partitioning by month. Lets perform the `SELECT` query for the `system.parts` table:
``` sql
SELECT

View File

@ -9,7 +9,7 @@ This engine is designed for thinning and aggregating/averaging (rollup) [Graphit
You can use any ClickHouse table engine to store the Graphite data if you dont need rollup, but if you need a rollup use `GraphiteMergeTree`. The engine reduces the volume of storage and increases the efficiency of queries from Graphite.
The engine inherits properties from [MergeTree](mergetree.md).
The engine inherits properties from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md).
## Creating a Table {#creating-table}
@ -50,7 +50,7 @@ The names of these columns should be set in the rollup configuration.
**Query clauses**
When creating a `GraphiteMergeTree` table, the same [clauses](mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
When creating a `GraphiteMergeTree` table, the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
<details markdown="1">

View File

@ -15,20 +15,20 @@ Main features:
This allows you to create a small sparse index that helps find data faster.
- Partitions can be used if the [partitioning key](custom-partitioning-key.md) is specified.
- Partitions can be used if the [partitioning key](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md) is specified.
ClickHouse supports certain operations with partitions that are more effective than general operations on the same data with the same result. ClickHouse also automatically cuts off the partition data where the partitioning key is specified in the query. This also improves query performance.
- Data replication support.
The family of `ReplicatedMergeTree` tables provides data replication. For more information, see [Data replication](replication.md).
The family of `ReplicatedMergeTree` tables provides data replication. For more information, see [Data replication](../../../engines/table-engines/mergetree-family/replication.md).
- Data sampling support.
If necessary, you can set the data sampling method in the table.
!!! info "Info"
The [Merge](../special/merge.md#merge) engine does not belong to the `*MergeTree` family.
The [Merge](../../../engines/table-engines/special/merge.md#merge) engine does not belong to the `*MergeTree` family.
## Creating a Table {#table_engine-mergetree-creating-a-table}
@ -51,9 +51,6 @@ ORDER BY expr
For a description of parameters, see the [CREATE query description](../../../sql-reference/statements/create.md).
!!! note "Note"
`INDEX` is an experimental feature, see [Data Skipping Indexes](#table_engine-mergetree-data_skipping-indexes).
### Query Clauses {#mergetree-query-clauses}
- `ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters.
@ -62,11 +59,11 @@ For a description of parameters, see the [CREATE query description](../../../sql
A tuple of column names or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`.
ClickHouse uses the sorting key as a primary key if the primary key is not defined obviously by the `PRIMARY KEY` clause.
Use the `ORDER BY tuple()` syntax, if you don't need sorting. See [Selecting the Primary Key](#selecting-the-primary-key).
ClickHouse uses the sorting key as a primary key if the primary key is not defined obviously by the `PRIMARY KEY` clause.
- `PARTITION BY` — The [partitioning key](custom-partitioning-key.md). Optional.
Use the `ORDER BY tuple()` syntax, if you dont need sorting. See [Selecting the Primary Key](#selecting-the-primary-key).
- `PARTITION BY` — The [partitioning key](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Optional.
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](../../../sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
@ -196,22 +193,22 @@ The number of columns in the primary key is not explicitly limited. Depending on
ClickHouse sorts data by primary key, so the higher the consistency, the better the compression.
- Provide additional logic when merging data parts in the [CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) and [SummingMergeTree](summingmergetree.md) engines.
- Provide additional logic when merging data parts in the [CollapsingMergeTree](../../../engines/table-engines/mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree) and [SummingMergeTree](../../../engines/table-engines/mergetree-family/summingmergetree.md) engines.
In this case it makes sense to specify the *sorting key* that is different from the primary key.
A long primary key will negatively affect the insert performance and memory consumption, but extra columns in the primary key do not affect ClickHouse performance during `SELECT` queries.
You can create a table without a primary key using the `ORDER BY tuple()` syntax. In this case, ClickHouse stores data in the order of inserting. If you want to save data order when inserting data by `INSERT ... SELECT` queries, set [max_insert_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads).
You can create a table without a primary key using the `ORDER BY tuple()` syntax. In this case, ClickHouse stores data in the order of inserting. If you want to save data order when inserting data by `INSERT ... SELECT` queries, set [max\_insert\_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads).
To select data in the initial order, use [single-threaded](../../../operations/settings/settings.md#settings-max_threads) `SELECT` queries.
### Choosing a Primary Key that Differs from the Sorting Key {#choosing-a-primary-key-that-differs-from-the-sorting-key}
It is possible to specify a primary key (an expression with values that are written in the index file for each mark) that is different from the sorting key (an expression for sorting the rows in data parts). In this case the primary key expression tuple must be a prefix of the sorting key expression tuple.
This feature is helpful when using the [SummingMergeTree](summingmergetree.md) and
[AggregatingMergeTree](aggregatingmergetree.md) table engines. In a common case when using these engines, the table has two types of columns: *dimensions* and *measures*. Typical queries aggregate values of measure columns with arbitrary `GROUP BY` and filtering by dimensions. Because SummingMergeTree and AggregatingMergeTree aggregate rows with the same value of the sorting key, it is natural to add all dimensions to it. As a result, the key expression consists of a long list of columns and this list must be frequently updated with newly added dimensions.
This feature is helpful when using the [SummingMergeTree](../../../engines/table-engines/mergetree-family/summingmergetree.md) and
[AggregatingMergeTree](../../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engines. In a common case when using these engines, the table has two types of columns: *dimensions* and *measures*. Typical queries aggregate values of measure columns with arbitrary `GROUP BY` and filtering by dimensions. Because SummingMergeTree and AggregatingMergeTree aggregate rows with the same value of the sorting key, it is natural to add all dimensions to it. As a result, the key expression consists of a long list of columns and this list must be frequently updated with newly added dimensions.
In this case it makes sense to leave only a few columns in the primary key that will provide efficient range scans and add the remaining dimension columns to the sorting key tuple.
@ -257,7 +254,7 @@ ClickHouse cannot use an index if the values of the primary key in the query par
ClickHouse uses this logic not only for days of the month sequences, but for any primary key that represents a partially-monotonic sequence.
### Data Skipping Indexes (experimental) {#table_engine-mergetree-data_skipping-indexes}
### Data Skipping Indexes {#table_engine-mergetree-data_skipping-indexes}
The index declaration is in the columns section of the `CREATE` query.
@ -487,7 +484,7 @@ When ClickHouse see that data is expired, it performs an off-schedule merge. To
If you perform the `SELECT` query between merges, you may get expired data. To avoid it, use the [OPTIMIZE](../../../sql-reference/statements/misc.md#misc_operations-optimize) query before `SELECT`.
## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes}
## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes}
### Introduction {#introduction}
@ -502,7 +499,7 @@ Data part is the minimum movable unit for `MergeTree`-engine tables. The data be
- Volume — Ordered set of equal disks (similar to [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)).
- Storage policy — Set of volumes and the rules for moving data between them.
The names given to the described entities can be found in the system tables, [system.storage\_policies](../../../operations/system-tables.md#system_tables-storage_policies) and [system.disks](../../../operations/system-tables.md#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables.
The names given to the described entities can be found in the system tables, [system.storage\_policies](../../../operations/system-tables/storage_policies.md#system_tables-storage_policies) and [system.disks](../../../operations/system-tables/disks.md#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables.
### Configuration {#table_engine-mergetree-multiple-volumes_configure}
@ -632,7 +629,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
The `default` storage policy implies using only one volume, which consists of only one disk given in `<path>`. Once a table is created, its storage policy cannot be changed.
The number of threads performing background moves of data parts can be changed by [background_move_pool_size](../../../operations/settings/settings.md#background_move_pool_size) setting.
The number of threads performing background moves of data parts can be changed by [background\_move\_pool\_size](../../../operations/settings/settings.md#background_move_pool_size) setting.
### Details {#details}
@ -651,7 +648,7 @@ In all these cases except for mutations and partition freezing, a part is stored
Under the hood, mutations and partition freezing make use of [hard links](https://en.wikipedia.org/wiki/Hard_link). Hard links between different disks are not supported, therefore in such cases the resulting parts are stored on the same disks as the initial ones.
In the background, parts are moved between volumes on the basis of the amount of free space (`move_factor` parameter) according to the order the volumes are declared in the configuration file.
Data is never transferred from the last one and into the first one. One may use system tables [system.part\_log](../../../operations/system-tables.md#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](../../../operations/system-tables.md#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs.
Data is never transferred from the last one and into the first one. One may use system tables [system.part\_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs.
User can force moving a part or a partition from one volume to another using the query [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter.md#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met.

View File

@ -5,7 +5,7 @@ toc_title: ReplacingMergeTree
# ReplacingMergeTree {#replacingmergetree}
The engine differs from [MergeTree](mergetree.md#table_engines-mergetree) in that it removes duplicate entries with the same primary key value (or more accurately, with the same [sorting key](mergetree.md) value).
The engine differs from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree) in that it removes duplicate entries with the same [sorting key](../../../engines/table-engines/mergetree-family/mergetree.md) value.
Data deduplication occurs only during a merge. Merging occurs in the background at an unknown time, so you cant plan for it. Some of the data may remain unprocessed. Although you can run an unscheduled merge using the `OPTIMIZE` query, dont count on using it, because the `OPTIMIZE` query will read and write a large amount of data.
@ -33,14 +33,14 @@ For a description of request parameters, see [request description](../../../sql-
- `ver` — column with version. Type `UInt*`, `Date` or `DateTime`. Optional parameter.
When merging, `ReplacingMergeTree` from all the rows with the same primary key leaves only one:
When merging, `ReplacingMergeTree` from all the rows with the same sorting key leaves only one:
- Last in the selection, if `ver` not set.
- With the maximum version, if `ver` specified.
**Query clauses**
When creating a `ReplacingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table.
When creating a `ReplacingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table.
<details markdown="1">

View File

@ -63,7 +63,7 @@ For each `INSERT` query, approximately ten entries are added to ZooKeeper throug
For very large clusters, you can use different ZooKeeper clusters for different shards. However, this hasnt proven necessary on the Yandex.Metrica cluster (approximately 300 servers).
Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) setting.
Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size) setting.
By default, an INSERT query waits for confirmation of writing the data from only one replica. If the data was successfully written to only one replica and the server with this replica ceases to exist, the stored data will be lost. To enable getting confirmation of data writes from multiple replicas, use the `insert_quorum` option.
@ -217,6 +217,6 @@ If the data in ZooKeeper was lost or damaged, you can save data by moving it to
**See also**
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size)
- [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/replication/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: SummingMergeTree
# SummingMergeTree {#summingmergetree}
The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree). The difference is that when merging data parts for `SummingMergeTree` tables ClickHouse replaces all the rows with the same primary key (or more accurately, with the same [sorting key](mergetree.md)) with one row which contains summarized values for the columns with the numeric data type. If the sorting key is composed in a way that a single key value corresponds to large number of rows, this significantly reduces storage volume and speeds up data selection.
The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree). The difference is that when merging data parts for `SummingMergeTree` tables ClickHouse replaces all the rows with the same primary key (or more accurately, with the same [sorting key](../../../engines/table-engines/mergetree-family/mergetree.md)) with one row which contains summarized values for the columns with the numeric data type. If the sorting key is composed in a way that a single key value corresponds to large number of rows, this significantly reduces storage volume and speeds up data selection.
We recommend to use the engine together with `MergeTree`. Store complete data in `MergeTree` table, and use `SummingMergeTree` for aggregated data storing, for example, when preparing reports. Such an approach will prevent you from losing valuable data due to an incorrectly composed primary key.
@ -35,7 +35,7 @@ For a description of request parameters, see [request description](../../../sql-
**Query clauses**
When creating a `SummingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table.
When creating a `SummingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table.
<details markdown="1">
@ -96,7 +96,7 @@ SELECT key, sum(value) FROM summtt GROUP BY key
When data are inserted into a table, they are saved as-is. ClickHouse merges the inserted parts of data periodically and this is when rows with the same primary key are summed and replaced with one for each resulting part of data.
ClickHouse can merge the data parts so that different resulting parts of data cat consist rows with the same primary key, i.e. the summation will be incomplete. Therefore (`SELECT`) an aggregate function [sum()](../../../sql-reference/aggregate-functions/reference.md#agg_function-sum) and `GROUP BY` clause should be used in a query as described in the example above.
ClickHouse can merge the data parts so that different resulting parts of data cat consist rows with the same primary key, i.e. the summation will be incomplete. Therefore (`SELECT`) an aggregate function [sum()](../../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum) and `GROUP BY` clause should be used in a query as described in the example above.
### Common Rules for Summation {#common-rules-for-summation}
@ -110,7 +110,7 @@ The values are not summarized for columns in the primary key.
### The Summation in the Aggregatefunction Columns {#the-summation-in-the-aggregatefunction-columns}
For columns of [AggregateFunction type](../../../sql-reference/data-types/aggregatefunction.md) ClickHouse behaves as [AggregatingMergeTree](aggregatingmergetree.md) engine aggregating according to the function.
For columns of [AggregateFunction type](../../../sql-reference/data-types/aggregatefunction.md) ClickHouse behaves as [AggregatingMergeTree](../../../engines/table-engines/mergetree-family/aggregatingmergetree.md) engine aggregating according to the function.
### Nested Structures {#nested-structures}
@ -132,7 +132,7 @@ Examples:
[(1, 100), (2, 150)] + [(1, -100)] -> [(2, 150)]
```
When requesting data, use the [sumMap(key, value)](../../../sql-reference/aggregate-functions/reference.md) function for aggregation of `Map`.
When requesting data, use the [sumMap(key, value)](../../../sql-reference/aggregate-functions/reference/summap.md) function for aggregation of `Map`.
For nested data structure, you do not need to specify its columns in the tuple of columns for summation.

View File

@ -12,7 +12,7 @@ This engine:
See the section [Collapsing](#table_engines_versionedcollapsingmergetree) for details.
The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree) and adds the logic for collapsing rows to the algorithm for merging data parts. `VersionedCollapsingMergeTree` serves the same purpose as [CollapsingMergeTree](collapsingmergetree.md) but uses a different collapsing algorithm that allows inserting the data in any order with multiple threads. In particular, the `Version` column helps to collapse the rows properly even if they are inserted in the wrong order. In contrast, `CollapsingMergeTree` allows only strictly consecutive insertion.
The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree) and adds the logic for collapsing rows to the algorithm for merging data parts. `VersionedCollapsingMergeTree` serves the same purpose as [CollapsingMergeTree](../../../engines/table-engines/mergetree-family/collapsingmergetree.md) but uses a different collapsing algorithm that allows inserting the data in any order with multiple threads. In particular, the `Version` column helps to collapse the rows properly even if they are inserted in the wrong order. In contrast, `CollapsingMergeTree` allows only strictly consecutive insertion.
## Creating a Table {#creating-a-table}
@ -47,7 +47,7 @@ VersionedCollapsingMergeTree(sign, version)
**Query Clauses**
When creating a `VersionedCollapsingMergeTree` table, the same [clauses](mergetree.md) are required as when creating a `MergeTree` table.
When creating a `VersionedCollapsingMergeTree` table, the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required as when creating a `MergeTree` table.
<details markdown="1">

View File

@ -7,7 +7,7 @@ toc_title: Dictionary
The `Dictionary` engine displays the [dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) data as a ClickHouse table.
## Example
## Example {#example}
As an example, consider a dictionary of `products` with the following configuration:

View File

@ -10,7 +10,7 @@ Reading is automatically parallelized. During a read, the table indexes on remot
The Distributed engine accepts parameters:
- the cluster name in the server's config file
- the cluster name in the servers config file
- the name of a remote database
@ -23,7 +23,7 @@ The Distributed engine accepts parameters:
See also:
- `insert_distributed_sync` setting
- [MergeTree](../mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples
- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples
Example:
@ -37,7 +37,7 @@ For example, for a query with GROUP BY, data will be aggregated on remote server
Instead of the database name, you can use a constant expression that returns a string. For example: currentDatabase().
logs The cluster name in the server's config file.
logs The cluster name in the servers config file.
Clusters are set like this:
@ -82,7 +82,7 @@ Replicas are duplicating servers (in order to read all the data, you can access
Cluster names must not contain dots.
The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `compression` are specified for each server:
- `host` The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server doesn't start. If you change the DNS record, restart the server.
- `host` The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server doesnt start. If you change the DNS record, restart the server.
- `port` The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Do not confuse it with http\_port.
- `user` Name of the user for connecting to a remote server. Default value: default. This user must have access to connect to the specified server. Access is configured in the users.xml file. For more information, see the section [Access rights](../../../operations/access-rights.md).
- `password` The password for connecting to a remote server (not masked). Default value: empty string.
@ -99,38 +99,38 @@ You can specify as many clusters as you wish in the configuration.
To view your clusters, use the `system.clusters` table.
The Distributed engine allows working with a cluster like a local server. However, the cluster is inextensible: you must write its configuration in the server config file (even better, for all the cluster's servers).
The Distributed engine allows working with a cluster like a local server. However, the cluster is inextensible: you must write its configuration in the server config file (even better, for all the clusters servers).
The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you don't need to create a Distributed table use the `remote` table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md).
The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you dont need to create a Distributed table use the `remote` table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md).
There are two methods for writing data to a cluster:
First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform INSERT in the tables that the distributed table "looks at". This is the most flexible solution as you can use any sharding scheme, which could be non-trivial due to the requirements of the subject area. This is also the most optimal solution since data can be written to different shards completely independently.
First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform INSERT in the tables that the distributed table “looks at”. This is the most flexible solution as you can use any sharding scheme, which could be non-trivial due to the requirements of the subject area. This is also the most optimal solution since data can be written to different shards completely independently.
Second, you can perform INSERT in a Distributed table. In this case, the table will distribute the inserted data across the servers itself. In order to write to a Distributed table, it must have a sharding key set (the last parameter). In addition, if there is only one shard, the write operation works without specifying the sharding key, since it doesn't mean anything in this case.
Second, you can perform INSERT in a Distributed table. In this case, the table will distribute the inserted data across the servers itself. In order to write to a Distributed table, it must have a sharding key set (the last parameter). In addition, if there is only one shard, the write operation works without specifying the sharding key, since it doesnt mean anything in this case.
Each shard can have a weight defined in the config file. By default, the weight is equal to one. Data is distributed across shards in the amount proportional to the shard weight. For example, if there are two shards and the first has a weight of 9 while the second has a weight of 10, the first will be sent 9 / 19 parts of the rows, and the second will be sent 10 / 19.
Each shard can have the `internal_replication` parameter defined in the config file.
If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table "looks at" replicated tables. In other words, if the table where data will be written is going to replicate them itself.
If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table “looks at” replicated tables. In other words, if the table where data will be written is going to replicate them itself.
If it is set to `false` (the default), data is written to all replicas. In essence, this means that the Distributed table replicates data itself. This is worse than using replicated tables, because the consistency of replicas is not checked, and over time they will contain slightly different data.
To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from `prev_weight` to `prev_weights + weight`, where `prev_weights` is the total weight of the shards with the smallest number, and `weight` is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19).
The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression `rand()` for random distribution of data, or `UserID` for distribution by the remainder from dividing the user's ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID).
The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression `rand()` for random distribution of data, or `UserID` for distribution by the remainder from dividing the users ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID).
A simple reminder from the division is a limited solution for sharding and isn't always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables.
A simple reminder from the division is a limited solution for sharding and isnt always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables.
SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you don't have to transfer the old data to it. You can write new data with a heavier weight the data will be distributed slightly unevenly, but queries will work correctly and efficiently.
SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you dont have to transfer the old data to it. You can write new data with a heavier weight the data will be distributed slightly unevenly, but queries will work correctly and efficiently.
You should be concerned about the sharding scheme in the following cases:
- Queries are used that require joining data (IN or JOIN) by a specific key. If data is sharded by this key, you can use local IN or JOIN instead of GLOBAL IN or GLOBAL JOIN, which is much more efficient.
- A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as we've done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into "layers", where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries.
- A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as weve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries.
Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed\_directory\_monitor\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed\_directory\_monitor\_batch\_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting.
Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed\_directory\_monitor\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed\_directory\_monitor\_batch\_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background\_distributed\_schedule\_pool\_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting.
If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the `broken` subdirectory and no longer used.
@ -145,7 +145,7 @@ When the `max_parallel_replicas` option is enabled, query processing is parallel
**See Also**
- [Virtual columns](index.md#table_engines-virtual_columns)
- [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size)
- [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns)
- [background\_distributed\_schedule\_pool\_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/distributed/) <!--hide-->

View File

@ -33,7 +33,7 @@ You may manually create this subfolder and file in server filesystem and then [A
!!! warning "Warning"
Be careful with this functionality, because ClickHouse does not keep track of external changes to such files. The result of simultaneous writes via ClickHouse and outside of ClickHouse is undefined.
## Example
## Example {#example}
**1.** Set up the `file_engine_table` table:

View File

@ -25,7 +25,7 @@ Generate table engine supports only `SELECT` queries.
It supports all [DataTypes](../../../sql-reference/data-types/index.md) that can be stored in a table except `LowCardinality` and `AggregateFunction`.
## Example
## Example {#example}
**1.** Set up the `generate_engine_table` table:

View File

@ -11,7 +11,7 @@ Reading is automatically parallelized. Writing to a table is not supported. When
The `Merge` engine accepts parameters: the database name and a regular expression for tables.
## Examples
## Examples {#examples}
Example 1:
@ -67,6 +67,6 @@ FROM WatchLog
**See Also**
- [Virtual columns](index.md#table_engines-virtual_columns)
- [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/merge/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: URL
# URL Table Engine {#table_engines-url}
Queries data to/from a remote HTTP/HTTPS server. This engine is similar to the [File](file.md) engine.
Queries data to/from a remote HTTP/HTTPS server. This engine is similar to the [File](../../../engines/table-engines/special/file.md) engine.
Syntax: `URL(URL, Format)`
@ -25,7 +25,7 @@ respectively. For processing `POST` requests, the remote server must support
You can limit the maximum number of HTTP GET redirect hops using the [max\_http\_get\_redirects](../../../operations/settings/settings.md#setting-max_http_get_redirects) setting.
## Example
## Example {#example}
**1.** Create a `url_engine_table` table on the server :

View File

@ -1,58 +0,0 @@
---
toc_priority: 78
toc_title: General Questions
---
# General Questions {#general-questions}
## Why Not Use Something Like MapReduce? {#why-not-use-something-like-mapreduce}
We can refer to systems like MapReduce as distributed computing systems in which the reduce operation is based on distributed sorting. The most common open-source solution in this class is [Apache Hadoop](http://hadoop.apache.org). Yandex uses its in-house solution, YT.
These systems arent appropriate for online queries due to their high latency. In other words, they cant be used as the back-end for a web interface. These types of systems arent useful for real-time data updates. Distributed sorting isnt the best way to perform reduce operations if the result of the operation and all the intermediate results (if there are any) are located in the RAM of a single server, which is usually the case for online queries. In such a case, a hash table is an optimal way to perform reduce operations. A common approach to optimizing map-reduce tasks is pre-aggregation (partial reduce) using a hash table in RAM. The user performs this optimization manually. Distributed sorting is one of the main causes of reduced performance when running simple map-reduce tasks.
Most MapReduce implementations allow you to execute arbitrary code on a cluster. But a declarative query language is better suited to OLAP to run experiments quickly. For example, Hadoop has Hive and Pig. Also consider Cloudera Impala or Shark (outdated) for Spark, as well as Spark SQL, Presto, and Apache Drill. Performance when running such tasks is highly sub-optimal compared to specialized systems, but relatively high latency makes it unrealistic to use these systems as the backend for a web interface.
## What If I Have a Problem with Encodings When Using Oracle Through ODBC? {#oracle-odbc-encodings}
If you use Oracle through the ODBC driver as a source of external dictionaries, you need to set the correct value for the `NLS_LANG` environment variable in `/etc/default/clickhouse`. For more information, see the [Oracle NLS\_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html).
**Example**
``` sql
NLS_LANG=RUSSIAN_RUSSIA.UTF8
```
## How Do I Export Data from ClickHouse to a File? {#how-to-export-to-file}
### Using INTO OUTFILE Clause {#using-into-outfile-clause}
Add an [INTO OUTFILE](../sql-reference/statements/select/into-outfile.md#into-outfile-clause) clause to your query.
For example:
``` sql
SELECT * FROM table INTO OUTFILE 'file'
```
By default, ClickHouse uses the [TabSeparated](../interfaces/formats.md#tabseparated) format for output data. To select the [data format](../interfaces/formats.md), use the [FORMAT clause](../sql-reference/statements/select/format.md#format-clause).
For example:
``` sql
SELECT * FROM table INTO OUTFILE 'file' FORMAT CSV
```
### Using a File-Engine Table {#using-a-file-engine-table}
See [File](../engines/table-engines/special/file.md).
### Using Command-Line Redirection {#using-command-line-redirection}
``` sql
$ clickhouse-client --query "SELECT * from table" --format FormatName > result.txt
```
See [clickhouse-client](../interfaces/cli.md).
{## [Original article](https://clickhouse.tech/docs/en/faq/general/) ##}

View File

@ -0,0 +1,11 @@
---
toc_hidden: true
toc_priority: 10
---
# What Does “ClickHouse” Mean? {#what-does-clickhouse-mean}
Its a combination of “**Click**stream” and “Data ware**house**”. It comes from the original use case at Yandex.Metrica, where ClickHouse was supposed to keep records of all clicks by people from all over the Internet and it still does the job. You can read more about this use case on [ClickHouse history](../../introduction/history.md) page.
!!! info "Fun fact"
Many years after ClickHouse got its name, this approach of combining two words that are meaningful on their own has been highlighted as the best way to name a database in a [research by Andy Pavlo](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html), an Associate Professor of Databases at Carnegie Mellon University. ClickHouse shared his “best database name of all time” award with Postgres.

View File

@ -0,0 +1,18 @@
---
toc_hidden_folder: true
toc_priority: 1
toc_title: General
---
# General Questions About ClickHouse {#general-questions}
Questions:
- [What does “ClickHouse” mean?](../../faq/general/dbms-naming.md)
- [What does “Не тормозит” mean?](../../faq/general/ne-tormozit.md)
- [Why not use something like MapReduce?](../../faq/general/mapreduce.md)
!!! info "Dont see what you were looking for?"
Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar.
{## [Original article](https://clickhouse.tech/docs/en/faq/general/) ##}

View File

@ -0,0 +1,12 @@
---
toc_hidden: true
toc_priority: 20
---
# Why Not Use Something Like MapReduce? {#why-not-use-something-like-mapreduce}
We can refer to systems like MapReduce as distributed computing systems in which the reduce operation is based on distributed sorting. The most common open-source solution in this class is [Apache Hadoop](http://hadoop.apache.org). Yandex uses its in-house solution, YT.
These systems arent appropriate for online queries due to their high latency. In other words, they cant be used as the back-end for a web interface. These types of systems arent useful for real-time data updates. Distributed sorting isnt the best way to perform reduce operations if the result of the operation and all the intermediate results (if there are any) are located in the RAM of a single server, which is usually the case for online queries. In such a case, a hash table is an optimal way to perform reduce operations. A common approach to optimizing map-reduce tasks is pre-aggregation (partial reduce) using a hash table in RAM. The user performs this optimization manually. Distributed sorting is one of the main causes of reduced performance when running simple map-reduce tasks.
Most MapReduce implementations allow you to execute arbitrary code on a cluster. But a declarative query language is better suited to OLAP to run experiments quickly. For example, Hadoop has Hive and Pig. Also consider Cloudera Impala or Shark (outdated) for Spark, as well as Spark SQL, Presto, and Apache Drill. Performance when running such tasks is highly sub-optimal compared to specialized systems, but relatively high latency makes it unrealistic to use these systems as the backend for a web interface.

View File

@ -0,0 +1,24 @@
---
toc_hidden: true
toc_priority: 11
---
# What Does “Не тормозит” mean? {#what-does-ne-tormozit-mean}
This question usually arises when people see official ClickHouse t-shirts. They have large words **“ClickHouse не тормозит”** on the front.
Before ClickHouse became open-source, it has been developed as an in-house storage system by the largest Russian IT company, [Yandex](https://yandex.com/company/). Thats why it initially got its slogan in Russian, which is “не тормозит”. After the open-source release we first produced some of those t-shirts for events in Russia and it was a no-brainer to use the slogan as-is.
One of the following batches of those t-shirts was supposed to be given away on events outside of Russia and we tried to make the English version of the slogan. Unfortunately, the Russian language is kind of elegant in terms of expressing stuff and there was a restriction of limited space on a t-shirt, so we failed to come up with good enough translation (most options appeared to be either long or inaccurate) and decided to keep the slogan in Russian even on t-shirts produced for international events. It appeared to be a great decision because people all over the world get positively surprised and curious when they see it.
So, what does it mean? Here are some ways to translate *“не тормозит”*:
- If you translate it literally, itd be something like *“ClickHouse doesnt press the brake pedal”*.
- If youd want to express it as close to how it sounds to a Russian person with IT background, itd be something like *“If you larger system lags, its not because it uses ClickHouse”*.
- Shorter, but not so precise versions could be *“ClickHouse is not slow”*, *“ClickHouse doesnt lag”* or just *“ClickHouse is fast”*.
If you havent seen one of those t-shirts in person, you can check them out online in many ClickHouse-related videos. For example, this one:
![iframe](https://www.youtube.com/embed/bSyQahMVZ7w)
P.S. These t-shirts are not for sale, they are given away for free on most [ClickHouse Meetups](https://clickhouse.tech/#meet), usually for best questions or other forms of active participation.

View File

@ -1,9 +1,17 @@
---
toc_folder_title: F.A.Q.
toc_priority: 76
toc_title: hidden
toc_hidden: true
toc_priority: 76
---
# ClickHouse F.A.Q {#clickhouse-f-a-q}
This section of the documentation is a place to collect answers to ClickHouse-related questions that arise often.
Categories:
- [General](../faq/general/index.md)
- [Operations](../faq/operations/index.md)
- [Integration](../faq/integration/index.md)
{## [Original article](https://clickhouse.tech/docs/en/faq) ##}

View File

@ -0,0 +1,36 @@
---
toc_hidden: true
toc_priority: 10
---
# How Do I Export Data from ClickHouse to a File? {#how-to-export-to-file}
## Using INTO OUTFILE Clause {#using-into-outfile-clause}
Add an [INTO OUTFILE](../../sql-reference/statements/select/into-outfile.md#into-outfile-clause) clause to your query.
For example:
``` sql
SELECT * FROM table INTO OUTFILE 'file'
```
By default, ClickHouse uses the [TabSeparated](../../interfaces/formats.md#tabseparated) format for output data. To select the [data format](../../interfaces/formats.md), use the [FORMAT clause](../../sql-reference/statements/select/format.md#format-clause).
For example:
``` sql
SELECT * FROM table INTO OUTFILE 'file' FORMAT CSV
```
## Using a File-Engine Table {#using-a-file-engine-table}
See [File](../../engines/table-engines/special/file.md) table engine.
## Using Command-Line Redirection {#using-command-line-redirection}
``` sql
$ clickhouse-client --query "SELECT * from table" --format FormatName > result.txt
```
See [clickhouse-client](../../interfaces/cli.md).

View File

@ -0,0 +1,17 @@
---
toc_hidden_folder: true
toc_priority: 3
toc_title: Integration
---
# Question About Integrating ClickHouse and Other Systems {#question-about-integrating-clickhouse-and-other-systems}
Questions:
- [How do I export data from ClickHouse to a file?](../../faq/integration/file-export.md)
- [What if I Have a problem with encodings when connecting to Oracle via ODBC?](../../faq/integration/oracle-odbc.md)
!!! info "Dont see what you were looking for?"
Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar.
{## [Original article](https://clickhouse.tech/docs/en/faq/integration/) ##}

View File

@ -0,0 +1,14 @@
---
toc_hidden: true
toc_priority: 20
---
# What If I Have a Problem with Encodings When Using Oracle Via ODBC? {#oracle-odbc-encodings}
If you use Oracle as a source of ClickHouse external dictionaries via Oracle ODBC driver, you need to set the correct value for the `NLS_LANG` environment variable in `/etc/default/clickhouse`. For more information, see the [Oracle NLS\_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html).
**Example**
``` sql
NLS_LANG=RUSSIAN_RUSSIA.UTF8
```

View File

@ -0,0 +1,16 @@
---
toc_hidden_folder: true
toc_priority: 2
toc_title: Operations
---
# Question About Operating ClickHouse Servers and Clusters {#question-about-operating-clickhouse-servers-and-clusters}
Questions:
- [Which ClickHouse version to use in production?](../../faq/operations/production.md)
!!! info "Dont see what you were looking for?"
Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar.
{## [Original article](https://clickhouse.tech/docs/en/faq/production/) ##}

View File

@ -0,0 +1,69 @@
---
toc_hidden: true
toc_priority: 10
---
# Which ClickHouse Version to Use in Production? {#which-clickhouse-version-to-use-in-production}
First of all, lets discuss why people ask this question in the first place. There are two key reasons:
1. ClickHouse is developed with pretty high velocity and usually, there are 10+ stable releases per year. It makes a wide range of releases to choose from, which is not so trivial choice.
2. Some users want to avoid spending time figuring out which version works best for their use case and just follow someone elses advice.
The second reason is more fundamental, so well start with it and then get back to navigating through various ClickHouse releases.
## Which ClickHouse Version Do You Recommend? {#which-clickhouse-version-do-you-recommend}
Its tempting to hire consultants or trust some known experts to get rid of responsibility for your production environment. You install some specific ClickHouse version that someone else recommended, now if theres some issue with it - its not your fault, its someone elses. This line of reasoning is a big trap. No external person knows better whats going on in your companys production environment.
So how to properly choose which ClickHouse version to upgrade to? Or how to choose your first ClickHouse version? First of all, you need to invest in setting up a **realistic pre-production environment**. In an ideal world, it could be a completely identical shadow copy, but thats usually expensive.
Herere some key points to get reasonable fidelity in a pre-production environment with not so high costs:
- Pre-production environment needs to run an as close set of queries as you intend to run in production:
- Dont make it read-only with some frozen data.
- Dont make it write-only with just copying data without building some typical reports.
- Dont wipe it clean instead of applying schema migrations.
- Use a sample of real production data and queries. Try to choose a sample thats still representative and makes `SELECT` queries return reasonable results. Use obfuscation if your data is sensitive and internal policies dont allow it to leave the production environment.
- Make sure that pre-production is covered by your monitoring and alerting software the same way as your production environment does.
- If your production spans across multiple datacenters or regions, make your pre-production does the same.
- If your production uses complex features like replication, distributed table, cascading materialize views, make sure they are configured similarly in pre-production.
- Theres a trade-off on using the roughly same number of servers or VMs in pre-production as in production, but of smaller size, or much less of them, but of the same size. The first option might catch extra network-related issues, while the latter is easier to manage.
The second area to invest in is **automated testing infrastructure**. Dont assume that if some kind of query has executed successfully once, itll continue to do so forever. Its ok to have some unit tests where ClickHouse is mocked but make sure your product has a reasonable set of automated tests that are run against real ClickHouse and check that all important use cases are still working as expected.
Extra step forward could be contributing those automated tests to [ClickHouses open-source test infrastructure](https://github.com/ClickHouse/ClickHouse/tree/master/tests) thats continuously used in its day-to-day development. It definitely will take some additional time and effort to learn [how to run it](../../development/tests.md) and then how to adapt your tests to this framework, but itll pay off by ensuring that ClickHouse releases are already tested against them when they are announced stable, instead of repeatedly losing time on reporting the issue after the fact and then waiting for a bugfix to be implemented, backported and released. Some companies even have such test contributions to infrastructure by its use as an internal policy, most notably its called [Beyonces Rule](https://www.oreilly.com/library/view/software-engineering-at/9781492082781/ch01.html#policies_that_scale_well) at Google.
When you have your pre-production environment and testing infrastructure in place, choosing the best version is straightforward:
1. Routinely run your automated tests against new ClickHouse releases. You can do it even for ClickHouse releases that are marked as `testing`, but going forward to the next steps with them is not recommended.
2. Deploy the ClickHouse release that passed the tests to pre-production and check that all processes are running as expected.
3. Report any issues you discovered to [ClickHouse GitHub Issues](https://github.com/ClickHouse/ClickHouse/issues).
4. If there were no major issues, it should be safe to start deploying ClickHouse release to your production environment. Investing in gradual release automation that implements an approach similar to [canary releases](https://martinfowler.com/bliki/CanaryRelease.html) or [green-blue deployments](https://martinfowler.com/bliki/BlueGreenDeployment.html) might further reduce the risk of issues in production.
As you might have noticed, theres nothing specific to ClickHouse in the approach described above, people do that for any piece of infrastructure they rely on if they take their production environment seriously.
## How to Choose Between ClickHouse Releases? {#how-to-choose-between-clickhouse-releases}
If you look into contents of ClickHouse package repository, youll see four kinds of packages:
1. `testing`
2. `prestable`
3. `stable`
4. `lts` (long-term support)
As was mentioned earlier, `testing` is good mostly to notice issues early, running them in production is not recommended because each of them is not tested as thoroughly as other kinds of packages.
`prestable` is a release candidate which generally looks promising and is likely to become announced as `stable` soon. You can try them out in pre-production and report issues if you see any.
For production use, there are two key options: `stable` and `lts`. Here is some guidance on how to choose between them:
- `stable` is the kind of package we recommend by default. They are released roughly monthly (and thus provide new features with reasonable delay) and three latest stable releases are supported in terms of diagnostics and backporting of bugfixes.
- `lts` are released twice a year and are supported for a year after their initial release. You might prefer them over `stable` in the following cases:
- Your company has some internal policies that dont allow for frequent upgrades or using non-LTS software.
- You are using ClickHouse in some secondary products that either doesnt require any complex ClickHouse features and dont have enough resources to keep it updated.
Many teams who initially thought that `lts` is the way to go, often switch to `stable` anyway because of some recent feature thats important for their product.
!!! warning "Important"
One more thing to keep in mind when upgrading ClickHouse: were always keeping eye on compatibility across releases, but sometimes its not reasonable to keep and some minor details might change. So make sure you check the [changelog](../../whats-new/changelog/index.md) before upgrading to see if there are any notes about backward-incompatible changes.

View File

@ -9,12 +9,12 @@ toc_title: Introduction
This section describes how to obtain example datasets and import them into ClickHouse.
For some datasets example queries are also available.
- [Anonymized Yandex.Metrica Dataset](metrica.md)
- [Star Schema Benchmark](star-schema.md)
- [WikiStat](wikistat.md)
- [Terabyte of Click Logs from Criteo](criteo.md)
- [AMPLab Big Data Benchmark](amplab-benchmark.md)
- [New York Taxi Data](nyc-taxi.md)
- [OnTime](ontime.md)
- [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md)
- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
- [WikiStat](../../getting-started/example-datasets/wikistat.md)
- [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
- [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md)
- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md)
- [OnTime](../../getting-started/example-datasets/ontime.md)
[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets) <!--hide-->

View File

@ -7,7 +7,7 @@ toc_title: Yandex.Metrica Data
Dataset consists of two tables containing anonymized data about hits (`hits_v1`) and visits (`visits_v1`) of Yandex.Metrica. You can read more about Yandex.Metrica in [ClickHouse history](../../introduction/history.md) section.
The dataset consists of two tables, either of them can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as TSV at https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz and as prepared partitions at https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz.
The dataset consists of two tables, either of them can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as TSV at https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits\_100m\_obfuscated\_v1.tsv.xz and as prepared partitions at https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits\_100m\_obfuscated\_v1.tar.xz.
## Obtaining Tables from Prepared Partitions {#obtaining-tables-from-prepared-partitions}

View File

@ -7,9 +7,9 @@ toc_title: hidden
# Getting Started {#getting-started}
If you are new to ClickHouse and want to get a hands-on feeling of its performance, first of all, you need to go through the [installation process](install.md). After that you can:
If you are new to ClickHouse and want to get a hands-on feeling of its performance, first of all, you need to go through the [installation process](../getting-started/install.md). After that you can:
- [Go through detailed tutorial](tutorial.md)
- [Experiment with example datasets](example-datasets/ontime.md)
- [Go through detailed tutorial](../getting-started/tutorial.md)
- [Experiment with example datasets](../getting-started/example-datasets/ontime.md)
[Original article](https://clickhouse.tech/docs/en/getting_started/) <!--hide-->

View File

@ -6,13 +6,13 @@ toc_title: Playground
# ClickHouse Playground {#clickhouse-playground}
[ClickHouse Playground](https://play.clickhouse.tech) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
Several example datasets are available in the Playground as well as sample queries that show ClickHouse features. There's also a selection of ClickHouse LTS releases to experiment with.
Several example datasets are available in the Playground as well as sample queries that show ClickHouse features. Theres also a selection of ClickHouse LTS releases to experiment with.
ClickHouse Playground gives the experience of m2.small [Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse) instance (4 vCPU, 32 GB RAM) hosted in [Yandex.Cloud](https://cloud.yandex.com/). More information about [cloud providers](../commercial/cloud.md).
You can make queries to playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md).
## Credentials
## Credentials {#credentials}
| Parameter | Value |
|:--------------------|:----------------------------------------|
@ -23,13 +23,13 @@ You can make queries to playground using any HTTP client, for example [curl](htt
There are additional endpoints with specific ClickHouse releases to experiment with their differences (ports and user/password are the same as above):
* 20.3 LTS: `play-api-v20-3.clickhouse.tech`
* 19.14 LTS: `play-api-v19-14.clickhouse.tech`
- 20.3 LTS: `play-api-v20-3.clickhouse.tech`
- 19.14 LTS: `play-api-v19-14.clickhouse.tech`
!!! note "Note"
All these endpoints require a secure TLS connection.
## Limitations
## Limitations {#limitations}
The queries are executed as a read-only user. It implies some limitations:
@ -37,12 +37,12 @@ The queries are executed as a read-only user. It implies some limitations:
- INSERT queries are not allowed
The following settings are also enforced:
- [max_result_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes)
- [max_result_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows)
- [result_overflow_mode=break](../operations/settings/query_complexity/#result-overflow-mode)
- [max_execution_time=60000](../operations/settings/query_complexity/#max-execution-time)
- [max\_result\_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes)
- [max\_result\_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows)
- [result\_overflow\_mode=break](../operations/settings/query_complexity/#result-overflow-mode)
- [max\_execution\_time=60000](../operations/settings/query_complexity/#max-execution-time)
## Examples
## Examples {#examples}
HTTPS endpoint example with `curl`:
@ -51,11 +51,12 @@ curl "https://play-api.clickhouse.tech:8443/?query=SELECT+'Play+ClickHouse!';&us
```
TCP endpoint example with [CLI](../interfaces/cli.md):
``` bash
clickhouse client --secure -h play-api.clickhouse.tech --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse!'"
```
## Implementation Details
## Implementation Details {#implementation-details}
ClickHouse Playground web interface makes requests via ClickHouse [HTTP API](../interfaces/http.md).
The Playground backend is just a ClickHouse cluster without any additional server-side application. As mentioned above, ClickHouse HTTPS and TCP/TLS endpoints are also publicly available as a part of the Playground, both are proxied through [Cloudflare Spectrum](https://www.cloudflare.com/products/cloudflare-spectrum/) to add extra layer of protection and improved global connectivity.

View File

@ -11,7 +11,7 @@ By going through this tutorial, youll learn how to set up a simple ClickHouse
## Single Node Setup {#single-node-setup}
To postpone the complexities of a distributed environment, well start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](install.md#install-from-deb-packages) or [rpm](install.md#from-rpm-packages) packages, but there are [alternatives](install.md#from-docker-image) for the operating systems that do no support them.
To postpone the complexities of a distributed environment, well start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](../getting-started/install.md#install-from-deb-packages) or [rpm](../getting-started/install.md#from-rpm-packages) packages, but there are [alternatives](../getting-started/install.md#from-docker-image) for the operating systems that do no support them.
For example, you have chosen `deb` packages and executed:
@ -80,7 +80,7 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv
## Import Sample Dataset {#import-sample-dataset}
Now its time to fill our ClickHouse server with some sample data. In this tutorial, well use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](example-datasets/metrica.md), and for the sake of the tutorial, well go with the most realistic one.
Now its time to fill our ClickHouse server with some sample data. In this tutorial, well use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, well go with the most realistic one.
### Download and Extract Table Data {#download-and-extract-table-data}

View File

@ -232,6 +232,6 @@ FROM
```
!!! note "Note"
More info about [avg()](../sql-reference/aggregate-functions/reference.md#agg_function-avg) and [log()](../sql-reference/functions/math-functions.md) functions.
More info about [avg()](../sql-reference/aggregate-functions/reference/avg.md#agg_function-avg) and [log()](../sql-reference/functions/math-functions.md) functions.
[Original article](https://clickhouse.tech/docs/en/guides/apply_catboost_model/) <!--hide-->

View File

@ -9,6 +9,6 @@ toc_title: Overview
List of detailed step-by-step instructions that help to solve various tasks using ClickHouse:
- [Tutorial on simple cluster set-up](../getting-started/tutorial.md)
- [Applying a CatBoost model in ClickHouse](apply-catboost-model.md)
- [Applying a CatBoost model in ClickHouse](../guides/apply-catboost-model.md)
[Original article](https://clickhouse.tech/docs/en/guides/) <!--hide-->

View File

@ -1147,11 +1147,11 @@ To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-e
[Apache Arrow](https://arrow.apache.org/) comes with two built-in columnar storage formats. ClickHouse supports read and write operations for these formats.
`Arrow` is Apache Arrow's "file mode" format. It is designed for in-memory random access.
`Arrow` is Apache Arrows “file mode” format. It is designed for in-memory random access.
## ArrowStream {#data-format-arrow-stream}
`ArrowStream` is Apache Arrow's "stream mode" format. It is designed for in-memory stream processing.
`ArrowStream` is Apache Arrows “stream mode” format. It is designed for in-memory stream processing.
## ORC {#data-format-orc}

View File

@ -275,7 +275,7 @@ Use buffering to avoid situations where a query processing error occurred after
### Queries with Parameters {#cli-queries-with-parameters}
You can create a query with parameters and pass values for them from the corresponding HTTP request parameters. For more information, see [Queries with Parameters for CLI](cli.md#cli-queries-with-parameters).
You can create a query with parameters and pass values for them from the corresponding HTTP request parameters. For more information, see [Queries with Parameters for CLI](../interfaces/cli.md#cli-queries-with-parameters).
### Example {#example}
@ -291,7 +291,7 @@ ClickHouse supports specific queries through the HTTP interface. For example, yo
$ echo '(4),(5),(6)' | curl 'http://localhost:8123/?query=INSERT%20INTO%20t%20VALUES' --data-binary @-
```
ClickHouse also supports Predefined HTTP Interface which can help you more easy integration with third party tools like [Prometheus exporter](https://github.com/percona-lab/clickhouse_exporter).
ClickHouse also supports Predefined HTTP Interface which can help you more easily integrate with third-party tools like [Prometheus exporter](https://github.com/percona-lab/clickhouse_exporter).
Example:
@ -314,7 +314,7 @@ Example:
</http_handlers>
```
- You can now request the url directly for data in the Prometheus format:
- You can now request the URL directly for data in the Prometheus format:
<!-- -->
@ -361,41 +361,40 @@ $ curl -v 'http://localhost:8123/predefined_query'
* Connection #0 to host localhost left intact
* Connection #0 to host localhost left intact
```
As you can see from the example, if `<http_handlers>` is configured in the config.xml file and `<http_handlers>` can contain many `<rule>s`. ClickHouse will match the HTTP requests received to the predefined type in `<rule>` and the first matched runs the handler. Then ClickHouse will execute the corresponding predefined query if the match is successful.
As you can see from the example if `http_handlers` is configured in the config.xml file and `http_handlers` can contain many `rules`. ClickHouse will match the HTTP requests received to the predefined type in `rule` and the first matched runs the handler. Then ClickHouse will execute the corresponding predefined query if the match is successful.
> Now `<rule>` can configure `<method>`, `<headers>`, `<url>`,`<handler>`:
> `<method>` is responsible for matching the method part of the HTTP request. `<method>` fully conforms to the definition of [method](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods) in the HTTP protocol. It is an optional configuration. If it is not defined in the configuration file, it does not match the method portion of the HTTP request.
>
> `<url>` is responsible for matching the url part of the HTTP request. It is compatible with [RE2](https://github.com/google/re2)s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the url portion of the HTTP request.
>
> `<headers>` is responsible for matching the header part of the HTTP request. It is compatible with RE2s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request.
>
> `<handler>` contains the main processing part. Now `<handler>` can configure `<type>`, `<status>`, `<content_type>`, `<response_content>`, `<query>`, `<query_param_name>`.
> \> `<type>` currently supports three types: **predefined\_query\_handler**, **dynamic\_query\_handler**, **static**.
> \>
> \> `<query>` - use with predefined\_query\_handler type, executes query when the handler is called.
> \>
> \> `<query_param_name>` - use with dynamic\_query\_handler type, extracts and executes the value corresponding to the `<query_param_name>` value in HTTP request params.
> \>
> \> `<status>` - use with static type, response status code.
> \>
> \> `<content_type>` - use with static type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
> \>
> \> `<response_content>` - use with static type, Response content sent to client, when using the prefix file:// or config://, find the content from the file or configuration send to client.
Now `rule` can configure `method`, `headers`, `url`, `handler`:
- `method` is responsible for matching the method part of the HTTP request. `method` fully conforms to the definition of [method](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods) in the HTTP protocol. It is an optional configuration. If it is not defined in the configuration file, it does not match the method portion of the HTTP request.
Next are the configuration methods for the different `<type>`.
- `url` is responsible for matching the URL part of the HTTP request. It is compatible with [RE2](https://github.com/google/re2)s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the URL portion of the HTTP request.
## predefined\_query\_handler {#predefined_query_handler}
- `headers` are responsible for matching the header part of the HTTP request. It is compatible with RE2s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request.
`<predefined_query_handler>` supports setting Settings and query\_params values. You can configure `<query>` in the type of `<predefined_query_handler>`.
- `handler` contains the main processing part. Now `handler` can configure `type`, `status`, `content_type`, `response_content`, `query`, `query_param_name`.
`type` currently supports three types: [predefined_query_handler](#predefined_query_handler), [dynamic_query_handler](#dynamic_query_handler), [static](#static).
- `query` — use with `predefined_query_handler` type, executes query when the handler is called.
- `query_param_name` — use with `dynamic_query_handler` type, extracts and executes the value corresponding to the `query_param_name` value in HTTP request params.
- `status` — use with `static` type, response status code.
- `content_type` — use with `static` type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
`<query>` value is a predefined query of `<predefined_query_handler>`, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration.
- `response_content` — use with `static` type, response content sent to client, when using the prefix file:// or config://, find the content from the file or configuration sends to client.
The following example defines the values of `max_threads` and `max_alter_threads` settings, then queries the system table to check whether these settings were set successfully.
Next are the configuration methods for different `type`.
### predefined_query_handler {#predefined_query_handler}
`predefined_query_handler` supports setting `Settings` and `query_params` values. You can configure `query` in the type of `predefined_query_handler`.
`query` value is a predefined query of `predefined_query_handler`, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration.
The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` settings, then queries the system table to check whether these settings were set successfully.
Example:
@ -424,15 +423,15 @@ max_alter_threads 2
```
!!! note "caution"
In one `<predefined_query_handler>` only supports one `<query>` of an insert type.
In one `predefined_query_handler` only supports one `query` of an insert type.
## dynamic\_query\_handler {#dynamic_query_handler}
### dynamic_query_handler {#dynamic_query_handler}
In `<dynamic_query_handler>`, query is written in the form of param of the HTTP request. The difference is that in `<predefined_query_handler>`, query is wrote in the configuration file. You can configure `<query_param_name>` in `<dynamic_query_handler>`.
In `dynamic_query_handler`, the query is written in the form of param of the HTTP request. The difference is that in `predefined_query_handler`, the query is written in the configuration file. You can configure `query_param_name` in `dynamic_query_handler`.
ClickHouse extracts and executes the value corresponding to the `<query_param_name>` value in the url of the HTTP request. The default value of `<query_param_name>` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the param is not passed in.
ClickHouse extracts and executes the value corresponding to the `query_param_name` value in the URL of the HTTP request. The default value of `query_param_name` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the param is not passed in.
To experiment with this functionality, the example defines the values of max\_threads and max\_alter\_threads and queries whether the Settings were set successfully.
To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` and `queries` whether the settings were set successfully.
Example:
@ -455,9 +454,9 @@ max_threads 1
max_alter_threads 2
```
## static {#static}
### static {#static}
`<static>` can return [content\_type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type), [status](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status) and response\_content. response\_content can return the specified content
`static` can return [content_type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type), [status](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status) and `response_content`. `response_content` can return the specified content.
Example:

View File

@ -9,19 +9,19 @@ toc_title: Introduction
ClickHouse provides two network interfaces (both can be optionally wrapped in TLS for additional security):
- [HTTP](http.md), which is documented and easy to use directly.
- [Native TCP](tcp.md), which has less overhead.
- [Native TCP](../interfaces/tcp.md), which has less overhead.
In most cases it is recommended to use appropriate tool or library instead of interacting with those directly. Officially supported by Yandex are the following:
- [Command-line client](cli.md)
- [JDBC driver](jdbc.md)
- [ODBC driver](odbc.md)
- [C++ client library](cpp.md)
- [Command-line client](../interfaces/cli.md)
- [JDBC driver](../interfaces/jdbc.md)
- [ODBC driver](../interfaces/odbc.md)
- [C++ client library](../interfaces/cpp.md)
There are also a wide range of third-party libraries for working with ClickHouse:
- [Client libraries](third-party/client-libraries.md)
- [Integrations](third-party/integrations.md)
- [Visual interfaces](third-party/gui.md)
- [Client libraries](../interfaces/third-party/client-libraries.md)
- [Integrations](../interfaces/third-party/integrations.md)
- [Visual interfaces](../interfaces/third-party/gui.md)
[Original article](https://clickhouse.tech/docs/en/interfaces/) <!--hide-->

View File

@ -5,6 +5,6 @@ toc_title: Native Interface (TCP)
# Native Interface (TCP) {#native-interface-tcp}
The native protocol is used in the [command-line client](cli.md), for inter-server communication during distributed query processing, and also in other C++ programs. Unfortunately, native ClickHouse protocol does not have formal specification yet, but it can be reverse-engineered from ClickHouse source code (starting [around here](https://github.com/ClickHouse/ClickHouse/tree/master/src/Client)) and/or by intercepting and analyzing TCP traffic.
The native protocol is used in the [command-line client](../interfaces/cli.md), for inter-server communication during distributed query processing, and also in other C++ programs. Unfortunately, native ClickHouse protocol does not have formal specification yet, but it can be reverse-engineered from ClickHouse source code (starting [around here](https://github.com/ClickHouse/ClickHouse/tree/master/src/Client)) and/or by intercepting and analyzing TCP traffic.
[Original article](https://clickhouse.tech/docs/en/interfaces/tcp/) <!--hide-->

View File

@ -12,6 +12,7 @@ toc_title: Integrations
- Relational database management systems
- [MySQL](https://www.mysql.com)
- [mysql2ch](https://github.com/long2ice/mysql2ch)
- [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support)
- [clickhouse-mysql-data-reader](https://github.com/Altinity/clickhouse-mysql-data-reader)
- [horgh-replicator](https://github.com/larsnovikov/horgh-replicator)
@ -97,5 +98,12 @@ toc_title: Integrations
- Elixir
- [Ecto](https://github.com/elixir-ecto/ecto)
- [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto)
- Ruby
- [Ruby on Rails](https://rubyonrails.org/)
- [activecube](https://github.com/bitquery/activecube)
- [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord)
- [GraphQL](https://github.com/graphql)
- [activecube-graphql](https://github.com/bitquery/activecube-graphql)
[Original article](https://clickhouse.tech/docs/en/interfaces/third-party/integrations/) <!--hide-->

View File

@ -8,78 +8,78 @@ toc_title: Adopters
!!! warning "Disclaimer"
The following list of companies using ClickHouse and their success stories is assembled from public sources, thus might differ from current reality. Wed appreciate it if you share the story of adopting ClickHouse in your company and [add it to the list](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md), but please make sure you wont have any NDA issues by doing so. Providing updates with publications from other companies is also useful.
| Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size<abbr title="of single replica"><sup>\*</sup></abbr> | Reference |
|---------------------------------------------------------------------|---------------------------------|-----------------------|------------------------------------------------------------|------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| [2gis](https://2gis.ru){.favicon} | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) |
| [Aloha&nbsp;Browser](https://alohabrowser.com/){.favicon} | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://presentations.clickhouse.tech/meetup22/aloha.pdf) |
| [Amadeus](https://amadeus.com/){.favicon} | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) |
| [Appsflyer](https://www.appsflyer.com){.favicon} | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) |
| [ArenaData](https://arenadata.tech/){.favicon} | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) |
| [Badoo](https://badoo.com){.favicon} | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) |
| [Benocs](https://www.benocs.com/){.favicon} | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) |
| [Bloomberg](https://www.bloomberg.com/){.favicon} | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
| [Bloxy](https://bloxy.info){.favicon} | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) |
| [Dataliance for China Telecom](https://www.chinatelecomglobal.com/){.favicon} | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) |
| [CARTO](https://carto.com/){.favicon} | Business Intelligence | Geo analytics | — | — | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) |
| [CERN](http://public.web.cern.ch/public/){.favicon} | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) |
| [Cisco](http://cisco.com/){.favicon} | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) |
| [Citadel&nbsp;Securities](https://www.citadelsecurities.com/){.favicon} | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) |
| [Citymobil](https://city-mobil.ru){.favicon} | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) |
| [ContentSquare](https://contentsquare.com){.favicon} | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) |
| [Cloudflare](https://cloudflare.com){.favicon} | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) |
| [Corunet](https://coru.net/){.favicon} | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) |
| [CraiditX 氪信](https://www.creditx.com){.favicon} | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) |
| [Criteo](https://www.criteo.com/){.favicon} | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) |
| [Deutsche&nbsp;Bank](https://db.com){.favicon} | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) |
| [Diva-e](https://www.diva-e.com){.favicon} | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) |
| [Exness](https://www.exness.com){.favicon} | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) |
| [Geniee](https://geniee.co.jp){.favicon} | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) |
| [HUYA](https://www.huya.com/){.favicon} | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) |
| [Idealista](https://www.idealista.com){.favicon} | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) |
| [Infovista](https://www.infovista.com/){.favicon} | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) |
| [InnoGames](https://www.innogames.com){.favicon} | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) |
| [Integros](https://integros.com){.favicon} | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) |
| [Kodiak Data](https://www.kodiakdata.com/){.favicon} | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) |
| [Kontur](https://kontur.ru){.favicon} | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) |
| [Lawrence Berkeley National Laboratory](https://www.lbl.gov){.favicon} | Research | Traffic analysis | 1 server | 11.8 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) |
| [LifeStreet](https://lifestreet.com/){.favicon} | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) |
| [Mail.ru Cloud Solutions](https://mcs.mail.ru/){.favicon} | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) |
| [MessageBird](https://www.messagebird.com){.favicon} | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) |
| [MGID](https://www.mgid.com/){.favicon} | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) |
| [OneAPM](https://www.oneapm.com/){.favicon} | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
| [Pragma&nbsp;Innovation](http://www.pragma-innovation.fr/){.favicon} | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) |
| [QINGCLOUD](https://www.qingcloud.com/){.favicon} | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
| [Qrator](https://qrator.net){.favicon} | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) |
| [Percent 百分点](https://www.percent.cn/){.favicon} | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) |
| [Rambler](https://rambler.ru){.favicon} | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) |
| [Tencent](https://www.tencent.com){.favicon} | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) |
| [Traffic&nbsp;Stars](https://trafficstars.com/){.favicon} | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) |
| [S7&nbsp;Airlines](https://www.s7.ru){.favicon} | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) |
| [SEMrush](https://www.semrush.com/){.favicon} | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) |
| [scireum&nbsp;GmbH](https://www.scireum.de/){.favicon} | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) |
| [Sentry](https://sentry.io/){.favicon} | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) |
| [SGK](http://www.sgk.gov.tr/wps/portal/sgk/tr){.favicon} | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) |
| [seo.do](https://seo.do/){.favicon} | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) |
| [Sina](http://english.sina.com/index.html){.favicon} | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) |
| [SMI2](https://smi2.ru/){.favicon} | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) |
| [Splunk](https://www.splunk.com/){.favicon} | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) |
| [Spotify](https://www.spotify.com){.favicon} | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) |
| [Tencent](https://www.tencent.com){.favicon} | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) |
| [Uber](https://www.uber.com){.favicon} | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) |
| [VKontakte](https://vk.com){.favicon} | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) |
| [Wisebits](https://wisebits.com/){.favicon} | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) |
| [Xiaoxin&nbsp;Tech](http://www.xiaoxintech.cn/){.favicon} | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) |
| [Ximalaya](https://www.ximalaya.com/){.favicon} | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) |
| [Yandex&nbsp;Cloud](https://cloud.yandex.ru/services/managed-clickhouse){.favicon} | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) |
| [Yandex&nbsp;DataLens](https://cloud.yandex.ru/services/datalens){.favicon} | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) |
| [Yandex&nbsp;Market](https://market.yandex.ru/){.favicon} | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) |
| [Yandex&nbsp;Metrica](https://metrica.yandex.com){.favicon} | Web analytics | Main product | 360 servers in one cluster, 1862 servers in one department | 66.41 PiB / 5.68 PiB | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) |
| [ЦВТ](https://htc-cs.ru/){.favicon} | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) |
| [МКБ](https://mkb.ru/){.favicon} | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) |
| [Jinshuju 金数据](https://jinshuju.net){.favicon} | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) |
| [Instana](https://www.instana.com){.favicon} | APM Platform | Main product | — | — | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304) |
| [Wargaming](https://wargaming.com/en/){.favicon} | Games | | — | — | [Interview](https://habr.com/en/post/496954/) |
| [Crazypanda](https://crazypanda.ru/en/){.favicon} | Games | | — | — | Live session on ClickHouse meetup |
| [FunCorp](https://fun.co/rp){.favicon} | Games | | — | — | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) |
| Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size<abbr title="of single replica"><sup>\*</sup></abbr> | Reference |
|------------------------------------------------------------------------------------------------|---------------------------------|-----------------------|------------------------------------------------------------|------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| <a href="https://2gis.ru" class="favicon">2gis</a> | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) |
| <a href="https://alohabrowser.com/" class="favicon">Aloha Browser</a> | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://presentations.clickhouse.tech/meetup22/aloha.pdf) |
| <a href="https://amadeus.com/" class="favicon">Amadeus</a> | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) |
| <a href="https://www.appsflyer.com" class="favicon">Appsflyer</a> | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) |
| <a href="https://arenadata.tech/" class="favicon">ArenaData</a> | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) |
| <a href="https://badoo.com" class="favicon">Badoo</a> | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) |
| <a href="https://www.benocs.com/" class="favicon">Benocs</a> | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) |
| <a href="https://www.bloomberg.com/" class="favicon">Bloomberg</a> | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
| <a href="https://bloxy.info" class="favicon">Bloxy</a> | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) |
| <a href="https://www.chinatelecomglobal.com/" class="favicon">Dataliance for China Telecom</a> | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) |
| <a href="https://carto.com/" class="favicon">CARTO</a> | Business Intelligence | Geo analytics | — | — | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) |
| <a href="http://public.web.cern.ch/public/" class="favicon">CERN</a> | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) |
| <a href="http://cisco.com/" class="favicon">Cisco</a> | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) |
| <a href="https://www.citadelsecurities.com/" class="favicon">Citadel Securities</a> | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) |
| <a href="https://city-mobil.ru" class="favicon">Citymobil</a> | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) |
| <a href="https://contentsquare.com" class="favicon">ContentSquare</a> | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) |
| <a href="https://cloudflare.com" class="favicon">Cloudflare</a> | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) |
| <a href="https://coru.net/" class="favicon">Corunet</a> | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) |
| <a href="https://www.creditx.com" class="favicon">CraiditX 氪信</a> | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) |
| <a href="https://www.criteo.com/" class="favicon">Criteo</a> | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) |
| <a href="https://db.com" class="favicon">Deutsche Bank</a> | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) |
| <a href="https://www.diva-e.com" class="favicon">Diva-e</a> | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) |
| <a href="https://www.exness.com" class="favicon">Exness</a> | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) |
| <a href="https://geniee.co.jp" class="favicon">Geniee</a> | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) |
| <a href="https://www.huya.com/" class="favicon">HUYA</a> | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) |
| <a href="https://www.idealista.com" class="favicon">Idealista</a> | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) |
| <a href="https://www.infovista.com/" class="favicon">Infovista</a> | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) |
| <a href="https://www.innogames.com" class="favicon">InnoGames</a> | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) |
| <a href="https://integros.com" class="favicon">Integros</a> | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) |
| <a href="https://www.kodiakdata.com/" class="favicon">Kodiak Data</a> | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) |
| <a href="https://kontur.ru" class="favicon">Kontur</a> | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) |
| <a href="https://www.lbl.gov" class="favicon">Lawrence Berkeley National Laboratory</a> | Research | Traffic analysis | 1 server | 11.8 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) |
| <a href="https://lifestreet.com/" class="favicon">LifeStreet</a> | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) |
| <a href="https://mcs.mail.ru/" class="favicon">Mail.ru Cloud Solutions</a> | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) |
| <a href="https://www.messagebird.com" class="favicon">MessageBird</a> | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) |
| <a href="https://www.mgid.com/" class="favicon">MGID</a> | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) |
| <a href="https://www.oneapm.com/" class="favicon">OneAPM</a> | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
| <a href="http://www.pragma-innovation.fr/" class="favicon">Pragma Innovation</a> | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) |
| <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a> | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
| <a href="https://qrator.net" class="favicon">Qrator</a> | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) |
| <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a> | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) |
| <a href="https://rambler.ru" class="favicon">Rambler</a> | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) |
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) |
| <a href="https://trafficstars.com/" class="favicon">Traffic Stars</a> | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) |
| <a href="https://www.s7.ru" class="favicon">S7 Airlines</a> | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) |
| <a href="https://www.semrush.com/" class="favicon">SEMrush</a> | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) |
| <a href="https://www.scireum.de/" class="favicon">scireum GmbH</a> | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) |
| <a href="https://sentry.io/" class="favicon">Sentry</a> | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) |
| <a href="http://www.sgk.gov.tr/wps/portal/sgk/tr" class="favicon">SGK</a> | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) |
| <a href="https://seo.do/" class="favicon">seo.do</a> | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) |
| <a href="http://english.sina.com/index.html" class="favicon">Sina</a> | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) |
| <a href="https://smi2.ru/" class="favicon">SMI2</a> | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) |
| <a href="https://www.splunk.com/" class="favicon">Splunk</a> | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) |
| <a href="https://www.spotify.com" class="favicon">Spotify</a> | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) |
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) |
| <a href="https://www.uber.com" class="favicon">Uber</a> | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) |
| <a href="https://vk.com" class="favicon">VKontakte</a> | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) |
| <a href="https://wisebits.com/" class="favicon">Wisebits</a> | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) |
| <a href="http://www.xiaoxintech.cn/" class="favicon">Xiaoxin Tech</a> | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) |
| <a href="https://www.ximalaya.com/" class="favicon">Ximalaya</a> | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) |
| <a href="https://cloud.yandex.ru/services/managed-clickhouse" class="favicon">Yandex Cloud</a> | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) |
| <a href="https://cloud.yandex.ru/services/datalens" class="favicon">Yandex DataLens</a> | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) |
| <a href="https://market.yandex.ru/" class="favicon">Yandex Market</a> | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) |
| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 360 servers in one cluster, 1862 servers in one department | 66.41 PiB / 5.68 PiB | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) |
| <a href="https://htc-cs.ru/" class="favicon">ЦВТ</a> | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) |
| <a href="https://mkb.ru/" class="favicon">МКБ</a> | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) |
| <a href="https://jinshuju.net" class="favicon">Jinshuju 金数据</a> | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) |
| <a href="https://www.instana.com" class="favicon">Instana</a> | APM Platform | Main product | — | — | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304) |
| <a href="https://wargaming.com/en/" class="favicon">Wargaming</a> | Games | | — | — | [Interview](https://habr.com/en/post/496954/) |
| <a href="https://crazypanda.ru/en/" class="favicon">Crazypanda</a> | Games | | — | — | Live session on ClickHouse meetup |
| <a href="https://fun.co/rp" class="favicon">FunCorp</a> | Games | | — | — | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) |
[Original article](https://clickhouse.tech/docs/en/introduction/adopters/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: Distinctive Features
# Distinctive Features of ClickHouse {#distinctive-features-of-clickhouse}
## True Column-Oriented DBMS {#true-column-oriented-dbms}
## True Column-Oriented Database Management System {#true-column-oriented-dbms}
In a true column-oriented DBMS, no extra data is stored with the values. Among other things, this means that constant-length values must be supported, to avoid storing their length “number” next to the values. As an example, a billion UInt8-type values should consume around 1 GB uncompressed, or this strongly affects the CPU use. It is essential to store data compactly (without any “garbage”) even when uncompressed, since the speed of decompression (CPU usage) depends mainly on the volume of uncompressed data.
@ -15,11 +15,15 @@ Its also worth noting that ClickHouse is a database management system, not a
## Data Compression {#data-compression}
Some column-oriented DBMSs (InfiniDB CE and MonetDB) do not use data compression. However, data compression does play a key role in achieving excellent performance.
Some column-oriented DBMSs do not use data compression. However, data compression does play a key role in achieving excellent performance.
In addition to efficient general-purpose compression codecs with different trade-offs between disk space and CPU consumption, ClickHouse provides [specialized codecs](../sql-reference/statements/create.md#create-query-specialized-codecs) for specific kinds of data, which allow ClickHouse to compete with and outperform more niche databases, like time-series ones.
## Disk Storage of Data {#disk-storage-of-data}
Keeping data physically sorted by primary key makes it possible to extract data for its specific values or value ranges with low latency, less than a few dozen milliseconds. Some column-oriented DBMSs (such as SAP HANA and Google PowerDrill) can only work in RAM. This approach encourages the allocation of a larger hardware budget than is necessary for real-time analysis. ClickHouse is designed to work on regular hard drives, which means the cost per GB of data storage is low, but SSD and additional RAM are also fully used if available.
Keeping data physically sorted by primary key makes it possible to extract data for its specific values or value ranges with low latency, less than a few dozen milliseconds. Some column-oriented DBMSs (such as SAP HANA and Google PowerDrill) can only work in RAM. This approach encourages the allocation of a larger hardware budget than is necessary for real-time analysis.
ClickHouse is designed to work on regular hard drives, which means the cost per GB of data storage is low, but SSD and additional RAM are also fully used if available.
## Parallel Processing on Multiple Cores {#parallel-processing-on-multiple-cores}
@ -28,15 +32,18 @@ Large queries are parallelized naturally, taking all the necessary resources ava
## Distributed Processing on Multiple Servers {#distributed-processing-on-multiple-servers}
Almost none of the columnar DBMSs mentioned above have support for distributed query processing.
In ClickHouse, data can reside on different shards. Each shard can be a group of replicas used for fault tolerance. All shards are used to run a query in parallel, transparently for the user.
## SQL Support {#sql-support}
ClickHouse supports a declarative query language based on SQL that is identical to the SQL standard in many cases.
Supported queries include GROUP BY, ORDER BY, subqueries in FROM, IN, and JOIN clauses, and scalar subqueries.
Dependent subqueries and window functions are not supported.
ClickHouse supports a [declarative query language based on SQL](../sql-reference/index.md) that is identical to the ANSI SQL standard in [many cases](../sql-reference/ansi.md).
## Vector Engine {#vector-engine}
Supported queries include [GROUP BY](../sql-reference/statements/select/group-by.md), [ORDER BY](../sql-reference/statements/select/order-by.md), subqueries in [FROM](../sql-reference/statements/select/from.md), [JOIN](../sql-reference/statements/select/join.md) clause, [IN](../sql-reference/operators/in.md) operator, and scalar subqueries.
Correlated (dependent) subqueries and window functions are not supported at the time of writing but might become available in the future.
## Vector Computation Engine {#vector-engine}
Data is not only stored by columns but is processed by vectors (parts of columns), which allows achieving high CPU efficiency.
@ -44,13 +51,19 @@ Data is not only stored by columns but is processed by vectors (parts of columns
ClickHouse supports tables with a primary key. To quickly perform queries on the range of the primary key, the data is sorted incrementally using the merge tree. Due to this, data can continually be added to the table. No locks are taken when new data is ingested.
## Index {#index}
## Primary Index {#primary-index}
Having a data physically sorted by primary key makes it possible to extract data for its specific values or value ranges with low latency, less than a few dozen milliseconds.
## Secondary Indexes {#secondary-indexes}
Unlike other database management systems, secondary indexes in ClickHouse does not point to specific rows or row ranges. Instead, they allow the database to know in advance that all rows in some data parts wouldn't match the query filtering conditions and do not read them at all, thus they are called [data skipping indexes](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-data_skipping-indexes).
## Suitable for Online Queries {#suitable-for-online-queries}
Low latency means that queries can be processed without delay and without trying to prepare an answer in advance, right at the same moment while the user interface page is loading. In other words, online.
Most OLAP database management systems don't aim for online queries with sub-second latencies. In alternative systems, report building time of tens of seconds or even minutes is often considered acceptable. Sometimes it takes even more which forces to prepare reports offline (in advance or by responding with "come back later").
In ClickHouse low latency means that queries can be processed without delay and without trying to prepare an answer in advance, right at the same moment while the user interface page is loading. In other words, online.
## Support for Approximated Calculations {#support-for-approximated-calculations}
@ -60,16 +73,24 @@ ClickHouse provides various ways to trade accuracy for performance:
2. Running a query based on a part (sample) of data and getting an approximated result. In this case, proportionally less data is retrieved from the disk.
3. Running an aggregation for a limited number of random keys, instead of for all keys. Under certain conditions for key distribution in the data, this provides a reasonably accurate result while using fewer resources.
## Adaptive Join Algorithm
ClickHouse adaptively chooses how to [JOIN](../sql-reference/statements/select/join.md) multiple tables, by preferring hash-join algorithm and falling back to the merge-join algorithm if there's more than one large table.
## Data Replication and Data Integrity Support {#data-replication-and-data-integrity-support}
ClickHouse uses asynchronous multi-master replication. After being written to any available replica, all the remaining replicas retrieve their copy in the background. The system maintains identical data on different replicas. Recovery after most failures is performed automatically, or semi-automatically in complex cases.
For more information, see the section [Data replication](../engines/table-engines/mergetree-family/replication.md).
## Role-Based Access Control
ClickHouse implements user account management using SQL queries and allows for [role-based access control configuration](../operations/access-rights.md) similar to what can be found in ANSI SQL standard and popular relational database management systems.
## Features that Can Be Considered Disadvantages {#clickhouse-features-that-can-be-considered-disadvantages}
1. No full-fledged transactions.
2. Lack of ability to modify or delete already inserted data with high rate and low latency. There are batch deletes and updates available to clean up or modify data, for example to comply with [GDPR](https://gdpr-info.eu).
3. The sparse index makes ClickHouse not so suitable for point queries retrieving single rows by their keys.
2. Lack of ability to modify or delete already inserted data with a high rate and low latency. There are batch deletes and updates available to clean up or modify data, for example, to comply with [GDPR](https://gdpr-info.eu).
3. The sparse index makes ClickHouse not so efficient for point queries retrieving single rows by their keys.
[Original article](https://clickhouse.tech/docs/en/introduction/distinctive_features/) <!--hide-->
[Original article](https://clickhouse.tech/docs/en/introduction/distinctive-features/) <!--hide-->

View File

@ -16,17 +16,16 @@ ClickHouse access entities:
You can configure access entities using:
- SQL-driven workflow.
- SQL-driven workflow.
You need to [enable](#enabling-access-control) this functionality.
- Server [configuration files](configuration-files.md) `users.xml` and `config.xml`.
- Server [configuration files](../operations/configuration-files.md) `users.xml` and `config.xml`.
We recommend using SQL-driven workflow. Both of the configuration methods work simultaneously, so if you use the server configuration files for managing accounts and access rights, you can smoothly switch to SQL-driven workflow.
!!! note "Warning"
You can't manage the same access entity by both configuration methods simultaneously.
You cant manage the same access entity by both configuration methods simultaneously.
## Usage {#access-control-usage}
@ -34,45 +33,44 @@ By default, the ClickHouse server provides the `default` user account which is n
If you just started using ClickHouse, consider the following scenario:
1. [Enable](#enabling-access-control) SQL-driven access control and account management for the `default` user.
2. Log in to the `default` user account and create all the required users. Don't forget to create an administrator account (`GRANT ALL ON *.* WITH GRANT OPTION TO admin_user_account`).
3. [Restrict permissions](settings/permissions-for-queries.md#permissions_for_queries) for the `default` user and disable SQL-driven access control and account management for it.
1. [Enable](#enabling-access-control) SQL-driven access control and account management for the `default` user.
2. Log in to the `default` user account and create all the required users. Dont forget to create an administrator account (`GRANT ALL ON *.* WITH GRANT OPTION TO admin_user_account`).
3. [Restrict permissions](../operations/settings/permissions-for-queries.md#permissions_for_queries) for the `default` user and disable SQL-driven access control and account management for it.
### Properties of Current Solution {#access-control-properties}
- You can grant permissions for databases and tables even if they do not exist.
- If a table was deleted, all the privileges that correspond to this table are not revoked. This means that even if you create a new table with the same name later, all the privileges remain valid. To revoke privileges corresponding to the deleted table, you need to execute, for example, the `REVOKE ALL PRIVILEGES ON db.table FROM ALL` query.
- There are no lifetime settings for privileges.
- You can grant permissions for databases and tables even if they do not exist.
- If a table was deleted, all the privileges that correspond to this table are not revoked. This means that even if you create a new table with the same name later, all the privileges remain valid. To revoke privileges corresponding to the deleted table, you need to execute, for example, the `REVOKE ALL PRIVILEGES ON db.table FROM ALL` query.
- There are no lifetime settings for privileges.
## User account {#user-account-management}
## User Account {#user-account-management}
A user account is an access entity that allows to authorize someone in ClickHouse. A user account contains:
- Identification information.
- [Privileges](../sql-reference/statements/grant.md#grant-privileges) that define a scope of queries the user can execute.
- Hosts allowed to connect to the ClickHouse server.
- Assigned and default roles.
- Settings with their constraints applied by default at user login.
- Assigned settings profiles.
- Identification information.
- [Privileges](../sql-reference/statements/grant.md#grant-privileges) that define a scope of queries the user can execute.
- Hosts allowed to connect to the ClickHouse server.
- Assigned and default roles.
- Settings with their constraints applied by default at user login.
- Assigned settings profiles.
Privileges can be granted to a user account by the [GRANT](../sql-reference/statements/grant.md) query or by assigning [roles](#role-management). To revoke privileges from a user, ClickHouse provides the [REVOKE](../sql-reference/statements/revoke.md) query. To list privileges for a user, use the [SHOW GRANTS](../sql-reference/statements/show.md#show-grants-statement) statement.
Management queries:
- [CREATE USER](../sql-reference/statements/create.md#create-user-statement)
- [ALTER USER](../sql-reference/statements/alter.md#alter-user-statement)
- [DROP USER](../sql-reference/statements/misc.md#drop-user-statement)
- [SHOW CREATE USER](../sql-reference/statements/show.md#show-create-user-statement)
- [CREATE USER](../sql-reference/statements/create.md#create-user-statement)
- [ALTER USER](../sql-reference/statements/alter.md#alter-user-statement)
- [DROP USER](../sql-reference/statements/misc.md#drop-user-statement)
- [SHOW CREATE USER](../sql-reference/statements/show.md#show-create-user-statement)
### Settings Applying {#access-control-settings-applying}
Settings can be configured differently: for a user account, in its granted roles and in settings profiles. At user login, if a setting is configured for different access entities, the value and constraints of this setting are applied as follows (from higher to lower priority):
1. User account settings.
2. The settings of default roles of the user account. If a setting is configured in some roles, then order of the setting application is undefined.
3. The settings from settings profiles assigned to a user or to its default roles. If a setting is configured in some profiles, then order of setting application is undefined.
4. Settings applied to all the server by default or from the [default profile](server-configuration-parameters/settings.md#default-profile).
1. User account settings.
2. The settings of default roles of the user account. If a setting is configured in some roles, then order of the setting application is undefined.
3. The settings from settings profiles assigned to a user or to its default roles. If a setting is configured in some profiles, then order of setting application is undefined.
4. Settings applied to all the server by default or from the [default profile](../operations/server-configuration-parameters/settings.md#default-profile).
## Role {#role-management}
@ -80,18 +78,18 @@ Role is a container for access entities that can be granted to a user account.
Role contains:
- [Privileges](../sql-reference/statements/grant.md#grant-privileges)
- Settings and constraints
- List of assigned roles
- [Privileges](../sql-reference/statements/grant.md#grant-privileges)
- Settings and constraints
- List of assigned roles
Management queries:
- [CREATE ROLE](../sql-reference/statements/create.md#create-role-statement)
- [ALTER ROLE](../sql-reference/statements/alter.md#alter-role-statement)
- [DROP ROLE](../sql-reference/statements/misc.md#drop-role-statement)
- [SET ROLE](../sql-reference/statements/misc.md#set-role-statement)
- [SET DEFAULT ROLE](../sql-reference/statements/misc.md#set-default-role-statement)
- [SHOW CREATE ROLE](../sql-reference/statements/show.md#show-create-role-statement)
- [CREATE ROLE](../sql-reference/statements/create.md#create-role-statement)
- [ALTER ROLE](../sql-reference/statements/alter.md#alter-role-statement)
- [DROP ROLE](../sql-reference/statements/misc.md#drop-role-statement)
- [SET ROLE](../sql-reference/statements/misc.md#set-role-statement)
- [SET DEFAULT ROLE](../sql-reference/statements/misc.md#set-default-role-statement)
- [SHOW CREATE ROLE](../sql-reference/statements/show.md#show-create-role-statement)
Privileges can be granted to a role by the [GRANT](../sql-reference/statements/grant.md) query. To revoke privileges from a role ClickHouse provides the [REVOKE](../sql-reference/statements/revoke.md) query.
@ -101,47 +99,43 @@ Row policy is a filter that defines which of the rows are available to a user or
Management queries:
- [CREATE ROW POLICY](../sql-reference/statements/create.md#create-row-policy-statement)
- [ALTER ROW POLICY](../sql-reference/statements/alter.md#alter-row-policy-statement)
- [DROP ROW POLICY](../sql-reference/statements/misc.md#drop-row-policy-statement)
- [SHOW CREATE ROW POLICY](../sql-reference/statements/show.md#show-create-row-policy-statement)
- [CREATE ROW POLICY](../sql-reference/statements/create.md#create-row-policy-statement)
- [ALTER ROW POLICY](../sql-reference/statements/alter.md#alter-row-policy-statement)
- [DROP ROW POLICY](../sql-reference/statements/misc.md#drop-row-policy-statement)
- [SHOW CREATE ROW POLICY](../sql-reference/statements/show.md#show-create-row-policy-statement)
## Settings Profile {#settings-profiles-management}
Settings profile is a collection of [settings](settings/index.md). Settings profile contains settings and constraints, as well as a list of roles and/or users to which this profile is applied.
Settings profile is a collection of [settings](../operations/settings/index.md). Settings profile contains settings and constraints, as well as a list of roles and/or users to which this profile is applied.
Management queries:
- [CREATE SETTINGS PROFILE](../sql-reference/statements/create.md#create-settings-profile-statement)
- [ALTER SETTINGS PROFILE](../sql-reference/statements/alter.md#alter-settings-profile-statement)
- [DROP SETTINGS PROFILE](../sql-reference/statements/misc.md#drop-settings-profile-statement)
- [SHOW CREATE SETTINGS PROFILE](../sql-reference/statements/show.md#show-create-settings-profile-statement)
- [CREATE SETTINGS PROFILE](../sql-reference/statements/create.md#create-settings-profile-statement)
- [ALTER SETTINGS PROFILE](../sql-reference/statements/alter.md#alter-settings-profile-statement)
- [DROP SETTINGS PROFILE](../sql-reference/statements/misc.md#drop-settings-profile-statement)
- [SHOW CREATE SETTINGS PROFILE](../sql-reference/statements/show.md#show-create-settings-profile-statement)
## Quota {#quotas-management}
Quota limits resource usage. See [Quotas](quotas.md).
Quota limits resource usage. See [Quotas](../operations/quotas.md).
Quota contains a set of limits for some durations, as well as a list of roles and/or users which should use this quota.
Management queries:
- [CREATE QUOTA](../sql-reference/statements/create.md#create-quota-statement)
- [ALTER QUOTA](../sql-reference/statements/alter.md#alter-quota-statement)
- [DROP QUOTA](../sql-reference/statements/misc.md#drop-quota-statement)
- [SHOW CREATE QUOTA](../sql-reference/statements/show.md#show-create-quota-statement)
- [CREATE QUOTA](../sql-reference/statements/create.md#create-quota-statement)
- [ALTER QUOTA](../sql-reference/statements/alter.md#alter-quota-statement)
- [DROP QUOTA](../sql-reference/statements/misc.md#drop-quota-statement)
- [SHOW CREATE QUOTA](../sql-reference/statements/show.md#show-create-quota-statement)
## Enabling SQL-driven Access Control and Account Management {#enabling-access-control}
- Setup a directory for configurations storage.
- Setup a directory for configurations storage.
ClickHouse stores access entity configurations in the folder set in the [access_control_path](server-configuration-parameters/settings.md#access_control_path) server configuration parameter.
ClickHouse stores access entity configurations in the folder set in the [access\_control\_path](../operations/server-configuration-parameters/settings.md#access_control_path) server configuration parameter.
- Enable SQL-driven access control and account management for at least one user account.
By default, SQL-driven access control and account management is disabled for all users. You need to configure at least one user in the `users.xml` configuration file and set the value of the [access_management](settings/settings-users.md#access_management-user-setting) setting to 1.
- Enable SQL-driven access control and account management for at least one user account.
By default, SQL-driven access control and account management is disabled for all users. You need to configure at least one user in the `users.xml` configuration file and set the value of the [access\_management](../operations/settings/settings-users.md#access_management-user-setting) setting to 1.
[Original article](https://clickhouse.tech/docs/en/operations/access_rights/) <!--hide-->

View File

@ -24,7 +24,7 @@ Some local filesystems provide snapshot functionality (for example, [ZFS](https:
## clickhouse-copier {#clickhouse-copier}
[clickhouse-copier](utilities/clickhouse-copier.md) is a versatile tool that was initially created to re-shard petabyte-sized tables. It can also be used for backup and restore purposes because it reliably copies data between ClickHouse tables and clusters.
[clickhouse-copier](../operations/utilities/clickhouse-copier.md) is a versatile tool that was initially created to re-shard petabyte-sized tables. It can also be used for backup and restore purposes because it reliably copies data between ClickHouse tables and clusters.
For smaller volumes of data, a simple `INSERT INTO ... SELECT ...` to remote tables might work as well.

View File

@ -9,7 +9,7 @@ ClickHouse supports multi-file configuration management. The main server configu
All the configuration files should be in XML format. Also, they should have the same root element, usually `<yandex>`.
## Override
## Override {#override}
Some settings specified in the main configuration file can be overridden in other configuration files:
@ -18,13 +18,13 @@ Some settings specified in the main configuration file can be overridden in othe
- If `replace` is specified, it replaces the entire element with the specified one.
- If `remove` is specified, it deletes the element.
## Substitution
## Substitution {#substitution}
The config can also define "substitutions". If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include\_from](server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](server-configuration-parameters/settings.md)).
The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include\_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md)).
Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element.
## User Settings
## User Settings {#user-settings}
The `config.xml` file can specify a separate config with user settings, profiles, and quotas. The relative path to this config is set in the `users_config` element. By default, it is `users.xml`. If `users_config` is omitted, the user settings, profiles, and quotas are specified directly in `config.xml`.
@ -32,7 +32,7 @@ Users configuration can be splitted into separate files similar to `config.xml`
Directory name is defined as `users_config` setting without `.xml` postfix concatenated with `.d`.
Directory `users.d` is used by default, as `users_config` defaults to `users.xml`.
## Example
## Example {#example}
For example, you can have separate config file for each user like this:
@ -55,7 +55,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
</yandex>
```
## Implementation Details
## Implementation Details {#implementation-details}
For each config file, the server also generates `file-preprocessed.xml` files when starting. These files contain all the completed substitutions and overrides, and they are intended for informational use. If ZooKeeper substitutions were used in the config files but ZooKeeper is not available on the server start, the server loads the configuration from the preprocessed file.

View File

@ -8,19 +8,19 @@ toc_title: Introduction
ClickHouse operations manual consists of the following major sections:
- [Requirements](requirements.md)
- [Monitoring](monitoring.md)
- [Troubleshooting](troubleshooting.md)
- [Usage Recommendations](tips.md)
- [Update Procedure](update.md)
- [Access Rights](access-rights.md)
- [Data Backup](backup.md)
- [Configuration Files](configuration-files.md)
- [Quotas](quotas.md)
- [System Tables](system-tables.md)
- [Server Configuration Parameters](server-configuration-parameters/index.md)
- [How To Test Your Hardware With ClickHouse](performance-test.md)
- [Settings](settings/index.md)
- [Utilities](utilities/index.md)
- [Requirements](../operations/requirements.md)
- [Monitoring](../operations/monitoring.md)
- [Troubleshooting](../operations/troubleshooting.md)
- [Usage Recommendations](../operations/tips.md)
- [Update Procedure](../operations/update.md)
- [Access Rights](../operations/access-rights.md)
- [Data Backup](../operations/backup.md)
- [Configuration Files](../operations/configuration-files.md)
- [Quotas](../operations/quotas.md)
- [System Tables](../operations/system-tables/index.md)
- [Server Configuration Parameters](../operations/server-configuration-parameters/index.md)
- [How To Test Your Hardware With ClickHouse](../operations/performance-test.md)
- [Settings](../operations/settings/index.md)
- [Utilities](../operations/utilities/index.md)
{## [Original article](https://clickhouse.tech/docs/en/operations/) ##}

View File

@ -26,19 +26,19 @@ It is highly recommended to set up monitoring for:
ClickHouse server has embedded instruments for self-state monitoring.
To track server events use server logs. See the [logger](server-configuration-parameters/settings.md#server_configuration_parameters-logger) section of the configuration file.
To track server events use server logs. See the [logger](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-logger) section of the configuration file.
ClickHouse collects:
- Different metrics of how the server uses computational resources.
- Common statistics on query processing.
You can find metrics in the [system.metrics](../operations/system-tables.md#system_tables-metrics), [system.events](../operations/system-tables.md#system_tables-events), and [system.asynchronous\_metrics](../operations/system-tables.md#system_tables-asynchronous_metrics) tables.
You can find metrics in the [system.metrics](../operations/system-tables/metrics.md#system_tables-metrics), [system.events](../operations/system-tables/events.md#system_tables-events), and [system.asynchronous\_metrics](../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) tables.
You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](server-configuration-parameters/settings.md#server_configuration_parameters-graphite) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Graphite by following their official [guide](https://graphite.readthedocs.io/en/latest/install.html).
You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Graphite by following their official [guide](https://graphite.readthedocs.io/en/latest/install.html).
You can configure ClickHouse to export metrics to [Prometheus](https://prometheus.io). See the [Prometheus section](server-configuration-parameters/settings.md#server_configuration_parameters-prometheus) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Prometheus by following their official [guide](https://prometheus.io/docs/prometheus/latest/installation/).
You can configure ClickHouse to export metrics to [Prometheus](https://prometheus.io). See the [Prometheus section](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-prometheus) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Prometheus by following their official [guide](https://prometheus.io/docs/prometheus/latest/installation/).
Additionally, you can monitor server availability through the HTTP API. Send the `HTTP GET` request to `/ping`. If the server is available, it responds with `200 OK`.
To monitor servers in a cluster configuration, you should set the [max\_replica\_delay\_for\_distributed\_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas_status`. A request to `/replicas_status` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns `503 HTTP_SERVICE_UNAVAILABLE` with information about the gap.
To monitor servers in a cluster configuration, you should set the [max\_replica\_delay\_for\_distributed\_queries](../operations/settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas_status`. A request to `/replicas_status` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns `503 HTTP_SERVICE_UNAVAILABLE` with information about the gap.

View File

@ -9,11 +9,11 @@ ClickHouse runs sampling profiler that allows analyzing query execution. Using p
To use profiler:
- Setup the [trace\_log](../server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) section of the server configuration.
- Setup the [trace\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) section of the server configuration.
This section configures the [trace\_log](../../operations/system-tables.md#system_tables-trace_log) system table containing the results of the profiler functioning. It is configured by default. Remember that data in this table is valid only for a running server. After the server restart, ClickHouse doesnt clean up the table and all the stored virtual memory address may become invalid.
This section configures the [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table containing the results of the profiler functioning. It is configured by default. Remember that data in this table is valid only for a running server. After the server restart, ClickHouse doesnt clean up the table and all the stored virtual memory address may become invalid.
- Setup the [query\_profiler\_cpu\_time\_period\_ns](../settings/settings.md#query_profiler_cpu_time_period_ns) or [query\_profiler\_real\_time\_period\_ns](../settings/settings.md#query_profiler_real_time_period_ns) settings. Both settings can be used simultaneously.
- Setup the [query\_profiler\_cpu\_time\_period\_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) or [query\_profiler\_real\_time\_period\_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) settings. Both settings can be used simultaneously.
These settings allow you to configure profiler timers. As these are the session settings, you can get different sampling frequency for the whole server, individual users or user profiles, for your interactive session, and for each individual query.
@ -23,7 +23,7 @@ To analyze the `trace_log` system table:
- Install the `clickhouse-common-static-dbg` package. See [Install from DEB Packages](../../getting-started/install.md#install-from-deb-packages).
- Allow introspection functions by the [allow\_introspection\_functions](../settings/settings.md#settings-allow_introspection_functions) setting.
- Allow introspection functions by the [allow\_introspection\_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting.
For security reasons, introspection functions are disabled by default.

View File

@ -24,7 +24,7 @@ With this instruction you can run basic ClickHouse performance test on any serve
# Then do:
chmod a+x clickhouse
5. Download configs:
1. Download configs:
<!-- -->
@ -34,7 +34,7 @@ With this instruction you can run basic ClickHouse performance test on any serve
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/path.xml -O config.d/path.xml
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/log_to_console.xml -O config.d/log_to_console.xml
6. Download benchmark files:
1. Download benchmark files:
<!-- -->
@ -42,7 +42,7 @@ With this instruction you can run basic ClickHouse performance test on any serve
chmod a+x benchmark-new.sh
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/queries.sql
7. Download test data according to the [Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md) instruction (“hits” table containing 100 million rows).
1. Download test data according to the [Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md) instruction (“hits” table containing 100 million rows).
<!-- -->
@ -50,31 +50,31 @@ With this instruction you can run basic ClickHouse performance test on any serve
tar xvf hits_100m_obfuscated_v1.tar.xz -C .
mv hits_100m_obfuscated_v1/* .
8. Run the server:
1. Run the server:
<!-- -->
./clickhouse server
9. Check the data: ssh to the server in another terminal
1. Check the data: ssh to the server in another terminal
<!-- -->
./clickhouse client --query "SELECT count() FROM hits_100m_obfuscated"
100000000
10. Edit the benchmark-new.sh, change `clickhouse-client` to `./clickhouse client` and add `-max_memory_usage 100000000000` parameter.
1. Edit the benchmark-new.sh, change `clickhouse-client` to `./clickhouse client` and add `-max_memory_usage 100000000000` parameter.
<!-- -->
mcedit benchmark-new.sh
11. Run the benchmark:
1. Run the benchmark:
<!-- -->
./benchmark-new.sh hits_100m_obfuscated
12. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com
1. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com
All the results are published here: https://clickhouse.tech/benchmark/hardware/

View File

@ -10,8 +10,8 @@ This section contains descriptions of server settings that cannot be changed at
These settings are stored in the `config.xml` file on the ClickHouse server.
Other settings are described in the “[Settings](../settings/index.md#session-settings-intro)” section.
Other settings are described in the “[Settings](../../operations/settings/index.md#session-settings-intro)” section.
Before studying the settings, read the [Configuration files](../configuration-files.md#configuration_files) section and note the use of substitutions (the `incl` and `optional` attributes).
Before studying the settings, read the [Configuration files](../../operations/configuration-files.md#configuration_files) section and note the use of substitutions (the `incl` and `optional` attributes).
[Original article](https://clickhouse.tech/docs/en/operations/server_configuration_parameters/) <!--hide-->

View File

@ -145,10 +145,10 @@ Settings:
- interval The interval for sending, in seconds.
- timeout The timeout for sending data, in seconds.
- root\_path Prefix for keys.
- metrics Sending data from the [system.metrics](../../operations/system-tables.md#system_tables-metrics) table.
- events Sending deltas data accumulated for the time period from the [system.events](../../operations/system-tables.md#system_tables-events) table.
- events\_cumulative Sending cumulative data from the [system.events](../../operations/system-tables.md#system_tables-events) table.
- asynchronous\_metrics Sending data from the [system.asynchronous\_metrics](../../operations/system-tables.md#system_tables-asynchronous_metrics) table.
- metrics Sending data from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table.
- events Sending deltas data accumulated for the time period from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
- events\_cumulative Sending cumulative data from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
- asynchronous\_metrics Sending data from the [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
You can configure multiple `<graphite>` clauses. For instance, you can use this for sending different data at different intervals.
@ -229,7 +229,7 @@ Opens `https://tabix.io/` when accessing `http://localhost: http_port`.
The path to the file with substitutions.
For more information, see the section “[Configuration files](../configuration-files.md#configuration_files)”.
For more information, see the section “[Configuration files](../../operations/configuration-files.md#configuration_files)”.
**Example**
@ -503,7 +503,7 @@ Keys for server/client settings:
Logging events that are associated with [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). For instance, adding or merging data. You can use the log to simulate merge algorithms and compare their characteristics. You can visualize the merge process.
Queries are logged in the [system.part\_log](../../operations/system-tables.md#system_tables-part-log) table, not in a separate file. You can configure the name of this table in the `table` parameter (see below).
Queries are logged in the [system.part\_log](../../operations/system-tables/part_log.md#system_tables-part-log) table, not in a separate file. You can configure the name of this table in the `table` parameter (see below).
Use the following parameters to configure logging:
@ -544,9 +544,9 @@ Settings:
- `endpoint` HTTP endpoint for scraping metrics by prometheus server. Start from /.
- `port` Port for `endpoint`.
- `metrics` Flag that sets to expose metrics from the [system.metrics](../system-tables.md#system_tables-metrics) table.
- `events` Flag that sets to expose metrics from the [system.events](../system-tables.md#system_tables-events) table.
- `asynchronous_metrics` Flag that sets to expose current metrics values from the [system.asynchronous\_metrics](../system-tables.md#system_tables-asynchronous_metrics) table.
- `metrics` Flag that sets to expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table.
- `events` Flag that sets to expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
- `asynchronous_metrics` Flag that sets to expose current metrics values from the [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
**Example**
@ -562,9 +562,9 @@ Settings:
## query\_log {#server_configuration_parameters-query-log}
Setting for logging queries received with the [log\_queries=1](../settings/settings.md) setting.
Setting for logging queries received with the [log\_queries=1](../../operations/settings/settings.md) setting.
Queries are logged in the [system.query\_log](../../operations/system-tables.md#system_tables-query_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below).
Queries are logged in the [system.query\_log](../../operations/system-tables/query_log.md#system_tables-query_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below).
Use the following parameters to configure logging:
@ -588,9 +588,9 @@ If the table doesnt exist, ClickHouse will create it. If the structure of the
## query\_thread\_log {#server_configuration_parameters-query_thread_log}
Setting for logging threads of queries received with the [log\_query\_threads=1](../settings/settings.md#settings-log-query-threads) setting.
Setting for logging threads of queries received with the [log\_query\_threads=1](../../operations/settings/settings.md#settings-log-query-threads) setting.
Queries are logged in the [system.query\_thread\_log](../../operations/system-tables.md#system_tables-query_thread_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below).
Queries are logged in the [system.query\_thread\_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below).
Use the following parameters to configure logging:
@ -614,7 +614,7 @@ If the table doesnt exist, ClickHouse will create it. If the structure of the
## trace\_log {#server_configuration_parameters-trace_log}
Settings for the [trace\_log](../../operations/system-tables.md#system_tables-trace_log) system table operation.
Settings for the [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation.
Parameters:
@ -675,11 +675,11 @@ Configuration of clusters used by the [Distributed](../../engines/table-engines/
<remote_servers incl="clickhouse_remote_servers" />
```
For the value of the `incl` attribute, see the section “[Configuration files](../configuration-files.md#configuration_files)”.
For the value of the `incl` attribute, see the section “[Configuration files](../../operations/configuration-files.md#configuration_files)”.
**See Also**
- [skip\_unavailable\_shards](../settings/settings.md#settings-skip_unavailable_shards)
- [skip\_unavailable\_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards)
## timezone {#server_configuration_parameters-timezone}
@ -705,7 +705,7 @@ Port for communicating with clients over the TCP protocol.
<tcp_port>9000</tcp_port>
```
## tcp_port_secure {#server_configuration_parameters-tcp_port_secure}
## tcp\_port\_secure {#server_configuration_parameters-tcp_port_secure}
TCP port for secure communication with clients. Use it with [OpenSSL](#server_configuration_parameters-openssl) settings.
@ -733,7 +733,7 @@ Example
<mysql_port>9004</mysql_port>
```
## tmp_path {#tmp-path}
## tmp\_path {#tmp-path}
Path to temporary data for processing large queries.
@ -746,23 +746,23 @@ Path to temporary data for processing large queries.
<tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
```
## tmp_policy {#tmp-policy}
## tmp\_policy {#tmp-policy}
Policy from [storage_configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) to store temporary files.
Policy from [storage\_configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) to store temporary files.
If not set, [tmp_path](#tmp-path) is used, otherwise it is ignored.
If not set, [tmp\_path](#tmp-path) is used, otherwise it is ignored.
!!! note "Note"
- `move_factor` is ignored.
- `keep_free_space_bytes` is ignored.
- `max_data_part_size_bytes` is ignored.
- Уou must have exactly one volume in that policy.
- `keep_free_space_bytes` is ignored.
- `max_data_part_size_bytes` is ignored.
- Уou must have exactly one volume in that policy.
## uncompressed\_cache\_size {#server-settings-uncompressed_cache_size}
Cache size (in bytes) for uncompressed data used by table engines from the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
There is one shared cache for the server. Memory is allocated on demand. The cache is used if the option [use\_uncompressed\_cache](../settings/settings.md#setting-use_uncompressed_cache) is enabled.
There is one shared cache for the server. Memory is allocated on demand. The cache is used if the option [use\_uncompressed\_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache) is enabled.
The uncompressed cache is advantageous for very short queries in individual cases.
@ -894,9 +894,9 @@ The update is performed asynchronously, in a separate system thread.
**See also**
- [background_schedule_pool_size](../settings/settings.md#background_schedule_pool_size)
- [background\_schedule\_pool\_size](../../operations/settings/settings.md#background_schedule_pool_size)
## access_control_path {#access_control_path}
## access\_control\_path {#access_control_path}
Path to a folder where a ClickHouse server stores user and role configurations created by SQL commands.
@ -904,6 +904,6 @@ Default value: `/var/lib/clickhouse/access/`.
**See also**
- [Access Control and Account Management](../access-rights.md#access-control)
- [Access Control and Account Management](../../operations/access-rights.md#access-control)
[Original article](https://clickhouse.tech/docs/en/operations/server_configuration_parameters/settings/) <!--hide-->

View File

@ -37,7 +37,7 @@ After setting `readonly = 1`, the user cant change `readonly` and `allow_ddl`
When using the `GET` method in the [HTTP interface](../../interfaces/http.md), `readonly = 1` is set automatically. To modify data, use the `POST` method.
Setting `readonly = 1` prohibit the user from changing all the settings. There is a way to prohibit the user
from changing only specific settings, for details see [constraints on settings](constraints-on-settings.md).
from changing only specific settings, for details see [constraints on settings](../../operations/settings/constraints-on-settings.md).
Default value: 0

View File

@ -113,7 +113,7 @@ Limit on the number of bytes in the result. The same as the previous setting.
What to do if the volume of the result exceeds one of the limits: throw or break. By default, throw.
Using break is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max\_result\_rows](#setting-max_result_rows), multiple of [max\_block\_size](settings.md#setting-max_block_size) and depends on [max_threads](settings.md#settings-max_threads).
Using break is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max\_result\_rows](#setting-max_result_rows), multiple of [max\_block\_size](../../operations/settings/settings.md#setting-max_block_size) and depends on [max\_threads](../../operations/settings/settings.md#settings-max_threads).
Example:

View File

@ -8,8 +8,7 @@ toc_title: Settings Profiles
A settings profile is a collection of settings grouped under the same name.
!!! note "Information"
ClickHouse also supports [SQL-driven workflow](../access-rights.md#access-control) for managing settings profiles. We recommend using it.
ClickHouse also supports [SQL-driven workflow](../../operations/access-rights.md#access-control) for managing settings profiles. We recommend using it.
The profile can have any name. You can specify the same profile for different users. The most important thing you can write in the settings profile is `readonly=1`, which ensures read-only access.
@ -71,9 +70,9 @@ Example:
</profiles>
```
The example specifies two profiles: `default` and `web`.
The example specifies two profiles: `default` and `web`.
The `default` profile has a special purpose: it must always be present and is applied when starting the server. In other words, the `default` profile contains default settings.
The `default` profile has a special purpose: it must always be present and is applied when starting the server. In other words, the `default` profile contains default settings.
The `web` profile is a regular profile that can be set using the `SET` query or using a URL parameter in an HTTP query.

View File

@ -8,8 +8,7 @@ toc_title: User Settings
The `users` section of the `user.xml` configuration file contains user settings.
!!! note "Information"
ClickHouse also supports [SQL-driven workflow](../access-rights.md#access-control) for managing users. We recommend using it.
ClickHouse also supports [SQL-driven workflow](../../operations/access-rights.md#access-control) for managing users. We recommend using it.
Structure of the `users` section:
@ -74,14 +73,14 @@ Password can be specified in plaintext or in SHA256 (hex format).
The first line of the result is the password. The second line is the corresponding double SHA1 hash.
### access_management {#access_management-user-setting}
### access\_management {#access_management-user-setting}
This setting enables or disables using of SQL-driven [access control and account management](../access-rights.md#access-control) for the user.
This setting enables or disables using of SQL-driven [access control and account management](../../operations/access-rights.md#access-control) for the user.
Possible values:
- 0 — Disabled.
- 1 — Enabled.
- 0 — Disabled.
- 1 — Enabled.
Default value: 0.
@ -129,14 +128,14 @@ To open access only from localhost, specify:
### user\_name/profile {#user-nameprofile}
You can assign a settings profile for the user. Settings profiles are configured in a separate section of the `users.xml` file. For more information, see [Profiles of Settings](settings-profiles.md).
You can assign a settings profile for the user. Settings profiles are configured in a separate section of the `users.xml` file. For more information, see [Profiles of Settings](../../operations/settings/settings-profiles.md).
### user\_name/quota {#user-namequota}
Quotas allow you to track or limit resource usage over a period of time. Quotas are configured in the `quotas`
section of the `users.xml` configuration file.
You can assign a quotas set for the user. For a detailed description of quotas configuration, see [Quotas](../quotas.md#quotas).
You can assign a quotas set for the user. For a detailed description of quotas configuration, see [Quotas](../../operations/quotas.md#quotas).
### user\_name/databases {#user-namedatabases}

View File

@ -404,11 +404,11 @@ Possible values:
Default value: 0.
## partial_merge_join_optimizations {#partial_merge_join_optimizations}
## partial\_merge\_join\_optimizations {#partial_merge_join_optimizations}
Disables optimizations in partial merge join algorithm for [JOIN](../../sql-reference/statements/select/join.md) queries.
By default, this setting enables improvements that could lead to wrong results. If you see suspicious results in your queries, disable optimizations by this setting. Optimizations can be different in different versions of the ClickHouse server.
By default, this setting enables improvements that could lead to wrong results. If you see suspicious results in your queries, disable optimizations by this setting. Optimizations can be different in different versions of the ClickHouse server.
Possible values:
@ -417,35 +417,35 @@ Possible values:
Default value: 1.
## partial_merge_join_rows_in_right_blocks {#partial_merge_join_rows_in_right_blocks}
## partial\_merge\_join\_rows\_in\_right\_blocks {#partial_merge_join_rows_in_right_blocks}
Limits sizes of right-hand join data blocks in partial merge join algorithm for [JOIN](../../sql-reference/statements/select/join.md) queries.
ClickHouse server:
1. Splits right-hand join data into blocks with up to the specified number of rows.
2. Indexes each block with their minimum and maximum values
3. Unloads prepared blocks to disk if possible.
1. Splits right-hand join data into blocks with up to the specified number of rows.
2. Indexes each block with their minimum and maximum values
3. Unloads prepared blocks to disk if possible.
Possible values:
- Any positive integer. Recommended range of values: [1000, 100000].
- Any positive integer. Recommended range of values: \[1000, 100000\].
Default value: 65536.
## join_on_disk_max_files_to_merge {#join_on_disk_max_files_to_merge}
## join\_on\_disk\_max\_files\_to\_merge {#join_on_disk_max_files_to_merge}
Limits the number of files allowed for parallel sorting in MergeJoin operations when they are executed on disk.
Limits the number of files allowed for parallel sorting in MergeJoin operations when they are executed on disk.
The bigger the value of the setting, the more RAM used and the less disk I/O needed.
Possible values:
- Any positive integer, starting from 2.
- Any positive integer, starting from 2.
Default value: 64.
## any_join_distinct_right_table_keys {#any_join_distinct_right_table_keys}
## any\_join\_distinct\_right\_table\_keys {#any_join_distinct_right_table_keys}
Enables legacy ClickHouse server behavior in `ANY INNER|LEFT JOIN` operations.
@ -454,19 +454,18 @@ Enables legacy ClickHouse server behavior in `ANY INNER|LEFT JOIN` operations.
When the legacy behavior enabled:
- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are not equal because ClickHouse uses the logic with many-to-one left-to-right table keys mapping.
- Results of `ANY INNER JOIN` operations contain all rows from the left table like the `SEMI LEFT JOIN` operations do.
- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are not equal because ClickHouse uses the logic with many-to-one left-to-right table keys mapping.
- Results of `ANY INNER JOIN` operations contain all rows from the left table like the `SEMI LEFT JOIN` operations do.
When the legacy behavior disabled:
- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are equal because ClickHouse uses the logic which provides one-to-many keys mapping in `ANY RIGHT JOIN` operations.
- Results of `ANY INNER JOIN` operations contain one row per key from both left and right tables.
- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are equal because ClickHouse uses the logic which provides one-to-many keys mapping in `ANY RIGHT JOIN` operations.
- Results of `ANY INNER JOIN` operations contain one row per key from both left and right tables.
Possible values:
- 0 — Legacy behavior is disabled.
- 1 — Legacy behavior is enabled.
- 0 — Legacy behavior is disabled.
- 1 — Legacy behavior is enabled.
Default value: 0.
@ -474,19 +473,17 @@ See also:
- [JOIN strictness](../../sql-reference/statements/select/join.md#select-join-strictness)
## temporary_files_codec {#temporary_files_codec}
## temporary\_files\_codec {#temporary_files_codec}
Sets compression codec for temporary files used in sorting and joining operations on disk.
Possible values:
Possible values:
- LZ4 — [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) compression is applied.
- NONE — No compression is applied.
- LZ4 — [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) compression is applied.
- NONE — No compression is applied.
Default value: LZ4.
## max\_block\_size {#setting-max_block_size}
In ClickHouse, data is processed by blocks (sets of column parts). The internal processing cycles for a single block are efficient enough, but there are noticeable expenditures on each block. The `max_block_size` setting is a recommendation for what size of the block (in a count of rows) to load from tables. The block size shouldnt be too small, so that the expenditures on each block are still noticeable, but not too large so that the query with LIMIT that is completed after the first block is processed quickly. The goal is to avoid consuming too much memory when extracting a large number of columns in multiple threads and to preserve at least some cache locality.
@ -555,7 +552,7 @@ Default value: 8.
If ClickHouse should read more than `merge_tree_max_rows_to_use_cache` rows in one query, it doesnt use the cache of uncompressed blocks.
The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed\_cache\_size](../server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.
The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed\_cache\_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.
Possible values:
@ -567,7 +564,7 @@ Default value: 128 ✕ 8192.
If ClickHouse should read more than `merge_tree_max_bytes_to_use_cache` bytes in one query, it doesnt use the cache of uncompressed blocks.
The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed\_cache\_size](../server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.
The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed\_cache\_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.
Possible value:
@ -592,7 +589,7 @@ Default value: 0.
Setting up query logging.
Queries sent to ClickHouse with this setup are logged according to the rules in the [query\_log](../server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server configuration parameter.
Queries sent to ClickHouse with this setup are logged according to the rules in the [query\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server configuration parameter.
Example:
@ -622,7 +619,7 @@ log_queries_min_type='EXCEPTION_WHILE_PROCESSING'
Setting up query threads logging.
Queries threads runned by ClickHouse with this setup are logged according to the rules in the [query\_thread\_log](../server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server configuration parameter.
Queries threads runned by ClickHouse with this setup are logged according to the rules in the [query\_thread\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server configuration parameter.
Example:
@ -789,7 +786,7 @@ For more information, see the section “Extreme values”.
## use\_uncompressed\_cache {#setting-use_uncompressed_cache}
Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled).
Using the uncompressed cache (only for tables in the MergeTree family) can significantly reduce latency and increase throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the [uncompressed\_cache\_size](../server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed and the least-used data is automatically deleted.
Using the uncompressed cache (only for tables in the MergeTree family) can significantly reduce latency and increase throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the [uncompressed\_cache\_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed and the least-used data is automatically deleted.
For queries that read at least a somewhat large volume of data (one million rows or more), the uncompressed cache is disabled automatically to save space for truly small queries. This means that you can keep the use\_uncompressed\_cache setting always set to 1.
@ -822,6 +819,11 @@ ClickHouse supports the following algorithms of choosing replicas:
- [Nearest hostname](#load_balancing-nearest_hostname)
- [In order](#load_balancing-in_order)
- [First or random](#load_balancing-first_or_random)
- [Round robin](#load_balancing-round_robin)
See also:
- [distributed\_replica\_max\_ignored\_errors](#settings-distributed_replica_max_ignored_errors)
### Random (by Default) {#load_balancing-random}
@ -865,6 +867,14 @@ This algorithm chooses the first replica in the set or a random replica if the f
The `first_or_random` algorithm solves the problem of the `in_order` algorithm. With `in_order`, if one replica goes down, the next one gets a double load while the remaining replicas handle the usual amount of traffic. When using the `first_or_random` algorithm, the load is evenly distributed among replicas that are still available.
### Round Robin {#load_balancing-round_robin}
``` sql
load_balancing = round_robin
```
This algorithm uses round robin policy across replicas with the same number of errors (only the queries with `round_robin` policy is accounted).
## prefer\_localhost\_replica {#settings-prefer-localhost-replica}
Enables/disables preferable using the localhost replica when processing distributed queries.
@ -955,10 +965,10 @@ ClickHouse generates an exception
See also:
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
- [select_sequential_consistency](#settings-select_sequential_consistency)
- [insert\_quorum\_timeout](#settings-insert_quorum_timeout)
- [select\_sequential\_consistency](#settings-select_sequential_consistency)
## insert_quorum_timeout {#settings-insert_quorum_timeout}
## insert\_quorum\_timeout {#settings-insert_quorum_timeout}
Write to quorum timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica.
@ -986,8 +996,8 @@ When sequential consistency is enabled, ClickHouse allows the client to execute
See also:
- [insert_quorum](#settings-insert_quorum)
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
- [insert\_quorum](#settings-insert_quorum)
- [insert\_quorum\_timeout](#settings-insert_quorum_timeout)
## insert\_deduplicate {#settings-insert-deduplicate}
@ -1002,7 +1012,6 @@ Default value: 1.
By default, blocks inserted into replicated tables by the `INSERT` statement are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)).
## deduplicate\_blocks\_in\_dependent\_materialized\_views {#settings-deduplicate-blocks-in-dependent-materialized-views}
Enables or disables the deduplication check for materialized views that receive data from Replicated\* tables.
@ -1067,15 +1076,15 @@ Default value: 0.
## count\_distinct\_implementation {#settings-count_distinct_implementation}
Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference.md#agg_function-count) construction.
Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count) construction.
Possible values:
- [uniq](../../sql-reference/aggregate-functions/reference.md#agg_function-uniq)
- [uniqCombined](../../sql-reference/aggregate-functions/reference.md#agg_function-uniqcombined)
- [uniqCombined64](../../sql-reference/aggregate-functions/reference.md#agg_function-uniqcombined64)
- [uniqHLL12](../../sql-reference/aggregate-functions/reference.md#agg_function-uniqhll12)
- [uniqExact](../../sql-reference/aggregate-functions/reference.md#agg_function-uniqexact)
- [uniq](../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq)
- [uniqCombined](../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined)
- [uniqCombined64](../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
- [uniqHLL12](../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12)
- [uniqExact](../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact)
Default value: `uniqExact`.
@ -1109,24 +1118,24 @@ Possible values:
Default value: 0.
## optimize_skip_unused_shards {#optimize-skip-unused-shards}
## optimize\_skip\_unused\_shards {#optimize-skip-unused-shards}
Enables or disables skipping of unused shards for [SELECT](../../sql-reference/statements/select/index.md) queries that have sharding key condition in `WHERE/PREWHERE` (assuming that the data is distributed by sharding key, otherwise does nothing).
Possible values:
- 0 — Disabled.
- 1 — Enabled.
- 0 — Disabled.
- 1 — Enabled.
Default value: 0
## force_optimize_skip_unused_shards {#force-optimize-skip-unused-shards}
## force\_optimize\_skip\_unused\_shards {#force-optimize-skip-unused-shards}
Enables or disables query execution if [optimize_skip_unused_shards](#optimize-skip-unused-shards) is enabled and skipping of unused shards is not possible. If the skipping is not possible and the setting is enabled, an exception will be thrown.
Enables or disables query execution if [optimize\_skip\_unused\_shards](#optimize-skip-unused-shards) is enabled and skipping of unused shards is not possible. If the skipping is not possible and the setting is enabled, an exception will be thrown.
Possible values:
- 0 — Disabled. ClickHouse doesn't throw an exception.
- 0 — Disabled. ClickHouse doesnt throw an exception.
- 1 — Enabled. Query execution is disabled only if the table has a sharding key.
- 2 — Enabled. Query execution is disabled regardless of whether a sharding key is defined for the table.
@ -1165,8 +1174,10 @@ Controls how fast errors in distributed tables are zeroed. If a replica is unava
See also:
- [load\_balancing](#load_balancing-round_robin)
- [Table engine Distributed](../../engines/table-engines/special/distributed.md)
- [distributed\_replica\_error\_cap](#settings-distributed_replica_error_cap)
- [distributed\_replica\_max\_ignored\_errors](#settings-distributed_replica_max_ignored_errors)
## distributed\_replica\_error\_cap {#settings-distributed_replica_error_cap}
@ -1177,8 +1188,24 @@ Error count of each replica is capped at this value, preventing a single replica
See also:
- [load\_balancing](#load_balancing-round_robin)
- [Table engine Distributed](../../engines/table-engines/special/distributed.md)
- [distributed\_replica\_error\_half\_life](#settings-distributed_replica_error_half_life)
- [distributed\_replica\_max\_ignored\_errors](#settings-distributed_replica_max_ignored_errors)
## distributed\_replica\_max\_ignored\_errors {#settings-distributed_replica_max_ignored_errors}
- Type: unsigned int
- Default value: 0
Number of errors that will be ignored while choosing replicas (according to `load_balancing` algorithm).
See also:
- [load\_balancing](#load_balancing-round_robin)
- [Table engine Distributed](../../engines/table-engines/special/distributed.md)
- [distributed\_replica\_error\_cap](#settings-distributed_replica_error_cap)
- [distributed\_replica\_error\_half\_life](#settings-distributed_replica_error_half_life)
## distributed\_directory\_monitor\_sleep\_time\_ms {#distributed_directory_monitor_sleep_time_ms}
@ -1249,7 +1276,7 @@ Default value: 1000000000 nanoseconds (once a second).
See also:
- System table [trace\_log](../../operations/system-tables.md#system_tables-trace_log)
- System table [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log)
## query\_profiler\_cpu\_time\_period\_ns {#query_profiler_cpu_time_period_ns}
@ -1272,7 +1299,7 @@ Default value: 1000000000 nanoseconds.
See also:
- System table [trace\_log](../../operations/system-tables.md#system_tables-trace_log)
- System table [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log)
## allow\_introspection\_functions {#settings-allow_introspection_functions}
@ -1287,8 +1314,8 @@ Default value: 0.
**See Also**
- [Sampling Query Profiler](../optimizing-performance/sampling-query-profiler.md)
- System table [trace\_log](../../operations/system-tables.md#system_tables-trace_log)
- [Sampling Query Profiler](../../operations/optimizing-performance/sampling-query-profiler.md)
- System table [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log)
## input\_format\_parallel\_parsing {#input-format-parallel-parsing}
@ -1336,7 +1363,7 @@ Type: URL
Default value: Empty
## background_pool_size {#background_pool_size}
## background\_pool\_size {#background_pool_size}
Sets the number of threads performing background operations in table engines (for example, merges in [MergeTree engine](../../engines/table-engines/mergetree-family/index.md) tables). This setting is applied from `default` profile at ClickHouse server start and cant be changed in a user session. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance.
@ -1348,9 +1375,9 @@ Possible values:
Default value: 16.
## background_buffer_flush_schedule_pool_size {#background_buffer_flush_schedule_pool_size}
## background\_buffer\_flush\_schedule\_pool\_size {#background_buffer_flush_schedule_pool_size}
Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. This setting is applied at ClickHouse server start and can't be changed in a user session.
Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. This setting is applied at ClickHouse server start and cant be changed in a user session.
Possible values:
@ -1358,7 +1385,7 @@ Possible values:
Default value: 16.
## background_move_pool_size {#background_move_pool_size}
## background\_move\_pool\_size {#background_move_pool_size}
Sets the number of threads performing background moves of data parts for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)-engine tables. This setting is applied at ClickHouse server start and cant be changed in a user session.
@ -1368,9 +1395,9 @@ Possible values:
Default value: 8.
## background_schedule_pool_size {#background_schedule_pool_size}
## background\_schedule\_pool\_size {#background_schedule_pool_size}
Sets the number of threads performing background tasks for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables, [Kafka](../../engines/table-engines/integrations/kafka.md) streaming, [DNS cache updates](../server-configuration-parameters/settings.md#server-settings-dns-cache-update-period). This setting is applied at ClickHouse server start and cant be changed in a user session.
Sets the number of threads performing background tasks for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables, [Kafka](../../engines/table-engines/integrations/kafka.md) streaming, [DNS cache updates](../../operations/server-configuration-parameters/settings.md#server-settings-dns-cache-update-period). This setting is applied at ClickHouse server start and cant be changed in a user session.
Possible values:
@ -1378,7 +1405,7 @@ Possible values:
Default value: 16.
## background_distributed_schedule_pool_size {#background_distributed_schedule_pool_size}
## background\_distributed\_schedule\_pool\_size {#background_distributed_schedule_pool_size}
Sets the number of threads performing background tasks for [distributed](../../engines/table-engines/special/distributed.md) sends. This setting is applied at ClickHouse server start and cant be changed in a user session.
@ -1388,9 +1415,68 @@ Possible values:
Default value: 16.
## low_cardinality_max_dictionary_size {#low_cardinality_max_dictionary_size}
## transform_null_in {#transform_null_in}
Sets a maximum size in rows of a shared global dictionary for the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) data type that can be written to a storage file system. This setting prevents issues with RAM in case of unlimited dictionary growth. All the data that can't be encoded due to maximum dictionary size limitation ClickHouse writes in an ordinary method.
Enables equality of [NULL](../../sql-reference/syntax.md#null-literal) values for [IN](../../sql-reference/operators/in.md) operator.
By default, `NULL` values can't be compared because `NULL` means undefined value. Thus, comparison `expr = NULL` must always return `false`. With this setting `NULL = NULL` returns `true` for `IN` operator.
Possible values:
- 0 — Comparison of `NULL` values in `IN` operator returns `false`.
- 1 — Comparison of `NULL` values in `IN` operator returns `true`.
Default value: 0.
**Example**
Consider the `null_in` table:
```text
┌──idx─┬─────i─┐
│ 1 │ 1 │
│ 2 │ NULL │
│ 3 │ 3 │
└──────┴───────┘
```
Query:
```sql
SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 0;
```
Result:
```text
┌──idx─┬────i─┐
│ 1 │ 1 │
└──────┴──────┘
```
Query:
```sql
SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1;
```
Result:
```text
┌──idx─┬─────i─┐
│ 1 │ 1 │
│ 2 │ NULL │
└──────┴───────┘
```
**See Also**
- [NULL Processing in IN Operators](../../sql-reference/operators/in.md#in-null-processing)
## low\_cardinality\_max\_dictionary\_size {#low_cardinality_max_dictionary_size}
Sets a maximum size in rows of a shared global dictionary for the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) data type that can be written to a storage file system. This setting prevents issues with RAM in case of unlimited dictionary growth. All the data that cant be encoded due to maximum dictionary size limitation ClickHouse writes in an ordinary method.
Possible values:
@ -1398,7 +1484,7 @@ Possible values:
Default value: 8192.
## low_cardinality_use_single_dictionary_for_part {#low_cardinality_use_single_dictionary_for_part}
## low\_cardinality\_use\_single\_dictionary\_for\_part {#low_cardinality_use_single_dictionary_for_part}
Turns on or turns off using of single dictionary for the data part.
@ -1406,44 +1492,73 @@ By default, ClickHouse server monitors the size of dictionaries and if a diction
Possible values:
- 1 — Creating several dictionaries for the data part is prohibited.
- 0 — Creating several dictionaries for the data part is not prohibited.
- 1 — Creating several dictionaries for the data part is prohibited.
- 0 — Creating several dictionaries for the data part is not prohibited.
Default value: 0.
## low_cardinality_allow_in_native_format {#low_cardinality_allow_in_native_format}
## low\_cardinality\_allow\_in\_native\_format {#low_cardinality_allow_in_native_format}
Allows or restricts using the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) data type with the [Native](../../interfaces/formats.md#native) format.
If usage of `LowCardinality` is restricted, ClickHouse server converts `LowCardinality`-columns to ordinary ones for `SELECT` queries, and convert ordinary columns to `LowCardinality`-columns for `INSERT` queries.
This setting is required mainly for third-party clients which don't support `LowCardinality` data type.
This setting is required mainly for third-party clients which dont support `LowCardinality` data type.
Possible values:
- 1 — Usage of `LowCardinality` is not restricted.
- 0 — Usage of `LowCardinality` is restricted.
- 1 — Usage of `LowCardinality` is not restricted.
- 0 — Usage of `LowCardinality` is restricted.
Default value: 1.
## allow_suspicious_low_cardinality_types {#allow_suspicious_low_cardinality_types}
## allow\_suspicious\_low\_cardinality\_types {#allow_suspicious_low_cardinality_types}
Allows or restricts using [LowCardinality](../../sql-reference/data-types/lowcardinality.md) with data types with fixed size of 8 bytes or less: numeric data types and `FixedString(8_bytes_or_less)`.
For small fixed values using of `LowCardinality` is usually inefficient, because ClickHouse stores a numeric index for each row. As a result:
- Disk space usage can rise.
- RAM consumption can be higher, depending on a dictionary size.
- Some functions can work slower due to extra coding/encoding operations.
- Disk space usage can rise.
- RAM consumption can be higher, depending on a dictionary size.
- Some functions can work slower due to extra coding/encoding operations.
Merge times in [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)-engine tables can grow due to all the reasons described above.
Possible values:
- 1 — Usage of `LowCardinality` is not restricted.
- 0 — Usage of `LowCardinality` is restricted.
- 1 — Usage of `LowCardinality` is not restricted.
- 0 — Usage of `LowCardinality` is restricted.
Default value: 0.
## min_insert_block_size_rows_for_materialized_views {#min-insert-block-size-rows-for-materialized-views}
Sets minimum number of rows in block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create.md#create-view). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage.
Possible values:
- Any positive integer.
- 0 — Squashing disabled.
Default value: 1048576.
**See Also**
- [min_insert_block_size_rows](#min-insert-block-size-rows)
## min_insert_block_size_bytes_for_materialized_views {#min-insert-block-size-bytes-for-materialized-views}
Sets minimum number of bytes in block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create.md#create-view). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage.
Possible values:
- Any positive integer.
- 0 — Squashing disabled.
Default value: 268435456.
**See also**
- [min_insert_block_size_bytes](#min-insert-block-size-bytes)
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,3 @@
## system.asynchronous\_metric\_log {#system-tables-async-log}
Contains the historical values for `system.asynchronous_log` (see [system.asynchronous\_metrics](asynchronous_metrics.md#system_tables-asynchronous_metrics))

View File

@ -0,0 +1,36 @@
# system.asynchronous\_metrics {#system_tables-asynchronous_metrics}
Contains metrics that are calculated periodically in the background. For example, the amount of RAM in use.
Columns:
- `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name.
- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value.
**Example**
``` sql
SELECT * FROM system.asynchronous_metrics LIMIT 10
```
``` text
┌─metric──────────────────────────────────┬──────value─┐
│ jemalloc.background_thread.run_interval │ 0 │
│ jemalloc.background_thread.num_runs │ 0 │
│ jemalloc.background_thread.num_threads │ 0 │
│ jemalloc.retained │ 422551552 │
│ jemalloc.mapped │ 1682989056 │
│ jemalloc.resident │ 1656446976 │
│ jemalloc.metadata_thp │ 0 │
│ jemalloc.metadata │ 10226856 │
│ UncompressedCacheCells │ 0 │
│ MarkCacheFiles │ 0 │
└─────────────────────────────────────────┴────────────┘
```
**See Also**
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.
- [system.metrics](metrics.md#system_tables-metrics) — Contains instantly calculated metrics.
- [system.events](events.md#system_tables-events) — Contains a number of events that have occurred.
- [system.metric\_log](metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.

View File

@ -0,0 +1,24 @@
# system.clusters {#system-clusters}
Contains information about clusters available in the config file and the servers in them.
Columns:
- `cluster` (String) — The cluster name.
- `shard_num` (UInt32) — The shard number in the cluster, starting from 1.
- `shard_weight` (UInt32) — The relative weight of the shard when writing data.
- `replica_num` (UInt32) — The replica number in the shard, starting from 1.
- `host_name` (String) — The host name, as specified in the config.
- `host_address` (String) — The host IP address obtained from DNS.
- `port` (UInt16) — The port to use for connecting to the server.
- `user` (String) — The name of the user for connecting to the server.
- `errors_count` (UInt32) - number of times this host failed to reach replica.
- `estimated_recovery_time` (UInt32) - seconds left until replica error count is zeroed and it is considered to be back to normal.
Please note that `errors_count` is updated once per query to the cluster, but `estimated_recovery_time` is recalculated on-demand. So there could be a case of non-zero `errors_count` and zero `estimated_recovery_time`, that next query will zero `errors_count` and try to use replica as if it has no errors.
**See also**
- [Table engine Distributed](../../engines/table-engines/special/distributed.md)
- [distributed\_replica\_error\_cap setting](../../operations/settings/settings.md#settings-distributed_replica_error_cap)
- [distributed\_replica\_error\_half\_life setting](../../operations/settings/settings.md#settings-distributed_replica_error_half_life)

View File

@ -0,0 +1,22 @@
# system.columns {#system-columns}
Contains information about columns in all the tables.
You can use this table to get information similar to the [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table) query, but for multiple tables at once.
The `system.columns` table contains the following columns (the column type is shown in brackets):
- `database` (String) — Database name.
- `table` (String) — Table name.
- `name` (String) — Column name.
- `type` (String) — Column type.
- `default_kind` (String) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined.
- `default_expression` (String) — Expression for the default value, or an empty string if it is not defined.
- `data_compressed_bytes` (UInt64) — The size of compressed data, in bytes.
- `data_uncompressed_bytes` (UInt64) — The size of decompressed data, in bytes.
- `marks_bytes` (UInt64) — The size of marks, in bytes.
- `comment` (String) — Comment on the column, or an empty string if it is not defined.
- `is_in_partition_key` (UInt8) — Flag that indicates whether the column is in the partition expression.
- `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression.
- `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression.
- `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression.

View File

@ -0,0 +1,40 @@
# system.contributors {#system-contributors}
Contains information about contributors. The order is random at query execution time.
Columns:
- `name` (String) — Contributor (author) name from git log.
**Example**
``` sql
SELECT * FROM system.contributors LIMIT 10
```
``` text
┌─name─────────────┐
│ Olga Khvostikova │
│ Max Vetrov │
│ LiuYangkuan │
│ svladykin │
│ zamulla │
│ Šimon Podlipský │
│ BayoNet │
│ Ilya Khomutov │
│ Amy Krishnevsky │
│ Loud_Scream │
└──────────────────┘
```
To find out yourself in the table, use a query:
``` sql
SELECT * FROM system.contributors WHERE name = 'Olga Khvostikova'
```
``` text
┌─name─────────────┐
│ Olga Khvostikova │
└──────────────────┘
```

View File

@ -0,0 +1,34 @@
# system.data\_type\_families {#system_tables-data_type_families}
Contains information about supported [data types](../../sql-reference/data-types/).
Columns:
- `name` ([String](../../sql-reference/data-types/string.md)) — Data type name.
- `case_insensitive` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Property that shows whether you can use a data type name in a query in case insensitive manner or not. For example, `Date` and `date` are both valid.
- `alias_to` ([String](../../sql-reference/data-types/string.md)) — Data type name for which `name` is an alias.
**Example**
``` sql
SELECT * FROM system.data_type_families WHERE alias_to = 'String'
```
``` text
┌─name───────┬─case_insensitive─┬─alias_to─┐
│ LONGBLOB │ 1 │ String │
│ LONGTEXT │ 1 │ String │
│ TINYTEXT │ 1 │ String │
│ TEXT │ 1 │ String │
│ VARCHAR │ 1 │ String │
│ MEDIUMBLOB │ 1 │ String │
│ BLOB │ 1 │ String │
│ TINYBLOB │ 1 │ String │
│ CHAR │ 1 │ String │
│ MEDIUMTEXT │ 1 │ String │
└────────────┴──────────────────┴──────────┘
```
**See Also**
- [Syntax](../../sql-reference/syntax.md) — Information about supported syntax.

View File

@ -0,0 +1,7 @@
# system.databases {#system-databases}
This table contains a single String column called name the name of a database.
Each database that the server knows about has a corresponding entry in the table.
This system table is used for implementing the `SHOW DATABASES` query.

View File

@ -0,0 +1,9 @@
# system.detached\_parts {#system_tables-detached_parts}
Contains information about detached parts of [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. The `reason` column specifies why the part was detached.
For user-detached parts, the reason is empty. Such parts can be attached with [ALTER TABLE ATTACH PARTITION\|PART](../../sql-reference/statements/alter.md#alter_attach-partition) command.
For the description of other columns, see [system.parts](parts.md#system_tables-parts).
If part name is invalid, values of some columns may be `NULL`. Such parts can be deleted with [ALTER TABLE DROP DETACHED PART](../../sql-reference/statements/alter.md#alter_drop-detached).

View File

@ -0,0 +1,61 @@
# system.dictionaries {#system_tables-dictionaries}
Contains information about [external dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
Columns:
- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database containing the dictionary created by DDL query. Empty string for other dictionaries.
- `name` ([String](../../sql-reference/data-types/string.md)) — [Dictionary name](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md).
- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Dictionary status. Possible values:
- `NOT_LOADED` — Dictionary was not loaded because it was not used.
- `LOADED` — Dictionary loaded successfully.
- `FAILED` — Unable to load the dictionary as a result of an error.
- `LOADING` — Dictionary is loading now.
- `LOADED_AND_RELOADING` — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: [SYSTEM RELOAD DICTIONARY](../../sql-reference/statements/system.md#query_language-system-reload-dictionary) query, timeout, dictionary config has changed).
- `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now.
- `origin` ([String](../../sql-reference/data-types/string.md)) — Path to the configuration file that describes the dictionary.
- `type` ([String](../../sql-reference/data-types/string.md)) — Type of a dictionary allocation. [Storing Dictionaries in Memory](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md).
- `key` — [Key type](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key): Numeric Key ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) or Сomposite key ([String](../../sql-reference/data-types/string.md)) — form “(type 1, type 2, …, type n)”.
- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [attribute names](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) provided by the dictionary.
- `attribute.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [attribute types](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) that are provided by the dictionary.
- `bytes_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary.
- `query_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of queries since the dictionary was loaded or since the last successful reboot.
- `hit_rate` ([Float64](../../sql-reference/data-types/float.md)) — For cache dictionaries, the percentage of uses for which the value was in the cache.
- `element_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of items stored in the dictionary.
- `load_factor` ([Float64](../../sql-reference/data-types/float.md)) — Percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table).
- `source` ([String](../../sql-reference/data-types/string.md)) — Text describing the [data source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) for the dictionary.
- `lifetime_min` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Minimum [lifetime](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds.
- `lifetime_max` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Maximum [lifetime](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds.
- `loading_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time for loading the dictionary.
- `last_successful_update_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — End time for loading or updating the dictionary. Helps to monitor some troubles with external sources and investigate causes.
- `loading_duration` ([Float32](../../sql-reference/data-types/float.md)) — Duration of a dictionary loading.
- `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldnt be created.
**Example**
Configure the dictionary.
``` sql
CREATE DICTIONARY dictdb.dict
(
`key` Int64 DEFAULT -1,
`value_default` String DEFAULT 'world',
`value_expression` String DEFAULT 'xxx' EXPRESSION 'toString(127 * 172)'
)
PRIMARY KEY key
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dicttbl' DB 'dictdb'))
LIFETIME(MIN 0 MAX 1)
LAYOUT(FLAT())
```
Make sure that the dictionary is loaded.
``` sql
SELECT * FROM system.dictionaries
```
``` text
┌─database─┬─name─┬─status─┬─origin──────┬─type─┬─key────┬─attribute.names──────────────────────┬─attribute.types─────┬─bytes_allocated─┬─query_count─┬─hit_rate─┬─element_count─┬───────────load_factor─┬─source─────────────────────┬─lifetime_min─┬─lifetime_max─┬──loading_start_time─┌──last_successful_update_time─┬──────loading_duration─┬─last_exception─┐
│ dictdb │ dict │ LOADED │ dictdb.dict │ Flat │ UInt64 │ ['value_default','value_expression'] │ ['String','String'] │ 74032 │ 0 │ 1 │ 1 │ 0.0004887585532746823 │ ClickHouse: dictdb.dicttbl │ 0 │ 1 │ 2020-03-04 04:17:34 │ 2020-03-04 04:30:34 │ 0.002 │ │
└──────────┴──────┴────────┴─────────────┴──────┴────────┴──────────────────────────────────────┴─────────────────────┴─────────────────┴─────────────┴──────────┴───────────────┴───────────────────────┴────────────────────────────┴──────────────┴──────────────┴─────────────────────┴──────────────────────────────┘───────────────────────┴────────────────┘
```

View File

@ -0,0 +1,26 @@
# system.disks {#system_tables-disks}
Contains information about disks defined in the [server configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure).
Columns:
- `name` ([String](../../sql-reference/data-types/string.md)) — Name of a disk in the server configuration.
- `path` ([String](../../sql-reference/data-types/string.md)) — Path to the mount point in the file system.
- `free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Free space on disk in bytes.
- `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Disk volume in bytes.
- `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration.
## system.storage\_policies {#system_tables-storage_policies}
Contains information about storage policies and volumes defined in the [server configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure).
Columns:
- `policy_name` ([String](../../sql-reference/data-types/string.md)) — Name of the storage policy.
- `volume_name` ([String](../../sql-reference/data-types/string.md)) — Volume name defined in the storage policy.
- `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Volume order number in the configuration.
- `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — Disk names, defined in the storage policy.
- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit).
- `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order.
If the storage policy contains more then one volume, then information for each volume is stored in the individual row of the table.

View File

@ -0,0 +1,32 @@
# system.events {#system_tables-events}
Contains information about the number of events that have occurred in the system. For example, in the table, you can find how many `SELECT` queries were processed since the ClickHouse server started.
Columns:
- `event` ([String](../../sql-reference/data-types/string.md)) — Event name.
- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of events occurred.
- `description` ([String](../../sql-reference/data-types/string.md)) — Event description.
**Example**
``` sql
SELECT * FROM system.events LIMIT 5
```
``` text
┌─event─────────────────────────────────┬─value─┬─description────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ Query │ 12 │ Number of queries to be interpreted and potentially executed. Does not include queries that failed to parse or were rejected due to AST size limits, quota limits or limits on the number of simultaneously running queries. May include internal queries initiated by ClickHouse itself. Does not count subqueries. │
│ SelectQuery │ 8 │ Same as Query, but only for SELECT queries. │
│ FileOpen │ 73 │ Number of files opened. │
│ ReadBufferFromFileDescriptorRead │ 155 │ Number of reads (read/pread) from a file descriptor. Does not include sockets. │
│ ReadBufferFromFileDescriptorReadBytes │ 9931 │ Number of bytes read from file descriptors. If the file is compressed, this will show the compressed data size. │
└───────────────────────────────────────┴───────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
**See Also**
- [system.asynchronous\_metrics](asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
- [system.metrics](metrics.md#system_tables-metrics) — Contains instantly calculated metrics.
- [system.metric\_log](metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.

Some files were not shown because too many files have changed in this diff Show More