Merge branch 'master' of github.com:ClickHouse/ClickHouse into sentry

This commit is contained in:
Ivan Blinkov 2020-06-22 10:06:36 +03:00
commit 2c0ff29c48
917 changed files with 21628 additions and 9344 deletions

View File

@ -16,6 +16,5 @@ ClickHouse is an open-source column-oriented database management system that all
## Upcoming Events
* [ClickHouse Online Meetup (in Russian)](https://events.yandex.ru/events/click-house-onlajn-vs-18-06-2020) on June 18, 2020.
* [ClickHouse Workshop in Novosibirsk](https://2020.codefest.ru/lecture/1628) on TBD date.
* [Yandex C++ Open-Source Sprints in Moscow](https://events.yandex.ru/events/otkrytyj-kod-v-yandek-28-03-2020) on TBD date.

View File

@ -75,7 +75,7 @@ std::string determineDefaultTimeZone()
try
{
tz_database_path = fs::canonical(tz_database_path);
tz_database_path = fs::weakly_canonical(tz_database_path);
/// The tzdata file exists. If it is inside the tz_database_dir,
/// then the relative path is the time zone id.
@ -91,7 +91,7 @@ std::string determineDefaultTimeZone()
if (!tz_file_path.is_absolute())
tz_file_path = tz_database_path / tz_file_path;
tz_file_path = fs::canonical(tz_file_path);
tz_file_path = fs::weakly_canonical(tz_file_path);
fs::path relative_path = tz_file_path.lexically_relative(tz_database_path);
if (!relative_path.empty() && *relative_path.begin() != ".." && *relative_path.begin() != ".")

View File

@ -39,6 +39,7 @@
#include <common/argsToConfig.h>
#include <common/getThreadId.h>
#include <common/coverage.h>
#include <common/sleep.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
@ -140,7 +141,7 @@ static void signalHandler(int sig, siginfo_t * info, void * context)
if (sig != SIGTSTP) /// This signal is used for debugging.
{
/// The time that is usually enough for separate thread to print info into log.
::sleep(10);
sleepForSeconds(10);
call_default_signal_handler(sig);
}

2
contrib/hyperscan vendored

@ -1 +1 @@
Subproject commit 3058c9c20cba3accdf92544d8513a26240c4ff70
Subproject commit 3907fd00ee8b2538739768fa9533f8635a276531

View File

@ -219,7 +219,9 @@ if (ENABLE_HYPERSCAN)
target_compile_definitions (hyperscan PUBLIC USE_HYPERSCAN=1)
target_compile_options (hyperscan
PRIVATE -g0 -march=corei7 # library has too much debug information
PRIVATE -g0 # Library has too much debug information
-march=corei7 -O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # The options from original build system
-fno-sanitize=undefined # Assume the library takes care of itself
)
target_include_directories (hyperscan
PRIVATE

2
debian/control vendored
View File

@ -28,7 +28,7 @@ Description: Client binary for ClickHouse
Package: clickhouse-common-static
Architecture: any
Depends: ${shlibs:Depends}, ${misc:Depends}, tzdata
Depends: ${shlibs:Depends}, ${misc:Depends}
Suggests: clickhouse-common-static-dbg
Replaces: clickhouse-common, clickhouse-server-base
Provides: clickhouse-common, clickhouse-server-base

View File

@ -17,7 +17,6 @@ RUN apt-get update \
clickhouse-client=$version \
clickhouse-common-static=$version \
locales \
tzdata \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf \
&& apt-get clean

View File

@ -21,7 +21,6 @@ RUN apt-get update \
locales \
ca-certificates \
wget \
tzdata \
&& rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \

View File

@ -17,8 +17,7 @@ RUN apt-get update \
odbc-postgresql \
sqlite3 \
curl \
tar \
tzdata
tar
RUN rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \

View File

@ -22,7 +22,7 @@ function configure
echo all killed
set -m # Spawn temporary in its own process groups
left/clickhouse-server --config-file=left/config/config.xml -- --path db0 &> setup-server-log.log &
left/clickhouse-server --config-file=left/config/config.xml -- --path db0 --user_files_path db0/user_files &> setup-server-log.log &
left_pid=$!
kill -0 $left_pid
disown $left_pid
@ -59,12 +59,12 @@ function restart
set -m # Spawn servers in their own process groups
left/clickhouse-server --config-file=left/config/config.xml -- --path left/db &>> left-server-log.log &
left/clickhouse-server --config-file=left/config/config.xml -- --path left/db --user_files_path left/db/user_files &>> left-server-log.log &
left_pid=$!
kill -0 $left_pid
disown $left_pid
right/clickhouse-server --config-file=right/config/config.xml -- --path right/db &>> right-server-log.log &
right/clickhouse-server --config-file=right/config/config.xml -- --path right/db --user_files_path right/db/user_files &>> right-server-log.log &
right_pid=$!
kill -0 $right_pid
disown $right_pid

View File

@ -131,5 +131,8 @@ done
dmesg -T > dmesg.log
7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} {right,left}/{performance,db/preprocessed_configs,scripts} report analyze benchmark
7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} \
{right,left}/{performance,scripts} {{right,left}/db,db0}/preprocessed_configs \
report analyze benchmark
cp compare.log /output

View File

@ -20,9 +20,9 @@ RUN apt-get --allow-unauthenticated update -y \
# apt-get --allow-unauthenticated install --yes --no-install-recommends \
# pvs-studio
ENV PKG_VERSION="pvs-studio-7.07.38234.46-amd64.deb"
ENV PKG_VERSION="pvs-studio-7.07.38234.48-amd64.deb"
RUN wget "http://files.viva64.com/$PKG_VERSION"
RUN wget "https://files.viva64.com/$PKG_VERSION"
RUN sudo dpkg -i "$PKG_VERSION"
CMD cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \

View File

@ -7,7 +7,7 @@ toc_title: How to Build ClickHouse on Linux for AARCH64 (ARM64)
This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on another Linux machine with AARCH64 CPU architecture. This is intended for continuous integration checks that run on Linux servers.
The cross-build for AARCH64 is based on the [Build instructions](build.md), follow them first.
The cross-build for AARCH64 is based on the [Build instructions](../development/build.md), follow them first.
# Install Clang-8 {#install-clang-8}

View File

@ -5,9 +5,9 @@ toc_title: How to Build ClickHouse on Linux for Mac OS X
# How to Build ClickHouse on Linux for Mac OS X {#how-to-build-clickhouse-on-linux-for-mac-os-x}
This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on OS X. This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](build-osx.md).
This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on OS X. This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](../development/build-osx.md).
The cross-build for Mac OS X is based on the [Build instructions](build.md), follow them first.
The cross-build for Mac OS X is based on the [Build instructions](../development/build.md), follow them first.
# Install Clang-8 {#install-clang-8}

View File

@ -28,10 +28,9 @@ There are several ways to do this.
### Install from Repository {#install-from-repository}
On Ubuntu 19.10 or newer:
```
$ sudo apt-get update
$ sudo apt-get install gcc-9 g++-9
```
$ sudo apt-get update
$ sudo apt-get install gcc-9 g++-9
### Install from a PPA Package {#install-from-a-ppa-package}

View File

@ -3,11 +3,11 @@ toc_priority: 61
toc_title: For Beginners
---
# The Beginner ClickHouse Developer Instruction
# The Beginner ClickHouse Developer Instruction {#the-beginner-clickhouse-developer-instruction}
Building of ClickHouse is supported on Linux, FreeBSD and Mac OS X.
If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.
If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/\#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.
ClickHouse cannot work or build on a 32-bit system. You should acquire access to a 64-bit system and you can continue reading.
@ -137,7 +137,7 @@ Official Yandex builds currently use GCC because it generates machine code of sl
To install GCC on Ubuntu run: `sudo apt install gcc g++`
Check the version of gcc: `gcc --version`. If it is below 9, then follow the instruction here: https://clickhouse.tech/docs/en/development/build/#install-gcc-9.
Check the version of gcc: `gcc --version`. If it is below 9, then follow the instruction here: https://clickhouse.tech/docs/en/development/build/\#install-gcc-9.
Mac OS X build is supported only for Clang. Just run `brew install llvm`

View File

@ -200,7 +200,7 @@ Debug version of `jemalloc` is used for debug build.
ClickHouse fuzzing is implemented both using [libFuzzer](https://llvm.org/docs/LibFuzzer.html) and random SQL queries.
All the fuzz testing should be performed with sanitizers (Address and Undefined).
LibFuzzer is used for isolated fuzz testing of library code. Fuzzers are implemented as part of test code and have "\_fuzzer" name postfixes.
LibFuzzer is used for isolated fuzz testing of library code. Fuzzers are implemented as part of test code and have “\_fuzzer” name postfixes.
Fuzzer example can be found at `src/Parsers/tests/lexer_fuzzer.cpp`. LibFuzzer-specific configs, dictionaries and corpus are stored at `tests/fuzz`.
We encourage you to write fuzz tests for every functionality that handles user input.
@ -211,7 +211,6 @@ Google OSS-Fuzz can be found at `docker/fuzz`.
We also use simple fuzz test to generate random SQL queries and to check that the server doesnt die executing them.
You can find it in `00746_sql_fuzzy.pl`. This test should be run continuously (overnight and longer).
## Security Audit {#security-audit}
People from Yandex Security Team do some basic overview of ClickHouse capabilities from the security standpoint.

View File

@ -12,8 +12,8 @@ By default, ClickHouse uses its native database engine, which provides configura
You can also use the following database engines:
- [MySQL](mysql.md)
- [MySQL](../../engines/database-engines/mysql.md)
- [Lazy](lazy.md)
- [Lazy](../../engines/database-engines/lazy.md)
[Original article](https://clickhouse.tech/docs/en/database_engines/) <!--hide-->

View File

@ -1,8 +1,8 @@
---
toc_folder_title: Engines
toc_hidden: true
toc_priority: 25
toc_title: hidden
toc_hidden: true
---
{## [Original article](https://clickhouse.tech/docs/en/engines/) ##}

View File

@ -19,27 +19,27 @@ The table engine (type of table) determines:
### MergeTree {#mergetree}
The most universal and functional table engines for high-load tasks. The property shared by these engines is quick data insertion with subsequent background data processing. `MergeTree` family engines support data replication (with [Replicated*](mergetree-family/replication.md#table_engines-replication) versions of engines), partitioning, and other features not supported in other engines.
The most universal and functional table engines for high-load tasks. The property shared by these engines is quick data insertion with subsequent background data processing. `MergeTree` family engines support data replication (with [Replicated\*](../../engines/table-engines/mergetree-family/replication.md#table_engines-replication) versions of engines), partitioning, and other features not supported in other engines.
Engines in the family:
- [MergeTree](mergetree-family/mergetree.md#mergetree)
- [ReplacingMergeTree](mergetree-family/replacingmergetree.md#replacingmergetree)
- [SummingMergeTree](mergetree-family/summingmergetree.md#summingmergetree)
- [AggregatingMergeTree](mergetree-family/aggregatingmergetree.md#aggregatingmergetree)
- [CollapsingMergeTree](mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree)
- [VersionedCollapsingMergeTree](mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree)
- [GraphiteMergeTree](mergetree-family/graphitemergetree.md#graphitemergetree)
- [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#mergetree)
- [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md#replacingmergetree)
- [SummingMergeTree](../../engines/table-engines/mergetree-family/summingmergetree.md#summingmergetree)
- [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md#aggregatingmergetree)
- [CollapsingMergeTree](../../engines/table-engines/mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree)
- [VersionedCollapsingMergeTree](../../engines/table-engines/mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree)
- [GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree)
### Log {#log}
Lightweight [engines](log-family/index.md) with minimum functionality. Theyre the most effective when you need to quickly write many small tables (up to approximately 1 million rows) and read them later as a whole.
Lightweight [engines](../../engines/table-engines/log-family/index.md) with minimum functionality. Theyre the most effective when you need to quickly write many small tables (up to approximately 1 million rows) and read them later as a whole.
Engines in the family:
- [TinyLog](log-family/tinylog.md#tinylog)
- [StripeLog](log-family/stripelog.md#stripelog)
- [Log](log-family/log.md#log)
- [TinyLog](../../engines/table-engines/log-family/tinylog.md#tinylog)
- [StripeLog](../../engines/table-engines/log-family/stripelog.md#stripelog)
- [Log](../../engines/table-engines/log-family/log.md#log)
### Integration Engines {#integration-engines}
@ -47,28 +47,28 @@ Engines for communicating with other data storage and processing systems.
Engines in the family:
- [Kafka](integrations/kafka.md#kafka)
- [MySQL](integrations/mysql.md#mysql)
- [ODBC](integrations/odbc.md#table-engine-odbc)
- [JDBC](integrations/jdbc.md#table-engine-jdbc)
- [HDFS](integrations/hdfs.md#hdfs)
- [Kafka](../../engines/table-engines/integrations/kafka.md#kafka)
- [MySQL](../../engines/table-engines/integrations/mysql.md#mysql)
- [ODBC](../../engines/table-engines/integrations/odbc.md#table-engine-odbc)
- [JDBC](../../engines/table-engines/integrations/jdbc.md#table-engine-jdbc)
- [HDFS](../../engines/table-engines/integrations/hdfs.md#hdfs)
### Special Engines {#special-engines}
Engines in the family:
- [Distributed](special/distributed.md#distributed)
- [MaterializedView](special/materializedview.md#materializedview)
- [Dictionary](special/dictionary.md#dictionary)
- [Merge](special/merge.md#merge)
- [File](special/file.md#file)
- [Null](special/null.md#null)
- [Set](special/set.md#set)
- [Join](special/join.md#join)
- [URL](special/url.md#table_engines-url)
- [View](special/view.md#table_engines-view)
- [Memory](special/memory.md#memory)
- [Buffer](special/buffer.md#buffer)
- [Distributed](../../engines/table-engines/special/distributed.md#distributed)
- [MaterializedView](../../engines/table-engines/special/materializedview.md#materializedview)
- [Dictionary](../../engines/table-engines/special/dictionary.md#dictionary)
- [Merge](../../engines/table-engines/special/merge.md#merge)
- [File](../../engines/table-engines/special/file.md#file)
- [Null](../../engines/table-engines/special/null.md#null)
- [Set](../../engines/table-engines/special/set.md#set)
- [Join](../../engines/table-engines/special/join.md#join)
- [URL](../../engines/table-engines/special/url.md#table_engines-url)
- [View](../../engines/table-engines/special/view.md#table_engines-view)
- [Memory](../../engines/table-engines/special/memory.md#memory)
- [Buffer](../../engines/table-engines/special/buffer.md#buffer)
## Virtual Columns {#table_engines-virtual_columns}

View File

@ -6,7 +6,7 @@ toc_title: HDFS
# HDFS {#table_engines-hdfs}
This engine provides integration with [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)via ClickHouse. This engine is similar
to the [File](../special/file.md#table_engines-file) and [URL](../special/url.md#table_engines-url) engines, but provides Hadoop-specific features.
to the [File](../../../engines/table-engines/special/file.md#table_engines-file) and [URL](../../../engines/table-engines/special/url.md#table_engines-url) engines, but provides Hadoop-specific features.
## Usage {#usage}
@ -116,6 +116,6 @@ CREARE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9
**See Also**
- [Virtual columns](../index.md#table_engines-virtual_columns)
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/hdfs/) <!--hide-->

View File

@ -173,7 +173,7 @@ For a list of possible configuration options, see the [librdkafka configuration
**See Also**
- [Virtual columns](../index.md#table_engines-virtual_columns)
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size)
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
- [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) <!--hide-->

View File

@ -9,9 +9,9 @@ These engines were developed for scenarios when you need to quickly write many s
Engines of the family:
- [StripeLog](stripelog.md)
- [Log](log.md)
- [TinyLog](tinylog.md)
- [StripeLog](../../../engines/table-engines/log-family/stripelog.md)
- [Log](../../../engines/table-engines/log-family/log.md)
- [TinyLog](../../../engines/table-engines/log-family/tinylog.md)
## Common Properties {#common-properties}

View File

@ -5,9 +5,9 @@ toc_title: Log
# Log {#log}
Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log-family.md) article.
Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) article.
Log differs from [TinyLog](tinylog.md) in that a small file of “marks” resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads.
Log differs from [TinyLog](../../../engines/table-engines/log-family/tinylog.md) in that a small file of “marks” resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads.
For concurrent data access, the read operations can be performed simultaneously, while write operations block reads and each other.
The Log engine does not support indexes. Similarly, if writing to a table failed, the table is broken, and reading from it returns an error. The Log engine is appropriate for temporary data, write-once tables, and for testing or demonstration purposes.

View File

@ -5,7 +5,7 @@ toc_title: StripeLog
# Stripelog {#stripelog}
This engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log-family.md) article.
This engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) article.
Use this engine in scenarios when you need to write many tables with a small amount of data (less than 1 million rows).

View File

@ -5,10 +5,10 @@ toc_title: TinyLog
# TinyLog {#tinylog}
The engine belongs to the log engine family. See [Log Engine Family](log-family.md) for common properties of log engines and their differences.
The engine belongs to the log engine family. See [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) for common properties of log engines and their differences.
This table engine is typically used with the write-once method: write data one time, then read it as many times as necessary. For example, you can use `TinyLog`-type tables for intermediary data that is processed in small batches. Note that storing data in a large number of small tables is inefficient.
Queries are executed in a single stream. In other words, this engine is intended for relatively small tables (up to about 1,000,000 rows). It makes sense to use this table engine if you have many small tables, since its simpler than the [Log](log.md) engine (fewer files need to be opened).
Queries are executed in a single stream. In other words, this engine is intended for relatively small tables (up to about 1,000,000 rows). It makes sense to use this table engine if you have many small tables, since its simpler than the [Log](../../../engines/table-engines/log-family/log.md) engine (fewer files need to be opened).
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/tinylog/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: AggregatingMergeTree
# Aggregatingmergetree {#aggregatingmergetree}
The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree), altering the logic for data parts merging. ClickHouse replaces all rows with the same primary key (or more accurately, with the same [sorting key](mergetree.md)) with a single row (within a one data part) that stores a combination of states of aggregate functions.
The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree), altering the logic for data parts merging. ClickHouse replaces all rows with the same primary key (or more accurately, with the same [sorting key](../../../engines/table-engines/mergetree-family/mergetree.md)) with a single row (within a one data part) that stores a combination of states of aggregate functions.
You can use `AggregatingMergeTree` tables for incremental data aggregation, including for aggregated materialized views.
@ -36,7 +36,7 @@ For a description of request parameters, see [request description](../../../sql-
**Query clauses**
When creating a `AggregatingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table.
When creating a `AggregatingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table.
<details markdown="1">

View File

@ -5,7 +5,7 @@ toc_title: CollapsingMergeTree
# CollapsingMergeTree {#table_engine-collapsingmergetree}
The engine inherits from [MergeTree](mergetree.md) and adds the logic of rows collapsing to data parts merge algorithm.
The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) and adds the logic of rows collapsing to data parts merge algorithm.
`CollapsingMergeTree` asynchronously deletes (collapses) pairs of rows if all of the fields in a sorting key (`ORDER BY`) are equivalent excepting the particular field `Sign` which can have `1` and `-1` values. Rows without a pair are kept. For more details see the [Collapsing](#table_engine-collapsingmergetree-collapsing) section of the document.
@ -36,7 +36,7 @@ For a description of query parameters, see [query description](../../../sql-refe
**Query clauses**
When creating a `CollapsingMergeTree` table, the same [query clauses](mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
When creating a `CollapsingMergeTree` table, the same [query clauses](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
<details markdown="1">

View File

@ -5,11 +5,11 @@ toc_title: Custom Partitioning Key
# Custom Partitioning Key {#custom-partitioning-key}
Partitioning is available for the [MergeTree](mergetree.md) family tables (including [replicated](replication.md) tables). [Materialized views](../special/materializedview.md#materializedview) based on MergeTree tables support partitioning, as well.
Partitioning is available for the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). [Materialized views](../../../engines/table-engines/special/materializedview.md#materializedview) based on MergeTree tables support partitioning, as well.
A partition is a logical combination of records in a table by a specified criterion. You can set a partition by an arbitrary criterion, such as by month, by day, or by event type. Each partition is stored separately to simplify manipulations of this data. When accessing the data, ClickHouse uses the smallest subset of partitions possible.
The partition is specified in the `PARTITION BY expr` clause when [creating a table](mergetree.md#table_engine-mergetree-creating-a-table). The partition key can be any expression from the table columns. For example, to specify partitioning by month, use the expression `toYYYYMM(date_column)`:
The partition is specified in the `PARTITION BY expr` clause when [creating a table](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table). The partition key can be any expression from the table columns. For example, to specify partitioning by month, use the expression `toYYYYMM(date_column)`:
``` sql
CREATE TABLE visits
@ -23,7 +23,7 @@ PARTITION BY toYYYYMM(VisitDate)
ORDER BY Hour;
```
The partition key can also be a tuple of expressions (similar to the [primary key](mergetree.md#primary-keys-and-indexes-in-queries)). For example:
The partition key can also be a tuple of expressions (similar to the [primary key](../../../engines/table-engines/mergetree-family/mergetree.md#primary-keys-and-indexes-in-queries)). For example:
``` sql
ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/name', 'replica1', Sign)
@ -38,7 +38,7 @@ When inserting new data to a table, this data is stored as a separate part (chun
!!! info "Info"
A merge only works for data parts that have the same value for the partitioning expression. This means **you shouldnt make overly granular partitions** (more than about a thousand partitions). Otherwise, the `SELECT` query performs poorly because of an unreasonably large number of files in the file system and open file descriptors.
Use the [system.parts](../../../operations/system-tables.md#system_tables-parts) table to view the table parts and partitions. For example, lets assume that we have a `visits` table with partitioning by month. Lets perform the `SELECT` query for the `system.parts` table:
Use the [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) table to view the table parts and partitions. For example, lets assume that we have a `visits` table with partitioning by month. Lets perform the `SELECT` query for the `system.parts` table:
``` sql
SELECT

View File

@ -9,7 +9,7 @@ This engine is designed for thinning and aggregating/averaging (rollup) [Graphit
You can use any ClickHouse table engine to store the Graphite data if you dont need rollup, but if you need a rollup use `GraphiteMergeTree`. The engine reduces the volume of storage and increases the efficiency of queries from Graphite.
The engine inherits properties from [MergeTree](mergetree.md).
The engine inherits properties from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md).
## Creating a Table {#creating-table}
@ -50,7 +50,7 @@ The names of these columns should be set in the rollup configuration.
**Query clauses**
When creating a `GraphiteMergeTree` table, the same [clauses](mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
When creating a `GraphiteMergeTree` table, the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
<details markdown="1">

View File

@ -15,20 +15,20 @@ Main features:
This allows you to create a small sparse index that helps find data faster.
- Partitions can be used if the [partitioning key](custom-partitioning-key.md) is specified.
- Partitions can be used if the [partitioning key](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md) is specified.
ClickHouse supports certain operations with partitions that are more effective than general operations on the same data with the same result. ClickHouse also automatically cuts off the partition data where the partitioning key is specified in the query. This also improves query performance.
- Data replication support.
The family of `ReplicatedMergeTree` tables provides data replication. For more information, see [Data replication](replication.md).
The family of `ReplicatedMergeTree` tables provides data replication. For more information, see [Data replication](../../../engines/table-engines/mergetree-family/replication.md).
- Data sampling support.
If necessary, you can set the data sampling method in the table.
!!! info "Info"
The [Merge](../special/merge.md#merge) engine does not belong to the `*MergeTree` family.
The [Merge](../../../engines/table-engines/special/merge.md#merge) engine does not belong to the `*MergeTree` family.
## Creating a Table {#table_engine-mergetree-creating-a-table}
@ -51,9 +51,6 @@ ORDER BY expr
For a description of parameters, see the [CREATE query description](../../../sql-reference/statements/create.md).
!!! note "Note"
`INDEX` is an experimental feature, see [Data Skipping Indexes](#table_engine-mergetree-data_skipping-indexes).
### Query Clauses {#mergetree-query-clauses}
- `ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters.
@ -62,11 +59,11 @@ For a description of parameters, see the [CREATE query description](../../../sql
A tuple of column names or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`.
ClickHouse uses the sorting key as a primary key if the primary key is not defined obviously by the `PRIMARY KEY` clause.
Use the `ORDER BY tuple()` syntax, if you don't need sorting. See [Selecting the Primary Key](#selecting-the-primary-key).
ClickHouse uses the sorting key as a primary key if the primary key is not defined obviously by the `PRIMARY KEY` clause.
- `PARTITION BY` — The [partitioning key](custom-partitioning-key.md). Optional.
Use the `ORDER BY tuple()` syntax, if you dont need sorting. See [Selecting the Primary Key](#selecting-the-primary-key).
- `PARTITION BY` — The [partitioning key](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Optional.
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](../../../sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
@ -196,22 +193,22 @@ The number of columns in the primary key is not explicitly limited. Depending on
ClickHouse sorts data by primary key, so the higher the consistency, the better the compression.
- Provide additional logic when merging data parts in the [CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) and [SummingMergeTree](summingmergetree.md) engines.
- Provide additional logic when merging data parts in the [CollapsingMergeTree](../../../engines/table-engines/mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree) and [SummingMergeTree](../../../engines/table-engines/mergetree-family/summingmergetree.md) engines.
In this case it makes sense to specify the *sorting key* that is different from the primary key.
A long primary key will negatively affect the insert performance and memory consumption, but extra columns in the primary key do not affect ClickHouse performance during `SELECT` queries.
You can create a table without a primary key using the `ORDER BY tuple()` syntax. In this case, ClickHouse stores data in the order of inserting. If you want to save data order when inserting data by `INSERT ... SELECT` queries, set [max_insert_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads).
You can create a table without a primary key using the `ORDER BY tuple()` syntax. In this case, ClickHouse stores data in the order of inserting. If you want to save data order when inserting data by `INSERT ... SELECT` queries, set [max\_insert\_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads).
To select data in the initial order, use [single-threaded](../../../operations/settings/settings.md#settings-max_threads) `SELECT` queries.
### Choosing a Primary Key that Differs from the Sorting Key {#choosing-a-primary-key-that-differs-from-the-sorting-key}
It is possible to specify a primary key (an expression with values that are written in the index file for each mark) that is different from the sorting key (an expression for sorting the rows in data parts). In this case the primary key expression tuple must be a prefix of the sorting key expression tuple.
This feature is helpful when using the [SummingMergeTree](summingmergetree.md) and
[AggregatingMergeTree](aggregatingmergetree.md) table engines. In a common case when using these engines, the table has two types of columns: *dimensions* and *measures*. Typical queries aggregate values of measure columns with arbitrary `GROUP BY` and filtering by dimensions. Because SummingMergeTree and AggregatingMergeTree aggregate rows with the same value of the sorting key, it is natural to add all dimensions to it. As a result, the key expression consists of a long list of columns and this list must be frequently updated with newly added dimensions.
This feature is helpful when using the [SummingMergeTree](../../../engines/table-engines/mergetree-family/summingmergetree.md) and
[AggregatingMergeTree](../../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engines. In a common case when using these engines, the table has two types of columns: *dimensions* and *measures*. Typical queries aggregate values of measure columns with arbitrary `GROUP BY` and filtering by dimensions. Because SummingMergeTree and AggregatingMergeTree aggregate rows with the same value of the sorting key, it is natural to add all dimensions to it. As a result, the key expression consists of a long list of columns and this list must be frequently updated with newly added dimensions.
In this case it makes sense to leave only a few columns in the primary key that will provide efficient range scans and add the remaining dimension columns to the sorting key tuple.
@ -257,7 +254,7 @@ ClickHouse cannot use an index if the values of the primary key in the query par
ClickHouse uses this logic not only for days of the month sequences, but for any primary key that represents a partially-monotonic sequence.
### Data Skipping Indexes (experimental) {#table_engine-mergetree-data_skipping-indexes}
### Data Skipping Indexes {#table_engine-mergetree-data_skipping-indexes}
The index declaration is in the columns section of the `CREATE` query.
@ -487,7 +484,7 @@ When ClickHouse see that data is expired, it performs an off-schedule merge. To
If you perform the `SELECT` query between merges, you may get expired data. To avoid it, use the [OPTIMIZE](../../../sql-reference/statements/misc.md#misc_operations-optimize) query before `SELECT`.
## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes}
## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes}
### Introduction {#introduction}
@ -502,7 +499,7 @@ Data part is the minimum movable unit for `MergeTree`-engine tables. The data be
- Volume — Ordered set of equal disks (similar to [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)).
- Storage policy — Set of volumes and the rules for moving data between them.
The names given to the described entities can be found in the system tables, [system.storage\_policies](../../../operations/system-tables.md#system_tables-storage_policies) and [system.disks](../../../operations/system-tables.md#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables.
The names given to the described entities can be found in the system tables, [system.storage\_policies](../../../operations/system-tables/storage_policies.md#system_tables-storage_policies) and [system.disks](../../../operations/system-tables/disks.md#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables.
### Configuration {#table_engine-mergetree-multiple-volumes_configure}
@ -632,7 +629,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
The `default` storage policy implies using only one volume, which consists of only one disk given in `<path>`. Once a table is created, its storage policy cannot be changed.
The number of threads performing background moves of data parts can be changed by [background_move_pool_size](../../../operations/settings/settings.md#background_move_pool_size) setting.
The number of threads performing background moves of data parts can be changed by [background\_move\_pool\_size](../../../operations/settings/settings.md#background_move_pool_size) setting.
### Details {#details}
@ -651,7 +648,7 @@ In all these cases except for mutations and partition freezing, a part is stored
Under the hood, mutations and partition freezing make use of [hard links](https://en.wikipedia.org/wiki/Hard_link). Hard links between different disks are not supported, therefore in such cases the resulting parts are stored on the same disks as the initial ones.
In the background, parts are moved between volumes on the basis of the amount of free space (`move_factor` parameter) according to the order the volumes are declared in the configuration file.
Data is never transferred from the last one and into the first one. One may use system tables [system.part\_log](../../../operations/system-tables.md#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](../../../operations/system-tables.md#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs.
Data is never transferred from the last one and into the first one. One may use system tables [system.part\_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs.
User can force moving a part or a partition from one volume to another using the query [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter.md#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met.

View File

@ -5,7 +5,7 @@ toc_title: ReplacingMergeTree
# ReplacingMergeTree {#replacingmergetree}
The engine differs from [MergeTree](mergetree.md#table_engines-mergetree) in that it removes duplicate entries with the same primary key value (or more accurately, with the same [sorting key](mergetree.md) value).
The engine differs from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree) in that it removes duplicate entries with the same [sorting key](../../../engines/table-engines/mergetree-family/mergetree.md) value.
Data deduplication occurs only during a merge. Merging occurs in the background at an unknown time, so you cant plan for it. Some of the data may remain unprocessed. Although you can run an unscheduled merge using the `OPTIMIZE` query, dont count on using it, because the `OPTIMIZE` query will read and write a large amount of data.
@ -33,14 +33,14 @@ For a description of request parameters, see [request description](../../../sql-
- `ver` — column with version. Type `UInt*`, `Date` or `DateTime`. Optional parameter.
When merging, `ReplacingMergeTree` from all the rows with the same primary key leaves only one:
When merging, `ReplacingMergeTree` from all the rows with the same sorting key leaves only one:
- Last in the selection, if `ver` not set.
- With the maximum version, if `ver` specified.
**Query clauses**
When creating a `ReplacingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table.
When creating a `ReplacingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table.
<details markdown="1">

View File

@ -63,7 +63,7 @@ For each `INSERT` query, approximately ten entries are added to ZooKeeper throug
For very large clusters, you can use different ZooKeeper clusters for different shards. However, this hasnt proven necessary on the Yandex.Metrica cluster (approximately 300 servers).
Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) setting.
Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size) setting.
By default, an INSERT query waits for confirmation of writing the data from only one replica. If the data was successfully written to only one replica and the server with this replica ceases to exist, the stored data will be lost. To enable getting confirmation of data writes from multiple replicas, use the `insert_quorum` option.
@ -217,6 +217,6 @@ If the data in ZooKeeper was lost or damaged, you can save data by moving it to
**See also**
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size)
- [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/replication/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: SummingMergeTree
# SummingMergeTree {#summingmergetree}
The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree). The difference is that when merging data parts for `SummingMergeTree` tables ClickHouse replaces all the rows with the same primary key (or more accurately, with the same [sorting key](mergetree.md)) with one row which contains summarized values for the columns with the numeric data type. If the sorting key is composed in a way that a single key value corresponds to large number of rows, this significantly reduces storage volume and speeds up data selection.
The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree). The difference is that when merging data parts for `SummingMergeTree` tables ClickHouse replaces all the rows with the same primary key (or more accurately, with the same [sorting key](../../../engines/table-engines/mergetree-family/mergetree.md)) with one row which contains summarized values for the columns with the numeric data type. If the sorting key is composed in a way that a single key value corresponds to large number of rows, this significantly reduces storage volume and speeds up data selection.
We recommend to use the engine together with `MergeTree`. Store complete data in `MergeTree` table, and use `SummingMergeTree` for aggregated data storing, for example, when preparing reports. Such an approach will prevent you from losing valuable data due to an incorrectly composed primary key.
@ -35,7 +35,7 @@ For a description of request parameters, see [request description](../../../sql-
**Query clauses**
When creating a `SummingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table.
When creating a `SummingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table.
<details markdown="1">
@ -96,7 +96,7 @@ SELECT key, sum(value) FROM summtt GROUP BY key
When data are inserted into a table, they are saved as-is. ClickHouse merges the inserted parts of data periodically and this is when rows with the same primary key are summed and replaced with one for each resulting part of data.
ClickHouse can merge the data parts so that different resulting parts of data cat consist rows with the same primary key, i.e. the summation will be incomplete. Therefore (`SELECT`) an aggregate function [sum()](../../../sql-reference/aggregate-functions/reference.md#agg_function-sum) and `GROUP BY` clause should be used in a query as described in the example above.
ClickHouse can merge the data parts so that different resulting parts of data cat consist rows with the same primary key, i.e. the summation will be incomplete. Therefore (`SELECT`) an aggregate function [sum()](../../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum) and `GROUP BY` clause should be used in a query as described in the example above.
### Common Rules for Summation {#common-rules-for-summation}
@ -110,7 +110,7 @@ The values are not summarized for columns in the primary key.
### The Summation in the Aggregatefunction Columns {#the-summation-in-the-aggregatefunction-columns}
For columns of [AggregateFunction type](../../../sql-reference/data-types/aggregatefunction.md) ClickHouse behaves as [AggregatingMergeTree](aggregatingmergetree.md) engine aggregating according to the function.
For columns of [AggregateFunction type](../../../sql-reference/data-types/aggregatefunction.md) ClickHouse behaves as [AggregatingMergeTree](../../../engines/table-engines/mergetree-family/aggregatingmergetree.md) engine aggregating according to the function.
### Nested Structures {#nested-structures}
@ -132,7 +132,7 @@ Examples:
[(1, 100), (2, 150)] + [(1, -100)] -> [(2, 150)]
```
When requesting data, use the [sumMap(key, value)](../../../sql-reference/aggregate-functions/reference.md) function for aggregation of `Map`.
When requesting data, use the [sumMap(key, value)](../../../sql-reference/aggregate-functions/reference/summap.md) function for aggregation of `Map`.
For nested data structure, you do not need to specify its columns in the tuple of columns for summation.

View File

@ -12,7 +12,7 @@ This engine:
See the section [Collapsing](#table_engines_versionedcollapsingmergetree) for details.
The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree) and adds the logic for collapsing rows to the algorithm for merging data parts. `VersionedCollapsingMergeTree` serves the same purpose as [CollapsingMergeTree](collapsingmergetree.md) but uses a different collapsing algorithm that allows inserting the data in any order with multiple threads. In particular, the `Version` column helps to collapse the rows properly even if they are inserted in the wrong order. In contrast, `CollapsingMergeTree` allows only strictly consecutive insertion.
The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree) and adds the logic for collapsing rows to the algorithm for merging data parts. `VersionedCollapsingMergeTree` serves the same purpose as [CollapsingMergeTree](../../../engines/table-engines/mergetree-family/collapsingmergetree.md) but uses a different collapsing algorithm that allows inserting the data in any order with multiple threads. In particular, the `Version` column helps to collapse the rows properly even if they are inserted in the wrong order. In contrast, `CollapsingMergeTree` allows only strictly consecutive insertion.
## Creating a Table {#creating-a-table}
@ -47,7 +47,7 @@ VersionedCollapsingMergeTree(sign, version)
**Query Clauses**
When creating a `VersionedCollapsingMergeTree` table, the same [clauses](mergetree.md) are required as when creating a `MergeTree` table.
When creating a `VersionedCollapsingMergeTree` table, the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required as when creating a `MergeTree` table.
<details markdown="1">

View File

@ -7,7 +7,7 @@ toc_title: Dictionary
The `Dictionary` engine displays the [dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) data as a ClickHouse table.
## Example
## Example {#example}
As an example, consider a dictionary of `products` with the following configuration:

View File

@ -10,7 +10,7 @@ Reading is automatically parallelized. During a read, the table indexes on remot
The Distributed engine accepts parameters:
- the cluster name in the server's config file
- the cluster name in the servers config file
- the name of a remote database
@ -23,7 +23,7 @@ The Distributed engine accepts parameters:
See also:
- `insert_distributed_sync` setting
- [MergeTree](../mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples
- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples
Example:
@ -37,7 +37,7 @@ For example, for a query with GROUP BY, data will be aggregated on remote server
Instead of the database name, you can use a constant expression that returns a string. For example: currentDatabase().
logs The cluster name in the server's config file.
logs The cluster name in the servers config file.
Clusters are set like this:
@ -82,7 +82,7 @@ Replicas are duplicating servers (in order to read all the data, you can access
Cluster names must not contain dots.
The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `compression` are specified for each server:
- `host` The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server doesn't start. If you change the DNS record, restart the server.
- `host` The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server doesnt start. If you change the DNS record, restart the server.
- `port` The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Do not confuse it with http\_port.
- `user` Name of the user for connecting to a remote server. Default value: default. This user must have access to connect to the specified server. Access is configured in the users.xml file. For more information, see the section [Access rights](../../../operations/access-rights.md).
- `password` The password for connecting to a remote server (not masked). Default value: empty string.
@ -99,38 +99,38 @@ You can specify as many clusters as you wish in the configuration.
To view your clusters, use the `system.clusters` table.
The Distributed engine allows working with a cluster like a local server. However, the cluster is inextensible: you must write its configuration in the server config file (even better, for all the cluster's servers).
The Distributed engine allows working with a cluster like a local server. However, the cluster is inextensible: you must write its configuration in the server config file (even better, for all the clusters servers).
The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you don't need to create a Distributed table use the `remote` table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md).
The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you dont need to create a Distributed table use the `remote` table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md).
There are two methods for writing data to a cluster:
First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform INSERT in the tables that the distributed table "looks at". This is the most flexible solution as you can use any sharding scheme, which could be non-trivial due to the requirements of the subject area. This is also the most optimal solution since data can be written to different shards completely independently.
First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform INSERT in the tables that the distributed table “looks at”. This is the most flexible solution as you can use any sharding scheme, which could be non-trivial due to the requirements of the subject area. This is also the most optimal solution since data can be written to different shards completely independently.
Second, you can perform INSERT in a Distributed table. In this case, the table will distribute the inserted data across the servers itself. In order to write to a Distributed table, it must have a sharding key set (the last parameter). In addition, if there is only one shard, the write operation works without specifying the sharding key, since it doesn't mean anything in this case.
Second, you can perform INSERT in a Distributed table. In this case, the table will distribute the inserted data across the servers itself. In order to write to a Distributed table, it must have a sharding key set (the last parameter). In addition, if there is only one shard, the write operation works without specifying the sharding key, since it doesnt mean anything in this case.
Each shard can have a weight defined in the config file. By default, the weight is equal to one. Data is distributed across shards in the amount proportional to the shard weight. For example, if there are two shards and the first has a weight of 9 while the second has a weight of 10, the first will be sent 9 / 19 parts of the rows, and the second will be sent 10 / 19.
Each shard can have the `internal_replication` parameter defined in the config file.
If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table "looks at" replicated tables. In other words, if the table where data will be written is going to replicate them itself.
If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table “looks at” replicated tables. In other words, if the table where data will be written is going to replicate them itself.
If it is set to `false` (the default), data is written to all replicas. In essence, this means that the Distributed table replicates data itself. This is worse than using replicated tables, because the consistency of replicas is not checked, and over time they will contain slightly different data.
To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from `prev_weight` to `prev_weights + weight`, where `prev_weights` is the total weight of the shards with the smallest number, and `weight` is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19).
The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression `rand()` for random distribution of data, or `UserID` for distribution by the remainder from dividing the user's ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID).
The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression `rand()` for random distribution of data, or `UserID` for distribution by the remainder from dividing the users ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID).
A simple reminder from the division is a limited solution for sharding and isn't always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables.
A simple reminder from the division is a limited solution for sharding and isnt always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables.
SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you don't have to transfer the old data to it. You can write new data with a heavier weight the data will be distributed slightly unevenly, but queries will work correctly and efficiently.
SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you dont have to transfer the old data to it. You can write new data with a heavier weight the data will be distributed slightly unevenly, but queries will work correctly and efficiently.
You should be concerned about the sharding scheme in the following cases:
- Queries are used that require joining data (IN or JOIN) by a specific key. If data is sharded by this key, you can use local IN or JOIN instead of GLOBAL IN or GLOBAL JOIN, which is much more efficient.
- A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as we've done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into "layers", where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries.
- A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as weve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries.
Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed\_directory\_monitor\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed\_directory\_monitor\_batch\_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting.
Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed\_directory\_monitor\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed\_directory\_monitor\_batch\_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background\_distributed\_schedule\_pool\_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting.
If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the `broken` subdirectory and no longer used.
@ -145,7 +145,7 @@ When the `max_parallel_replicas` option is enabled, query processing is parallel
**See Also**
- [Virtual columns](index.md#table_engines-virtual_columns)
- [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size)
- [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns)
- [background\_distributed\_schedule\_pool\_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/distributed/) <!--hide-->

View File

@ -33,7 +33,7 @@ You may manually create this subfolder and file in server filesystem and then [A
!!! warning "Warning"
Be careful with this functionality, because ClickHouse does not keep track of external changes to such files. The result of simultaneous writes via ClickHouse and outside of ClickHouse is undefined.
## Example
## Example {#example}
**1.** Set up the `file_engine_table` table:

View File

@ -25,7 +25,7 @@ Generate table engine supports only `SELECT` queries.
It supports all [DataTypes](../../../sql-reference/data-types/index.md) that can be stored in a table except `LowCardinality` and `AggregateFunction`.
## Example
## Example {#example}
**1.** Set up the `generate_engine_table` table:

View File

@ -11,7 +11,7 @@ Reading is automatically parallelized. Writing to a table is not supported. When
The `Merge` engine accepts parameters: the database name and a regular expression for tables.
## Examples
## Examples {#examples}
Example 1:
@ -67,6 +67,6 @@ FROM WatchLog
**See Also**
- [Virtual columns](index.md#table_engines-virtual_columns)
- [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/merge/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: URL
# URL Table Engine {#table_engines-url}
Queries data to/from a remote HTTP/HTTPS server. This engine is similar to the [File](file.md) engine.
Queries data to/from a remote HTTP/HTTPS server. This engine is similar to the [File](../../../engines/table-engines/special/file.md) engine.
Syntax: `URL(URL, Format)`
@ -25,7 +25,7 @@ respectively. For processing `POST` requests, the remote server must support
You can limit the maximum number of HTTP GET redirect hops using the [max\_http\_get\_redirects](../../../operations/settings/settings.md#setting-max_http_get_redirects) setting.
## Example
## Example {#example}
**1.** Create a `url_engine_table` table on the server :

View File

@ -1,9 +1,8 @@
---
toc_folder_title: F.A.Q.
toc_hidden: true
toc_priority: 76
toc_title: hidden
toc_hidden: true
---
{## [Original article](https://clickhouse.tech/docs/en/faq) ##}

View File

@ -9,12 +9,12 @@ toc_title: Introduction
This section describes how to obtain example datasets and import them into ClickHouse.
For some datasets example queries are also available.
- [Anonymized Yandex.Metrica Dataset](metrica.md)
- [Star Schema Benchmark](star-schema.md)
- [WikiStat](wikistat.md)
- [Terabyte of Click Logs from Criteo](criteo.md)
- [AMPLab Big Data Benchmark](amplab-benchmark.md)
- [New York Taxi Data](nyc-taxi.md)
- [OnTime](ontime.md)
- [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md)
- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
- [WikiStat](../../getting-started/example-datasets/wikistat.md)
- [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
- [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md)
- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md)
- [OnTime](../../getting-started/example-datasets/ontime.md)
[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets) <!--hide-->

View File

@ -7,7 +7,7 @@ toc_title: Yandex.Metrica Data
Dataset consists of two tables containing anonymized data about hits (`hits_v1`) and visits (`visits_v1`) of Yandex.Metrica. You can read more about Yandex.Metrica in [ClickHouse history](../../introduction/history.md) section.
The dataset consists of two tables, either of them can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as TSV at https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz and as prepared partitions at https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz.
The dataset consists of two tables, either of them can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as TSV at https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits\_100m\_obfuscated\_v1.tsv.xz and as prepared partitions at https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits\_100m\_obfuscated\_v1.tar.xz.
## Obtaining Tables from Prepared Partitions {#obtaining-tables-from-prepared-partitions}

View File

@ -7,9 +7,9 @@ toc_title: hidden
# Getting Started {#getting-started}
If you are new to ClickHouse and want to get a hands-on feeling of its performance, first of all, you need to go through the [installation process](install.md). After that you can:
If you are new to ClickHouse and want to get a hands-on feeling of its performance, first of all, you need to go through the [installation process](../getting-started/install.md). After that you can:
- [Go through detailed tutorial](tutorial.md)
- [Experiment with example datasets](example-datasets/ontime.md)
- [Go through detailed tutorial](../getting-started/tutorial.md)
- [Experiment with example datasets](../getting-started/example-datasets/ontime.md)
[Original article](https://clickhouse.tech/docs/en/getting_started/) <!--hide-->

View File

@ -6,13 +6,13 @@ toc_title: Playground
# ClickHouse Playground {#clickhouse-playground}
[ClickHouse Playground](https://play.clickhouse.tech) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
Several example datasets are available in the Playground as well as sample queries that show ClickHouse features. There's also a selection of ClickHouse LTS releases to experiment with.
Several example datasets are available in the Playground as well as sample queries that show ClickHouse features. Theres also a selection of ClickHouse LTS releases to experiment with.
ClickHouse Playground gives the experience of m2.small [Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse) instance (4 vCPU, 32 GB RAM) hosted in [Yandex.Cloud](https://cloud.yandex.com/). More information about [cloud providers](../commercial/cloud.md).
You can make queries to playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md).
## Credentials
## Credentials {#credentials}
| Parameter | Value |
|:--------------------|:----------------------------------------|
@ -23,13 +23,13 @@ You can make queries to playground using any HTTP client, for example [curl](htt
There are additional endpoints with specific ClickHouse releases to experiment with their differences (ports and user/password are the same as above):
* 20.3 LTS: `play-api-v20-3.clickhouse.tech`
* 19.14 LTS: `play-api-v19-14.clickhouse.tech`
- 20.3 LTS: `play-api-v20-3.clickhouse.tech`
- 19.14 LTS: `play-api-v19-14.clickhouse.tech`
!!! note "Note"
All these endpoints require a secure TLS connection.
## Limitations
## Limitations {#limitations}
The queries are executed as a read-only user. It implies some limitations:
@ -37,12 +37,12 @@ The queries are executed as a read-only user. It implies some limitations:
- INSERT queries are not allowed
The following settings are also enforced:
- [max_result_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes)
- [max_result_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows)
- [result_overflow_mode=break](../operations/settings/query_complexity/#result-overflow-mode)
- [max_execution_time=60000](../operations/settings/query_complexity/#max-execution-time)
- [max\_result\_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes)
- [max\_result\_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows)
- [result\_overflow\_mode=break](../operations/settings/query_complexity/#result-overflow-mode)
- [max\_execution\_time=60000](../operations/settings/query_complexity/#max-execution-time)
## Examples
## Examples {#examples}
HTTPS endpoint example with `curl`:
@ -51,11 +51,12 @@ curl "https://play-api.clickhouse.tech:8443/?query=SELECT+'Play+ClickHouse!';&us
```
TCP endpoint example with [CLI](../interfaces/cli.md):
``` bash
clickhouse client --secure -h play-api.clickhouse.tech --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse!'"
```
## Implementation Details
## Implementation Details {#implementation-details}
ClickHouse Playground web interface makes requests via ClickHouse [HTTP API](../interfaces/http.md).
The Playground backend is just a ClickHouse cluster without any additional server-side application. As mentioned above, ClickHouse HTTPS and TCP/TLS endpoints are also publicly available as a part of the Playground, both are proxied through [Cloudflare Spectrum](https://www.cloudflare.com/products/cloudflare-spectrum/) to add extra layer of protection and improved global connectivity.

View File

@ -11,7 +11,7 @@ By going through this tutorial, youll learn how to set up a simple ClickHouse
## Single Node Setup {#single-node-setup}
To postpone the complexities of a distributed environment, well start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](install.md#install-from-deb-packages) or [rpm](install.md#from-rpm-packages) packages, but there are [alternatives](install.md#from-docker-image) for the operating systems that do no support them.
To postpone the complexities of a distributed environment, well start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](../getting-started/install.md#install-from-deb-packages) or [rpm](../getting-started/install.md#from-rpm-packages) packages, but there are [alternatives](../getting-started/install.md#from-docker-image) for the operating systems that do no support them.
For example, you have chosen `deb` packages and executed:
@ -80,7 +80,7 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv
## Import Sample Dataset {#import-sample-dataset}
Now its time to fill our ClickHouse server with some sample data. In this tutorial, well use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](example-datasets/metrica.md), and for the sake of the tutorial, well go with the most realistic one.
Now its time to fill our ClickHouse server with some sample data. In this tutorial, well use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, well go with the most realistic one.
### Download and Extract Table Data {#download-and-extract-table-data}

View File

@ -232,6 +232,6 @@ FROM
```
!!! note "Note"
More info about [avg()](../sql-reference/aggregate-functions/reference.md#agg_function-avg) and [log()](../sql-reference/functions/math-functions.md) functions.
More info about [avg()](../sql-reference/aggregate-functions/reference/avg.md#agg_function-avg) and [log()](../sql-reference/functions/math-functions.md) functions.
[Original article](https://clickhouse.tech/docs/en/guides/apply_catboost_model/) <!--hide-->

View File

@ -9,6 +9,6 @@ toc_title: Overview
List of detailed step-by-step instructions that help to solve various tasks using ClickHouse:
- [Tutorial on simple cluster set-up](../getting-started/tutorial.md)
- [Applying a CatBoost model in ClickHouse](apply-catboost-model.md)
- [Applying a CatBoost model in ClickHouse](../guides/apply-catboost-model.md)
[Original article](https://clickhouse.tech/docs/en/guides/) <!--hide-->

View File

@ -1147,11 +1147,11 @@ To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-e
[Apache Arrow](https://arrow.apache.org/) comes with two built-in columnar storage formats. ClickHouse supports read and write operations for these formats.
`Arrow` is Apache Arrow's "file mode" format. It is designed for in-memory random access.
`Arrow` is Apache Arrows “file mode” format. It is designed for in-memory random access.
## ArrowStream {#data-format-arrow-stream}
`ArrowStream` is Apache Arrow's "stream mode" format. It is designed for in-memory stream processing.
`ArrowStream` is Apache Arrows “stream mode” format. It is designed for in-memory stream processing.
## ORC {#data-format-orc}

View File

@ -275,7 +275,7 @@ Use buffering to avoid situations where a query processing error occurred after
### Queries with Parameters {#cli-queries-with-parameters}
You can create a query with parameters and pass values for them from the corresponding HTTP request parameters. For more information, see [Queries with Parameters for CLI](cli.md#cli-queries-with-parameters).
You can create a query with parameters and pass values for them from the corresponding HTTP request parameters. For more information, see [Queries with Parameters for CLI](../interfaces/cli.md#cli-queries-with-parameters).
### Example {#example}
@ -291,7 +291,7 @@ ClickHouse supports specific queries through the HTTP interface. For example, yo
$ echo '(4),(5),(6)' | curl 'http://localhost:8123/?query=INSERT%20INTO%20t%20VALUES' --data-binary @-
```
ClickHouse also supports Predefined HTTP Interface which can help you more easy integration with third party tools like [Prometheus exporter](https://github.com/percona-lab/clickhouse_exporter).
ClickHouse also supports Predefined HTTP Interface which can help you more easily integrate with third-party tools like [Prometheus exporter](https://github.com/percona-lab/clickhouse_exporter).
Example:
@ -314,7 +314,7 @@ Example:
</http_handlers>
```
- You can now request the url directly for data in the Prometheus format:
- You can now request the URL directly for data in the Prometheus format:
<!-- -->
@ -361,41 +361,40 @@ $ curl -v 'http://localhost:8123/predefined_query'
* Connection #0 to host localhost left intact
* Connection #0 to host localhost left intact
```
As you can see from the example, if `<http_handlers>` is configured in the config.xml file and `<http_handlers>` can contain many `<rule>s`. ClickHouse will match the HTTP requests received to the predefined type in `<rule>` and the first matched runs the handler. Then ClickHouse will execute the corresponding predefined query if the match is successful.
As you can see from the example if `http_handlers` is configured in the config.xml file and `http_handlers` can contain many `rules`. ClickHouse will match the HTTP requests received to the predefined type in `rule` and the first matched runs the handler. Then ClickHouse will execute the corresponding predefined query if the match is successful.
> Now `<rule>` can configure `<method>`, `<headers>`, `<url>`,`<handler>`:
> `<method>` is responsible for matching the method part of the HTTP request. `<method>` fully conforms to the definition of [method](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods) in the HTTP protocol. It is an optional configuration. If it is not defined in the configuration file, it does not match the method portion of the HTTP request.
>
> `<url>` is responsible for matching the url part of the HTTP request. It is compatible with [RE2](https://github.com/google/re2)s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the url portion of the HTTP request.
>
> `<headers>` is responsible for matching the header part of the HTTP request. It is compatible with RE2s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request.
>
> `<handler>` contains the main processing part. Now `<handler>` can configure `<type>`, `<status>`, `<content_type>`, `<response_content>`, `<query>`, `<query_param_name>`.
> \> `<type>` currently supports three types: **predefined\_query\_handler**, **dynamic\_query\_handler**, **static**.
> \>
> \> `<query>` - use with predefined\_query\_handler type, executes query when the handler is called.
> \>
> \> `<query_param_name>` - use with dynamic\_query\_handler type, extracts and executes the value corresponding to the `<query_param_name>` value in HTTP request params.
> \>
> \> `<status>` - use with static type, response status code.
> \>
> \> `<content_type>` - use with static type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
> \>
> \> `<response_content>` - use with static type, Response content sent to client, when using the prefix file:// or config://, find the content from the file or configuration send to client.
Now `rule` can configure `method`, `headers`, `url`, `handler`:
- `method` is responsible for matching the method part of the HTTP request. `method` fully conforms to the definition of [method](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods) in the HTTP protocol. It is an optional configuration. If it is not defined in the configuration file, it does not match the method portion of the HTTP request.
Next are the configuration methods for the different `<type>`.
- `url` is responsible for matching the URL part of the HTTP request. It is compatible with [RE2](https://github.com/google/re2)s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the URL portion of the HTTP request.
## predefined\_query\_handler {#predefined_query_handler}
- `headers` are responsible for matching the header part of the HTTP request. It is compatible with RE2s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request.
`<predefined_query_handler>` supports setting Settings and query\_params values. You can configure `<query>` in the type of `<predefined_query_handler>`.
- `handler` contains the main processing part. Now `handler` can configure `type`, `status`, `content_type`, `response_content`, `query`, `query_param_name`.
`type` currently supports three types: [predefined_query_handler](#predefined_query_handler), [dynamic_query_handler](#dynamic_query_handler), [static](#static).
- `query` — use with `predefined_query_handler` type, executes query when the handler is called.
- `query_param_name` — use with `dynamic_query_handler` type, extracts and executes the value corresponding to the `query_param_name` value in HTTP request params.
- `status` — use with `static` type, response status code.
- `content_type` — use with `static` type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
`<query>` value is a predefined query of `<predefined_query_handler>`, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration.
- `response_content` — use with `static` type, response content sent to client, when using the prefix file:// or config://, find the content from the file or configuration sends to client.
The following example defines the values of `max_threads` and `max_alter_threads` settings, then queries the system table to check whether these settings were set successfully.
Next are the configuration methods for different `type`.
### predefined_query_handler {#predefined_query_handler}
`predefined_query_handler` supports setting `Settings` and `query_params` values. You can configure `query` in the type of `predefined_query_handler`.
`query` value is a predefined query of `predefined_query_handler`, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration.
The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` settings, then queries the system table to check whether these settings were set successfully.
Example:
@ -424,15 +423,15 @@ max_alter_threads 2
```
!!! note "caution"
In one `<predefined_query_handler>` only supports one `<query>` of an insert type.
In one `predefined_query_handler` only supports one `query` of an insert type.
## dynamic\_query\_handler {#dynamic_query_handler}
### dynamic_query_handler {#dynamic_query_handler}
In `<dynamic_query_handler>`, query is written in the form of param of the HTTP request. The difference is that in `<predefined_query_handler>`, query is wrote in the configuration file. You can configure `<query_param_name>` in `<dynamic_query_handler>`.
In `dynamic_query_handler`, the query is written in the form of param of the HTTP request. The difference is that in `predefined_query_handler`, the query is written in the configuration file. You can configure `query_param_name` in `dynamic_query_handler`.
ClickHouse extracts and executes the value corresponding to the `<query_param_name>` value in the url of the HTTP request. The default value of `<query_param_name>` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the param is not passed in.
ClickHouse extracts and executes the value corresponding to the `query_param_name` value in the URL of the HTTP request. The default value of `query_param_name` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the param is not passed in.
To experiment with this functionality, the example defines the values of max\_threads and max\_alter\_threads and queries whether the Settings were set successfully.
To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` and `queries` whether the settings were set successfully.
Example:
@ -455,9 +454,9 @@ max_threads 1
max_alter_threads 2
```
## static {#static}
### static {#static}
`<static>` can return [content\_type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type), [status](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status) and response\_content. response\_content can return the specified content
`static` can return [content_type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type), [status](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status) and `response_content`. `response_content` can return the specified content.
Example:

View File

@ -9,19 +9,19 @@ toc_title: Introduction
ClickHouse provides two network interfaces (both can be optionally wrapped in TLS for additional security):
- [HTTP](http.md), which is documented and easy to use directly.
- [Native TCP](tcp.md), which has less overhead.
- [Native TCP](../interfaces/tcp.md), which has less overhead.
In most cases it is recommended to use appropriate tool or library instead of interacting with those directly. Officially supported by Yandex are the following:
- [Command-line client](cli.md)
- [JDBC driver](jdbc.md)
- [ODBC driver](odbc.md)
- [C++ client library](cpp.md)
- [Command-line client](../interfaces/cli.md)
- [JDBC driver](../interfaces/jdbc.md)
- [ODBC driver](../interfaces/odbc.md)
- [C++ client library](../interfaces/cpp.md)
There are also a wide range of third-party libraries for working with ClickHouse:
- [Client libraries](third-party/client-libraries.md)
- [Integrations](third-party/integrations.md)
- [Visual interfaces](third-party/gui.md)
- [Client libraries](../interfaces/third-party/client-libraries.md)
- [Integrations](../interfaces/third-party/integrations.md)
- [Visual interfaces](../interfaces/third-party/gui.md)
[Original article](https://clickhouse.tech/docs/en/interfaces/) <!--hide-->

View File

@ -5,6 +5,6 @@ toc_title: Native Interface (TCP)
# Native Interface (TCP) {#native-interface-tcp}
The native protocol is used in the [command-line client](cli.md), for inter-server communication during distributed query processing, and also in other C++ programs. Unfortunately, native ClickHouse protocol does not have formal specification yet, but it can be reverse-engineered from ClickHouse source code (starting [around here](https://github.com/ClickHouse/ClickHouse/tree/master/src/Client)) and/or by intercepting and analyzing TCP traffic.
The native protocol is used in the [command-line client](../interfaces/cli.md), for inter-server communication during distributed query processing, and also in other C++ programs. Unfortunately, native ClickHouse protocol does not have formal specification yet, but it can be reverse-engineered from ClickHouse source code (starting [around here](https://github.com/ClickHouse/ClickHouse/tree/master/src/Client)) and/or by intercepting and analyzing TCP traffic.
[Original article](https://clickhouse.tech/docs/en/interfaces/tcp/) <!--hide-->

View File

@ -98,5 +98,12 @@ toc_title: Integrations
- Elixir
- [Ecto](https://github.com/elixir-ecto/ecto)
- [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto)
- Ruby
- [Ruby on Rails](https://rubyonrails.org/)
- [activecube](https://github.com/bitquery/activecube)
- [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord)
- [GraphQL](https://github.com/graphql)
- [activecube-graphql](https://github.com/bitquery/activecube-graphql)
[Original article](https://clickhouse.tech/docs/en/interfaces/third-party/integrations/) <!--hide-->

View File

@ -8,78 +8,78 @@ toc_title: Adopters
!!! warning "Disclaimer"
The following list of companies using ClickHouse and their success stories is assembled from public sources, thus might differ from current reality. Wed appreciate it if you share the story of adopting ClickHouse in your company and [add it to the list](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md), but please make sure you wont have any NDA issues by doing so. Providing updates with publications from other companies is also useful.
| Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size<abbr title="of single replica"><sup>\*</sup></abbr> | Reference |
|---------------------------------------------------------------------|---------------------------------|-----------------------|------------------------------------------------------------|------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| [2gis](https://2gis.ru){.favicon} | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) |
| [Aloha&nbsp;Browser](https://alohabrowser.com/){.favicon} | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://presentations.clickhouse.tech/meetup22/aloha.pdf) |
| [Amadeus](https://amadeus.com/){.favicon} | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) |
| [Appsflyer](https://www.appsflyer.com){.favicon} | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) |
| [ArenaData](https://arenadata.tech/){.favicon} | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) |
| [Badoo](https://badoo.com){.favicon} | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) |
| [Benocs](https://www.benocs.com/){.favicon} | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) |
| [Bloomberg](https://www.bloomberg.com/){.favicon} | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
| [Bloxy](https://bloxy.info){.favicon} | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) |
| [Dataliance for China Telecom](https://www.chinatelecomglobal.com/){.favicon} | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) |
| [CARTO](https://carto.com/){.favicon} | Business Intelligence | Geo analytics | — | — | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) |
| [CERN](http://public.web.cern.ch/public/){.favicon} | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) |
| [Cisco](http://cisco.com/){.favicon} | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) |
| [Citadel&nbsp;Securities](https://www.citadelsecurities.com/){.favicon} | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) |
| [Citymobil](https://city-mobil.ru){.favicon} | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) |
| [ContentSquare](https://contentsquare.com){.favicon} | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) |
| [Cloudflare](https://cloudflare.com){.favicon} | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) |
| [Corunet](https://coru.net/){.favicon} | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) |
| [CraiditX 氪信](https://www.creditx.com){.favicon} | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) |
| [Criteo](https://www.criteo.com/){.favicon} | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) |
| [Deutsche&nbsp;Bank](https://db.com){.favicon} | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) |
| [Diva-e](https://www.diva-e.com){.favicon} | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) |
| [Exness](https://www.exness.com){.favicon} | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) |
| [Geniee](https://geniee.co.jp){.favicon} | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) |
| [HUYA](https://www.huya.com/){.favicon} | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) |
| [Idealista](https://www.idealista.com){.favicon} | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) |
| [Infovista](https://www.infovista.com/){.favicon} | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) |
| [InnoGames](https://www.innogames.com){.favicon} | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) |
| [Integros](https://integros.com){.favicon} | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) |
| [Kodiak Data](https://www.kodiakdata.com/){.favicon} | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) |
| [Kontur](https://kontur.ru){.favicon} | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) |
| [Lawrence Berkeley National Laboratory](https://www.lbl.gov){.favicon} | Research | Traffic analysis | 1 server | 11.8 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) |
| [LifeStreet](https://lifestreet.com/){.favicon} | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) |
| [Mail.ru Cloud Solutions](https://mcs.mail.ru/){.favicon} | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) |
| [MessageBird](https://www.messagebird.com){.favicon} | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) |
| [MGID](https://www.mgid.com/){.favicon} | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) |
| [OneAPM](https://www.oneapm.com/){.favicon} | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
| [Pragma&nbsp;Innovation](http://www.pragma-innovation.fr/){.favicon} | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) |
| [QINGCLOUD](https://www.qingcloud.com/){.favicon} | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
| [Qrator](https://qrator.net){.favicon} | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) |
| [Percent 百分点](https://www.percent.cn/){.favicon} | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) |
| [Rambler](https://rambler.ru){.favicon} | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) |
| [Tencent](https://www.tencent.com){.favicon} | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) |
| [Traffic&nbsp;Stars](https://trafficstars.com/){.favicon} | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) |
| [S7&nbsp;Airlines](https://www.s7.ru){.favicon} | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) |
| [SEMrush](https://www.semrush.com/){.favicon} | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) |
| [scireum&nbsp;GmbH](https://www.scireum.de/){.favicon} | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) |
| [Sentry](https://sentry.io/){.favicon} | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) |
| [SGK](http://www.sgk.gov.tr/wps/portal/sgk/tr){.favicon} | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) |
| [seo.do](https://seo.do/){.favicon} | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) |
| [Sina](http://english.sina.com/index.html){.favicon} | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) |
| [SMI2](https://smi2.ru/){.favicon} | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) |
| [Splunk](https://www.splunk.com/){.favicon} | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) |
| [Spotify](https://www.spotify.com){.favicon} | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) |
| [Tencent](https://www.tencent.com){.favicon} | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) |
| [Uber](https://www.uber.com){.favicon} | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) |
| [VKontakte](https://vk.com){.favicon} | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) |
| [Wisebits](https://wisebits.com/){.favicon} | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) |
| [Xiaoxin&nbsp;Tech](http://www.xiaoxintech.cn/){.favicon} | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) |
| [Ximalaya](https://www.ximalaya.com/){.favicon} | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) |
| [Yandex&nbsp;Cloud](https://cloud.yandex.ru/services/managed-clickhouse){.favicon} | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) |
| [Yandex&nbsp;DataLens](https://cloud.yandex.ru/services/datalens){.favicon} | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) |
| [Yandex&nbsp;Market](https://market.yandex.ru/){.favicon} | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) |
| [Yandex&nbsp;Metrica](https://metrica.yandex.com){.favicon} | Web analytics | Main product | 360 servers in one cluster, 1862 servers in one department | 66.41 PiB / 5.68 PiB | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) |
| [ЦВТ](https://htc-cs.ru/){.favicon} | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) |
| [МКБ](https://mkb.ru/){.favicon} | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) |
| [Jinshuju 金数据](https://jinshuju.net){.favicon} | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) |
| [Instana](https://www.instana.com){.favicon} | APM Platform | Main product | — | — | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304) |
| [Wargaming](https://wargaming.com/en/){.favicon} | Games | | — | — | [Interview](https://habr.com/en/post/496954/) |
| [Crazypanda](https://crazypanda.ru/en/){.favicon} | Games | | — | — | Live session on ClickHouse meetup |
| [FunCorp](https://fun.co/rp){.favicon} | Games | | — | — | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) |
| Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size<abbr title="of single replica"><sup>\*</sup></abbr> | Reference |
|------------------------------------------------------------------------------------------------|---------------------------------|-----------------------|------------------------------------------------------------|------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| <a href="https://2gis.ru" class="favicon">2gis</a> | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) |
| <a href="https://alohabrowser.com/" class="favicon">Aloha Browser</a> | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://presentations.clickhouse.tech/meetup22/aloha.pdf) |
| <a href="https://amadeus.com/" class="favicon">Amadeus</a> | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) |
| <a href="https://www.appsflyer.com" class="favicon">Appsflyer</a> | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) |
| <a href="https://arenadata.tech/" class="favicon">ArenaData</a> | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) |
| <a href="https://badoo.com" class="favicon">Badoo</a> | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) |
| <a href="https://www.benocs.com/" class="favicon">Benocs</a> | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) |
| <a href="https://www.bloomberg.com/" class="favicon">Bloomberg</a> | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
| <a href="https://bloxy.info" class="favicon">Bloxy</a> | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) |
| <a href="https://www.chinatelecomglobal.com/" class="favicon">Dataliance for China Telecom</a> | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) |
| <a href="https://carto.com/" class="favicon">CARTO</a> | Business Intelligence | Geo analytics | — | — | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) |
| <a href="http://public.web.cern.ch/public/" class="favicon">CERN</a> | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) |
| <a href="http://cisco.com/" class="favicon">Cisco</a> | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) |
| <a href="https://www.citadelsecurities.com/" class="favicon">Citadel Securities</a> | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) |
| <a href="https://city-mobil.ru" class="favicon">Citymobil</a> | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) |
| <a href="https://contentsquare.com" class="favicon">ContentSquare</a> | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) |
| <a href="https://cloudflare.com" class="favicon">Cloudflare</a> | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) |
| <a href="https://coru.net/" class="favicon">Corunet</a> | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) |
| <a href="https://www.creditx.com" class="favicon">CraiditX 氪信</a> | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) |
| <a href="https://www.criteo.com/" class="favicon">Criteo</a> | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) |
| <a href="https://db.com" class="favicon">Deutsche Bank</a> | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) |
| <a href="https://www.diva-e.com" class="favicon">Diva-e</a> | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) |
| <a href="https://www.exness.com" class="favicon">Exness</a> | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) |
| <a href="https://geniee.co.jp" class="favicon">Geniee</a> | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) |
| <a href="https://www.huya.com/" class="favicon">HUYA</a> | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) |
| <a href="https://www.idealista.com" class="favicon">Idealista</a> | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) |
| <a href="https://www.infovista.com/" class="favicon">Infovista</a> | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) |
| <a href="https://www.innogames.com" class="favicon">InnoGames</a> | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) |
| <a href="https://integros.com" class="favicon">Integros</a> | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) |
| <a href="https://www.kodiakdata.com/" class="favicon">Kodiak Data</a> | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) |
| <a href="https://kontur.ru" class="favicon">Kontur</a> | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) |
| <a href="https://www.lbl.gov" class="favicon">Lawrence Berkeley National Laboratory</a> | Research | Traffic analysis | 1 server | 11.8 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) |
| <a href="https://lifestreet.com/" class="favicon">LifeStreet</a> | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) |
| <a href="https://mcs.mail.ru/" class="favicon">Mail.ru Cloud Solutions</a> | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) |
| <a href="https://www.messagebird.com" class="favicon">MessageBird</a> | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) |
| <a href="https://www.mgid.com/" class="favicon">MGID</a> | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) |
| <a href="https://www.oneapm.com/" class="favicon">OneAPM</a> | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
| <a href="http://www.pragma-innovation.fr/" class="favicon">Pragma Innovation</a> | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) |
| <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a> | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
| <a href="https://qrator.net" class="favicon">Qrator</a> | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) |
| <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a> | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) |
| <a href="https://rambler.ru" class="favicon">Rambler</a> | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) |
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) |
| <a href="https://trafficstars.com/" class="favicon">Traffic Stars</a> | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) |
| <a href="https://www.s7.ru" class="favicon">S7 Airlines</a> | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) |
| <a href="https://www.semrush.com/" class="favicon">SEMrush</a> | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) |
| <a href="https://www.scireum.de/" class="favicon">scireum GmbH</a> | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) |
| <a href="https://sentry.io/" class="favicon">Sentry</a> | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) |
| <a href="http://www.sgk.gov.tr/wps/portal/sgk/tr" class="favicon">SGK</a> | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) |
| <a href="https://seo.do/" class="favicon">seo.do</a> | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) |
| <a href="http://english.sina.com/index.html" class="favicon">Sina</a> | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) |
| <a href="https://smi2.ru/" class="favicon">SMI2</a> | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) |
| <a href="https://www.splunk.com/" class="favicon">Splunk</a> | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) |
| <a href="https://www.spotify.com" class="favicon">Spotify</a> | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) |
| <a href="https://www.tencent.com" class="favicon">Tencent</a> | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) |
| <a href="https://www.uber.com" class="favicon">Uber</a> | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) |
| <a href="https://vk.com" class="favicon">VKontakte</a> | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) |
| <a href="https://wisebits.com/" class="favicon">Wisebits</a> | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) |
| <a href="http://www.xiaoxintech.cn/" class="favicon">Xiaoxin Tech</a> | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) |
| <a href="https://www.ximalaya.com/" class="favicon">Ximalaya</a> | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) |
| <a href="https://cloud.yandex.ru/services/managed-clickhouse" class="favicon">Yandex Cloud</a> | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) |
| <a href="https://cloud.yandex.ru/services/datalens" class="favicon">Yandex DataLens</a> | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) |
| <a href="https://market.yandex.ru/" class="favicon">Yandex Market</a> | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) |
| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 360 servers in one cluster, 1862 servers in one department | 66.41 PiB / 5.68 PiB | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) |
| <a href="https://htc-cs.ru/" class="favicon">ЦВТ</a> | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) |
| <a href="https://mkb.ru/" class="favicon">МКБ</a> | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) |
| <a href="https://jinshuju.net" class="favicon">Jinshuju 金数据</a> | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) |
| <a href="https://www.instana.com" class="favicon">Instana</a> | APM Platform | Main product | — | — | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304) |
| <a href="https://wargaming.com/en/" class="favicon">Wargaming</a> | Games | | — | — | [Interview](https://habr.com/en/post/496954/) |
| <a href="https://crazypanda.ru/en/" class="favicon">Crazypanda</a> | Games | | — | — | Live session on ClickHouse meetup |
| <a href="https://fun.co/rp" class="favicon">FunCorp</a> | Games | | — | — | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) |
[Original article](https://clickhouse.tech/docs/en/introduction/adopters/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: Distinctive Features
# Distinctive Features of ClickHouse {#distinctive-features-of-clickhouse}
## True Column-Oriented DBMS {#true-column-oriented-dbms}
## True Column-Oriented Database Management System {#true-column-oriented-dbms}
In a true column-oriented DBMS, no extra data is stored with the values. Among other things, this means that constant-length values must be supported, to avoid storing their length “number” next to the values. As an example, a billion UInt8-type values should consume around 1 GB uncompressed, or this strongly affects the CPU use. It is essential to store data compactly (without any “garbage”) even when uncompressed, since the speed of decompression (CPU usage) depends mainly on the volume of uncompressed data.
@ -15,11 +15,15 @@ Its also worth noting that ClickHouse is a database management system, not a
## Data Compression {#data-compression}
Some column-oriented DBMSs (InfiniDB CE and MonetDB) do not use data compression. However, data compression does play a key role in achieving excellent performance.
Some column-oriented DBMSs do not use data compression. However, data compression does play a key role in achieving excellent performance.
In addition to efficient general-purpose compression codecs with different trade-offs between disk space and CPU consumption, ClickHouse provides [specialized codecs](../sql-reference/statements/create.md#create-query-specialized-codecs) for specific kinds of data, which allow ClickHouse to compete with and outperform more niche databases, like time-series ones.
## Disk Storage of Data {#disk-storage-of-data}
Keeping data physically sorted by primary key makes it possible to extract data for its specific values or value ranges with low latency, less than a few dozen milliseconds. Some column-oriented DBMSs (such as SAP HANA and Google PowerDrill) can only work in RAM. This approach encourages the allocation of a larger hardware budget than is necessary for real-time analysis. ClickHouse is designed to work on regular hard drives, which means the cost per GB of data storage is low, but SSD and additional RAM are also fully used if available.
Keeping data physically sorted by primary key makes it possible to extract data for its specific values or value ranges with low latency, less than a few dozen milliseconds. Some column-oriented DBMSs (such as SAP HANA and Google PowerDrill) can only work in RAM. This approach encourages the allocation of a larger hardware budget than is necessary for real-time analysis.
ClickHouse is designed to work on regular hard drives, which means the cost per GB of data storage is low, but SSD and additional RAM are also fully used if available.
## Parallel Processing on Multiple Cores {#parallel-processing-on-multiple-cores}
@ -28,15 +32,18 @@ Large queries are parallelized naturally, taking all the necessary resources ava
## Distributed Processing on Multiple Servers {#distributed-processing-on-multiple-servers}
Almost none of the columnar DBMSs mentioned above have support for distributed query processing.
In ClickHouse, data can reside on different shards. Each shard can be a group of replicas used for fault tolerance. All shards are used to run a query in parallel, transparently for the user.
## SQL Support {#sql-support}
ClickHouse supports a declarative query language based on SQL that is identical to the SQL standard in many cases.
Supported queries include GROUP BY, ORDER BY, subqueries in FROM, IN, and JOIN clauses, and scalar subqueries.
Dependent subqueries and window functions are not supported.
ClickHouse supports a [declarative query language based on SQL](../sql-reference/index.md) that is identical to the ANSI SQL standard in [many cases](../sql-reference/ansi.md).
## Vector Engine {#vector-engine}
Supported queries include [GROUP BY](../sql-reference/statements/select/group-by.md), [ORDER BY](../sql-reference/statements/select/order-by.md), subqueries in [FROM](../sql-reference/statements/select/from.md), [JOIN](../sql-reference/statements/select/join.md) clause, [IN](../sql-reference/operators/in.md) operator, and scalar subqueries.
Correlated (dependent) subqueries and window functions are not supported at the time of writing but might become available in the future.
## Vector Computation Engine {#vector-engine}
Data is not only stored by columns but is processed by vectors (parts of columns), which allows achieving high CPU efficiency.
@ -44,13 +51,19 @@ Data is not only stored by columns but is processed by vectors (parts of columns
ClickHouse supports tables with a primary key. To quickly perform queries on the range of the primary key, the data is sorted incrementally using the merge tree. Due to this, data can continually be added to the table. No locks are taken when new data is ingested.
## Index {#index}
## Primary Index {#primary-index}
Having a data physically sorted by primary key makes it possible to extract data for its specific values or value ranges with low latency, less than a few dozen milliseconds.
## Secondary Indexes {#secondary-indexes}
Unlike other database management systems, secondary indexes in ClickHouse does not point to specific rows or row ranges. Instead, they allow the database to know in advance that all rows in some data parts wouldn't match the query filtering conditions and do not read them at all, thus they are called [data skipping indexes](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-data_skipping-indexes).
## Suitable for Online Queries {#suitable-for-online-queries}
Low latency means that queries can be processed without delay and without trying to prepare an answer in advance, right at the same moment while the user interface page is loading. In other words, online.
Most OLAP database management systems don't aim for online queries with sub-second latencies. In alternative systems, report building time of tens of seconds or even minutes is often considered acceptable. Sometimes it takes even more which forces to prepare reports offline (in advance or by responding with "come back later").
In ClickHouse low latency means that queries can be processed without delay and without trying to prepare an answer in advance, right at the same moment while the user interface page is loading. In other words, online.
## Support for Approximated Calculations {#support-for-approximated-calculations}
@ -60,16 +73,24 @@ ClickHouse provides various ways to trade accuracy for performance:
2. Running a query based on a part (sample) of data and getting an approximated result. In this case, proportionally less data is retrieved from the disk.
3. Running an aggregation for a limited number of random keys, instead of for all keys. Under certain conditions for key distribution in the data, this provides a reasonably accurate result while using fewer resources.
## Adaptive Join Algorithm
ClickHouse adaptively chooses how to [JOIN](../sql-reference/statements/select/join.md) multiple tables, by preferring hash-join algorithm and falling back to the merge-join algorithm if there's more than one large table.
## Data Replication and Data Integrity Support {#data-replication-and-data-integrity-support}
ClickHouse uses asynchronous multi-master replication. After being written to any available replica, all the remaining replicas retrieve their copy in the background. The system maintains identical data on different replicas. Recovery after most failures is performed automatically, or semi-automatically in complex cases.
For more information, see the section [Data replication](../engines/table-engines/mergetree-family/replication.md).
## Role-Based Access Control
ClickHouse implements user account management using SQL queries and allows for [role-based access control configuration](../operations/access-rights.md) similar to what can be found in ANSI SQL standard and popular relational database management systems.
## Features that Can Be Considered Disadvantages {#clickhouse-features-that-can-be-considered-disadvantages}
1. No full-fledged transactions.
2. Lack of ability to modify or delete already inserted data with high rate and low latency. There are batch deletes and updates available to clean up or modify data, for example to comply with [GDPR](https://gdpr-info.eu).
3. The sparse index makes ClickHouse not so suitable for point queries retrieving single rows by their keys.
2. Lack of ability to modify or delete already inserted data with a high rate and low latency. There are batch deletes and updates available to clean up or modify data, for example, to comply with [GDPR](https://gdpr-info.eu).
3. The sparse index makes ClickHouse not so efficient for point queries retrieving single rows by their keys.
[Original article](https://clickhouse.tech/docs/en/introduction/distinctive_features/) <!--hide-->
[Original article](https://clickhouse.tech/docs/en/introduction/distinctive-features/) <!--hide-->

View File

@ -16,17 +16,16 @@ ClickHouse access entities:
You can configure access entities using:
- SQL-driven workflow.
- SQL-driven workflow.
You need to [enable](#enabling-access-control) this functionality.
- Server [configuration files](configuration-files.md) `users.xml` and `config.xml`.
- Server [configuration files](../operations/configuration-files.md) `users.xml` and `config.xml`.
We recommend using SQL-driven workflow. Both of the configuration methods work simultaneously, so if you use the server configuration files for managing accounts and access rights, you can smoothly switch to SQL-driven workflow.
!!! note "Warning"
You can't manage the same access entity by both configuration methods simultaneously.
You cant manage the same access entity by both configuration methods simultaneously.
## Usage {#access-control-usage}
@ -34,45 +33,44 @@ By default, the ClickHouse server provides the `default` user account which is n
If you just started using ClickHouse, consider the following scenario:
1. [Enable](#enabling-access-control) SQL-driven access control and account management for the `default` user.
2. Log in to the `default` user account and create all the required users. Don't forget to create an administrator account (`GRANT ALL ON *.* WITH GRANT OPTION TO admin_user_account`).
3. [Restrict permissions](settings/permissions-for-queries.md#permissions_for_queries) for the `default` user and disable SQL-driven access control and account management for it.
1. [Enable](#enabling-access-control) SQL-driven access control and account management for the `default` user.
2. Log in to the `default` user account and create all the required users. Dont forget to create an administrator account (`GRANT ALL ON *.* WITH GRANT OPTION TO admin_user_account`).
3. [Restrict permissions](../operations/settings/permissions-for-queries.md#permissions_for_queries) for the `default` user and disable SQL-driven access control and account management for it.
### Properties of Current Solution {#access-control-properties}
- You can grant permissions for databases and tables even if they do not exist.
- If a table was deleted, all the privileges that correspond to this table are not revoked. This means that even if you create a new table with the same name later, all the privileges remain valid. To revoke privileges corresponding to the deleted table, you need to execute, for example, the `REVOKE ALL PRIVILEGES ON db.table FROM ALL` query.
- There are no lifetime settings for privileges.
- You can grant permissions for databases and tables even if they do not exist.
- If a table was deleted, all the privileges that correspond to this table are not revoked. This means that even if you create a new table with the same name later, all the privileges remain valid. To revoke privileges corresponding to the deleted table, you need to execute, for example, the `REVOKE ALL PRIVILEGES ON db.table FROM ALL` query.
- There are no lifetime settings for privileges.
## User account {#user-account-management}
## User Account {#user-account-management}
A user account is an access entity that allows to authorize someone in ClickHouse. A user account contains:
- Identification information.
- [Privileges](../sql-reference/statements/grant.md#grant-privileges) that define a scope of queries the user can execute.
- Hosts allowed to connect to the ClickHouse server.
- Assigned and default roles.
- Settings with their constraints applied by default at user login.
- Assigned settings profiles.
- Identification information.
- [Privileges](../sql-reference/statements/grant.md#grant-privileges) that define a scope of queries the user can execute.
- Hosts allowed to connect to the ClickHouse server.
- Assigned and default roles.
- Settings with their constraints applied by default at user login.
- Assigned settings profiles.
Privileges can be granted to a user account by the [GRANT](../sql-reference/statements/grant.md) query or by assigning [roles](#role-management). To revoke privileges from a user, ClickHouse provides the [REVOKE](../sql-reference/statements/revoke.md) query. To list privileges for a user, use the [SHOW GRANTS](../sql-reference/statements/show.md#show-grants-statement) statement.
Management queries:
- [CREATE USER](../sql-reference/statements/create.md#create-user-statement)
- [ALTER USER](../sql-reference/statements/alter.md#alter-user-statement)
- [DROP USER](../sql-reference/statements/misc.md#drop-user-statement)
- [SHOW CREATE USER](../sql-reference/statements/show.md#show-create-user-statement)
- [CREATE USER](../sql-reference/statements/create.md#create-user-statement)
- [ALTER USER](../sql-reference/statements/alter.md#alter-user-statement)
- [DROP USER](../sql-reference/statements/misc.md#drop-user-statement)
- [SHOW CREATE USER](../sql-reference/statements/show.md#show-create-user-statement)
### Settings Applying {#access-control-settings-applying}
Settings can be configured differently: for a user account, in its granted roles and in settings profiles. At user login, if a setting is configured for different access entities, the value and constraints of this setting are applied as follows (from higher to lower priority):
1. User account settings.
2. The settings of default roles of the user account. If a setting is configured in some roles, then order of the setting application is undefined.
3. The settings from settings profiles assigned to a user or to its default roles. If a setting is configured in some profiles, then order of setting application is undefined.
4. Settings applied to all the server by default or from the [default profile](server-configuration-parameters/settings.md#default-profile).
1. User account settings.
2. The settings of default roles of the user account. If a setting is configured in some roles, then order of the setting application is undefined.
3. The settings from settings profiles assigned to a user or to its default roles. If a setting is configured in some profiles, then order of setting application is undefined.
4. Settings applied to all the server by default or from the [default profile](../operations/server-configuration-parameters/settings.md#default-profile).
## Role {#role-management}
@ -80,18 +78,18 @@ Role is a container for access entities that can be granted to a user account.
Role contains:
- [Privileges](../sql-reference/statements/grant.md#grant-privileges)
- Settings and constraints
- List of assigned roles
- [Privileges](../sql-reference/statements/grant.md#grant-privileges)
- Settings and constraints
- List of assigned roles
Management queries:
- [CREATE ROLE](../sql-reference/statements/create.md#create-role-statement)
- [ALTER ROLE](../sql-reference/statements/alter.md#alter-role-statement)
- [DROP ROLE](../sql-reference/statements/misc.md#drop-role-statement)
- [SET ROLE](../sql-reference/statements/misc.md#set-role-statement)
- [SET DEFAULT ROLE](../sql-reference/statements/misc.md#set-default-role-statement)
- [SHOW CREATE ROLE](../sql-reference/statements/show.md#show-create-role-statement)
- [CREATE ROLE](../sql-reference/statements/create.md#create-role-statement)
- [ALTER ROLE](../sql-reference/statements/alter.md#alter-role-statement)
- [DROP ROLE](../sql-reference/statements/misc.md#drop-role-statement)
- [SET ROLE](../sql-reference/statements/misc.md#set-role-statement)
- [SET DEFAULT ROLE](../sql-reference/statements/misc.md#set-default-role-statement)
- [SHOW CREATE ROLE](../sql-reference/statements/show.md#show-create-role-statement)
Privileges can be granted to a role by the [GRANT](../sql-reference/statements/grant.md) query. To revoke privileges from a role ClickHouse provides the [REVOKE](../sql-reference/statements/revoke.md) query.
@ -101,47 +99,43 @@ Row policy is a filter that defines which of the rows are available to a user or
Management queries:
- [CREATE ROW POLICY](../sql-reference/statements/create.md#create-row-policy-statement)
- [ALTER ROW POLICY](../sql-reference/statements/alter.md#alter-row-policy-statement)
- [DROP ROW POLICY](../sql-reference/statements/misc.md#drop-row-policy-statement)
- [SHOW CREATE ROW POLICY](../sql-reference/statements/show.md#show-create-row-policy-statement)
- [CREATE ROW POLICY](../sql-reference/statements/create.md#create-row-policy-statement)
- [ALTER ROW POLICY](../sql-reference/statements/alter.md#alter-row-policy-statement)
- [DROP ROW POLICY](../sql-reference/statements/misc.md#drop-row-policy-statement)
- [SHOW CREATE ROW POLICY](../sql-reference/statements/show.md#show-create-row-policy-statement)
## Settings Profile {#settings-profiles-management}
Settings profile is a collection of [settings](settings/index.md). Settings profile contains settings and constraints, as well as a list of roles and/or users to which this profile is applied.
Settings profile is a collection of [settings](../operations/settings/index.md). Settings profile contains settings and constraints, as well as a list of roles and/or users to which this profile is applied.
Management queries:
- [CREATE SETTINGS PROFILE](../sql-reference/statements/create.md#create-settings-profile-statement)
- [ALTER SETTINGS PROFILE](../sql-reference/statements/alter.md#alter-settings-profile-statement)
- [DROP SETTINGS PROFILE](../sql-reference/statements/misc.md#drop-settings-profile-statement)
- [SHOW CREATE SETTINGS PROFILE](../sql-reference/statements/show.md#show-create-settings-profile-statement)
- [CREATE SETTINGS PROFILE](../sql-reference/statements/create.md#create-settings-profile-statement)
- [ALTER SETTINGS PROFILE](../sql-reference/statements/alter.md#alter-settings-profile-statement)
- [DROP SETTINGS PROFILE](../sql-reference/statements/misc.md#drop-settings-profile-statement)
- [SHOW CREATE SETTINGS PROFILE](../sql-reference/statements/show.md#show-create-settings-profile-statement)
## Quota {#quotas-management}
Quota limits resource usage. See [Quotas](quotas.md).
Quota limits resource usage. See [Quotas](../operations/quotas.md).
Quota contains a set of limits for some durations, as well as a list of roles and/or users which should use this quota.
Management queries:
- [CREATE QUOTA](../sql-reference/statements/create.md#create-quota-statement)
- [ALTER QUOTA](../sql-reference/statements/alter.md#alter-quota-statement)
- [DROP QUOTA](../sql-reference/statements/misc.md#drop-quota-statement)
- [SHOW CREATE QUOTA](../sql-reference/statements/show.md#show-create-quota-statement)
- [CREATE QUOTA](../sql-reference/statements/create.md#create-quota-statement)
- [ALTER QUOTA](../sql-reference/statements/alter.md#alter-quota-statement)
- [DROP QUOTA](../sql-reference/statements/misc.md#drop-quota-statement)
- [SHOW CREATE QUOTA](../sql-reference/statements/show.md#show-create-quota-statement)
## Enabling SQL-driven Access Control and Account Management {#enabling-access-control}
- Setup a directory for configurations storage.
- Setup a directory for configurations storage.
ClickHouse stores access entity configurations in the folder set in the [access_control_path](server-configuration-parameters/settings.md#access_control_path) server configuration parameter.
ClickHouse stores access entity configurations in the folder set in the [access\_control\_path](../operations/server-configuration-parameters/settings.md#access_control_path) server configuration parameter.
- Enable SQL-driven access control and account management for at least one user account.
By default, SQL-driven access control and account management is disabled for all users. You need to configure at least one user in the `users.xml` configuration file and set the value of the [access_management](settings/settings-users.md#access_management-user-setting) setting to 1.
- Enable SQL-driven access control and account management for at least one user account.
By default, SQL-driven access control and account management is disabled for all users. You need to configure at least one user in the `users.xml` configuration file and set the value of the [access\_management](../operations/settings/settings-users.md#access_management-user-setting) setting to 1.
[Original article](https://clickhouse.tech/docs/en/operations/access_rights/) <!--hide-->

View File

@ -24,7 +24,7 @@ Some local filesystems provide snapshot functionality (for example, [ZFS](https:
## clickhouse-copier {#clickhouse-copier}
[clickhouse-copier](utilities/clickhouse-copier.md) is a versatile tool that was initially created to re-shard petabyte-sized tables. It can also be used for backup and restore purposes because it reliably copies data between ClickHouse tables and clusters.
[clickhouse-copier](../operations/utilities/clickhouse-copier.md) is a versatile tool that was initially created to re-shard petabyte-sized tables. It can also be used for backup and restore purposes because it reliably copies data between ClickHouse tables and clusters.
For smaller volumes of data, a simple `INSERT INTO ... SELECT ...` to remote tables might work as well.

View File

@ -9,7 +9,7 @@ ClickHouse supports multi-file configuration management. The main server configu
All the configuration files should be in XML format. Also, they should have the same root element, usually `<yandex>`.
## Override
## Override {#override}
Some settings specified in the main configuration file can be overridden in other configuration files:
@ -18,13 +18,13 @@ Some settings specified in the main configuration file can be overridden in othe
- If `replace` is specified, it replaces the entire element with the specified one.
- If `remove` is specified, it deletes the element.
## Substitution
## Substitution {#substitution}
The config can also define "substitutions". If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include\_from](server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](server-configuration-parameters/settings.md)).
The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include\_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md)).
Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element.
## User Settings
## User Settings {#user-settings}
The `config.xml` file can specify a separate config with user settings, profiles, and quotas. The relative path to this config is set in the `users_config` element. By default, it is `users.xml`. If `users_config` is omitted, the user settings, profiles, and quotas are specified directly in `config.xml`.
@ -32,7 +32,7 @@ Users configuration can be splitted into separate files similar to `config.xml`
Directory name is defined as `users_config` setting without `.xml` postfix concatenated with `.d`.
Directory `users.d` is used by default, as `users_config` defaults to `users.xml`.
## Example
## Example {#example}
For example, you can have separate config file for each user like this:
@ -55,7 +55,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
</yandex>
```
## Implementation Details
## Implementation Details {#implementation-details}
For each config file, the server also generates `file-preprocessed.xml` files when starting. These files contain all the completed substitutions and overrides, and they are intended for informational use. If ZooKeeper substitutions were used in the config files but ZooKeeper is not available on the server start, the server loads the configuration from the preprocessed file.

View File

@ -8,19 +8,19 @@ toc_title: Introduction
ClickHouse operations manual consists of the following major sections:
- [Requirements](requirements.md)
- [Monitoring](monitoring.md)
- [Troubleshooting](troubleshooting.md)
- [Usage Recommendations](tips.md)
- [Update Procedure](update.md)
- [Access Rights](access-rights.md)
- [Data Backup](backup.md)
- [Configuration Files](configuration-files.md)
- [Quotas](quotas.md)
- [System Tables](system-tables.md)
- [Server Configuration Parameters](server-configuration-parameters/index.md)
- [How To Test Your Hardware With ClickHouse](performance-test.md)
- [Settings](settings/index.md)
- [Utilities](utilities/index.md)
- [Requirements](../operations/requirements.md)
- [Monitoring](../operations/monitoring.md)
- [Troubleshooting](../operations/troubleshooting.md)
- [Usage Recommendations](../operations/tips.md)
- [Update Procedure](../operations/update.md)
- [Access Rights](../operations/access-rights.md)
- [Data Backup](../operations/backup.md)
- [Configuration Files](../operations/configuration-files.md)
- [Quotas](../operations/quotas.md)
- [System Tables](../operations/system-tables/index.md)
- [Server Configuration Parameters](../operations/server-configuration-parameters/index.md)
- [How To Test Your Hardware With ClickHouse](../operations/performance-test.md)
- [Settings](../operations/settings/index.md)
- [Utilities](../operations/utilities/index.md)
{## [Original article](https://clickhouse.tech/docs/en/operations/) ##}

View File

@ -26,19 +26,19 @@ It is highly recommended to set up monitoring for:
ClickHouse server has embedded instruments for self-state monitoring.
To track server events use server logs. See the [logger](server-configuration-parameters/settings.md#server_configuration_parameters-logger) section of the configuration file.
To track server events use server logs. See the [logger](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-logger) section of the configuration file.
ClickHouse collects:
- Different metrics of how the server uses computational resources.
- Common statistics on query processing.
You can find metrics in the [system.metrics](../operations/system-tables.md#system_tables-metrics), [system.events](../operations/system-tables.md#system_tables-events), and [system.asynchronous\_metrics](../operations/system-tables.md#system_tables-asynchronous_metrics) tables.
You can find metrics in the [system.metrics](../operations/system-tables/metrics.md#system_tables-metrics), [system.events](../operations/system-tables/events.md#system_tables-events), and [system.asynchronous\_metrics](../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) tables.
You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](server-configuration-parameters/settings.md#server_configuration_parameters-graphite) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Graphite by following their official [guide](https://graphite.readthedocs.io/en/latest/install.html).
You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Graphite by following their official [guide](https://graphite.readthedocs.io/en/latest/install.html).
You can configure ClickHouse to export metrics to [Prometheus](https://prometheus.io). See the [Prometheus section](server-configuration-parameters/settings.md#server_configuration_parameters-prometheus) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Prometheus by following their official [guide](https://prometheus.io/docs/prometheus/latest/installation/).
You can configure ClickHouse to export metrics to [Prometheus](https://prometheus.io). See the [Prometheus section](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-prometheus) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Prometheus by following their official [guide](https://prometheus.io/docs/prometheus/latest/installation/).
Additionally, you can monitor server availability through the HTTP API. Send the `HTTP GET` request to `/ping`. If the server is available, it responds with `200 OK`.
To monitor servers in a cluster configuration, you should set the [max\_replica\_delay\_for\_distributed\_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas_status`. A request to `/replicas_status` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns `503 HTTP_SERVICE_UNAVAILABLE` with information about the gap.
To monitor servers in a cluster configuration, you should set the [max\_replica\_delay\_for\_distributed\_queries](../operations/settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas_status`. A request to `/replicas_status` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns `503 HTTP_SERVICE_UNAVAILABLE` with information about the gap.

View File

@ -9,11 +9,11 @@ ClickHouse runs sampling profiler that allows analyzing query execution. Using p
To use profiler:
- Setup the [trace\_log](../server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) section of the server configuration.
- Setup the [trace\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) section of the server configuration.
This section configures the [trace\_log](../../operations/system-tables.md#system_tables-trace_log) system table containing the results of the profiler functioning. It is configured by default. Remember that data in this table is valid only for a running server. After the server restart, ClickHouse doesnt clean up the table and all the stored virtual memory address may become invalid.
This section configures the [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table containing the results of the profiler functioning. It is configured by default. Remember that data in this table is valid only for a running server. After the server restart, ClickHouse doesnt clean up the table and all the stored virtual memory address may become invalid.
- Setup the [query\_profiler\_cpu\_time\_period\_ns](../settings/settings.md#query_profiler_cpu_time_period_ns) or [query\_profiler\_real\_time\_period\_ns](../settings/settings.md#query_profiler_real_time_period_ns) settings. Both settings can be used simultaneously.
- Setup the [query\_profiler\_cpu\_time\_period\_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) or [query\_profiler\_real\_time\_period\_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) settings. Both settings can be used simultaneously.
These settings allow you to configure profiler timers. As these are the session settings, you can get different sampling frequency for the whole server, individual users or user profiles, for your interactive session, and for each individual query.
@ -23,7 +23,7 @@ To analyze the `trace_log` system table:
- Install the `clickhouse-common-static-dbg` package. See [Install from DEB Packages](../../getting-started/install.md#install-from-deb-packages).
- Allow introspection functions by the [allow\_introspection\_functions](../settings/settings.md#settings-allow_introspection_functions) setting.
- Allow introspection functions by the [allow\_introspection\_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting.
For security reasons, introspection functions are disabled by default.

View File

@ -24,7 +24,7 @@ With this instruction you can run basic ClickHouse performance test on any serve
# Then do:
chmod a+x clickhouse
5. Download configs:
1. Download configs:
<!-- -->
@ -34,7 +34,7 @@ With this instruction you can run basic ClickHouse performance test on any serve
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/path.xml -O config.d/path.xml
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/log_to_console.xml -O config.d/log_to_console.xml
6. Download benchmark files:
1. Download benchmark files:
<!-- -->
@ -42,7 +42,7 @@ With this instruction you can run basic ClickHouse performance test on any serve
chmod a+x benchmark-new.sh
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/queries.sql
7. Download test data according to the [Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md) instruction (“hits” table containing 100 million rows).
1. Download test data according to the [Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md) instruction (“hits” table containing 100 million rows).
<!-- -->
@ -50,31 +50,31 @@ With this instruction you can run basic ClickHouse performance test on any serve
tar xvf hits_100m_obfuscated_v1.tar.xz -C .
mv hits_100m_obfuscated_v1/* .
8. Run the server:
1. Run the server:
<!-- -->
./clickhouse server
9. Check the data: ssh to the server in another terminal
1. Check the data: ssh to the server in another terminal
<!-- -->
./clickhouse client --query "SELECT count() FROM hits_100m_obfuscated"
100000000
10. Edit the benchmark-new.sh, change `clickhouse-client` to `./clickhouse client` and add `-max_memory_usage 100000000000` parameter.
1. Edit the benchmark-new.sh, change `clickhouse-client` to `./clickhouse client` and add `-max_memory_usage 100000000000` parameter.
<!-- -->
mcedit benchmark-new.sh
11. Run the benchmark:
1. Run the benchmark:
<!-- -->
./benchmark-new.sh hits_100m_obfuscated
12. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com
1. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com
All the results are published here: https://clickhouse.tech/benchmark/hardware/

View File

@ -10,8 +10,8 @@ This section contains descriptions of server settings that cannot be changed at
These settings are stored in the `config.xml` file on the ClickHouse server.
Other settings are described in the “[Settings](../settings/index.md#session-settings-intro)” section.
Other settings are described in the “[Settings](../../operations/settings/index.md#session-settings-intro)” section.
Before studying the settings, read the [Configuration files](../configuration-files.md#configuration_files) section and note the use of substitutions (the `incl` and `optional` attributes).
Before studying the settings, read the [Configuration files](../../operations/configuration-files.md#configuration_files) section and note the use of substitutions (the `incl` and `optional` attributes).
[Original article](https://clickhouse.tech/docs/en/operations/server_configuration_parameters/) <!--hide-->

View File

@ -145,10 +145,10 @@ Settings:
- interval The interval for sending, in seconds.
- timeout The timeout for sending data, in seconds.
- root\_path Prefix for keys.
- metrics Sending data from the [system.metrics](../../operations/system-tables.md#system_tables-metrics) table.
- events Sending deltas data accumulated for the time period from the [system.events](../../operations/system-tables.md#system_tables-events) table.
- events\_cumulative Sending cumulative data from the [system.events](../../operations/system-tables.md#system_tables-events) table.
- asynchronous\_metrics Sending data from the [system.asynchronous\_metrics](../../operations/system-tables.md#system_tables-asynchronous_metrics) table.
- metrics Sending data from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table.
- events Sending deltas data accumulated for the time period from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
- events\_cumulative Sending cumulative data from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
- asynchronous\_metrics Sending data from the [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
You can configure multiple `<graphite>` clauses. For instance, you can use this for sending different data at different intervals.
@ -229,7 +229,7 @@ Opens `https://tabix.io/` when accessing `http://localhost: http_port`.
The path to the file with substitutions.
For more information, see the section “[Configuration files](../configuration-files.md#configuration_files)”.
For more information, see the section “[Configuration files](../../operations/configuration-files.md#configuration_files)”.
**Example**
@ -527,7 +527,7 @@ Keys for server/client settings:
Logging events that are associated with [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). For instance, adding or merging data. You can use the log to simulate merge algorithms and compare their characteristics. You can visualize the merge process.
Queries are logged in the [system.part\_log](../../operations/system-tables.md#system_tables-part-log) table, not in a separate file. You can configure the name of this table in the `table` parameter (see below).
Queries are logged in the [system.part\_log](../../operations/system-tables/part_log.md#system_tables-part-log) table, not in a separate file. You can configure the name of this table in the `table` parameter (see below).
Use the following parameters to configure logging:
@ -568,9 +568,9 @@ Settings:
- `endpoint` HTTP endpoint for scraping metrics by prometheus server. Start from /.
- `port` Port for `endpoint`.
- `metrics` Flag that sets to expose metrics from the [system.metrics](../system-tables.md#system_tables-metrics) table.
- `events` Flag that sets to expose metrics from the [system.events](../system-tables.md#system_tables-events) table.
- `asynchronous_metrics` Flag that sets to expose current metrics values from the [system.asynchronous\_metrics](../system-tables.md#system_tables-asynchronous_metrics) table.
- `metrics` Flag that sets to expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table.
- `events` Flag that sets to expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
- `asynchronous_metrics` Flag that sets to expose current metrics values from the [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
**Example**
@ -586,9 +586,9 @@ Settings:
## query\_log {#server_configuration_parameters-query-log}
Setting for logging queries received with the [log\_queries=1](../settings/settings.md) setting.
Setting for logging queries received with the [log\_queries=1](../../operations/settings/settings.md) setting.
Queries are logged in the [system.query\_log](../../operations/system-tables.md#system_tables-query_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below).
Queries are logged in the [system.query\_log](../../operations/system-tables/query_log.md#system_tables-query_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below).
Use the following parameters to configure logging:
@ -612,9 +612,9 @@ If the table doesnt exist, ClickHouse will create it. If the structure of the
## query\_thread\_log {#server_configuration_parameters-query_thread_log}
Setting for logging threads of queries received with the [log\_query\_threads=1](../settings/settings.md#settings-log-query-threads) setting.
Setting for logging threads of queries received with the [log\_query\_threads=1](../../operations/settings/settings.md#settings-log-query-threads) setting.
Queries are logged in the [system.query\_thread\_log](../../operations/system-tables.md#system_tables-query_thread_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below).
Queries are logged in the [system.query\_thread\_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below).
Use the following parameters to configure logging:
@ -638,7 +638,7 @@ If the table doesnt exist, ClickHouse will create it. If the structure of the
## trace\_log {#server_configuration_parameters-trace_log}
Settings for the [trace\_log](../../operations/system-tables.md#system_tables-trace_log) system table operation.
Settings for the [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation.
Parameters:
@ -699,11 +699,11 @@ Configuration of clusters used by the [Distributed](../../engines/table-engines/
<remote_servers incl="clickhouse_remote_servers" />
```
For the value of the `incl` attribute, see the section “[Configuration files](../configuration-files.md#configuration_files)”.
For the value of the `incl` attribute, see the section “[Configuration files](../../operations/configuration-files.md#configuration_files)”.
**See Also**
- [skip\_unavailable\_shards](../settings/settings.md#settings-skip_unavailable_shards)
- [skip\_unavailable\_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards)
## timezone {#server_configuration_parameters-timezone}
@ -729,7 +729,7 @@ Port for communicating with clients over the TCP protocol.
<tcp_port>9000</tcp_port>
```
## tcp_port_secure {#server_configuration_parameters-tcp_port_secure}
## tcp\_port\_secure {#server_configuration_parameters-tcp_port_secure}
TCP port for secure communication with clients. Use it with [OpenSSL](#server_configuration_parameters-openssl) settings.
@ -757,7 +757,7 @@ Example
<mysql_port>9004</mysql_port>
```
## tmp_path {#tmp-path}
## tmp\_path {#tmp-path}
Path to temporary data for processing large queries.
@ -770,23 +770,23 @@ Path to temporary data for processing large queries.
<tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
```
## tmp_policy {#tmp-policy}
## tmp\_policy {#tmp-policy}
Policy from [storage_configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) to store temporary files.
Policy from [storage\_configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) to store temporary files.
If not set, [tmp_path](#tmp-path) is used, otherwise it is ignored.
If not set, [tmp\_path](#tmp-path) is used, otherwise it is ignored.
!!! note "Note"
- `move_factor` is ignored.
- `keep_free_space_bytes` is ignored.
- `max_data_part_size_bytes` is ignored.
- Уou must have exactly one volume in that policy.
- `keep_free_space_bytes` is ignored.
- `max_data_part_size_bytes` is ignored.
- Уou must have exactly one volume in that policy.
## uncompressed\_cache\_size {#server-settings-uncompressed_cache_size}
Cache size (in bytes) for uncompressed data used by table engines from the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
There is one shared cache for the server. Memory is allocated on demand. The cache is used if the option [use\_uncompressed\_cache](../settings/settings.md#setting-use_uncompressed_cache) is enabled.
There is one shared cache for the server. Memory is allocated on demand. The cache is used if the option [use\_uncompressed\_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache) is enabled.
The uncompressed cache is advantageous for very short queries in individual cases.
@ -918,9 +918,9 @@ The update is performed asynchronously, in a separate system thread.
**See also**
- [background_schedule_pool_size](../settings/settings.md#background_schedule_pool_size)
- [background\_schedule\_pool\_size](../../operations/settings/settings.md#background_schedule_pool_size)
## access_control_path {#access_control_path}
## access\_control\_path {#access_control_path}
Path to a folder where a ClickHouse server stores user and role configurations created by SQL commands.
@ -928,6 +928,6 @@ Default value: `/var/lib/clickhouse/access/`.
**See also**
- [Access Control and Account Management](../access-rights.md#access-control)
- [Access Control and Account Management](../../operations/access-rights.md#access-control)
[Original article](https://clickhouse.tech/docs/en/operations/server_configuration_parameters/settings/) <!--hide-->

View File

@ -37,7 +37,7 @@ After setting `readonly = 1`, the user cant change `readonly` and `allow_ddl`
When using the `GET` method in the [HTTP interface](../../interfaces/http.md), `readonly = 1` is set automatically. To modify data, use the `POST` method.
Setting `readonly = 1` prohibit the user from changing all the settings. There is a way to prohibit the user
from changing only specific settings, for details see [constraints on settings](constraints-on-settings.md).
from changing only specific settings, for details see [constraints on settings](../../operations/settings/constraints-on-settings.md).
Default value: 0

View File

@ -113,7 +113,7 @@ Limit on the number of bytes in the result. The same as the previous setting.
What to do if the volume of the result exceeds one of the limits: throw or break. By default, throw.
Using break is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max\_result\_rows](#setting-max_result_rows), multiple of [max\_block\_size](settings.md#setting-max_block_size) and depends on [max_threads](settings.md#settings-max_threads).
Using break is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max\_result\_rows](#setting-max_result_rows), multiple of [max\_block\_size](../../operations/settings/settings.md#setting-max_block_size) and depends on [max\_threads](../../operations/settings/settings.md#settings-max_threads).
Example:

View File

@ -8,8 +8,7 @@ toc_title: Settings Profiles
A settings profile is a collection of settings grouped under the same name.
!!! note "Information"
ClickHouse also supports [SQL-driven workflow](../access-rights.md#access-control) for managing settings profiles. We recommend using it.
ClickHouse also supports [SQL-driven workflow](../../operations/access-rights.md#access-control) for managing settings profiles. We recommend using it.
The profile can have any name. You can specify the same profile for different users. The most important thing you can write in the settings profile is `readonly=1`, which ensures read-only access.
@ -71,9 +70,9 @@ Example:
</profiles>
```
The example specifies two profiles: `default` and `web`.
The example specifies two profiles: `default` and `web`.
The `default` profile has a special purpose: it must always be present and is applied when starting the server. In other words, the `default` profile contains default settings.
The `default` profile has a special purpose: it must always be present and is applied when starting the server. In other words, the `default` profile contains default settings.
The `web` profile is a regular profile that can be set using the `SET` query or using a URL parameter in an HTTP query.

View File

@ -8,8 +8,7 @@ toc_title: User Settings
The `users` section of the `user.xml` configuration file contains user settings.
!!! note "Information"
ClickHouse also supports [SQL-driven workflow](../access-rights.md#access-control) for managing users. We recommend using it.
ClickHouse also supports [SQL-driven workflow](../../operations/access-rights.md#access-control) for managing users. We recommend using it.
Structure of the `users` section:
@ -74,14 +73,14 @@ Password can be specified in plaintext or in SHA256 (hex format).
The first line of the result is the password. The second line is the corresponding double SHA1 hash.
### access_management {#access_management-user-setting}
### access\_management {#access_management-user-setting}
This setting enables or disables using of SQL-driven [access control and account management](../access-rights.md#access-control) for the user.
This setting enables or disables using of SQL-driven [access control and account management](../../operations/access-rights.md#access-control) for the user.
Possible values:
- 0 — Disabled.
- 1 — Enabled.
- 0 — Disabled.
- 1 — Enabled.
Default value: 0.
@ -129,14 +128,14 @@ To open access only from localhost, specify:
### user\_name/profile {#user-nameprofile}
You can assign a settings profile for the user. Settings profiles are configured in a separate section of the `users.xml` file. For more information, see [Profiles of Settings](settings-profiles.md).
You can assign a settings profile for the user. Settings profiles are configured in a separate section of the `users.xml` file. For more information, see [Profiles of Settings](../../operations/settings/settings-profiles.md).
### user\_name/quota {#user-namequota}
Quotas allow you to track or limit resource usage over a period of time. Quotas are configured in the `quotas`
section of the `users.xml` configuration file.
You can assign a quotas set for the user. For a detailed description of quotas configuration, see [Quotas](../quotas.md#quotas).
You can assign a quotas set for the user. For a detailed description of quotas configuration, see [Quotas](../../operations/quotas.md#quotas).
### user\_name/databases {#user-namedatabases}

View File

@ -404,11 +404,11 @@ Possible values:
Default value: 0.
## partial_merge_join_optimizations {#partial_merge_join_optimizations}
## partial\_merge\_join\_optimizations {#partial_merge_join_optimizations}
Disables optimizations in partial merge join algorithm for [JOIN](../../sql-reference/statements/select/join.md) queries.
By default, this setting enables improvements that could lead to wrong results. If you see suspicious results in your queries, disable optimizations by this setting. Optimizations can be different in different versions of the ClickHouse server.
By default, this setting enables improvements that could lead to wrong results. If you see suspicious results in your queries, disable optimizations by this setting. Optimizations can be different in different versions of the ClickHouse server.
Possible values:
@ -417,35 +417,35 @@ Possible values:
Default value: 1.
## partial_merge_join_rows_in_right_blocks {#partial_merge_join_rows_in_right_blocks}
## partial\_merge\_join\_rows\_in\_right\_blocks {#partial_merge_join_rows_in_right_blocks}
Limits sizes of right-hand join data blocks in partial merge join algorithm for [JOIN](../../sql-reference/statements/select/join.md) queries.
ClickHouse server:
1. Splits right-hand join data into blocks with up to the specified number of rows.
2. Indexes each block with their minimum and maximum values
3. Unloads prepared blocks to disk if possible.
1. Splits right-hand join data into blocks with up to the specified number of rows.
2. Indexes each block with their minimum and maximum values
3. Unloads prepared blocks to disk if possible.
Possible values:
- Any positive integer. Recommended range of values: [1000, 100000].
- Any positive integer. Recommended range of values: \[1000, 100000\].
Default value: 65536.
## join_on_disk_max_files_to_merge {#join_on_disk_max_files_to_merge}
## join\_on\_disk\_max\_files\_to\_merge {#join_on_disk_max_files_to_merge}
Limits the number of files allowed for parallel sorting in MergeJoin operations when they are executed on disk.
Limits the number of files allowed for parallel sorting in MergeJoin operations when they are executed on disk.
The bigger the value of the setting, the more RAM used and the less disk I/O needed.
Possible values:
- Any positive integer, starting from 2.
- Any positive integer, starting from 2.
Default value: 64.
## any_join_distinct_right_table_keys {#any_join_distinct_right_table_keys}
## any\_join\_distinct\_right\_table\_keys {#any_join_distinct_right_table_keys}
Enables legacy ClickHouse server behavior in `ANY INNER|LEFT JOIN` operations.
@ -454,19 +454,18 @@ Enables legacy ClickHouse server behavior in `ANY INNER|LEFT JOIN` operations.
When the legacy behavior enabled:
- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are not equal because ClickHouse uses the logic with many-to-one left-to-right table keys mapping.
- Results of `ANY INNER JOIN` operations contain all rows from the left table like the `SEMI LEFT JOIN` operations do.
- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are not equal because ClickHouse uses the logic with many-to-one left-to-right table keys mapping.
- Results of `ANY INNER JOIN` operations contain all rows from the left table like the `SEMI LEFT JOIN` operations do.
When the legacy behavior disabled:
- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are equal because ClickHouse uses the logic which provides one-to-many keys mapping in `ANY RIGHT JOIN` operations.
- Results of `ANY INNER JOIN` operations contain one row per key from both left and right tables.
- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are equal because ClickHouse uses the logic which provides one-to-many keys mapping in `ANY RIGHT JOIN` operations.
- Results of `ANY INNER JOIN` operations contain one row per key from both left and right tables.
Possible values:
- 0 — Legacy behavior is disabled.
- 1 — Legacy behavior is enabled.
- 0 — Legacy behavior is disabled.
- 1 — Legacy behavior is enabled.
Default value: 0.
@ -474,19 +473,17 @@ See also:
- [JOIN strictness](../../sql-reference/statements/select/join.md#select-join-strictness)
## temporary_files_codec {#temporary_files_codec}
## temporary\_files\_codec {#temporary_files_codec}
Sets compression codec for temporary files used in sorting and joining operations on disk.
Possible values:
Possible values:
- LZ4 — [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) compression is applied.
- NONE — No compression is applied.
- LZ4 — [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) compression is applied.
- NONE — No compression is applied.
Default value: LZ4.
## max\_block\_size {#setting-max_block_size}
In ClickHouse, data is processed by blocks (sets of column parts). The internal processing cycles for a single block are efficient enough, but there are noticeable expenditures on each block. The `max_block_size` setting is a recommendation for what size of the block (in a count of rows) to load from tables. The block size shouldnt be too small, so that the expenditures on each block are still noticeable, but not too large so that the query with LIMIT that is completed after the first block is processed quickly. The goal is to avoid consuming too much memory when extracting a large number of columns in multiple threads and to preserve at least some cache locality.
@ -555,7 +552,7 @@ Default value: 8.
If ClickHouse should read more than `merge_tree_max_rows_to_use_cache` rows in one query, it doesnt use the cache of uncompressed blocks.
The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed\_cache\_size](../server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.
The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed\_cache\_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.
Possible values:
@ -567,7 +564,7 @@ Default value: 128 ✕ 8192.
If ClickHouse should read more than `merge_tree_max_bytes_to_use_cache` bytes in one query, it doesnt use the cache of uncompressed blocks.
The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed\_cache\_size](../server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.
The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed\_cache\_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.
Possible value:
@ -592,7 +589,7 @@ Default value: 0.
Setting up query logging.
Queries sent to ClickHouse with this setup are logged according to the rules in the [query\_log](../server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server configuration parameter.
Queries sent to ClickHouse with this setup are logged according to the rules in the [query\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server configuration parameter.
Example:
@ -622,7 +619,7 @@ log_queries_min_type='EXCEPTION_WHILE_PROCESSING'
Setting up query threads logging.
Queries threads runned by ClickHouse with this setup are logged according to the rules in the [query\_thread\_log](../server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server configuration parameter.
Queries threads runned by ClickHouse with this setup are logged according to the rules in the [query\_thread\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server configuration parameter.
Example:
@ -789,7 +786,7 @@ For more information, see the section “Extreme values”.
## use\_uncompressed\_cache {#setting-use_uncompressed_cache}
Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled).
Using the uncompressed cache (only for tables in the MergeTree family) can significantly reduce latency and increase throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the [uncompressed\_cache\_size](../server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed and the least-used data is automatically deleted.
Using the uncompressed cache (only for tables in the MergeTree family) can significantly reduce latency and increase throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the [uncompressed\_cache\_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed and the least-used data is automatically deleted.
For queries that read at least a somewhat large volume of data (one million rows or more), the uncompressed cache is disabled automatically to save space for truly small queries. This means that you can keep the use\_uncompressed\_cache setting always set to 1.
@ -822,6 +819,7 @@ ClickHouse supports the following algorithms of choosing replicas:
- [Nearest hostname](#load_balancing-nearest_hostname)
- [In order](#load_balancing-in_order)
- [First or random](#load_balancing-first_or_random)
- [Round robin](#load_balancing-round_robin)
### Random (by Default) {#load_balancing-random}
@ -865,6 +863,14 @@ This algorithm chooses the first replica in the set or a random replica if the f
The `first_or_random` algorithm solves the problem of the `in_order` algorithm. With `in_order`, if one replica goes down, the next one gets a double load while the remaining replicas handle the usual amount of traffic. When using the `first_or_random` algorithm, the load is evenly distributed among replicas that are still available.
### Round Robin {#load_balancing-round_robin}
``` sql
load_balancing = round_robin
```
This algorithm uses round robin policy across replicas with the same number of errors (only the queries with `round_robin` policy is accounted).
## prefer\_localhost\_replica {#settings-prefer-localhost-replica}
Enables/disables preferable using the localhost replica when processing distributed queries.
@ -955,10 +961,10 @@ ClickHouse generates an exception
See also:
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
- [select_sequential_consistency](#settings-select_sequential_consistency)
- [insert\_quorum\_timeout](#settings-insert_quorum_timeout)
- [select\_sequential\_consistency](#settings-select_sequential_consistency)
## insert_quorum_timeout {#settings-insert_quorum_timeout}
## insert\_quorum\_timeout {#settings-insert_quorum_timeout}
Write to quorum timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica.
@ -986,8 +992,8 @@ When sequential consistency is enabled, ClickHouse allows the client to execute
See also:
- [insert_quorum](#settings-insert_quorum)
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
- [insert\_quorum](#settings-insert_quorum)
- [insert\_quorum\_timeout](#settings-insert_quorum_timeout)
## insert\_deduplicate {#settings-insert-deduplicate}
@ -1002,7 +1008,6 @@ Default value: 1.
By default, blocks inserted into replicated tables by the `INSERT` statement are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)).
## deduplicate\_blocks\_in\_dependent\_materialized\_views {#settings-deduplicate-blocks-in-dependent-materialized-views}
Enables or disables the deduplication check for materialized views that receive data from Replicated\* tables.
@ -1067,15 +1072,15 @@ Default value: 0.
## count\_distinct\_implementation {#settings-count_distinct_implementation}
Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference.md#agg_function-count) construction.
Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count) construction.
Possible values:
- [uniq](../../sql-reference/aggregate-functions/reference.md#agg_function-uniq)
- [uniqCombined](../../sql-reference/aggregate-functions/reference.md#agg_function-uniqcombined)
- [uniqCombined64](../../sql-reference/aggregate-functions/reference.md#agg_function-uniqcombined64)
- [uniqHLL12](../../sql-reference/aggregate-functions/reference.md#agg_function-uniqhll12)
- [uniqExact](../../sql-reference/aggregate-functions/reference.md#agg_function-uniqexact)
- [uniq](../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq)
- [uniqCombined](../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined)
- [uniqCombined64](../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
- [uniqHLL12](../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12)
- [uniqExact](../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact)
Default value: `uniqExact`.
@ -1109,24 +1114,24 @@ Possible values:
Default value: 0.
## optimize_skip_unused_shards {#optimize-skip-unused-shards}
## optimize\_skip\_unused\_shards {#optimize-skip-unused-shards}
Enables or disables skipping of unused shards for [SELECT](../../sql-reference/statements/select/index.md) queries that have sharding key condition in `WHERE/PREWHERE` (assuming that the data is distributed by sharding key, otherwise does nothing).
Possible values:
- 0 — Disabled.
- 1 — Enabled.
- 0 — Disabled.
- 1 — Enabled.
Default value: 0
## force_optimize_skip_unused_shards {#force-optimize-skip-unused-shards}
## force\_optimize\_skip\_unused\_shards {#force-optimize-skip-unused-shards}
Enables or disables query execution if [optimize_skip_unused_shards](#optimize-skip-unused-shards) is enabled and skipping of unused shards is not possible. If the skipping is not possible and the setting is enabled, an exception will be thrown.
Enables or disables query execution if [optimize\_skip\_unused\_shards](#optimize-skip-unused-shards) is enabled and skipping of unused shards is not possible. If the skipping is not possible and the setting is enabled, an exception will be thrown.
Possible values:
- 0 — Disabled. ClickHouse doesn't throw an exception.
- 0 — Disabled. ClickHouse doesnt throw an exception.
- 1 — Enabled. Query execution is disabled only if the table has a sharding key.
- 2 — Enabled. Query execution is disabled regardless of whether a sharding key is defined for the table.
@ -1249,7 +1254,7 @@ Default value: 1000000000 nanoseconds (once a second).
See also:
- System table [trace\_log](../../operations/system-tables.md#system_tables-trace_log)
- System table [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log)
## query\_profiler\_cpu\_time\_period\_ns {#query_profiler_cpu_time_period_ns}
@ -1272,7 +1277,7 @@ Default value: 1000000000 nanoseconds.
See also:
- System table [trace\_log](../../operations/system-tables.md#system_tables-trace_log)
- System table [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log)
## allow\_introspection\_functions {#settings-allow_introspection_functions}
@ -1287,8 +1292,8 @@ Default value: 0.
**See Also**
- [Sampling Query Profiler](../optimizing-performance/sampling-query-profiler.md)
- System table [trace\_log](../../operations/system-tables.md#system_tables-trace_log)
- [Sampling Query Profiler](../../operations/optimizing-performance/sampling-query-profiler.md)
- System table [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log)
## input\_format\_parallel\_parsing {#input-format-parallel-parsing}
@ -1336,7 +1341,7 @@ Type: URL
Default value: Empty
## background_pool_size {#background_pool_size}
## background\_pool\_size {#background_pool_size}
Sets the number of threads performing background operations in table engines (for example, merges in [MergeTree engine](../../engines/table-engines/mergetree-family/index.md) tables). This setting is applied from `default` profile at ClickHouse server start and cant be changed in a user session. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance.
@ -1348,9 +1353,9 @@ Possible values:
Default value: 16.
## background_buffer_flush_schedule_pool_size {#background_buffer_flush_schedule_pool_size}
## background\_buffer\_flush\_schedule\_pool\_size {#background_buffer_flush_schedule_pool_size}
Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. This setting is applied at ClickHouse server start and can't be changed in a user session.
Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. This setting is applied at ClickHouse server start and cant be changed in a user session.
Possible values:
@ -1358,7 +1363,7 @@ Possible values:
Default value: 16.
## background_move_pool_size {#background_move_pool_size}
## background\_move\_pool\_size {#background_move_pool_size}
Sets the number of threads performing background moves of data parts for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)-engine tables. This setting is applied at ClickHouse server start and cant be changed in a user session.
@ -1368,9 +1373,9 @@ Possible values:
Default value: 8.
## background_schedule_pool_size {#background_schedule_pool_size}
## background\_schedule\_pool\_size {#background_schedule_pool_size}
Sets the number of threads performing background tasks for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables, [Kafka](../../engines/table-engines/integrations/kafka.md) streaming, [DNS cache updates](../server-configuration-parameters/settings.md#server-settings-dns-cache-update-period). This setting is applied at ClickHouse server start and cant be changed in a user session.
Sets the number of threads performing background tasks for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables, [Kafka](../../engines/table-engines/integrations/kafka.md) streaming, [DNS cache updates](../../operations/server-configuration-parameters/settings.md#server-settings-dns-cache-update-period). This setting is applied at ClickHouse server start and cant be changed in a user session.
Possible values:
@ -1378,7 +1383,7 @@ Possible values:
Default value: 16.
## background_distributed_schedule_pool_size {#background_distributed_schedule_pool_size}
## background\_distributed\_schedule\_pool\_size {#background_distributed_schedule_pool_size}
Sets the number of threads performing background tasks for [distributed](../../engines/table-engines/special/distributed.md) sends. This setting is applied at ClickHouse server start and cant be changed in a user session.
@ -1388,9 +1393,68 @@ Possible values:
Default value: 16.
## low_cardinality_max_dictionary_size {#low_cardinality_max_dictionary_size}
## transform_null_in {#transform_null_in}
Sets a maximum size in rows of a shared global dictionary for the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) data type that can be written to a storage file system. This setting prevents issues with RAM in case of unlimited dictionary growth. All the data that can't be encoded due to maximum dictionary size limitation ClickHouse writes in an ordinary method.
Enables equality of [NULL](../../sql-reference/syntax.md#null-literal) values for [IN](../../sql-reference/operators/in.md) operator.
By default, `NULL` values can't be compared because `NULL` means undefined value. Thus, comparison `expr = NULL` must always return `false`. With this setting `NULL = NULL` returns `true` for `IN` operator.
Possible values:
- 0 — Comparison of `NULL` values in `IN` operator returns `false`.
- 1 — Comparison of `NULL` values in `IN` operator returns `true`.
Default value: 0.
**Example**
Consider the `null_in` table:
```text
┌──idx─┬─────i─┐
│ 1 │ 1 │
│ 2 │ NULL │
│ 3 │ 3 │
└──────┴───────┘
```
Query:
```sql
SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 0;
```
Result:
```text
┌──idx─┬────i─┐
│ 1 │ 1 │
└──────┴──────┘
```
Query:
```sql
SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1;
```
Result:
```text
┌──idx─┬─────i─┐
│ 1 │ 1 │
│ 2 │ NULL │
└──────┴───────┘
```
**See Also**
- [NULL Processing in IN Operators](../../sql-reference/operators/in.md#in-null-processing)
## low\_cardinality\_max\_dictionary\_size {#low_cardinality_max_dictionary_size}
Sets a maximum size in rows of a shared global dictionary for the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) data type that can be written to a storage file system. This setting prevents issues with RAM in case of unlimited dictionary growth. All the data that cant be encoded due to maximum dictionary size limitation ClickHouse writes in an ordinary method.
Possible values:
@ -1398,7 +1462,7 @@ Possible values:
Default value: 8192.
## low_cardinality_use_single_dictionary_for_part {#low_cardinality_use_single_dictionary_for_part}
## low\_cardinality\_use\_single\_dictionary\_for\_part {#low_cardinality_use_single_dictionary_for_part}
Turns on or turns off using of single dictionary for the data part.
@ -1406,44 +1470,73 @@ By default, ClickHouse server monitors the size of dictionaries and if a diction
Possible values:
- 1 — Creating several dictionaries for the data part is prohibited.
- 0 — Creating several dictionaries for the data part is not prohibited.
- 1 — Creating several dictionaries for the data part is prohibited.
- 0 — Creating several dictionaries for the data part is not prohibited.
Default value: 0.
## low_cardinality_allow_in_native_format {#low_cardinality_allow_in_native_format}
## low\_cardinality\_allow\_in\_native\_format {#low_cardinality_allow_in_native_format}
Allows or restricts using the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) data type with the [Native](../../interfaces/formats.md#native) format.
If usage of `LowCardinality` is restricted, ClickHouse server converts `LowCardinality`-columns to ordinary ones for `SELECT` queries, and convert ordinary columns to `LowCardinality`-columns for `INSERT` queries.
This setting is required mainly for third-party clients which don't support `LowCardinality` data type.
This setting is required mainly for third-party clients which dont support `LowCardinality` data type.
Possible values:
- 1 — Usage of `LowCardinality` is not restricted.
- 0 — Usage of `LowCardinality` is restricted.
- 1 — Usage of `LowCardinality` is not restricted.
- 0 — Usage of `LowCardinality` is restricted.
Default value: 1.
## allow_suspicious_low_cardinality_types {#allow_suspicious_low_cardinality_types}
## allow\_suspicious\_low\_cardinality\_types {#allow_suspicious_low_cardinality_types}
Allows or restricts using [LowCardinality](../../sql-reference/data-types/lowcardinality.md) with data types with fixed size of 8 bytes or less: numeric data types and `FixedString(8_bytes_or_less)`.
For small fixed values using of `LowCardinality` is usually inefficient, because ClickHouse stores a numeric index for each row. As a result:
- Disk space usage can rise.
- RAM consumption can be higher, depending on a dictionary size.
- Some functions can work slower due to extra coding/encoding operations.
- Disk space usage can rise.
- RAM consumption can be higher, depending on a dictionary size.
- Some functions can work slower due to extra coding/encoding operations.
Merge times in [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)-engine tables can grow due to all the reasons described above.
Possible values:
- 1 — Usage of `LowCardinality` is not restricted.
- 0 — Usage of `LowCardinality` is restricted.
- 1 — Usage of `LowCardinality` is not restricted.
- 0 — Usage of `LowCardinality` is restricted.
Default value: 0.
## min_insert_block_size_rows_for_materialized_views {#min-insert-block-size-rows-for-materialized-views}
Sets minimum number of rows in block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create.md#create-view). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage.
Possible values:
- Any positive integer.
- 0 — Squashing disabled.
Default value: 1048576.
**See Also**
- [min_insert_block_size_rows](#min-insert-block-size-rows)
## min_insert_block_size_bytes_for_materialized_views {#min-insert-block-size-bytes-for-materialized-views}
Sets minimum number of bytes in block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create.md#create-view). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage.
Possible values:
- Any positive integer.
- 0 — Squashing disabled.
Default value: 268435456.
**See also**
- [min_insert_block_size_bytes](#min-insert-block-size-bytes)
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,3 @@
## system.asynchronous\_metric\_log {#system-tables-async-log}
Contains the historical values for `system.asynchronous_log` (see [system.asynchronous\_metrics](asynchronous_metrics.md#system_tables-asynchronous_metrics))

View File

@ -0,0 +1,36 @@
# system.asynchronous\_metrics {#system_tables-asynchronous_metrics}
Contains metrics that are calculated periodically in the background. For example, the amount of RAM in use.
Columns:
- `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name.
- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value.
**Example**
``` sql
SELECT * FROM system.asynchronous_metrics LIMIT 10
```
``` text
┌─metric──────────────────────────────────┬──────value─┐
│ jemalloc.background_thread.run_interval │ 0 │
│ jemalloc.background_thread.num_runs │ 0 │
│ jemalloc.background_thread.num_threads │ 0 │
│ jemalloc.retained │ 422551552 │
│ jemalloc.mapped │ 1682989056 │
│ jemalloc.resident │ 1656446976 │
│ jemalloc.metadata_thp │ 0 │
│ jemalloc.metadata │ 10226856 │
│ UncompressedCacheCells │ 0 │
│ MarkCacheFiles │ 0 │
└─────────────────────────────────────────┴────────────┘
```
**See Also**
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.
- [system.metrics](metrics.md#system_tables-metrics) — Contains instantly calculated metrics.
- [system.events](events.md#system_tables-events) — Contains a number of events that have occurred.
- [system.metric\_log](metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.

View File

@ -0,0 +1,24 @@
# system.clusters {#system-clusters}
Contains information about clusters available in the config file and the servers in them.
Columns:
- `cluster` (String) — The cluster name.
- `shard_num` (UInt32) — The shard number in the cluster, starting from 1.
- `shard_weight` (UInt32) — The relative weight of the shard when writing data.
- `replica_num` (UInt32) — The replica number in the shard, starting from 1.
- `host_name` (String) — The host name, as specified in the config.
- `host_address` (String) — The host IP address obtained from DNS.
- `port` (UInt16) — The port to use for connecting to the server.
- `user` (String) — The name of the user for connecting to the server.
- `errors_count` (UInt32) - number of times this host failed to reach replica.
- `estimated_recovery_time` (UInt32) - seconds left until replica error count is zeroed and it is considered to be back to normal.
Please note that `errors_count` is updated once per query to the cluster, but `estimated_recovery_time` is recalculated on-demand. So there could be a case of non-zero `errors_count` and zero `estimated_recovery_time`, that next query will zero `errors_count` and try to use replica as if it has no errors.
**See also**
- [Table engine Distributed](../../engines/table-engines/special/distributed.md)
- [distributed\_replica\_error\_cap setting](../../operations/settings/settings.md#settings-distributed_replica_error_cap)
- [distributed\_replica\_error\_half\_life setting](../../operations/settings/settings.md#settings-distributed_replica_error_half_life)

View File

@ -0,0 +1,22 @@
# system.columns {#system-columns}
Contains information about columns in all the tables.
You can use this table to get information similar to the [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table) query, but for multiple tables at once.
The `system.columns` table contains the following columns (the column type is shown in brackets):
- `database` (String) — Database name.
- `table` (String) — Table name.
- `name` (String) — Column name.
- `type` (String) — Column type.
- `default_kind` (String) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined.
- `default_expression` (String) — Expression for the default value, or an empty string if it is not defined.
- `data_compressed_bytes` (UInt64) — The size of compressed data, in bytes.
- `data_uncompressed_bytes` (UInt64) — The size of decompressed data, in bytes.
- `marks_bytes` (UInt64) — The size of marks, in bytes.
- `comment` (String) — Comment on the column, or an empty string if it is not defined.
- `is_in_partition_key` (UInt8) — Flag that indicates whether the column is in the partition expression.
- `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression.
- `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression.
- `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression.

View File

@ -0,0 +1,40 @@
# system.contributors {#system-contributors}
Contains information about contributors. The order is random at query execution time.
Columns:
- `name` (String) — Contributor (author) name from git log.
**Example**
``` sql
SELECT * FROM system.contributors LIMIT 10
```
``` text
┌─name─────────────┐
│ Olga Khvostikova │
│ Max Vetrov │
│ LiuYangkuan │
│ svladykin │
│ zamulla │
│ Šimon Podlipský │
│ BayoNet │
│ Ilya Khomutov │
│ Amy Krishnevsky │
│ Loud_Scream │
└──────────────────┘
```
To find out yourself in the table, use a query:
``` sql
SELECT * FROM system.contributors WHERE name = 'Olga Khvostikova'
```
``` text
┌─name─────────────┐
│ Olga Khvostikova │
└──────────────────┘
```

View File

@ -0,0 +1,34 @@
# system.data\_type\_families {#system_tables-data_type_families}
Contains information about supported [data types](../../sql-reference/data-types/).
Columns:
- `name` ([String](../../sql-reference/data-types/string.md)) — Data type name.
- `case_insensitive` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Property that shows whether you can use a data type name in a query in case insensitive manner or not. For example, `Date` and `date` are both valid.
- `alias_to` ([String](../../sql-reference/data-types/string.md)) — Data type name for which `name` is an alias.
**Example**
``` sql
SELECT * FROM system.data_type_families WHERE alias_to = 'String'
```
``` text
┌─name───────┬─case_insensitive─┬─alias_to─┐
│ LONGBLOB │ 1 │ String │
│ LONGTEXT │ 1 │ String │
│ TINYTEXT │ 1 │ String │
│ TEXT │ 1 │ String │
│ VARCHAR │ 1 │ String │
│ MEDIUMBLOB │ 1 │ String │
│ BLOB │ 1 │ String │
│ TINYBLOB │ 1 │ String │
│ CHAR │ 1 │ String │
│ MEDIUMTEXT │ 1 │ String │
└────────────┴──────────────────┴──────────┘
```
**See Also**
- [Syntax](../../sql-reference/syntax.md) — Information about supported syntax.

View File

@ -0,0 +1,7 @@
# system.databases {#system-databases}
This table contains a single String column called name the name of a database.
Each database that the server knows about has a corresponding entry in the table.
This system table is used for implementing the `SHOW DATABASES` query.

View File

@ -0,0 +1,9 @@
# system.detached\_parts {#system_tables-detached_parts}
Contains information about detached parts of [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. The `reason` column specifies why the part was detached.
For user-detached parts, the reason is empty. Such parts can be attached with [ALTER TABLE ATTACH PARTITION\|PART](../../sql-reference/statements/alter.md#alter_attach-partition) command.
For the description of other columns, see [system.parts](parts.md#system_tables-parts).
If part name is invalid, values of some columns may be `NULL`. Such parts can be deleted with [ALTER TABLE DROP DETACHED PART](../../sql-reference/statements/alter.md#alter_drop-detached).

View File

@ -0,0 +1,61 @@
# system.dictionaries {#system_tables-dictionaries}
Contains information about [external dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
Columns:
- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database containing the dictionary created by DDL query. Empty string for other dictionaries.
- `name` ([String](../../sql-reference/data-types/string.md)) — [Dictionary name](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md).
- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Dictionary status. Possible values:
- `NOT_LOADED` — Dictionary was not loaded because it was not used.
- `LOADED` — Dictionary loaded successfully.
- `FAILED` — Unable to load the dictionary as a result of an error.
- `LOADING` — Dictionary is loading now.
- `LOADED_AND_RELOADING` — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: [SYSTEM RELOAD DICTIONARY](../../sql-reference/statements/system.md#query_language-system-reload-dictionary) query, timeout, dictionary config has changed).
- `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now.
- `origin` ([String](../../sql-reference/data-types/string.md)) — Path to the configuration file that describes the dictionary.
- `type` ([String](../../sql-reference/data-types/string.md)) — Type of a dictionary allocation. [Storing Dictionaries in Memory](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md).
- `key` — [Key type](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key): Numeric Key ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) or Сomposite key ([String](../../sql-reference/data-types/string.md)) — form “(type 1, type 2, …, type n)”.
- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [attribute names](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) provided by the dictionary.
- `attribute.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [attribute types](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) that are provided by the dictionary.
- `bytes_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary.
- `query_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of queries since the dictionary was loaded or since the last successful reboot.
- `hit_rate` ([Float64](../../sql-reference/data-types/float.md)) — For cache dictionaries, the percentage of uses for which the value was in the cache.
- `element_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of items stored in the dictionary.
- `load_factor` ([Float64](../../sql-reference/data-types/float.md)) — Percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table).
- `source` ([String](../../sql-reference/data-types/string.md)) — Text describing the [data source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) for the dictionary.
- `lifetime_min` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Minimum [lifetime](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds.
- `lifetime_max` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Maximum [lifetime](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds.
- `loading_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time for loading the dictionary.
- `last_successful_update_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — End time for loading or updating the dictionary. Helps to monitor some troubles with external sources and investigate causes.
- `loading_duration` ([Float32](../../sql-reference/data-types/float.md)) — Duration of a dictionary loading.
- `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldnt be created.
**Example**
Configure the dictionary.
``` sql
CREATE DICTIONARY dictdb.dict
(
`key` Int64 DEFAULT -1,
`value_default` String DEFAULT 'world',
`value_expression` String DEFAULT 'xxx' EXPRESSION 'toString(127 * 172)'
)
PRIMARY KEY key
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dicttbl' DB 'dictdb'))
LIFETIME(MIN 0 MAX 1)
LAYOUT(FLAT())
```
Make sure that the dictionary is loaded.
``` sql
SELECT * FROM system.dictionaries
```
``` text
┌─database─┬─name─┬─status─┬─origin──────┬─type─┬─key────┬─attribute.names──────────────────────┬─attribute.types─────┬─bytes_allocated─┬─query_count─┬─hit_rate─┬─element_count─┬───────────load_factor─┬─source─────────────────────┬─lifetime_min─┬─lifetime_max─┬──loading_start_time─┌──last_successful_update_time─┬──────loading_duration─┬─last_exception─┐
│ dictdb │ dict │ LOADED │ dictdb.dict │ Flat │ UInt64 │ ['value_default','value_expression'] │ ['String','String'] │ 74032 │ 0 │ 1 │ 1 │ 0.0004887585532746823 │ ClickHouse: dictdb.dicttbl │ 0 │ 1 │ 2020-03-04 04:17:34 │ 2020-03-04 04:30:34 │ 0.002 │ │
└──────────┴──────┴────────┴─────────────┴──────┴────────┴──────────────────────────────────────┴─────────────────────┴─────────────────┴─────────────┴──────────┴───────────────┴───────────────────────┴────────────────────────────┴──────────────┴──────────────┴─────────────────────┴──────────────────────────────┘───────────────────────┴────────────────┘
```

View File

@ -0,0 +1,26 @@
# system.disks {#system_tables-disks}
Contains information about disks defined in the [server configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure).
Columns:
- `name` ([String](../../sql-reference/data-types/string.md)) — Name of a disk in the server configuration.
- `path` ([String](../../sql-reference/data-types/string.md)) — Path to the mount point in the file system.
- `free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Free space on disk in bytes.
- `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Disk volume in bytes.
- `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration.
## system.storage\_policies {#system_tables-storage_policies}
Contains information about storage policies and volumes defined in the [server configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure).
Columns:
- `policy_name` ([String](../../sql-reference/data-types/string.md)) — Name of the storage policy.
- `volume_name` ([String](../../sql-reference/data-types/string.md)) — Volume name defined in the storage policy.
- `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Volume order number in the configuration.
- `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — Disk names, defined in the storage policy.
- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit).
- `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order.
If the storage policy contains more then one volume, then information for each volume is stored in the individual row of the table.

View File

@ -0,0 +1,32 @@
# system.events {#system_tables-events}
Contains information about the number of events that have occurred in the system. For example, in the table, you can find how many `SELECT` queries were processed since the ClickHouse server started.
Columns:
- `event` ([String](../../sql-reference/data-types/string.md)) — Event name.
- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of events occurred.
- `description` ([String](../../sql-reference/data-types/string.md)) — Event description.
**Example**
``` sql
SELECT * FROM system.events LIMIT 5
```
``` text
┌─event─────────────────────────────────┬─value─┬─description────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ Query │ 12 │ Number of queries to be interpreted and potentially executed. Does not include queries that failed to parse or were rejected due to AST size limits, quota limits or limits on the number of simultaneously running queries. May include internal queries initiated by ClickHouse itself. Does not count subqueries. │
│ SelectQuery │ 8 │ Same as Query, but only for SELECT queries. │
│ FileOpen │ 73 │ Number of files opened. │
│ ReadBufferFromFileDescriptorRead │ 155 │ Number of reads (read/pread) from a file descriptor. Does not include sockets. │
│ ReadBufferFromFileDescriptorReadBytes │ 9931 │ Number of bytes read from file descriptors. If the file is compressed, this will show the compressed data size. │
└───────────────────────────────────────┴───────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
**See Also**
- [system.asynchronous\_metrics](asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
- [system.metrics](metrics.md#system_tables-metrics) — Contains instantly calculated metrics.
- [system.metric\_log](metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.

View File

@ -0,0 +1,8 @@
# system.functions {#system-functions}
Contains information about normal and aggregate functions.
Columns:
- `name`(`String`) The name of the function.
- `is_aggregate`(`UInt8`) — Whether the function is aggregate.

View File

@ -0,0 +1,15 @@
# system.graphite\_retentions {#system-graphite-retentions}
Contains information about parameters [graphite\_rollup](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) which are used in tables with [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md) engines.
Columns:
- `config_name` (String) - `graphite_rollup` parameter name.
- `regexp` (String) - A pattern for the metric name.
- `function` (String) - The name of the aggregating function.
- `age` (UInt64) - The minimum age of the data in seconds.
- `precision` (UInt64) - How precisely to define the age of the data in seconds.
- `priority` (UInt16) - Pattern priority.
- `is_default` (UInt8) - Whether the pattern is the default.
- `Tables.database` (Array(String)) - Array of names of database tables that use the `config_name` parameter.
- `Tables.table` (Array(String)) - Array of table names that use the `config_name` parameter.

View File

@ -0,0 +1,49 @@
---
toc_priority: 52
toc_title: System Tables
---
# System Tables {#system-tables}
## Introduction {#system-tables-introduction}
System tables provide information about:
- Server states, processes, and environment.
- Servers internal processes.
System tables:
- Located in the `system` database.
- Available only for reading data.
- Cant be dropped or altered, but can be detached.
Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start.
Unlike other system tables, the system tables [metric\_log](metric_log.md#system_tables-metric_log), [query\_log](query_log.md#system_tables-query_log), [query\_thread\_log](query_thread_log.md#system_tables-query_thread_log), [trace\_log](trace_log.md#system_tables-trace_log) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
By default, table growth is unlimited. To control a size of a table, you can use [TTL](../../sql-reference/statements/alter.md#manipulations-with-table-ttl) settings for removing outdated log records. Also you can use the partitioning feature of `MergeTree`-engine tables.
## Sources of System Metrics {#system-tables-sources-of-system-metrics}
For collecting system metrics ClickHouse server uses:
- `CAP_NET_ADMIN` capability.
- [procfs](https://en.wikipedia.org/wiki/Procfs) (only in Linux).
**procfs**
If ClickHouse server doesnt have `CAP_NET_ADMIN` capability, it tries to fall back to `ProcfsMetricsProvider`. `ProcfsMetricsProvider` allows collecting per-query system metrics (for CPU and I/O).
If procfs is supported and enabled on the system, ClickHouse server collects these metrics:
- `OSCPUVirtualTimeMicroseconds`
- `OSCPUWaitMicroseconds`
- `OSIOWaitMicroseconds`
- `OSReadChars`
- `OSWriteChars`
- `OSReadBytes`
- `OSWriteBytes`
[Original article](https://clickhouse.tech/docs/en/operations/system-tables/) <!--hide-->

View File

@ -0,0 +1,11 @@
# system.merge\_tree\_settings {#system-merge_tree_settings}
Contains information about settings for `MergeTree` tables.
Columns:
- `name` (String) — Setting name.
- `value` (String) — Setting value.
- `description` (String) — Setting description.
- `type` (String) — Setting type (implementation specific string value).
- `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed.

View File

@ -0,0 +1,19 @@
# system.merges {#system-merges}
Contains information about merges and part mutations currently in process for tables in the MergeTree family.
Columns:
- `database` (String) — The name of the database the table is in.
- `table` (String) — Table name.
- `elapsed` (Float64) — The time elapsed (in seconds) since the merge started.
- `progress` (Float64) — The percentage of completed work from 0 to 1.
- `num_parts` (UInt64) — The number of pieces to be merged.
- `result_part_name` (String) — The name of the part that will be formed as the result of merging.
- `is_mutation` (UInt8) - 1 if this process is a part mutation.
- `total_size_bytes_compressed` (UInt64) — The total size of the compressed data in the merged chunks.
- `total_size_marks` (UInt64) — The total number of marks in the merged parts.
- `bytes_read_uncompressed` (UInt64) — Number of bytes read, uncompressed.
- `rows_read` (UInt64) — Number of rows read.
- `bytes_written_uncompressed` (UInt64) — Number of bytes written, uncompressed.
- `rows_written` (UInt64) — Number of rows written.

View File

@ -0,0 +1,55 @@
# system.metric\_log {#system_tables-metric_log}
Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk.
To turn on metrics history collection on `system.metric_log`, create `/etc/clickhouse-server/config.d/metric_log.xml` with following content:
``` xml
<yandex>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
</yandex>
```
**Example**
``` sql
SELECT * FROM system.metric_log LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
event_date: 2020-02-18
event_time: 2020-02-18 07:15:33
milliseconds: 554
ProfileEvent_Query: 0
ProfileEvent_SelectQuery: 0
ProfileEvent_InsertQuery: 0
ProfileEvent_FileOpen: 0
ProfileEvent_Seek: 0
ProfileEvent_ReadBufferFromFileDescriptorRead: 1
ProfileEvent_ReadBufferFromFileDescriptorReadFailed: 0
ProfileEvent_ReadBufferFromFileDescriptorReadBytes: 0
ProfileEvent_WriteBufferFromFileDescriptorWrite: 1
ProfileEvent_WriteBufferFromFileDescriptorWriteFailed: 0
ProfileEvent_WriteBufferFromFileDescriptorWriteBytes: 56
...
CurrentMetric_Query: 0
CurrentMetric_Merge: 0
CurrentMetric_PartMutation: 0
CurrentMetric_ReplicatedFetch: 0
CurrentMetric_ReplicatedSend: 0
CurrentMetric_ReplicatedChecks: 0
...
```
**See also**
- [system.asynchronous\_metrics](asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
- [system.events](events.md#system_tables-events) — Contains a number of events that occurred.
- [system.metrics](metrics.md#system_tables-metrics) — Contains instantly calculated metrics.
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.

View File

@ -0,0 +1,39 @@
# system.metrics {#system_tables-metrics}
Contains metrics which can be calculated instantly, or have a current value. For example, the number of simultaneously processed queries or the current replica delay. This table is always up to date.
Columns:
- `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name.
- `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — Metric value.
- `description` ([String](../../sql-reference/data-types/string.md)) — Metric description.
The list of supported metrics you can find in the [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) source file of ClickHouse.
**Example**
``` sql
SELECT * FROM system.metrics LIMIT 10
```
``` text
┌─metric─────────────────────┬─value─┬─description──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ Query │ 1 │ Number of executing queries │
│ Merge │ 0 │ Number of executing background merges │
│ PartMutation │ 0 │ Number of mutations (ALTER DELETE/UPDATE) │
│ ReplicatedFetch │ 0 │ Number of data parts being fetched from replicas │
│ ReplicatedSend │ 0 │ Number of data parts being sent to replicas │
│ ReplicatedChecks │ 0 │ Number of data parts checking for consistency │
│ BackgroundPoolTask │ 0 │ Number of active tasks in BackgroundProcessingPool (merges, mutations, fetches, or replication queue bookkeeping) │
│ BackgroundSchedulePoolTask │ 0 │ Number of active tasks in BackgroundSchedulePool. This pool is used for periodic ReplicatedMergeTree tasks, like cleaning old data parts, altering data parts, replica re-initialization, etc. │
│ DiskSpaceReservedForMerge │ 0 │ Disk space reserved for currently running background merges. It is slightly more than the total size of currently merging parts. │
│ DistributedSend │ 0 │ Number of connections to remote servers sending data that was INSERTed into Distributed tables. Both synchronous and asynchronous mode. │
└────────────────────────────┴───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
**See Also**
- [system.asynchronous\_metrics](asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
- [system.events](events.md#system_tables-events) — Contains a number of events that occurred.
- [system.metric\_log](metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.

View File

@ -0,0 +1,25 @@
# system.mutations {#system_tables-mutations}
The table contains information about [mutations](../../sql-reference/statements/alter.md#alter-mutations) of MergeTree tables and their progress. Each mutation command is represented by a single row. The table has the following columns:
**database**, **table** - The name of the database and table to which the mutation was applied.
**mutation\_id** - The ID of the mutation. For replicated tables these IDs correspond to znode names in the `<table_path_in_zookeeper>/mutations/` directory in ZooKeeper. For unreplicated tables the IDs correspond to file names in the data directory of the table.
**command** - The mutation command string (the part of the query after `ALTER TABLE [db.]table`).
**create\_time** - When this mutation command was submitted for execution.
**block\_numbers.partition\_id**, **block\_numbers.number** - A nested column. For mutations of replicated tables, it contains one record for each partition: the partition ID and the block number that was acquired by the mutation (in each partition, only parts that contain blocks with numbers less than the block number acquired by the mutation in that partition will be mutated). In non-replicated tables, block numbers in all partitions form a single sequence. This means that for mutations of non-replicated tables, the column will contain one record with a single block number acquired by the mutation.
**parts\_to\_do** - The number of data parts that need to be mutated for the mutation to finish.
**is\_done** - Is the mutation done? Note that even if `parts_to_do = 0` it is possible that a mutation of a replicated table is not done yet because of a long-running INSERT that will create a new data part that will need to be mutated.
If there were problems with mutating some parts, the following columns contain additional information:
**latest\_failed\_part** - The name of the most recent part that could not be mutated.
**latest\_fail\_time** - The time of the most recent part mutation failure.
**latest\_fail\_reason** - The exception message that caused the most recent part mutation failure.

View File

@ -0,0 +1,7 @@
# system.numbers {#system-numbers}
This table contains a single UInt64 column named `number` that contains almost all the natural numbers starting from zero.
You can use this table for tests, or if you need to do a brute force search.
Reads from this table are not parallelized.

View File

@ -0,0 +1,5 @@
# system.numbers\_mt {#system-numbers-mt}
The same as [system.numbers](numbers.md) but reads are parallelized. The numbers can be returned in any order.
Used for tests.

View File

@ -0,0 +1,7 @@
# system.one {#system-one}
This table contains a single row with a single `dummy` UInt8 column containing the value 0.
This table is used if a `SELECT` query doesnt specify the `FROM` clause.
This is similar to the `DUAL` table found in other DBMSs.

View File

@ -0,0 +1,32 @@
# system.part\_log {#system_tables-part-log}
The `system.part_log` table is created only if the [part\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-part-log) server setting is specified.
This table contains information about events that occurred with [data parts](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) in the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family tables, such as adding or merging data.
The `system.part_log` table contains the following columns:
- `event_type` (Enum) — Type of the event that occurred with the data part. Can have one of the following values:
- `NEW_PART` — Inserting of a new data part.
- `MERGE_PARTS` — Merging of data parts.
- `DOWNLOAD_PART` — Downloading a data part.
- `REMOVE_PART` — Removing or detaching a data part using [DETACH PARTITION](../../sql-reference/statements/alter.md#alter_detach-partition).
- `MUTATE_PART` — Mutating of a data part.
- `MOVE_PART` — Moving the data part from the one disk to another one.
- `event_date` (Date) — Event date.
- `event_time` (DateTime) — Event time.
- `duration_ms` (UInt64) — Duration.
- `database` (String) — Name of the database the data part is in.
- `table` (String) — Name of the table the data part is in.
- `part_name` (String) — Name of the data part.
- `partition_id` (String) — ID of the partition that the data part was inserted to. The column takes the all value if the partitioning is by `tuple()`.
- `rows` (UInt64) — The number of rows in the data part.
- `size_in_bytes` (UInt64) — Size of the data part in bytes.
- `merged_from` (Array(String)) — An array of names of the parts which the current part was made up from (after the merge).
- `bytes_uncompressed` (UInt64) — Size of uncompressed bytes.
- `read_rows` (UInt64) — The number of rows was read during the merge.
- `read_bytes` (UInt64) — The number of bytes was read during the merge.
- `error` (UInt16) — The code number of the occurred error.
- `exception` (String) — Text message of the occurred error.
The `system.part_log` table is created after the first inserting data to the `MergeTree` table.

View File

@ -0,0 +1,80 @@
# system.parts {#system_tables-parts}
Contains information about parts of [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables.
Each row describes one data part.
Columns:
- `partition` (String) The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter.md#query_language_queries_alter) query.
Formats:
- `YYYYMM` for automatic partitioning by month.
- `any_string` when partitioning manually.
- `name` (`String`) Name of the data part.
- `active` (`UInt8`) Flag that indicates whether the data part is active. If a data part is active, its used in a table. Otherwise, its deleted. Inactive data parts remain after merging.
- `marks` (`UInt64`) The number of marks. To get the approximate number of rows in a data part, multiply `marks` by the index granularity (usually 8192) (this hint doesnt work for adaptive granularity).
- `rows` (`UInt64`) The number of rows.
- `bytes_on_disk` (`UInt64`) Total size of all the data part files in bytes.
- `data_compressed_bytes` (`UInt64`) Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included.
- `data_uncompressed_bytes` (`UInt64`) Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included.
- `marks_bytes` (`UInt64`) The size of the file with marks.
- `modification_time` (`DateTime`) The time the directory with the data part was modified. This usually corresponds to the time of data part creation.\|
- `remove_time` (`DateTime`) The time when the data part became inactive.
- `refcount` (`UInt32`) The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges.
- `min_date` (`Date`) The minimum value of the date key in the data part.
- `max_date` (`Date`) The maximum value of the date key in the data part.
- `min_time` (`DateTime`) The minimum value of the date and time key in the data part.
- `max_time`(`DateTime`) The maximum value of the date and time key in the data part.
- `partition_id` (`String`) ID of the partition.
- `min_block_number` (`UInt64`) The minimum number of data parts that make up the current part after merging.
- `max_block_number` (`UInt64`) The maximum number of data parts that make up the current part after merging.
- `level` (`UInt32`) Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts.
- `data_version` (`UInt64`) Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than `data_version`).
- `primary_key_bytes_in_memory` (`UInt64`) The amount of memory (in bytes) used by primary key values.
- `primary_key_bytes_in_memory_allocated` (`UInt64`) The amount of memory (in bytes) reserved for primary key values.
- `is_frozen` (`UInt8`) Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup doesnt exist. For more details, see [FREEZE PARTITION](../../sql-reference/statements/alter.md#alter_freeze-partition)
- `database` (`String`) Name of the database.
- `table` (`String`) Name of the table.
- `engine` (`String`) Name of the table engine without parameters.
- `path` (`String`) Absolute path to the folder with data part files.
- `disk` (`String`) Name of a disk that stores the data part.
- `hash_of_all_files` (`String`) [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) of compressed files.
- `hash_of_uncompressed_files` (`String`) [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) of uncompressed files (files with marks, index file etc.).
- `uncompressed_hash_of_compressed_files` (`String`) [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) of data in the compressed files as if they were uncompressed.
- `bytes` (`UInt64`) Alias for `bytes_on_disk`.
- `marks_size` (`UInt64`) Alias for `marks_bytes`.

View File

@ -0,0 +1,15 @@
# system.processes {#system_tables-processes}
This system table is used for implementing the `SHOW PROCESSLIST` query.
Columns:
- `user` (String) The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the `default` user. The field contains the username for a specific query, not for a query that this query initiated.
- `address` (String) The IP address the request was made from. The same for distributed processing. To track where a distributed query was originally made from, look at `system.processes` on the query requestor server.
- `elapsed` (Float64) The time in seconds since request execution started.
- `rows_read` (UInt64) The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers.
- `bytes_read` (UInt64) The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers.
- `total_rows_approx` (UInt64) The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known.
- `memory_usage` (UInt64) Amount of RAM the request uses. It might not include some types of dedicated memory. See the [max\_memory\_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) setting.
- `query` (String) The query text. For `INSERT`, it doesnt include the data to insert.
- `query_id` (String) Query ID, if defined.

View File

@ -0,0 +1,138 @@
# system.query\_log {#system_tables-query_log}
Contains information about executed queries, for example, start time, duration of processing, error messages.
!!! note "Note"
This table doesnt contain the ingested data for `INSERT` queries.
You can change settings of queries logging in the [query\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) section of the server configuration.
You can disable queries logging by setting [log\_queries = 0](../../operations/settings/settings.md#settings-log-queries). We dont recommend to turn off logging because information in this table is important for solving issues.
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
ClickHouse doesnt delete data from the table automatically. See [Introduction](index.md#system-tables-introduction) for more details.
The `system.query_log` table registers two kinds of queries:
1. Initial queries that were run directly by the client.
2. Child queries that were initiated by other queries (for distributed query execution). For these types of queries, information about the parent queries is shown in the `initial_*` columns.
Each query creates one or two rows in the `query_log` table, depending on the status (see the `type` column) of the query:
1. If the query execution was successful, two rows with the `QueryStart` and `QueryFinish` types are created .
2. If an error occurred during query processing, two events with the `QueryStart` and `ExceptionWhileProcessing` types are created .
3. If an error occurred before launching the query, a single event with the `ExceptionBeforeStart` type is created.
Columns:
- `type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of an event that occurred when executing the query. Values:
- `'QueryStart' = 1` — Successful start of query execution.
- `'QueryFinish' = 2` — Successful end of query execution.
- `'ExceptionBeforeStart' = 3` — Exception before the start of query execution.
- `'ExceptionWhileProcessing' = 4` — Exception during the query execution.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Query starting date.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query starting time.
- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution.
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution in milliseconds.
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number or rows read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_rows` includes the total number of rows read at all replicas. Each replica sends its `read_rows` value, and the server-initiator of the query summarize all received and local values. The cache volumes doesnt affect this value.
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number or bytes read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_bytes` includes the total number of rows read at all replicas. Each replica sends its `read_bytes` value, and the server-initiator of the query summarize all received and local values. The cache volumes doesnt affect this value.
- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0.
- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0.
- `result_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of rows in a result of the `SELECT` query, or a number of rows in the `INSERT` query.
- `result_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — RAM volume in bytes used to store a query result.
- `memory_usage` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Memory consumption by the query.
- `query` ([String](../../sql-reference/data-types/string.md)) — Query string.
- `exception` ([String](../../sql-reference/data-types/string.md)) — Exception message.
- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code of an exception.
- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [Stack trace](https://en.wikipedia.org/wiki/Stack_trace). An empty string, if the query was completed successfully.
- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Query type. Possible values:
- 1 — Query was initiated by the client.
- 0 — Query was initiated by another query as part of distributed query execution.
- `user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query.
- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query.
- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query.
- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to make the query.
- `initial_user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution).
- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution).
- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from.
- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to make the parent query.
- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Interface that the query was initiated from. Possible values:
- 1 — TCP.
- 2 — HTTP.
- `os_user` ([String](../../sql-reference/data-types/string.md)) — Operating system username who runs [clickhouse-client](../../interfaces/cli.md).
- `client_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the client machine where the [clickhouse-client](../../interfaces/cli.md) or another TCP client is run.
- `client_name` ([String](../../sql-reference/data-types/string.md)) — The [clickhouse-client](../../interfaces/cli.md) or another TCP client name.
- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the [clickhouse-client](../../interfaces/cli.md) or another TCP client.
- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Major version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client.
- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Minor version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client.
- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the [clickhouse-client](../../interfaces/cli.md) or another TCP client version.
- `http_method` (UInt8) — HTTP method that initiated the query. Possible values:
- 0 — The query was launched from the TCP interface.
- 1 — `GET` method was used.
- 2 — `POST` method was used.
- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — The `UserAgent` header passed in the HTTP request.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — The “quota key” specified in the [quotas](../../operations/quotas.md) setting (see `keyed`).
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision.
- `thread_numbers` ([Array(UInt32)](../../sql-reference/data-types/array.md)) — Number of threads that are participating in query execution.
- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics. The description of them could be found in the table [system.events](events.md#system_tables-events)
- `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Values of metrics that are listed in the `ProfileEvents.Names` column.
- `Settings.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1.
- `Settings.Values` ([Array(String)](../../sql-reference/data-types/array.md)) — Values of settings that are listed in the `Settings.Names` column.
**Example**
``` sql
SELECT * FROM system.query_log LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
type: QueryStart
event_date: 2020-05-13
event_time: 2020-05-13 14:02:28
query_start_time: 2020-05-13 14:02:28
query_duration_ms: 0
read_rows: 0
read_bytes: 0
written_rows: 0
written_bytes: 0
result_rows: 0
result_bytes: 0
memory_usage: 0
query: SELECT 1
exception_code: 0
exception:
stack_trace:
is_initial_query: 1
user: default
query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a
address: ::ffff:127.0.0.1
port: 57720
initial_user: default
initial_query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a
initial_address: ::ffff:127.0.0.1
initial_port: 57720
interface: 1
os_user: bayonet
client_hostname: clickhouse.ru-central1.internal
client_name: ClickHouse client
client_revision: 54434
client_version_major: 20
client_version_minor: 4
client_version_patch: 1
http_method: 0
http_user_agent:
quota_key:
revision: 54434
thread_ids: []
ProfileEvents.Names: []
ProfileEvents.Values: []
Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage']
Settings.Values: ['0','random','1','10000000000']
```
**See Also**
- [system.query\_thread\_log](query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread.

View File

@ -0,0 +1,113 @@
# system.query\_thread\_log {#system_tables-query_thread_log}
Contains information about threads which execute queries, for example, thread name, thread start time, duration of query processing.
To start logging:
1. Configure parameters in the [query\_thread\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) section.
2. Set [log\_query\_threads](../../operations/settings/settings.md#settings-log-query-threads) to 1.
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query\_thread\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query.
ClickHouse doesnt delete data from the table automatically. See [Introduction](index.md#system-tables-introduction) for more details.
Columns:
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query.
- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution.
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution.
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read rows.
- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read bytes.
- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0.
- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0.
- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The difference between the amount of allocated and freed memory in context of this thread.
- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread.
- `thread_name` ([String](../../sql-reference/data-types/string.md)) — Name of the thread.
- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Internal thread ID.
- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — thread ID.
- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS initial ID of initial thread.
- `query` ([String](../../sql-reference/data-types/string.md)) — Query string.
- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Query type. Possible values:
- 1 — Query was initiated by the client.
- 0 — Query was initiated by another query for distributed query execution.
- `user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query.
- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query.
- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query.
- `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the query.
- `initial_user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution).
- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution).
- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from.
- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the parent query.
- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Interface that the query was initiated from. Possible values:
- 1 — TCP.
- 2 — HTTP.
- `os_user` ([String](../../sql-reference/data-types/string.md)) — OSs username who runs [clickhouse-client](../../interfaces/cli.md).
- `client_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the client machine where the [clickhouse-client](../../interfaces/cli.md) or another TCP client is run.
- `client_name` ([String](../../sql-reference/data-types/string.md)) — The [clickhouse-client](../../interfaces/cli.md) or another TCP client name.
- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the [clickhouse-client](../../interfaces/cli.md) or another TCP client.
- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Major version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client.
- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Minor version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client.
- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the [clickhouse-client](../../interfaces/cli.md) or another TCP client version.
- `http_method` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — HTTP method that initiated the query. Possible values:
- 0 — The query was launched from the TCP interface.
- 1 — `GET` method was used.
- 2 — `POST` method was used.
- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — The `UserAgent` header passed in the HTTP request.
- `quota_key` ([String](../../sql-reference/data-types/string.md)) — The “quota key” specified in the [quotas](../../operations/quotas.md) setting (see `keyed`).
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision.
- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics for this thread. The description of them could be found in the table [system.events](#system_tables-events).
- `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Values of metrics for this thread that are listed in the `ProfileEvents.Names` column.
**Example**
``` sql
SELECT * FROM system.query_thread_log LIMIT 1 FORMAT Vertical
```
``` text
Row 1:
──────
event_date: 2020-05-13
event_time: 2020-05-13 14:02:28
query_start_time: 2020-05-13 14:02:28
query_duration_ms: 0
read_rows: 1
read_bytes: 1
written_rows: 0
written_bytes: 0
memory_usage: 0
peak_memory_usage: 0
thread_name: QueryPipelineEx
thread_id: 28952
master_thread_id: 28924
query: SELECT 1
is_initial_query: 1
user: default
query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a
address: ::ffff:127.0.0.1
port: 57720
initial_user: default
initial_query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a
initial_address: ::ffff:127.0.0.1
initial_port: 57720
interface: 1
os_user: bayonet
client_hostname: clickhouse.ru-central1.internal
client_name: ClickHouse client
client_revision: 54434
client_version_major: 20
client_version_minor: 4
client_version_patch: 1
http_method: 0
http_user_agent:
quota_key:
revision: 54434
ProfileEvents.Names: ['ContextLock','RealTimeMicroseconds','UserTimeMicroseconds','OSCPUWaitMicroseconds','OSCPUVirtualTimeMicroseconds']
ProfileEvents.Values: [1,97,81,5,81]
...
```
**See Also**
- [system.query\_log](query_log.md#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution.

View File

@ -0,0 +1,121 @@
# system.replicas {#system_tables-replicas}
Contains information and status for replicated tables residing on the local server.
This table can be used for monitoring. The table contains a row for every Replicated\* table.
Example:
``` sql
SELECT *
FROM system.replicas
WHERE table = 'visits'
FORMAT Vertical
```
``` text
Row 1:
──────
database: merge
table: visits
engine: ReplicatedCollapsingMergeTree
is_leader: 1
can_become_leader: 1
is_readonly: 0
is_session_expired: 0
future_parts: 1
parts_to_check: 0
zookeeper_path: /clickhouse/tables/01-06/visits
replica_name: example01-06-1.yandex.ru
replica_path: /clickhouse/tables/01-06/visits/replicas/example01-06-1.yandex.ru
columns_version: 9
queue_size: 1
inserts_in_queue: 0
merges_in_queue: 1
part_mutations_in_queue: 0
queue_oldest_time: 2020-02-20 08:34:30
inserts_oldest_time: 0000-00-00 00:00:00
merges_oldest_time: 2020-02-20 08:34:30
part_mutations_oldest_time: 0000-00-00 00:00:00
oldest_part_to_get:
oldest_part_to_merge_to: 20200220_20284_20840_7
oldest_part_to_mutate_to:
log_max_index: 596273
log_pointer: 596274
last_queue_update: 2020-02-20 08:34:32
absolute_delay: 0
total_replicas: 2
active_replicas: 2
```
Columns:
- `database` (`String`) - Database name
- `table` (`String`) - Table name
- `engine` (`String`) - Table engine name
- `is_leader` (`UInt8`) - Whether the replica is the leader.
Only one replica at a time can be the leader. The leader is responsible for selecting background merges to perform.
Note that writes can be performed to any replica that is available and has a session in ZK, regardless of whether it is a leader.
- `can_become_leader` (`UInt8`) - Whether the replica can be elected as a leader.
- `is_readonly` (`UInt8`) - Whether the replica is in read-only mode.
This mode is turned on if the config doesnt have sections with ZooKeeper, if an unknown error occurred when reinitializing sessions in ZooKeeper, and during session reinitialization in ZooKeeper.
- `is_session_expired` (`UInt8`) - the session with ZooKeeper has expired. Basically the same as `is_readonly`.
- `future_parts` (`UInt32`) - The number of data parts that will appear as the result of INSERTs or merges that havent been done yet.
- `parts_to_check` (`UInt32`) - The number of data parts in the queue for verification. A part is put in the verification queue if there is suspicion that it might be damaged.
- `zookeeper_path` (`String`) - Path to table data in ZooKeeper.
- `replica_name` (`String`) - Replica name in ZooKeeper. Different replicas of the same table have different names.
- `replica_path` (`String`) - Path to replica data in ZooKeeper. The same as concatenating zookeeper\_path/replicas/replica\_path.
- `columns_version` (`Int32`) - Version number of the table structure. Indicates how many times ALTER was performed. If replicas have different versions, it means some replicas havent made all of the ALTERs yet.
- `queue_size` (`UInt32`) - Size of the queue for operations waiting to be performed. Operations include inserting blocks of data, merges, and certain other actions. It usually coincides with `future_parts`.
- `inserts_in_queue` (`UInt32`) - Number of inserts of blocks of data that need to be made. Insertions are usually replicated fairly quickly. If this number is large, it means something is wrong.
- `merges_in_queue` (`UInt32`) - The number of merges waiting to be made. Sometimes merges are lengthy, so this value may be greater than zero for a long time.
- `part_mutations_in_queue` (`UInt32`) - The number of mutations waiting to be made.
- `queue_oldest_time` (`DateTime`) - If `queue_size` greater than 0, shows when the oldest operation was added to the queue.
- `inserts_oldest_time` (`DateTime`) - See `queue_oldest_time`
- `merges_oldest_time` (`DateTime`) - See `queue_oldest_time`
- `part_mutations_oldest_time` (`DateTime`) - See `queue_oldest_time`
The next 4 columns have a non-zero value only where there is an active session with ZK.
- `log_max_index` (`UInt64`) - Maximum entry number in the log of general activity.
- `log_pointer` (`UInt64`) - Maximum entry number in the log of general activity that the replica copied to its execution queue, plus one. If `log_pointer` is much smaller than `log_max_index`, something is wrong.
- `last_queue_update` (`DateTime`) - When the queue was updated last time.
- `absolute_delay` (`UInt64`) - How big lag in seconds the current replica has.
- `total_replicas` (`UInt8`) - The total number of known replicas of this table.
- `active_replicas` (`UInt8`) - The number of replicas of this table that have a session in ZooKeeper (i.e., the number of functioning replicas).
If you request all the columns, the table may work a bit slowly, since several reads from ZooKeeper are made for each row.
If you dont request the last 4 columns (log\_max\_index, log\_pointer, total\_replicas, active\_replicas), the table works quickly.
For example, you can check that everything is working correctly like this:
``` sql
SELECT
database,
table,
is_leader,
is_readonly,
is_session_expired,
future_parts,
parts_to_check,
columns_version,
queue_size,
inserts_in_queue,
merges_in_queue,
log_max_index,
log_pointer,
total_replicas,
active_replicas
FROM system.replicas
WHERE
is_readonly
OR is_session_expired
OR future_parts > 20
OR parts_to_check > 10
OR queue_size > 20
OR inserts_in_queue > 10
OR log_max_index - log_pointer > 10
OR total_replicas < 2
OR active_replicas < total_replicas
```
If this query doesnt return anything, it means that everything is fine.

View File

@ -0,0 +1,50 @@
# system.settings {#system-tables-system-settings}
Contains information about session settings for current user.
Columns:
- `name` ([String](../../sql-reference/data-types/string.md)) — Setting name.
- `value` ([String](../../sql-reference/data-types/string.md)) — Setting value.
- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting is changed from its default value.
- `description` ([String](../../sql-reference/data-types/string.md)) — Short setting description.
- `min` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Minimum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no minimum value, contains [NULL](../../sql-reference/syntax.md#null-literal).
- `max` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Maximum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no maximum value, contains [NULL](../../sql-reference/syntax.md#null-literal).
- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting:
- `0` — Current user can change the setting.
- `1` — Current user cant change the setting.
**Example**
The following example shows how to get information about settings which name contains `min_i`.
``` sql
SELECT *
FROM system.settings
WHERE name LIKE '%min_i%'
```
``` text
┌─name────────────────────────────────────────┬─value─────┬─changed─┬─description───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─min──┬─max──┬─readonly─┐
│ min_insert_block_size_rows │ 1048576 │ 0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │
│ min_insert_block_size_bytes │ 268435456 │ 0 │ Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │
│ read_backoff_min_interval_between_events_ms │ 1000 │ 0 │ Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │
└─────────────────────────────────────────────┴───────────┴─────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────┴──────┴──────────┘
```
Using of `WHERE changed` can be useful, for example, when you want to check:
- Whether settings in configuration files are loaded correctly and are in use.
- Settings that changed in the current session.
<!-- -->
``` sql
SELECT * FROM system.settings WHERE changed AND name='load_balancing'
```
**See also**
- [Settings](../../operations/settings/index.md#session-settings-intro)
- [Permissions for Queries](../../operations/settings/permissions-for-queries.md#settings_readonly)
- [Constraints on Settings](../../operations/settings/constraints-on-settings.md)

Some files were not shown because too many files have changed in this diff Show More