Merge branch 'master' of github.com:yandex/ClickHouse into clang-tidy-4

This commit is contained in:
Alexey Milovidov 2020-03-18 22:16:07 +03:00
commit 9cc463b1af
113 changed files with 598 additions and 578 deletions

View File

@ -186,11 +186,13 @@ if (COMPILER_GCC OR COMPILER_CLANG)
endif ()
option(WITH_COVERAGE "Build with coverage." 0)
if(WITH_COVERAGE AND COMPILER_CLANG)
set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
# If we want to disable coverage for specific translation units
set(WITHOUT_COVERAGE "-fno-profile-instr-generate -fno-coverage-mapping")
endif()
if(WITH_COVERAGE AND COMPILER_GCC)
set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-arcs -ftest-coverage")
set(COVERAGE_OPTION "-lgcov")

View File

@ -6,6 +6,7 @@ set (SRCS
DateLUT.cpp
DateLUTImpl.cpp
demangle.cpp
getFQDNOrHostName.cpp
getMemoryAmount.cpp
getThreadId.cpp
JSON.cpp
@ -20,15 +21,9 @@ set (SRCS
)
if (ENABLE_REPLXX)
set (SRCS ${SRCS}
ReplxxLineReader.cpp
ReplxxLineReader.h
)
list (APPEND SRCS ReplxxLineReader.cpp)
elseif (ENABLE_READLINE)
set (SRCS ${SRCS}
ReadlineLineReader.cpp
ReadlineLineReader.h
)
list (APPEND SRCS ReadlineLineReader.cpp)
endif ()
if (USE_DEBUG_HELPERS)
@ -38,6 +33,12 @@ endif ()
add_library (common ${SRCS})
if (WITH_COVERAGE)
target_compile_definitions(common PUBLIC WITH_COVERAGE=1)
else ()
target_compile_definitions(common PUBLIC WITH_COVERAGE=0)
endif ()
target_include_directories(common PUBLIC .. ${CMAKE_CURRENT_BINARY_DIR}/..)
if(CCTZ_INCLUDE_DIR)
@ -56,8 +57,6 @@ if(CCTZ_LIBRARY)
target_link_libraries(common PRIVATE ${CCTZ_LIBRARY})
endif()
target_link_libraries(common PUBLIC replxx)
# allow explicitly fallback to readline
if (NOT ENABLE_REPLXX AND ENABLE_READLINE)
message (STATUS "Attempt to fallback to readline explicitly")
@ -82,11 +81,13 @@ endif ()
target_link_libraries (common
PUBLIC
${Poco_Net_LIBRARY}
${Poco_Util_LIBRARY}
${Poco_Foundation_LIBRARY}
${CITYHASH_LIBRARIES}
${Boost_SYSTEM_LIBRARY}
FastMemcpy
replxx
)
if (ENABLE_TESTS)

View File

@ -1,4 +1,4 @@
#include <common/argsToConfig.h>
#include "argsToConfig.h"
#include <Poco/Util/Application.h>
#include <Poco/Util/LayeredConfiguration.h>

View File

@ -1,4 +1,5 @@
#pragma once
#include <Poco/Util/Application.h>
namespace Poco::Util

View File

@ -4,4 +4,3 @@
#cmakedefine01 USE_JEMALLOC
#cmakedefine01 UNBUNDLED
#cmakedefine01 WITH_COVERAGE

View File

@ -1,16 +1,17 @@
#include <common/coverage.h>
#include <common/config_common.h>
#include "coverage.h"
#if WITH_COVERAGE
#include <unistd.h>
#include <mutex>
# include <mutex>
#if defined(__clang__)
# include <unistd.h>
# if defined(__clang__)
extern "C" void __llvm_profile_dump();
#elif defined(__GNUC__) || defined(__GNUG__)
# elif defined(__GNUC__) || defined(__GNUG__)
extern "C" void __gcov_exit();
#endif
# endif
#endif
@ -21,11 +22,11 @@ void dumpCoverageReportIfPossible()
static std::mutex mutex;
std::lock_guard lock(mutex);
#if defined(__clang__)
# if defined(__clang__)
__llvm_profile_dump();
#elif defined(__GNUC__) || defined(__GNUG__)
# elif defined(__GNUC__) || defined(__GNUG__)
__gcov_exit();
#endif
# endif
#endif
}

View File

@ -1,5 +1,5 @@
#include <Poco/Net/DNS.h>
#include <Common/getFQDNOrHostName.h>
#include <common/getFQDNOrHostName.h>
namespace

12
base/common/ya.make Normal file
View File

@ -0,0 +1,12 @@
LIBRARY()
PEERDIR(
contrib/libs/poco/Util
)
SRCS(
argsToConfig.cpp
coverage.cpp
)
END()

View File

@ -2,7 +2,7 @@
#include <daemon/BaseDaemon.h>
#include <Poco/Util/LayeredConfiguration.h>
#include <Poco/Util/Application.h>
#include <Common/getFQDNOrHostName.h>
#include <common/getFQDNOrHostName.h>
#include <mutex>
#include <iomanip>

View File

@ -1 +1,3 @@
RECURSE(
common
)

View File

@ -19,7 +19,7 @@
#include <Common/Exception.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/getFQDNOrHostName.h>
#include <common/getFQDNOrHostName.h>
#include <Common/isLocalAddress.h>
#include <Common/typeid_cast.h>
#include <Common/ClickHouseRevision.h>

View File

@ -6,7 +6,7 @@
#include <thread>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <Common/getFQDNOrHostName.h>
#include <common/getFQDNOrHostName.h>
#include <common/getMemoryAmount.h>
#include <Common/StringUtils/StringUtils.h>

View File

@ -12,7 +12,7 @@
#include <Core/ExternalTable.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/escapeForFileName.h>
#include <Common/getFQDNOrHostName.h>
#include <common/getFQDNOrHostName.h>
#include <Common/CurrentThread.h>
#include <Common/setThreadName.h>
#include <Common/config.h>

View File

@ -1,7 +1,7 @@
#pragma once
#include <Common/config.h>
#include <Poco/Net/TCPServerConnection.h>
#include <Common/getFQDNOrHostName.h>
#include <common/getFQDNOrHostName.h>
#include <Common/CurrentMetrics.h>
#include <Core/MySQLProtocol.h>
#include "IServer.h"

View File

@ -27,7 +27,7 @@
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/ZooKeeper/ZooKeeperNodeCache.h>
#include "config_core.h"
#include <Common/getFQDNOrHostName.h>
#include <common/getFQDNOrHostName.h>
#include <Common/getMultipleKeysFromConfig.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <Common/getExecutablePath.h>

View File

@ -2,7 +2,7 @@
#include <Poco/Net/TCPServerConnection.h>
#include <Common/getFQDNOrHostName.h>
#include <common/getFQDNOrHostName.h>
#include <Common/CurrentMetrics.h>
#include <Common/Stopwatch.h>
#include <Core/Protocol.h>

View File

@ -4,7 +4,7 @@
#include <Poco/Net/DNS.h>
#include <Common/BitHelpers.h>
#include <Common/getFQDNOrHostName.h>
#include <common/getFQDNOrHostName.h>
#include <Common/isLocalAddress.h>
#include <Common/ProfileEvents.h>
#include <Core/Settings.h>

View File

@ -1,7 +1,7 @@
#include <Functions/IFunctionImpl.h>
#include <Functions/FunctionFactory.h>
#include <DataTypes/DataTypeString.h>
#include <Common/getFQDNOrHostName.h>
#include <common/getFQDNOrHostName.h>
#include <Core/Field.h>

View File

@ -4,7 +4,7 @@
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Core/Defines.h>
#include <Common/getFQDNOrHostName.h>
#include <common/getFQDNOrHostName.h>
#include <Common/ClickHouseRevision.h>
#include <Common/config_version.h>
#include <unistd.h>

View File

@ -20,7 +20,7 @@
#include <Access/AccessRightsElement.h>
#include <Common/DNSResolver.h>
#include <Common/Macros.h>
#include <Common/getFQDNOrHostName.h>
#include <common/getFQDNOrHostName.h>
#include <Common/setThreadName.h>
#include <Common/Stopwatch.h>
#include <Common/randomSeed.h>

View File

@ -19,47 +19,49 @@
Мы запускаем этот запрос и наблюдаем, с какой скоростью он выполняется. Через несколько секунд, когда скорость стабилизируется, прерываем выполнение.
В качестве скорости выполнения запроса указывается количество обработанных исходных (прочитанных из таблицы) данных в единицу времени.
Например, в таблице numbers читаемые нами данные - это числа типа UInt64 (8 байт). Если мы обрабатываем миллиард таких чисел в секунду, то отобразится скорость - 8 GB/sec. -->
<query>SELECT count() FROM zeros(100000000) WHERE NOT ignore(rand())</query>
<query>SELECT count() FROM zeros_mt(1600000000) WHERE NOT ignore(rand())</query>
<query>SELECT count() FROM zeros( 100000000) WHERE NOT ignore(rand())</query>
<query>SELECT count() FROM zeros_mt(1600000000) WHERE NOT ignore(rand())</query>
<!-- 10. Некриптографическая хэш-функция для целых чисел 64bit -> 64bit. -->
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(intHash64(number))</query>
<query>SELECT count() FROM numbers( 100000000) WHERE NOT ignore(intHash64(number))</query>
<query>SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(intHash64(number))</query>
<!-- 11. Некриптографическая хэш-функция для целых чисел 64bit -> 32bit. -->
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(intHash32(number))</query>
<query>SELECT count() FROM numbers( 100000000) WHERE NOT ignore(intHash32(number))</query>
<query>SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(intHash32(number))</query>
<!-- 12. Преобразование целого числа в строку в десятичном виде. -->
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(toString(number))</query>
<query>SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(toString(number))</query>
<query>SELECT count() FROM numbers( 10000000) WHERE NOT ignore(toString(number))</query>
<query>SELECT count() FROM numbers_mt(160000000) WHERE NOT ignore(toString(number))</query>
<!-- 13. Преобразование целого числа в строку путём копирования куска памяти. -->
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(reinterpretAsString(number))</query>
<query>SELECT count() FROM numbers( 100000000) WHERE NOT ignore(reinterpretAsString(number))</query>
<query>SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(reinterpretAsString(number))</query>
<!-- 26. Целочисленное деление на константу. Используется библиотека libdivide. -->
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(number / 7)</query>
<query>SELECT count() FROM numbers( 100000000) WHERE NOT ignore(number / 7)</query>
<query>SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(number / 7)</query>
<!-- 27. Целочисленное деление на константу. -->
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(number % 7)</query>
<query>SELECT count() FROM numbers( 100000000) WHERE NOT ignore(number % 7)</query>
<query>SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(number % 7)</query>
<!-- 28. Целочисленное деление на константу. -->
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore(number % 34908756)</query>
<query>SELECT count() FROM numbers( 100000000) WHERE NOT ignore(number % 34908756)</query>
<query>SELECT count() FROM numbers_mt(1600000000) WHERE NOT ignore(number % 34908756)</query>
<!-- 29. Lookup-таблица, помещающаяся в L2-кэш. -->
<query>SELECT number % 1000 AS k, count() FROM numbers(100000000) GROUP BY k</query>
<query>SELECT number % 1000 AS k, count() FROM numbers_mt(1600000000) GROUP BY k</query>
<query>SELECT number % 1000 AS k, count() FROM numbers( 100000000) GROUP BY k FORMAT Null</query>
<query>SELECT number % 1000 AS k, count() FROM numbers_mt(1600000000) GROUP BY k FORMAT Null</query>
<!-- 30. Хэш-таблица, помещающаяся в L3-кэш. -->
<query>SELECT number % 100000 AS k, count() FROM numbers(100000000) GROUP BY k</query>
<query>SELECT number % 100000 AS k, count() FROM numbers_mt(1600000000) GROUP BY k</query>
<query>SELECT number % 100000 AS k, count() FROM numbers( 10000000) GROUP BY k FORMAT Null</query>
<query>SELECT number % 100000 AS k, count() FROM numbers_mt(160000000) GROUP BY k FORMAT Null</query>
<!-- 31. Хэш-таблица, наверное помещающаяся в L3-кэш. -->
<query>SELECT number % 1000000 AS k, count() FROM numbers(100000000) GROUP BY k</query>
<query>SELECT number % 1000000 AS k, count() FROM numbers_mt(1600000000) GROUP BY k</query>
<query>SELECT number % 1000000 AS k, count() FROM numbers( 10000000) GROUP BY k FORMAT Null</query>
<query>SELECT number % 1000000 AS k, count() FROM numbers_mt(160000000) GROUP BY k FORMAT Null</query>
<!-- 32. Хэш-таблица, не помещающаяся в L3-кэш. -->
<query>SELECT number % 10000000 AS k, count() FROM numbers(100000000) GROUP BY k</query>
<query>SELECT number % 10000000 AS k, count() FROM numbers_mt(1600000000) GROUP BY k</query>
<query>SELECT number % 10000000 AS k, count() FROM numbers( 10000000) GROUP BY k FORMAT Null</query>
<query>SELECT number % 10000000 AS k, count() FROM numbers_mt(80000000) GROUP BY k FORMAT Null</query>
<!-- 33. Хэш-таблица, требующая кучу оперативки. Возможны интересные эффекты. -->
<query>SELECT number % 500000000 AS k, count() FROM numbers(100000000) GROUP BY k</query>
<query>SELECT number % 500000000 AS k, count() FROM numbers_mt(1600000000) GROUP BY k</query>
<!-- For this HT size, a single-threaded query that makes sense would be too slow (tens of seconds).
<query>SELECT number % 100000000 AS k, count() FROM numbers( 100000000) GROUP BY k FORMAT Null</query>
-->
<query>SELECT number % toUInt32(1e8) AS k, count() FROM numbers_mt(toUInt32(1e8)) GROUP BY k FORMAT Null</query>
<!-- 35. Кэш-промахи, осуществляемые из многих процессорных ядер. -->
<!-- <query>SELECT number % (intDiv(100000000, {THREADS})) AS k, count() FROM numbers_mt(1600000000) GROUP BY k</query> -->
<!-- 46. Запрос, требующий много бесполезных копирований. -->
<query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(materialize('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') AS s, concat(s,s,s,s,s,s,s,s,s,s) AS t, concat(t,t,t,t,t,t,t,t,t,t) AS u) SETTINGS max_block_size = 1000</query>
<query>SELECT count() FROM zeros(1000000) WHERE NOT ignore(materialize('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') AS s, concat(s,s,s,s,s,s,s,s,s,s) AS t, concat(t,t,t,t,t,t,t,t,t,t) AS u) SETTINGS max_block_size = 1000</query>
</test>

View File

@ -274,6 +274,7 @@ if unstable_queries:
error_tests += slow_average_tests
if error_tests:
status = 'failure'
message_array.append(str(error_tests) + ' errors')
if message_array:

View File

@ -1,4 +1,4 @@
# Array(T) { #data_type-array}
# Array(T) {#data_type-array}
Array of `T`-type items.

View File

@ -1,4 +1,4 @@
# DateTime { #data_type-datetime}
# DateTime {#data_type-datetime}
Allows to store an instant in time, that can be expressed as a calendar date and a time of a day.

View File

@ -1,4 +1,4 @@
# DateTime64 { #data_type-datetime64}
# DateTime64 {#data_type-datetime64}
Allows to store an instant in time, that can be expressed as a calendar date and a time of a day, with defined sub-second precision

View File

@ -27,7 +27,7 @@ SELECT 1 - 0.9
- Floating-point calculations might result in numbers such as infinity (`Inf`) and "not-a-number" (`NaN`). This should be taken into account when processing the results of calculations.
- When parsing floating-point numbers from text, the result might not be the nearest machine-representable number.
## NaN and Inf { #data_type-float-nan-inf}
## NaN and Inf {#data_type-float-nan-inf}
In contrast to standard SQL, ClickHouse supports the following categories of floating-point numbers:

View File

@ -1,4 +1,4 @@
# Data Types { #data_types}
# Data Types {#data_types}
ClickHouse can store various kinds of data in table cells.

View File

@ -1,4 +1,4 @@
# AggregateFunction(name, types_of_arguments...) { #data_type-aggregatefunction}
# AggregateFunction(name, types_of_arguments...) {#data_type-aggregatefunction}
The intermediate state of an aggregate function. To get it, use aggregate functions with the `-State` suffix. To get aggregated data in the future, you must use the same aggregate functions with the `-Merge`suffix.

View File

@ -1,4 +1,4 @@
# Nullable(TypeName) { #data_type-nullable}
# Nullable(TypeName) {#data_type-nullable}
Allows to store special marker ([NULL](../query_language/syntax.md)) that denotes "missing value" alongside normal values allowed by `TypeName`. For example, a `Nullable(Int8)` type column can store `Int8` type values, and the rows that don't have a value will store `NULL`.

View File

@ -1,4 +1,4 @@
# Interval { #data-type-interval}
# Interval {#data-type-interval}
The family of data types representing time and date intervals. The resulting types of the [INTERVAL](../../query_language/operators.md#operator-interval) operator.
@ -32,7 +32,7 @@ SELECT toTypeName(INTERVAL 4 DAY)
└──────────────────────────────┘
```
## Usage Remarks { #data-type-interval-usage-remarks}
## Usage Remarks {#data-type-interval-usage-remarks}
You can use `Interval`-type values in arithmetical operations with [Date](../../data_types/date.md) and [DateTime](../../data_types/datetime.md)-type values. For example, you can add 4 days to the current time:

View File

@ -1,6 +1,5 @@
# Set
Used for the right half of an [IN](../../query_language/select.md##select-in-operators) expression.
Used for the right half of an [IN](../../query_language/select.md#select-in-operators) expression.
[Original article](https://clickhouse.tech/docs/en/data_types/special_data_types/set/) <!--hide-->

View File

@ -1,4 +1,4 @@
# UUID { #uuid-data-type}
# UUID {#uuid-data-type}
A universally unique identifier (UUID) is a 16-byte number used to identify records. For detailed information about the UUID, see [Wikipedia](https://en.wikipedia.org/wiki/Universally_unique_identifier).

View File

@ -11,7 +11,7 @@ Distributed sorting is one of the main causes of reduced performance when runnin
Most MapReduce implementations allow you to execute arbitrary code on a cluster. But a declarative query language is better suited to OLAP in order to run experiments quickly. For example, Hadoop has Hive and Pig. Also consider Cloudera Impala or Shark (outdated) for Spark, as well as Spark SQL, Presto, and Apache Drill. Performance when running such tasks is highly sub-optimal compared to specialized systems, but relatively high latency makes it unrealistic to use these systems as the backend for a web interface.
## What If I Have a Problem with Encodings When Using Oracle Through ODBC? { #oracle-odbc-encodings}
## What If I Have a Problem with Encodings When Using Oracle Through ODBC? {#oracle-odbc-encodings}
If you use Oracle through the ODBC driver as a source of external dictionaries, you need to set the correct value for the `NLS_LANG` environment variable in `/etc/default/clickhouse`. For more information, see the [Oracle NLS_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html).
@ -21,7 +21,7 @@ If you use Oracle through the ODBC driver as a source of external dictionaries,
NLS_LANG=RUSSIAN_RUSSIA.UTF8
```
## How Do I Export Data from ClickHouse to a File? { #how-to-export-to-file}
## How Do I Export Data from ClickHouse to a File? {#how-to-export-to-file}
### Using INTO OUTFILE Clause

View File

@ -14,7 +14,7 @@ To run ClickHouse on processors that do not support SSE 4.2 or have AArch64 or P
## Available Installation Options
### From DEB Packages { #install-from-deb-packages}
### From DEB Packages {#install-from-deb-packages}
It is recommended to use official pre-compiled `deb` packages for Debian or Ubuntu.
@ -66,7 +66,7 @@ sudo yum install clickhouse-server clickhouse-client
You can also download and install packages manually from here: <https://repo.clickhouse.tech/rpm/stable/x86_64>.
### From tgz archives { #from-tgz-archives}
### From tgz archives {#from-tgz-archives}
It is recommended to use official pre-compiled `tgz` archives for all Linux distributions, where installation of `deb` or `rpm` packages is not possible.

View File

@ -1,4 +1,4 @@
# Applying a Catboost Model in ClickHouse { #applying-catboost-model-in-clickhouse}
# Applying a Catboost Model in ClickHouse {#applying-catboost-model-in-clickhouse}
[CatBoost](https://catboost.ai) is a free and open-source gradient boosting library developed at [Yandex](https://yandex.com/company/) for machine learning.
@ -13,7 +13,7 @@ To apply a CatBoost model in ClickHouse:
For more information about training CatBoost models, see [Training and applying models](https://catboost.ai/docs/features/training.html#training).
## Prerequisites { #prerequisites}
## Prerequisites {#prerequisites}
If you don't have the [Docker](https://docs.docker.com/install/) yet, install it.
@ -44,7 +44,7 @@ yandex/tutorial-catboost-clickhouse latest 622e4d17945b 22
$ docker run -it -p 8888:8888 yandex/tutorial-catboost-clickhouse
```
## 1. Create a Table { #create-table}
## 1. Create a Table {#create-table}
To create a ClickHouse table for the train sample:
@ -83,7 +83,7 @@ ENGINE = MergeTree ORDER BY date
:) exit
```
## 2. Insert the Data to the Table { #insert-data-to-table}
## 2. Insert the Data to the Table {#insert-data-to-table}
To insert the data:
@ -112,7 +112,7 @@ FROM amazon_train
+---------+
```
## 3. Integrate CatBoost into ClickHouse { #integrate-catboost-into-clickhouse}
## 3. Integrate CatBoost into ClickHouse {#integrate-catboost-into-clickhouse}
!!! note "Note"
**Optional step.** The Docker image contains everything you need to run CatBoost and ClickHouse.
@ -154,7 +154,7 @@ The fastest way to evaluate a CatBoost model is compile `libcatboostmodel.<so|dl
<models_config>/home/catboost/models/*_model.xml</models_config>
```
## 4. Run the Model Inference from SQL { #run-model-inference}
## 4. Run the Model Inference from SQL {#run-model-inference}
For test model run the ClickHouse client `$ clickhouse client`.

View File

@ -19,7 +19,7 @@ Different client and server versions are compatible with one another, but some f
ClickHouse client version is older than ClickHouse server. It may lack support for new features.
```
## Usage { #cli_usage}
## Usage {#cli_usage}
The client can be used in interactive and non-interactive (batch) mode.
To use batch mode, specify the 'query' parameter, or send data to 'stdin' (it verifies that 'stdin' is not a terminal), or both.
@ -71,7 +71,7 @@ You can cancel a long query by pressing Ctrl+C. However, you will still need to
The command-line client allows passing external data (external temporary tables) for querying. For more information, see the section "External data for query processing".
### Queries with Parameters { #cli-queries-with-parameters}
### Queries with Parameters {#cli-queries-with-parameters}
You can create a query with parameters and pass values to them from client application. This allows to avoid formatting query with specific dynamic values on client side. For example:
@ -79,7 +79,7 @@ You can create a query with parameters and pass values to them from client appli
$ clickhouse-client --param_parName="[1, 2]" -q "SELECT * FROM table WHERE a = {parName:Array(UInt16)}"
```
#### Query Syntax { #cli-queries-with-parameters-syntax}
#### Query Syntax {#cli-queries-with-parameters-syntax}
Format a query as usual, then place the values that you want to pass from the app parameters to the query in braces in the following format:
@ -96,7 +96,7 @@ Format a query as usual, then place the values that you want to pass from the ap
$ clickhouse-client --param_tuple_in_tuple="(10, ('dt', 10))" -q "SELECT * FROM table WHERE val = {tuple_in_tuple:Tuple(UInt8, Tuple(String, UInt8))}"
```
## Configuring { #interfaces_cli_configuration}
## Configuring {#interfaces_cli_configuration}
You can pass parameters to `clickhouse-client` (all parameters have a default value) using:

View File

@ -1,4 +1,4 @@
# Formats for Input and Output Data { #formats}
# Formats for Input and Output Data {#formats}
ClickHouse can accept and return data in various formats. A format supported for input can be used to parse the data provided to `INSERT`s, to perform `SELECT`s from a file-backed table such as File, URL or HDFS, or to read an external dictionary. A format supported for output can be used to arrange the
results of a `SELECT`, and to perform `INSERT`s into a file-backed table.
@ -42,7 +42,7 @@ The supported formats are:
You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](../operations/settings/settings.md) section.
## TabSeparated { #tabseparated}
## TabSeparated {#tabseparated}
In TabSeparated format, data is written by row. Each row contains values separated by tabs. Each value is follow by a tab, except the last value in the row, which is followed by a line feed. Strictly Unix line feeds are assumed everywhere. The last row also must contain a line feed at the end. Values are written in text format, without enclosing quotation marks, and with special characters escaped.
@ -130,14 +130,14 @@ SELECT * FROM nestedt FORMAT TSV
1 [1] ['a']
```
## TabSeparatedRaw { #tabseparatedraw}
## TabSeparatedRaw {#tabseparatedraw}
Differs from `TabSeparated` format in that the rows are written without escaping.
This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table).
This format is also available under the name `TSVRaw`.
## TabSeparatedWithNames { #tabseparatedwithnames}
## TabSeparatedWithNames {#tabseparatedwithnames}
Differs from the `TabSeparated` format in that the column names are written in the first row.
During parsing, the first row is completely ignored. You can't use column names to determine their position or to check their correctness.
@ -145,14 +145,14 @@ During parsing, the first row is completely ignored. You can't use column names
This format is also available under the name `TSVWithNames`.
## TabSeparatedWithNamesAndTypes { #tabseparatedwithnamesandtypes}
## TabSeparatedWithNamesAndTypes {#tabseparatedwithnamesandtypes}
Differs from the `TabSeparated` format in that the column names are written to the first row, while the column types are in the second row.
During parsing, the first and second rows are completely ignored.
This format is also available under the name `TSVWithNamesAndTypes`.
## Template { #format-template}
## Template {#format-template}
This format allows to specify a custom format string with placeholders for values with specified escaping rule.
@ -268,7 +268,7 @@ Page views: ${PageViews:CSV}, User id: ${UserID:CSV}, Useless field: ${:CSV}, Du
`PageViews`, `UserID`, `Duration` and `Sign` inside placeholders are names of columns in the table. Values after `Useless field` in rows and after `\nTotal rows: ` in suffix will be ignored.
All delimiters in the input data must be strictly equal to delimiters in specified format strings.
## TemplateIgnoreSpaces { #templateignorespaces}
## TemplateIgnoreSpaces {#templateignorespaces}
This format is suitable only for input.
Similar to `Template`, but skips whitespace characters between delimiters and values in the input stream. However, if format strings contain whitespace characters, these characters will be expected in the input stream. Also allows to specify empty placeholders (`${}` or `${:None}`) to split some delimiter into separate parts to ignore spaces between them. Such placeholders are used only for skipping whitespace characters.
@ -286,7 +286,7 @@ format_template_resultset = '/some/path/resultset.format', format_template_row =
{${}"SearchPhrase"${}:${}${phrase:JSON}${},${}"c"${}:${}${cnt:JSON}${}}
```
## TSKV { #tskv}
## TSKV {#tskv}
Similar to TabSeparated, but outputs a value in name=value format. Names are escaped the same way as in TabSeparated format, and the = symbol is also escaped.
@ -319,7 +319,7 @@ Both data output and parsing are supported in this format. For parsing, any orde
Parsing allows the presence of the additional field `tskv` without the equal sign or a value. This field is ignored.
## CSV { #csv}
## CSV {#csv}
Comma Separated Values format ([RFC](https://tools.ietf.org/html/rfc4180)).
@ -345,12 +345,12 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
Also prints the header row, similar to `TabSeparatedWithNames`.
## CustomSeparated { #format-customseparated}
## CustomSeparated {#format-customseparated}
Similar to [Template](#format-template), but it prints or reads all columns and uses escaping rule from setting `format_custom_escaping_rule` and delimiters from settings `format_custom_field_delimiter`, `format_custom_row_before_delimiter`, `format_custom_row_after_delimiter`, `format_custom_row_between_delimiter`, `format_custom_result_before_delimiter` and `format_custom_result_after_delimiter`, not from format strings.
There is also `CustomSeparatedIgnoreSpaces` format, which is similar to `TemplateIgnoreSpaces`.
## JSON { #json}
## JSON {#json}
Outputs data in JSON format. Besides data tables, it also outputs column names and types, along with some additional information: the total number of output rows, and the number of rows that could have been output if there weren't a LIMIT. Example:
@ -439,7 +439,7 @@ ClickHouse supports [NULL](../query_language/syntax.md), which is displayed as `
See also the [JSONEachRow](#jsoneachrow) format.
## JSONCompact { #jsoncompact}
## JSONCompact {#jsoncompact}
Differs from JSON only in that data rows are output in arrays, not in objects.
@ -485,7 +485,7 @@ Example:
This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table).
See also the `JSONEachRow` format.
## JSONEachRow { #jsoneachrow}
## JSONEachRow {#jsoneachrow}
When using this format, ClickHouse outputs rows as separated, newline-delimited JSON objects, but the data as a whole is not valid JSON.
@ -555,7 +555,7 @@ Unlike the [JSON](#json) format, there is no substitution of invalid UTF-8 seque
!!! note "Note"
Any set of bytes can be output in the strings. Use the `JSONEachRow` format if you are sure that the data in the table can be formatted as JSON without losing any information.
### Usage of Nested Structures { #jsoneachrow-nested}
### Usage of Nested Structures {#jsoneachrow-nested}
If you have a table with [Nested](../data_types/nested_data_structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](../operations/settings/settings.md#settings-input_format_import_nested_json) setting.
@ -609,18 +609,18 @@ SELECT * FROM json_each_row_nested
└───────────────┴────────┘
```
## Native { #native}
## Native {#native}
The most efficient format. Data is written and read by blocks in binary format. For each block, the number of rows, number of columns, column names and types, and parts of columns in this block are recorded one after another. In other words, this format is "columnar" it doesn't convert columns to rows. This is the format used in the native interface for interaction between servers, for using the command-line client, and for C++ clients.
You can use this format to quickly generate dumps that can only be read by the ClickHouse DBMS. It doesn't make sense to work with this format yourself.
## Null { #null}
## Null {#null}
Nothing is output. However, the query is processed, and when using the command-line client, data is transmitted to the client. This is used for tests, including productivity testing.
Obviously, this format is only appropriate for output, not for parsing.
## Pretty { #pretty}
## Pretty {#pretty}
Outputs data as Unicode-art tables, also using ANSI-escape sequences for setting colors in the terminal.
A full grid of the table is drawn, and each row occupies two lines in the terminal.
@ -684,16 +684,16 @@ Extremes:
└────────────┴─────────┘
```
## PrettyCompact { #prettycompact}
## PrettyCompact {#prettycompact}
Differs from [Pretty](#pretty) in that the grid is drawn between rows and the result is more compact.
This format is used by default in the command-line client in interactive mode.
## PrettyCompactMonoBlock { #prettycompactmonoblock}
## PrettyCompactMonoBlock {#prettycompactmonoblock}
Differs from [PrettyCompact](#prettycompact) in that up to 10,000 rows are buffered, then output as a single table, not by blocks.
## PrettyNoEscapes { #prettynoescapes}
## PrettyNoEscapes {#prettynoescapes}
Differs from Pretty in that ANSI-escape sequences aren't used. This is necessary for displaying this format in a browser, as well as for using the 'watch' command-line utility.
@ -713,11 +713,11 @@ The same as the previous setting.
The same as the previous setting.
## PrettySpace { #prettyspace}
## PrettySpace {#prettyspace}
Differs from [PrettyCompact](#prettycompact) in that whitespace (space characters) is used instead of the grid.
## RowBinary { #rowbinary}
## RowBinary {#rowbinary}
Formats and parses data by row in binary format. Rows and values are listed consecutively, without separators.
This format is less efficient than the Native format, since it is row-based.
@ -732,7 +732,7 @@ Array is represented as a varint length (unsigned [LEB128](https://en.wikipedia.
For [NULL](../query_language/syntax.md#null-literal) support, an additional byte containing 1 or 0 is added before each [Nullable](../data_types/nullable.md) value. If 1, then the value is `NULL` and this byte is interpreted as a separate value. If 0, the value after the byte is not `NULL`.
## RowBinaryWithNamesAndTypes { #rowbinarywithnamesandtypes}
## RowBinaryWithNamesAndTypes {#rowbinarywithnamesandtypes}
Similar to [RowBinary](#rowbinary), but with added header:
@ -740,7 +740,7 @@ Similar to [RowBinary](#rowbinary), but with added header:
* N `String`s specifying column names
* N `String`s specifying column types
## Values { #data-format-values}
## Values {#data-format-values}
Prints every row in brackets. Rows are separated by commas. There is no comma after the last row. The values inside the brackets are also comma-separated. Numbers are output in decimal format without quotes. Arrays are output in square brackets. Strings, dates, and dates with times are output in quotes. Escaping rules and parsing are similar to the [TabSeparated](#tabseparated) format. During formatting, extra spaces aren't inserted, but during parsing, they are allowed and skipped (except for spaces inside array values, which are not allowed). [NULL](../query_language/syntax.md) is represented as `NULL`.
@ -750,7 +750,7 @@ This is the format that is used in `INSERT INTO t VALUES ...`, but you can also
See also: [input_format_values_interpret_expressions](../operations/settings/settings.md#settings-input_format_values_interpret_expressions) and [input_format_values_deduce_templates_of_expressions](../operations/settings/settings.md#settings-input_format_values_deduce_templates_of_expressions) settings.
## Vertical { #vertical}
## Vertical {#vertical}
Prints each value on a separate line with the column name specified. This format is convenient for printing just one or a few rows, if each row consists of a large number of columns.
@ -783,11 +783,11 @@ test: string with 'quotes' and with some special
This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table).
## VerticalRaw { #verticalraw}
## VerticalRaw {#verticalraw}
Similar to [Vertical](#vertical), but with escaping disabled. This format is only suitable for outputting query results, not for parsing (receiving data and inserting it in the table).
## XML { #xml}
## XML {#xml}
XML format is suitable only for output, not for parsing. Example:
@ -860,7 +860,7 @@ In string values, the characters `<` and `&` are escaped as `<` and `&`.
Arrays are output as `<array><elem>Hello</elem><elem>World</elem>...</array>`,and tuples as `<tuple><elem>Hello</elem><elem>World</elem>...</tuple>`.
## CapnProto { #capnproto}
## CapnProto {#capnproto}
Cap'n Proto is a binary message format similar to Protocol Buffers and Thrift, but not like JSON or MessagePack.
@ -883,7 +883,7 @@ Deserialization is effective and usually doesn't increase the system load.
See also [Format Schema](#formatschema).
## Protobuf { #protobuf}
## Protobuf {#protobuf}
Protobuf - is a [Protocol Buffers](https://developers.google.com/protocol-buffers/) format.
@ -950,7 +950,7 @@ ClickHouse inputs and outputs protobuf messages in the `length-delimited` format
It means before every message should be written its length as a [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints).
See also [how to read/write length-delimited protobuf messages in popular languages](https://cwiki.apache.org/confluence/display/GEODE/Delimiting+Protobuf+Messages).
## Avro { #data-format-avro}
## Avro {#data-format-avro}
[Apache Avro](http://avro.apache.org/) is a row-oriented data serialization framework developed within Apache's Hadoop project.
@ -1014,7 +1014,7 @@ Column names must:
Output Avro file compression and sync interval can be configured with [output_format_avro_codec](../operations/settings/settings.md#settings-output_format_avro_codec) and [output_format_avro_sync_interval](../operations/settings/settings.md#settings-output_format_avro_sync_interval) respectively.
## AvroConfluent { #data-format-avro-confluent}
## AvroConfluent {#data-format-avro-confluent}
AvroConfluent supports decoding single-object Avro messages commonly used with [Kafka](https://kafka.apache.org/) and [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html).
@ -1062,7 +1062,7 @@ SELECT * FROM topic1_stream;
Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` to maintain it's value after a restart.
## Parquet { #data-format-parquet}
## Parquet {#data-format-parquet}
[Apache Parquet](http://parquet.apache.org/) is a columnar storage format widespread in the Hadoop ecosystem. ClickHouse supports read and write operations for this format.
@ -1110,7 +1110,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_
To exchange data with Hadoop, you can use [HDFS table engine](../operations/table_engines/hdfs.md).
## ORC { #data-format-orc}
## ORC {#data-format-orc}
[Apache ORC](https://orc.apache.org/) is a columnar storage format widespread in the Hadoop ecosystem. You can only insert data in this format to ClickHouse.
@ -1151,7 +1151,7 @@ $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT OR
To exchange data with Hadoop, you can use [HDFS table engine](../operations/table_engines/hdfs.md).
## Format Schema { #formatschema}
## Format Schema {#formatschema}
The file name containing the format schema is set by the setting `format_schema`.
It's required to set this setting when it is used one of the formats `Cap'n Proto` and `Protobuf`.
@ -1170,7 +1170,7 @@ in the server configuration.
[Original article](https://clickhouse.tech/docs/en/interfaces/formats/) <!--hide-->
## Skipping Errors { #skippingerrors}
## Skipping Errors {#skippingerrors}
Some formats such as `CSV`, `TabSeparated`, `TSKV`, `JSONEachRow`, `Template`, `CustomSeparated` and `Protobuf` can skip broken row if parsing error occurred and continue parsing from the beginning of next row. See [input_format_allow_errors_num](../operations/settings/settings.md#settings-input_format_allow_errors_num) and
[input_format_allow_errors_ratio](../operations/settings/settings.md#settings-input_format_allow_errors_ratio) settings.

View File

@ -1,4 +1,4 @@
# HTTP Interface { #http_interface}
# HTTP Interface {#http_interface}
The HTTP interface lets you use ClickHouse on any platform from any programming language. We use it for working from Java and Perl, as well as shell scripts. In other departments, the HTTP interface is used from Perl, Python, and Go. The HTTP interface is more limited than the native interface, but it has better compatibility.
@ -261,7 +261,7 @@ $ curl -sS 'http://localhost:8123/?max_result_bytes=4000000&buffer_size=3000000&
Use buffering to avoid situations where a query processing error occurred after the response code and HTTP headers were sent to the client. In this situation, an error message is written at the end of the response body, and on the client side, the error can only be detected at the parsing stage.
### Queries with Parameters { #cli-queries-with-parameters}
### Queries with Parameters {#cli-queries-with-parameters}
You can create a query with parameters and pass values for them from the corresponding HTTP request parameters. For more information, see [Queries with Parameters for CLI](cli.md#cli-queries-with-parameters).

View File

@ -1,4 +1,4 @@
# Interfaces { #interfaces}
# Interfaces {#interfaces}
ClickHouse provides two network interfaces (both can be optionally wrapped in TLS for additional security):

View File

@ -1,4 +1,4 @@
# MySQL interface { #mysql_interface}
# MySQL interface {#mysql_interface}
ClickHouse supports MySQL wire protocol. It can be enabled by [mysql_port](../operations/server_settings/settings.md#server_settings-mysql_port) setting in configuration file:
```xml

View File

@ -101,7 +101,7 @@ Features:
- Refactorings.
- Search and Navigation.
### Yandex DataLens { #yandex-datalens}
### Yandex DataLens {#yandex-datalens}
[Yandex DataLens](https://cloud.yandex.ru/services/datalens) is a service of data visualization and analytics.

View File

@ -1,4 +1,4 @@
# Configuration Files { #configuration_files}
# Configuration Files {#configuration_files}
ClickHouse supports multi-file configuration management. The main server configuration file is `/etc/clickhouse-server/config.xml`. Other files must be in the `/etc/clickhouse-server/config.d` directory.

View File

@ -1,4 +1,4 @@
# Quotas { #quotas}
# Quotas {#quotas}
Quotas allow you to limit resource usage over a period of time, or simply track the use of resources.
Quotas are set up in the user config. This is usually 'users.xml'.

View File

@ -1,4 +1,4 @@
# Server configuration parameters { #server_settings}
# Server configuration parameters {#server_settings}
This section contains descriptions of server settings that cannot be changed at the session or query level.

View File

@ -16,7 +16,7 @@ Default value: 3600.
```
## compression { #server-settings-compression}
## compression {#server-settings-compression}
Data compression settings for [MergeTree](../table_engines/mergetree.md)-engine tables.
@ -90,7 +90,7 @@ Settings profiles are located in the file specified in the parameter `user_confi
```
## dictionaries_config { #server_settings-dictionaries_config}
## dictionaries_config {#server_settings-dictionaries_config}
The path to the config file for external dictionaries.
@ -108,7 +108,7 @@ See also "[External dictionaries](../../query_language/dicts/external_dicts.md)"
```
## dictionaries_lazy_load { #server_settings-dictionaries_lazy_load}
## dictionaries_lazy_load {#server_settings-dictionaries_lazy_load}
Lazy loading of dictionaries.
@ -125,7 +125,7 @@ The default is `true`.
```
## format_schema_path { #server_settings-format_schema_path}
## format_schema_path {#server_settings-format_schema_path}
The path to the directory with the schemes for the input data, such as schemas for the [CapnProto](../../interfaces/formats.md#capnproto) format.
@ -136,7 +136,7 @@ The path to the directory with the schemes for the input data, such as schemas f
<format_schema_path>format_schemas/</format_schema_path>
```
## graphite { #server_settings-graphite}
## graphite {#server_settings-graphite}
Sending data to [Graphite](https://github.com/graphite-project).
@ -171,7 +171,7 @@ You can configure multiple `<graphite>` clauses. For instance, you can use this
```
## graphite_rollup { #server_settings-graphite_rollup}
## graphite_rollup {#server_settings-graphite_rollup}
Settings for thinning data for Graphite.
@ -215,7 +215,7 @@ If `http_port` is specified, the openSSL configuration is ignored even if it is
```
## http_server_default_response { #server_settings-http_server_default_response}
## http_server_default_response {#server_settings-http_server_default_response}
The page that is shown by default when you access the ClickHouse HTTP(s) server.
Default value is "Ok." (with a line feed at the end)
@ -230,7 +230,7 @@ Opens `https://tabix.io/` when accessing ` http://localhost: http_port`.
</http_server_default_response>
```
## include_from { #server_settings-include_from}
## include_from {#server_settings-include_from}
The path to the file with substitutions.
@ -268,7 +268,7 @@ Useful for breaking away from a specific network interface.
<interserver_http_host>example.yandex.ru</interserver_http_host>
```
## interserver_http_credentials { #server-settings-interserver_http_credentials}
## interserver_http_credentials {#server-settings-interserver_http_credentials}
The username and password used to authenticate during [replication](../table_engines/replication.md) with the Replicated* engines. These credentials are used only for communication between replicas and are unrelated to credentials for ClickHouse clients. The server is checking these credentials for connecting replicas and use the same credentials when connecting to other replicas. So, these credentials should be set the same for all replicas in a cluster.
By default, the authentication is not used.
@ -299,7 +299,7 @@ The number of seconds that ClickHouse waits for incoming requests before closing
```
## listen_host { #server_settings-listen_host}
## listen_host {#server_settings-listen_host}
Restriction on hosts that requests can come from. If you want the server to answer all of them, specify `::`.
@ -311,7 +311,7 @@ Examples:
```
## logger { #server_settings-logger}
## logger {#server_settings-logger}
Logging settings.
@ -374,7 +374,7 @@ For more information, see the section "[Creating replicated tables](../../operat
```
## mark_cache_size { #server-mark-cache-size}
## mark_cache_size {#server-mark-cache-size}
Approximate size (in bytes) of the cache of marks used by table engines of the [MergeTree](../table_engines/mergetree.md) family.
@ -443,7 +443,7 @@ The value 0 means that you can delete all tables without any restrictions.
```
## merge_tree { #server_settings-merge_tree}
## merge_tree {#server_settings-merge_tree}
Fine tuning for tables in the [MergeTree](../table_engines/mergetree.md).
@ -458,7 +458,7 @@ For more information, see the MergeTreeSettings.h header file.
```
## openSSL { #server_settings-openssl}
## openSSL {#server_settings-openssl}
SSL client/server configuration.
@ -518,7 +518,7 @@ Keys for server/client settings:
```
## part_log { #server_settings-part-log}
## part_log {#server_settings-part-log}
Logging events that are associated with [MergeTree](../table_engines/mergetree.md). For instance, adding or merging data. You can use the log to simulate merge algorithms and compare their characteristics. You can visualize the merge process.
@ -543,7 +543,7 @@ Use the following parameters to configure logging:
```
## path { #server_settings-path}
## path {#server_settings-path}
The path to the directory containing data.
@ -557,7 +557,7 @@ The path to the directory containing data.
```
## query_log { #server_settings-query-log}
## query_log {#server_settings-query-log}
Setting for logging queries received with the [log_queries=1](../settings/settings.md) setting.
@ -583,7 +583,7 @@ If the table doesn't exist, ClickHouse will create it. If the structure of the q
</query_log>
```
## query_thread_log { #server_settings-query-thread-log}
## query_thread_log {#server_settings-query-thread-log}
Setting for logging threads of queries received with the [log_query_threads=1](../settings/settings.md#settings-log-query-threads) setting.
@ -609,7 +609,7 @@ If the table doesn't exist, ClickHouse will create it. If the structure of the q
</query_thread_log>
```
## trace_log { #server_settings-trace_log}
## trace_log {#server_settings-trace_log}
Settings for the [trace_log](../system_tables.md#system_tables-trace_log) system table operation.
@ -662,7 +662,7 @@ The masking rules are applied on whole query (to prevent leaks of sensitive data
For distributed queries each server have to be configured separately, otherwise subquries passed to other
nodes will be stored without masking.
## remote_servers { #server_settings_remote_servers}
## remote_servers {#server_settings_remote_servers}
Configuration of clusters used by the [Distributed](../../operations/table_engines/distributed.md) table engine and by the `cluster` table function.
@ -678,7 +678,7 @@ For the value of the `incl` attribute, see the section "[Configuration files](..
- [skip_unavailable_shards](../settings/settings.md#settings-skip_unavailable_shards)
## timezone { #server_settings-timezone}
## timezone {#server_settings-timezone}
The server's time zone.
@ -693,7 +693,7 @@ The time zone is necessary for conversions between String and DateTime formats w
```
## tcp_port { #server_settings-tcp_port}
## tcp_port {#server_settings-tcp_port}
Port for communicating with clients over the TCP protocol.
@ -703,7 +703,7 @@ Port for communicating with clients over the TCP protocol.
<tcp_port>9000</tcp_port>
```
## tcp_port_secure { #server_settings-tcp_port_secure}
## tcp_port_secure {#server_settings-tcp_port_secure}
TCP port for secure communication with clients. Use it with [OpenSSL](#server_settings-openssl) settings.
@ -717,7 +717,7 @@ Positive integer.
<tcp_port_secure>9440</tcp_port_secure>
```
## mysql_port { #server_settings-mysql_port}
## mysql_port {#server_settings-mysql_port}
Port for communicating with clients over MySQL protocol.
@ -731,7 +731,7 @@ Example
<mysql_port>9004</mysql_port>
```
## tmp_path { #server-settings-tmp_path}
## tmp_path {#server-settings-tmp_path}
Path to temporary data for processing large queries.
@ -745,7 +745,7 @@ Path to temporary data for processing large queries.
```
## tmp_policy { #server-settings-tmp_policy}
## tmp_policy {#server-settings-tmp_policy}
Policy from [`storage_configuration`](../table_engines/mergetree.md#table_engine-mergetree-multiple-volumes) to store temporary files.
If not set [`tmp_path`](#server-settings-tmp_path) is used, otherwise it is ignored.
@ -756,7 +756,7 @@ If not set [`tmp_path`](#server-settings-tmp_path) is used, otherwise it is igno
- `max_data_part_size_bytes` is ignored
- you must have exactly one volume in that policy
## uncompressed_cache_size { #server-settings-uncompressed_cache_size}
## uncompressed_cache_size {#server-settings-uncompressed_cache_size}
Cache size (in bytes) for uncompressed data used by table engines from the [MergeTree](../table_engines/mergetree.md).
@ -770,7 +770,7 @@ The uncompressed cache is advantageous for very short queries in individual case
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
```
## user_files_path { #server_settings-user_files_path}
## user_files_path {#server_settings-user_files_path}
The directory with user files. Used in the table function [file()](../../query_language/table_functions/file.md).
@ -797,7 +797,7 @@ Path to the file that contains:
```
## zookeeper { #server-settings_zookeeper}
## zookeeper {#server-settings_zookeeper}
Contains settings that allow ClickHouse to interact with a [ZooKeeper](http://zookeeper.apache.org/) cluster.
@ -848,7 +848,7 @@ This section contains the following parameters:
- [Replication](../../operations/table_engines/replication.md)
- [ZooKeeper Programmer's Guide](http://zookeeper.apache.org/doc/current/zookeeperProgrammers.html)
## use_minimalistic_part_header_in_zookeeper { #server-settings-use_minimalistic_part_header_in_zookeeper}
## use_minimalistic_part_header_in_zookeeper {#server-settings-use_minimalistic_part_header_in_zookeeper}
Storage method for data part headers in ZooKeeper.
@ -876,14 +876,14 @@ If `use_minimalistic_part_header_in_zookeeper = 1`, then [replicated](../table_e
**Default value:** 0.
## disable_internal_dns_cache { #server-settings-disable_internal_dns_cache}
## disable_internal_dns_cache {#server-settings-disable_internal_dns_cache}
Disables the internal DNS cache. Recommended for operating ClickHouse in systems
with frequently changing infrastructure such as Kubernetes.
**Default value:** 0.
## dns_cache_update_period { #server-settings-dns_cache_update_period}
## dns_cache_update_period {#server-settings-dns_cache_update_period}
The period of updating IP addresses stored in the ClickHouse internal DNS cache (in seconds).
The update is performed asynchronously, in a separate system thread.

View File

@ -1,4 +1,4 @@
# Settings { #settings}
# Settings {#settings}
There are multiple ways to make all the settings described below.
Settings are configured in layers, so each subsequent layer redefines the previous settings.

View File

@ -1,4 +1,4 @@
# Permissions for queries { #permissions_for_queries}
# Permissions for queries {#permissions_for_queries}
Queries in ClickHouse can be divided into several types:
@ -15,7 +15,7 @@ The following settings regulate user permissions by the type of query:
`KILL QUERY` can be performed with any settings.
## readonly { #settings_readonly}
## readonly {#settings_readonly}
Restricts permissions for read data, write data and change settings queries.
@ -36,7 +36,7 @@ from changing only specific settings, for details see [constraints on settings](
Default value: 0
## allow_ddl { #settings_allow_ddl}
## allow_ddl {#settings_allow_ddl}
Allows or denies [DDL](https://en.wikipedia.org/wiki/Data_definition_language) queries.

View File

@ -16,7 +16,7 @@ It can take one of two values: `throw` or `break`. Restrictions on aggregation (
`any (only for group_by_overflow_mode)` Continuing aggregation for the keys that got into the set, but don't add new keys to the set.
## max_memory_usage { #settings_max_memory_usage}
## max_memory_usage {#settings_max_memory_usage}
The maximum amount of RAM to use for running a query on a single server.
@ -64,7 +64,7 @@ Maximum number of bytes (uncompressed data) that can be read from a table when r
What to do when the volume of data read exceeds one of the limits: 'throw' or 'break'. By default, throw.
## max_rows_to_group_by { #settings-max_rows_to_group_by}
## max_rows_to_group_by {#settings-max_rows_to_group_by}
Maximum number of unique keys received from aggregation. This setting lets you limit memory consumption when aggregating.
@ -73,7 +73,7 @@ Maximum number of unique keys received from aggregation. This setting lets you l
What to do when the number of unique keys for aggregation exceeds the limit: 'throw', 'break', or 'any'. By default, throw.
Using the 'any' value lets you run an approximation of GROUP BY. The quality of this approximation depends on the statistical nature of the data.
## max_bytes_before_external_group_by { #settings-max_bytes_before_external_group_by}
## max_bytes_before_external_group_by {#settings-max_bytes_before_external_group_by}
Enables or disables execution of `GROUP BY` clauses in external memory. See [GROUP BY in external memory](../../query_language/select.md#select-group-by-in-external-memory).
@ -96,7 +96,7 @@ Maximum number of bytes before sorting.
What to do if the number of rows received before sorting exceeds one of the limits: 'throw' or 'break'. By default, throw.
## max_result_rows { #setting-max_result_rows}
## max_result_rows {#setting-max_result_rows}
Limit on the number of rows in the result. Also checked for subqueries, and on remote servers when running parts of a distributed query.
@ -223,7 +223,7 @@ Maximum number of bytes (uncompressed data) that can be passed to a remote serve
What to do when the amount of data exceeds one of the limits: 'throw' or 'break'. By default, throw.
## max_rows_in_join { #settings-max_rows_in_join}
## max_rows_in_join {#settings-max_rows_in_join}
Limits the number of rows in the hash table that is used when joining tables.
@ -240,7 +240,7 @@ Possible values:
Default value: 0.
## max_bytes_in_join { #settings-max_bytes_in_join}
## max_bytes_in_join {#settings-max_bytes_in_join}
Limits the size in bytes of the hash table used when joining tables.
@ -257,7 +257,7 @@ Possible values:
Default value: 0.
## join_overflow_mode { #settings-join_overflow_mode}
## join_overflow_mode {#settings-join_overflow_mode}
Defines what action ClickHouse performs when any of the following join limits is reached:

View File

@ -45,7 +45,7 @@ If `enable_optimize_predicate_expression = 1`, then the execution time of these
If `enable_optimize_predicate_expression = 0`, then the execution time of the second query is much longer, because the `WHERE` clause applies to all the data after the subquery finishes.
## fallback_to_stale_replicas_for_distributed_queries { #settings-fallback_to_stale_replicas_for_distributed_queries}
## fallback_to_stale_replicas_for_distributed_queries {#settings-fallback_to_stale_replicas_for_distributed_queries}
Forces a query to an out-of-date replica if updated data is not available. See [Replication](../table_engines/replication.md).
@ -55,7 +55,7 @@ Used when performing `SELECT` from a distributed table that points to replicated
By default, 1 (enabled).
## force_index_by_date { #settings-force_index_by_date}
## force_index_by_date {#settings-force_index_by_date}
Disables query execution if the index can't be used by date.
@ -82,7 +82,7 @@ Enables or disables [fsync](http://pubs.opengroup.org/onlinepubs/9699919799/func
It makes sense to disable it if the server has millions of tiny tables that are constantly being created and destroyed.
## enable_http_compression { #settings-enable_http_compression}
## enable_http_compression {#settings-enable_http_compression}
Enables or disables data compression in the response to an HTTP request.
@ -95,7 +95,7 @@ Possible values:
Default value: 0.
## http_zlib_compression_level { #settings-http_zlib_compression_level}
## http_zlib_compression_level {#settings-http_zlib_compression_level}
Sets the level of data compression in the response to an HTTP request if [enable_http_compression = 1](#settings-enable_http_compression).
@ -104,7 +104,7 @@ Possible values: Numbers from 1 to 9.
Default value: 3.
## http_native_compression_disable_checksumming_on_decompress { #settings-http_native_compression_disable_checksumming_on_decompress}
## http_native_compression_disable_checksumming_on_decompress {#settings-http_native_compression_disable_checksumming_on_decompress}
Enables or disables checksum verification when decompressing the HTTP POST data from the client. Used only for ClickHouse native compression format (not used with `gzip` or `deflate`).
@ -117,7 +117,7 @@ Possible values:
Default value: 0.
## send_progress_in_http_headers { #settings-send_progress_in_http_headers}
## send_progress_in_http_headers {#settings-send_progress_in_http_headers}
Enables or disables `X-ClickHouse-Progress` HTTP response headers in `clickhouse-server` responses.
@ -130,7 +130,7 @@ Possible values:
Default value: 0.
## max_http_get_redirects { #setting-max_http_get_redirects}
## max_http_get_redirects {#setting-max_http_get_redirects}
Limits the maximum number of HTTP GET redirect hops for [URL](../table_engines/url.md)-engine tables. The setting applies to both types of tables: those created by the [CREATE TABLE](../../query_language/create/#create-table-query) query and by the [url](../../query_language/table_functions/url.md) table function.
@ -141,7 +141,7 @@ Possible values:
Default value: 0.
## input_format_allow_errors_num { #settings-input_format_allow_errors_num}
## input_format_allow_errors_num {#settings-input_format_allow_errors_num}
Sets the maximum number of acceptable errors when reading from text formats (CSV, TSV, etc.).
@ -153,7 +153,7 @@ If an error occurred while reading rows but the error counter is still less than
If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception.
## input_format_allow_errors_ratio { #settings-input_format_allow_errors_ratio}
## input_format_allow_errors_ratio {#settings-input_format_allow_errors_ratio}
Sets the maximum percentage of errors allowed when reading from text formats (CSV, TSV, etc.).
The percentage of errors is set as a floating-point number between 0 and 1.
@ -167,7 +167,7 @@ If an error occurred while reading rows but the error counter is still less than
If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception.
## input_format_values_interpret_expressions { #settings-input_format_values_interpret_expressions}
## input_format_values_interpret_expressions {#settings-input_format_values_interpret_expressions}
Enables or disables the full SQL parser if the fast stream parser can't parse the data. This setting is used only for the [Values](../../interfaces/formats.md#data-format-values) format at the data insertion. For more information about syntax parsing, see the [Syntax](../../query_language/syntax.md) section.
@ -217,7 +217,7 @@ INSERT INTO datetime_t SELECT now()
Ok.
```
## input_format_values_deduce_templates_of_expressions { #settings-input_format_values_deduce_templates_of_expressions}
## input_format_values_deduce_templates_of_expressions {#settings-input_format_values_deduce_templates_of_expressions}
Enables or disables template deduction for an SQL expressions in [Values](../../interfaces/formats.md#data-format-values) format. It allows to parse and interpret expressions in `Values` much faster if expressions in consecutive rows have the same structure. ClickHouse will try to deduce template of an expression, parse the following rows using this template and evaluate the expression on batch of successfully parsed rows. For the following query:
@ -231,7 +231,7 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (
Enabled by default.
## input_format_values_accurate_types_of_literals { #settings-input_format_values_accurate_types_of_literals}
## input_format_values_accurate_types_of_literals {#settings-input_format_values_accurate_types_of_literals}
This setting is used only when `input_format_values_deduce_templates_of_expressions = 1`. It can happen, that expressions for some column have the same structure, but contain numeric literals of different types, e.g
```sql
@ -244,7 +244,7 @@ When this setting is enabled, ClickHouse will check actual type of literal and w
When disabled, ClickHouse may use more general type for some literals (e.g. `Float64` or `Int64` instead of `UInt64` for `42`), but it may cause overflow and precision issues.
Enabled by default.
## input_format_defaults_for_omitted_fields { #session_settings-input_format_defaults_for_omitted_fields}
## input_format_defaults_for_omitted_fields {#session_settings-input_format_defaults_for_omitted_fields}
When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md#jsoneachrow), [CSV](../../interfaces/formats.md#csv) and [TabSeparated](../../interfaces/formats.md#tabseparated) formats.
@ -258,18 +258,18 @@ Possible values:
Default value: 1.
## input_format_tsv_empty_as_default { #settings-input_format_tsv_empty_as_default}
## input_format_tsv_empty_as_default {#settings-input_format_tsv_empty_as_default}
When enabled, replace empty input fields in TSV with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too.
Disabled by default.
## input_format_null_as_default { #settings-input_format_null_as_default}
## input_format_null_as_default {#settings-input_format_null_as_default}
Enables or disables using default values if input data contain `NULL`, but data type of corresponding column in not `Nullable(T)` (for text input formats).
## input_format_skip_unknown_fields { #settings-input_format_skip_unknown_fields}
## input_format_skip_unknown_fields {#settings-input_format_skip_unknown_fields}
Enables or disables skipping insertion of extra data.
@ -289,7 +289,7 @@ Possible values:
Default value: 0.
## input_format_import_nested_json { #settings-input_format_import_nested_json}
## input_format_import_nested_json {#settings-input_format_import_nested_json}
Enables or disables the insertion of JSON data with nested objects.
@ -308,7 +308,7 @@ See also:
- [Usage of Nested Structures](../../interfaces/formats.md#jsoneachrow-nested) with the `JSONEachRow` format.
## input_format_with_names_use_header { #settings-input_format_with_names_use_header}
## input_format_with_names_use_header {#settings-input_format_with_names_use_header}
Enables or disables checking the column order when inserting data.
@ -326,7 +326,7 @@ Possible values:
Default value: 1.
## date_time_input_format { #settings-date_time_input_format}
## date_time_input_format {#settings-date_time_input_format}
Allows to choose a parser of text representation of date and time.
@ -349,7 +349,7 @@ See also:
- [DateTime data type.](../../data_types/datetime.md)
- [Functions for working with dates and times.](../../query_language/functions/date_time_functions.md)
## join_default_strictness { #settings-join_default_strictness}
## join_default_strictness {#settings-join_default_strictness}
Sets default strictness for [JOIN clauses](../../query_language/select.md#select-join).
@ -362,7 +362,7 @@ Possible values:
Default value: `ALL`.
## join_any_take_last_row { #settings-join_any_take_last_row}
## join_any_take_last_row {#settings-join_any_take_last_row}
Changes behavior of join operations with `ANY` strictness.
@ -382,7 +382,7 @@ See also:
- [Join table engine](../table_engines/join.md)
- [join_default_strictness](#settings-join_default_strictness)
## join_use_nulls { #join_use_nulls}
## join_use_nulls {#join_use_nulls}
Sets the type of [JOIN](../../query_language/select.md) behavior. When merging tables, empty cells may appear. ClickHouse fills them differently based on this setting.
@ -393,7 +393,7 @@ Possible values:
Default value: 0.
## max_block_size { #setting-max_block_size}
## max_block_size {#setting-max_block_size}
In ClickHouse, data is processed by blocks (sets of column parts). The internal processing cycles for a single block are efficient enough, but there are noticeable expenditures on each block. The `max_block_size` setting is a recommendation for what size of block (in number of rows) to load from tables. The block size shouldn't be too small, so that the expenditures on each block are still noticeable, but not too large, so that the query with LIMIT that is completed after the first block is processed quickly. The goal is to avoid consuming too much memory when extracting a large number of columns in multiple threads, and to preserve at least some cache locality.
@ -407,7 +407,7 @@ Used for the same purpose as `max_block_size`, but it sets the recommended block
However, the block size cannot be more than `max_block_size` rows.
By default: 1,000,000. It only works when reading from MergeTree engines.
## merge_tree_min_rows_for_concurrent_read { #setting-merge_tree_min_rows_for_concurrent_read}
## merge_tree_min_rows_for_concurrent_read {#setting-merge_tree_min_rows_for_concurrent_read}
If the number of rows to be read from a file of a [MergeTree](../table_engines/mergetree.md) table exceeds `merge_tree_min_rows_for_concurrent_read` then ClickHouse tries to perform a concurrent reading from this file on several threads.
@ -417,7 +417,7 @@ Possible values:
Default value: 163840.
## merge_tree_min_bytes_for_concurrent_read { #setting-merge_tree_min_bytes_for_concurrent_read}
## merge_tree_min_bytes_for_concurrent_read {#setting-merge_tree_min_bytes_for_concurrent_read}
If the number of bytes to read from one file of a [MergeTree](../table_engines/mergetree.md)-engine table exceeds `merge_tree_min_bytes_for_concurrent_read`, then ClickHouse tries to concurrently read from this file in several threads.
@ -427,7 +427,7 @@ Possible value:
Default value: 251658240.
## merge_tree_min_rows_for_seek { #setting-merge_tree_min_rows_for_seek}
## merge_tree_min_rows_for_seek {#setting-merge_tree_min_rows_for_seek}
If the distance between two data blocks to be read in one file is less than `merge_tree_min_rows_for_seek` rows, then ClickHouse does not seek through the file, but reads the data sequentially.
@ -437,7 +437,7 @@ Possible values:
Default value: 0.
## merge_tree_min_bytes_for_seek { #setting-merge_tree_min_bytes_for_seek}
## merge_tree_min_bytes_for_seek {#setting-merge_tree_min_bytes_for_seek}
If the distance between two data blocks to be read in one file is less than `merge_tree_min_bytes_for_seek` bytes, then ClickHouse sequentially reads range of file that contains both blocks, thus avoiding extra seek.
@ -448,7 +448,7 @@ Possible values:
Default value: 0.
## merge_tree_coarse_index_granularity { #setting-merge_tree_coarse_index_granularity}
## merge_tree_coarse_index_granularity {#setting-merge_tree_coarse_index_granularity}
When searching data, ClickHouse checks the data marks in the index file. If ClickHouse finds that required keys are in some range, it divides this range into `merge_tree_coarse_index_granularity` subranges and searches the required keys there recursively.
@ -458,7 +458,7 @@ Possible values:
Default value: 8.
## merge_tree_max_rows_to_use_cache { #setting-merge_tree_max_rows_to_use_cache}
## merge_tree_max_rows_to_use_cache {#setting-merge_tree_max_rows_to_use_cache}
If ClickHouse should read more than `merge_tree_max_rows_to_use_cache` rows in one query, it doesn't use the cache of uncompressed blocks.
@ -470,7 +470,7 @@ Possible values:
Default value: 128 ✕ 8192.
## merge_tree_max_bytes_to_use_cache { #setting-merge_tree_max_bytes_to_use_cache}
## merge_tree_max_bytes_to_use_cache {#setting-merge_tree_max_bytes_to_use_cache}
If ClickHouse should read more than `merge_tree_max_bytes_to_use_cache` bytes in one query, it doesn't use the cache of uncompressed blocks.
@ -482,7 +482,7 @@ Possible value:
Default value: 2013265920.
## min_bytes_to_use_direct_io { #settings-min_bytes_to_use_direct_io}
## min_bytes_to_use_direct_io {#settings-min_bytes_to_use_direct_io}
The minimum data volume required for using direct I/O access to the storage disk.
@ -495,7 +495,7 @@ Possible values:
Default value: 0.
## log_queries { #settings-log-queries}
## log_queries {#settings-log-queries}
Setting up query logging.
@ -507,7 +507,7 @@ Example:
log_queries=1
```
## log_query_threads { #settings-log-query-threads}
## log_query_threads {#settings-log-query-threads}
Setting up query threads logging.
@ -519,7 +519,7 @@ Example:
log_query_threads=1
```
## max_insert_block_size { #settings-max_insert_block_size}
## max_insert_block_size {#settings-max_insert_block_size}
The size of blocks to form for insertion into a table.
This setting only applies in cases when the server forms the blocks.
@ -531,7 +531,7 @@ Default value: 1,048,576.
The default is slightly more than `max_block_size`. The reason for this is because certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion, and a large enough block size allows sorting more data in RAM.
## max_replica_delay_for_distributed_queries { #settings-max_replica_delay_for_distributed_queries}
## max_replica_delay_for_distributed_queries {#settings-max_replica_delay_for_distributed_queries}
Disables lagging replicas for distributed queries. See [Replication](../../operations/table_engines/replication.md).
@ -541,7 +541,7 @@ Default value: 300.
Used when performing `SELECT` from a distributed table that points to replicated tables.
## max_threads { #settings-max_threads}
## max_threads {#settings-max_threads}
The maximum number of query processing threads, excluding threads for retrieving data from remote servers (see the 'max_distributed_connections' parameter).
@ -556,7 +556,7 @@ For queries that are completed quickly because of a LIMIT, you can set a lower '
The smaller the `max_threads` value, the less memory is consumed.
## max_insert_threads { #settings-max_insert_threads}
## max_insert_threads {#settings-max_insert_threads}
The maximum number of threads to execute the `INSERT SELECT` query.
@ -590,7 +590,7 @@ We are writing a URL column with the String type (average size of 60 bytes per v
There usually isn't any reason to change this setting.
## max_query_size { #settings-max_query_size}
## max_query_size {#settings-max_query_size}
The maximum part of a query that can be taken to RAM for parsing with the SQL parser.
The INSERT query also contains data for INSERT that is processed by a separate stream parser (that consumes O(1) RAM), which is not included in this restriction.
@ -654,7 +654,7 @@ Default value: 3.
Whether to count extreme values (the minimums and maximums in columns of a query result). Accepts 0 or 1. By default, 0 (disabled).
For more information, see the section "Extreme values".
## use_uncompressed_cache { #setting-use_uncompressed_cache}
## use_uncompressed_cache {#setting-use_uncompressed_cache}
Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled).
Using the uncompressed cache (only for tables in the MergeTree family) can significantly reduce latency and increase throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed and the least-used data is automatically deleted.
@ -682,7 +682,7 @@ The default value is 7500.
The smaller the value, the more often data is flushed into the table. Setting the value too low leads to poor performance.
## load_balancing { #settings-load_balancing}
## load_balancing {#settings-load_balancing}
Specifies the algorithm of replicas selection that is used for distributed query processing.
@ -693,7 +693,7 @@ ClickHouse supports the following algorithms of choosing replicas:
- [In order](#load_balancing-in_order)
- [First or random](#load_balancing-first_or_random)
### Random (by default) { #load_balancing-random}
### Random (by default) {#load_balancing-random}
```sql
load_balancing = random
@ -702,7 +702,7 @@ load_balancing = random
The number of errors is counted for each replica. The query is sent to the replica with the fewest errors, and if there are several of these, to any one of them.
Disadvantages: Server proximity is not accounted for; if the replicas have different data, you will also get different data.
### Nearest Hostname { #load_balancing-nearest_hostname}
### Nearest Hostname {#load_balancing-nearest_hostname}
```sql
load_balancing = nearest_hostname
@ -716,7 +716,7 @@ This method might seem primitive, but it doesn't require external data about net
Thus, if there are equivalent replicas, the closest one by name is preferred.
We can also assume that when sending a query to the same server, in the absence of failures, a distributed query will also go to the same servers. So even if different data is placed on the replicas, the query will return mostly the same results.
### In Order { #load_balancing-in_order}
### In Order {#load_balancing-in_order}
```sql
load_balancing = in_order
@ -726,7 +726,7 @@ Replicas with the same number of errors are accessed in the same order as they a
This method is appropriate when you know exactly which replica is preferable.
### First or Random { #load_balancing-first_or_random}
### First or Random {#load_balancing-first_or_random}
```sql
load_balancing = first_or_random
@ -736,7 +736,7 @@ This algorithm chooses the first replica in the set or a random replica if the f
The `first_or_random` algorithm solves the problem of the `in_order` algorithm. With `in_order`, if one replica goes down, the next one gets a double load while the remaining replicas handle the usual amount of traffic. When using the `first_or_random` algorithm, load is evenly distributed among replicas that are still available.
## prefer_localhost_replica { #settings-prefer_localhost_replica}
## prefer_localhost_replica {#settings-prefer_localhost_replica}
Enables/disables preferable using the localhost replica when processing distributed queries.
@ -760,7 +760,7 @@ See the section "WITH TOTALS modifier".
The threshold for `totals_mode = 'auto'`.
See the section "WITH TOTALS modifier".
## max_parallel_replicas { #settings-max_parallel_replicas}
## max_parallel_replicas {#settings-max_parallel_replicas}
The maximum number of replicas for each shard when executing a query.
For consistency (to get different parts of the same data split), this option only works when the sampling key is set.
@ -782,27 +782,27 @@ If the value is 1 or more, compilation occurs asynchronously in a separate threa
Compiled code is required for each different combination of aggregate functions used in the query and the type of keys in the GROUP BY clause.
The results of compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results, since they don't use very much space. Old results will be used after server restarts, except in the case of a server upgrade in this case, the old results are deleted.
## output_format_json_quote_64bit_integers { #session_settings-output_format_json_quote_64bit_integers}
## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers}
If the value is true, integers appear in quotes when using JSON\* Int64 and UInt64 formats (for compatibility with most JavaScript implementations); otherwise, integers are output without the quotes.
## format_csv_delimiter { #settings-format_csv_delimiter}
## format_csv_delimiter {#settings-format_csv_delimiter}
The character interpreted as a delimiter in the CSV data. By default, the delimiter is `,`.
## input_format_csv_unquoted_null_literal_as_null { #settings-input_format_csv_unquoted_null_literal_as_null}
## input_format_csv_unquoted_null_literal_as_null {#settings-input_format_csv_unquoted_null_literal_as_null}
For CSV input format enables or disables parsing of unquoted `NULL` as literal (synonym for `\N`).
## output_format_csv_crlf_end_of_line { #settings-output_format_csv_crlf_end_of_line}
## output_format_csv_crlf_end_of_line {#settings-output_format_csv_crlf_end_of_line}
Use DOS/Windows style line separator (CRLF) in CSV instead of Unix style (LF).
## output_format_tsv_crlf_end_of_line { #settings-output_format_tsv_crlf_end_of_line}
## output_format_tsv_crlf_end_of_line {#settings-output_format_tsv_crlf_end_of_line}
Use DOC/Windows style line separator (CRLF) in TSV instead of Unix style (LF).
## insert_quorum { #settings-insert_quorum}
## insert_quorum {#settings-insert_quorum}
Enables quorum writes.
@ -829,7 +829,7 @@ See also:
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
- [select_sequential_consistency](#settings-select_sequential_consistency)
## insert_quorum_timeout { #settings-insert_quorum_timeout}
## insert_quorum_timeout {#settings-insert_quorum_timeout}
Quorum write timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica.
@ -841,7 +841,7 @@ See also:
- [select_sequential_consistency](#settings-select_sequential_consistency)
## select_sequential_consistency { #settings-select_sequential_consistency}
## select_sequential_consistency {#settings-select_sequential_consistency}
Enables or disables sequential consistency for `SELECT` queries:
@ -861,7 +861,7 @@ See also:
- [insert_quorum](#settings-insert_quorum)
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
## insert_deduplicate { #settings-insert_deduplicate}
## insert_deduplicate {#settings-insert_deduplicate}
Enables or disables block deduplication of `INSERT` (for Replicated* tables).
@ -874,7 +874,7 @@ Default value: 1.
By default, blocks inserted into replicated tables by the `INSERT` statement are deduplicated (see [Data Replication] (../ table_engines/replication.md).
## deduplicate_blocks_in_dependent_materialized_views { #settings-deduplicate_blocks_in_dependent_materialized_views}
## deduplicate_blocks_in_dependent_materialized_views {#settings-deduplicate_blocks_in_dependent_materialized_views}
Enables or disables the deduplication check for materialized views that receive data from Replicated* tables.
@ -892,7 +892,7 @@ If an INSERTed block is skipped due to deduplication in the source table, there
At the same time, this behavior "breaks" `INSERT` idempotency. If an `INSERT` into the main table was successful and `INSERT` into a materialized view failed (e.g. because of communication failure with Zookeeper) a client will get an error and can retry the operation. However, the materialized view won't receive the second insert because it will be discarded by deduplication in the main (source) table. The setting `deduplicate_blocks_in_dependent_materialized_views` allows to change this behavior. On retry a materialized view will receive the repeat insert and will perform deduplication check by itself,
ignoring check result for the source table, and will insert rows lost because of first failure.
## max_network_bytes { #settings-max_network_bytes}
## max_network_bytes {#settings-max_network_bytes}
Limits the data volume (in bytes) that is received or transmitted over the network when executing a query. This setting applies to every individual query.
Possible values:
@ -902,7 +902,7 @@ Possible values:
Default value: 0.
## max_network_bandwidth { #settings-max_network_bandwidth}
## max_network_bandwidth {#settings-max_network_bandwidth}
Limits the speed of the data exchange over the network in bytes per second. This setting applies to every query.
@ -913,7 +913,7 @@ Possible values:
Default value: 0.
## max_network_bandwidth_for_user { #settings-max_network_bandwidth_for_user}
## max_network_bandwidth_for_user {#settings-max_network_bandwidth_for_user}
Limits the speed of the data exchange over the network in bytes per second. This setting applies to all concurrently running queries performed by a single user.
@ -924,7 +924,7 @@ Possible values:
Default value: 0.
## max_network_bandwidth_for_all_users { #settings-max_network_bandwidth_for_all_users}
## max_network_bandwidth_for_all_users {#settings-max_network_bandwidth_for_all_users}
Limits the speed that data is exchanged at over the network in bytes per second. This setting applies to all concurrently running queries on the server.
@ -935,7 +935,7 @@ Possible values:
Default value: 0.
## count_distinct_implementation { #settings-count_distinct_implementation}
## count_distinct_implementation {#settings-count_distinct_implementation}
Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT ...)](../../query_language/agg_functions/reference.md#agg_function-count) construction.
@ -949,7 +949,7 @@ Possible values:
Default value: `uniqExact`.
## skip_unavailable_shards { #settings-skip_unavailable_shards}
## skip_unavailable_shards {#settings-skip_unavailable_shards}
Enables or disables silently skipping of unavailable shards.
@ -979,13 +979,13 @@ Possible values:
Default value: 0.
## optimize_skip_unused_shards { #settings-optimize_skip_unused_shards}
## optimize_skip_unused_shards {#settings-optimize_skip_unused_shards}
Enables or disables skipping of unused shards for SELECT queries that has sharding key condition in PREWHERE/WHERE (assumes that the data is distributed by sharding key, otherwise do nothing).
Default value: 0
## force_optimize_skip_unused_shards { #settings-force_optimize_skip_unused_shards}
## force_optimize_skip_unused_shards {#settings-force_optimize_skip_unused_shards}
Enables or disables query execution if [`optimize_skip_unused_shards`](#settings-optimize_skip_unused_shards) enabled and skipping of unused shards is not possible. If the skipping is not possible and the setting is enabled exception will be thrown.
@ -997,7 +997,7 @@ Possible values:
Default value: 0
## optimize_throw_if_noop { #setting-optimize_throw_if_noop}
## optimize_throw_if_noop {#setting-optimize_throw_if_noop}
Enables or disables throwing an exception if an [OPTIMIZE](../../query_language/misc.md#misc_operations-optimize) query didn't perform a merge.
@ -1011,7 +1011,7 @@ Possible values:
Default value: 0.
## distributed_replica_error_half_life { #settings-distributed_replica_error_half_life}
## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life}
- Type: seconds
- Default value: 60 seconds
@ -1023,7 +1023,7 @@ See also:
- [Table engine Distributed](../../operations/table_engines/distributed.md)
- [distributed_replica_error_cap](#settings-distributed_replica_error_cap)
## distributed_replica_error_cap { #settings-distributed_replica_error_cap}
## distributed_replica_error_cap {#settings-distributed_replica_error_cap}
- Type: unsigned int
- Default value: 1000
@ -1035,7 +1035,7 @@ See also:
- [Table engine Distributed](../../operations/table_engines/distributed.md)
- [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life)
## distributed_directory_monitor_sleep_time_ms { #distributed_directory_monitor_sleep_time_ms}
## distributed_directory_monitor_sleep_time_ms {#distributed_directory_monitor_sleep_time_ms}
Base interval for the [Distributed](../table_engines/distributed.md) table engine to send data. The actual interval grows exponentially in the event of errors.
@ -1045,7 +1045,7 @@ Possible values:
Default value: 100 milliseconds.
## distributed_directory_monitor_max_sleep_time_ms { #distributed_directory_monitor_max_sleep_time_ms}
## distributed_directory_monitor_max_sleep_time_ms {#distributed_directory_monitor_max_sleep_time_ms}
Maximum interval for the [Distributed](../table_engines/distributed.md) table engine to send data. Limits exponential growth of the interval set in the [distributed_directory_monitor_sleep_time_ms](#distributed_directory_monitor_sleep_time_ms) setting.
@ -1055,7 +1055,7 @@ Possible values:
Default value: 30000 milliseconds (30 seconds).
## distributed_directory_monitor_batch_inserts { #distributed_directory_monitor_batch_inserts}
## distributed_directory_monitor_batch_inserts {#distributed_directory_monitor_batch_inserts}
Enables/disables sending of inserted data in batches.
@ -1068,7 +1068,7 @@ Possible values:
Default value: 0.
## os_thread_priority { #setting-os_thread_priority}
## os_thread_priority {#setting-os_thread_priority}
Sets the priority ([nice](https://en.wikipedia.org/wiki/Nice_(Unix))) for threads that execute queries. The OS scheduler considers this priority when choosing the next thread to run on each available CPU core.
@ -1083,7 +1083,7 @@ Lower values mean higher priority. Threads with low `nice` priority values are e
Default value: 0.
## query_profiler_real_time_period_ns { #query_profiler_real_time_period_ns}
## query_profiler_real_time_period_ns {#query_profiler_real_time_period_ns}
Sets the period for a real clock timer of the [query profiler](../../operations/performance/sampling_query_profiler.md). Real clock timer counts wall-clock time.
@ -1106,7 +1106,7 @@ See also:
- System table [trace_log](../system_tables.md#system_tables-trace_log)
## query_profiler_cpu_time_period_ns { #query_profiler_cpu_time_period_ns}
## query_profiler_cpu_time_period_ns {#query_profiler_cpu_time_period_ns}
Sets the period for a CPU clock timer of the [query profiler](../../operations/performance/sampling_query_profiler.md). This timer counts only CPU time.
@ -1129,7 +1129,7 @@ See also:
- System table [trace_log](../system_tables.md#system_tables-trace_log)
## allow_introspection_functions { #settings-allow_introspection_functions}
## allow_introspection_functions {#settings-allow_introspection_functions}
Enables of disables [introspections functions](../../query_language/functions/introspection.md) for query profiling.
@ -1159,7 +1159,7 @@ Enable order-preserving parallel parsing of data formats. Supported only for TSV
The minimum chunk size in bytes, which each thread will parse in parallel.
## output_format_avro_codec { #settings-output_format_avro_codec}
## output_format_avro_codec {#settings-output_format_avro_codec}
Sets the compression codec used for output Avro file.
@ -1173,7 +1173,7 @@ Possible values:
Default value: `snappy` (if available) or `deflate`.
## output_format_avro_sync_interval { #settings-output_format_avro_sync_interval}
## output_format_avro_sync_interval {#settings-output_format_avro_sync_interval}
Sets minimum data size (in bytes) between synchronization markers for output Avro file.
@ -1183,7 +1183,7 @@ Possible values: 32 (32 bytes) - 1073741824 (1 GiB)
Default value: 32768 (32 KiB)
## format_avro_schema_registry_url { #settings-format_avro_schema_registry_url}
## format_avro_schema_registry_url {#settings-format_avro_schema_registry_url}
Sets Confluent Schema Registry URL to use with [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent) format

View File

@ -6,7 +6,7 @@ System tables don't have files with data on the disk or files with metadata. The
System tables are read-only.
They are located in the 'system' database.
## system.asynchronous_metrics { #system_tables-asynchronous_metrics}
## system.asynchronous_metrics {#system_tables-asynchronous_metrics}
Contains metrics that are calculated periodically in the background. For example, the amount of RAM in use.
@ -92,7 +92,7 @@ The `system.columns` table contains the following columns (the column type is sh
- `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression.
- `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression.
## system.contributors { #system_contributors}
## system.contributors {#system_contributors}
Contains information about contributors. All constributors in random order. The order is random at query execution time.
@ -138,7 +138,7 @@ This table contains a single String column called 'name' the name of a datab
Each database that the server knows about has a corresponding entry in the table.
This system table is used for implementing the `SHOW DATABASES` query.
## system.detached_parts { #system_tables-detached_parts}
## system.detached_parts {#system_tables-detached_parts}
Contains information about detached parts of [MergeTree](table_engines/mergetree.md) tables. The `reason` column specifies why the part was detached. For user-detached parts, the reason is empty. Such parts can be attached with [ALTER TABLE ATTACH PARTITION|PART](../query_language/query_language/alter/#alter_attach-partition) command. For the description of other columns, see [system.parts](#system_tables-parts). If part name is invalid, values of some columns may be `NULL`. Such parts can be deleted with [ALTER TABLE DROP DETACHED PART](../query_language/query_language/alter/#alter_drop-detached).
@ -164,7 +164,7 @@ Columns:
Note that the amount of memory used by the dictionary is not proportional to the number of items stored in it. So for flat and cached dictionaries, all the memory cells are pre-assigned, regardless of how full the dictionary actually is.
## system.events { #system_tables-events}
## system.events {#system_tables-events}
Contains information about the number of events that have occurred in the system. For example, in the table, you can find how many `SELECT` queries were processed since the ClickHouse server started.
@ -243,7 +243,7 @@ Columns:
- `bytes_written_uncompressed` (UInt64) — Number of bytes written, uncompressed.
- `rows_written` (UInt64) — Number of rows written.
## system.metrics { #system_tables-metrics}
## system.metrics {#system_tables-metrics}
Contains metrics which can be calculated instantly, or have a current value. For example, the number of simultaneously processed queries or the current replica delay. This table is always up to date.
@ -283,7 +283,7 @@ SELECT * FROM system.metrics LIMIT 10
- [system.metric_log](#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
- [Monitoring](monitoring.md) — Base concepts of ClickHouse monitoring.
## system.metric_log { #system_tables-metric_log}
## system.metric_log {#system_tables-metric_log}
Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk.
To turn on metrics history collection on `system.metric_log`, create `/etc/clickhouse-server/config.d/metric_log.xml` with following content:
@ -356,7 +356,7 @@ This table contains a single row with a single 'dummy' UInt8 column containing t
This table is used if a SELECT query doesn't specify the FROM clause.
This is similar to the DUAL table found in other DBMSs.
## system.parts { #system_tables-parts}
## system.parts {#system_tables-parts}
Contains information about parts of [MergeTree](table_engines/mergetree.md) tables.
@ -406,7 +406,7 @@ Columns:
- `marks_size` (`UInt64`) Alias for `marks_bytes`.
## system.part_log { #system_tables-part-log}
## system.part_log {#system_tables-part-log}
The `system.part_log` table is created only if the [part_log](server_settings/settings.md#server_settings-part-log) server setting is specified.
@ -439,7 +439,7 @@ The `system.part_log` table contains the following columns:
The `system.part_log` table is created after the first inserting data to the `MergeTree` table.
## system.processes { #system_tables-processes}
## system.processes {#system_tables-processes}
This system table is used for implementing the `SHOW PROCESSLIST` query.
@ -455,7 +455,7 @@ Columns:
- `query` (String) The query text. For `INSERT`, it doesn't include the data to insert.
- `query_id` (String) Query ID, if defined.
## system.text_log { #system_tables-text_log}
## system.text_log {#system_tables-text_log}
Contains logging entries. Logging level which goes to this table can be limited with `text_log.level` server setting.
@ -483,7 +483,7 @@ Columns:
- `source_line` (`UInt64`) - Source line from which the logging was done.
## system.query_log { #system_tables-query_log}
## system.query_log {#system_tables-query_log}
Contains information about execution of queries. For each query, you can see processing start time, duration of processing, error messages and other information.
@ -569,7 +569,7 @@ When the table is deleted manually, it will be automatically created on the fly.
You can specify an arbitrary partitioning key for the `system.query_log` table in the [query_log](server_settings/settings.md#server_settings-query-log) server setting (see the `partition_by` parameter).
## system.query_thread_log { #system_tables-query-thread-log}
## system.query_thread_log {#system_tables-query-thread-log}
The table contains information about each query execution thread.
@ -634,7 +634,7 @@ When the table is deleted manually, it will be automatically created on the fly.
You can specify an arbitrary partitioning key for the `system.query_thread_log` table in the [query_thread_log](server_settings/settings.md#server_settings-query-thread-log) server setting (see the `partition_by` parameter).
## system.trace_log { #system_tables-trace_log}
## system.trace_log {#system_tables-trace_log}
Contains stack traces collected by the sampling query profiler.
@ -677,7 +677,7 @@ query_id: acc4d61f-5bd1-4a3e-bc91-2180be37c915
trace: [94222141367858,94222152240175,94222152325351,94222152329944,94222152330796,94222151449980,94222144088167,94222151682763,94222144088167,94222151682763,94222144088167,94222144058283,94222144059248,94222091840750,94222091842302,94222091831228,94222189631488,140509950166747,140509942945935]
```
## system.replicas { #system_tables-replicas}
## system.replicas {#system_tables-replicas}
Contains information and status for replicated tables residing on the local server.
This table can be used for monitoring. The table contains a row for every Replicated\* table.
@ -960,7 +960,7 @@ pzxid: 987021252247
path: /clickhouse/tables/01-08/visits/replicas
```
## system.mutations { #system_tables-mutations}
## system.mutations {#system_tables-mutations}
The table contains information about [mutations](../query_language/alter.md#alter-mutations) of MergeTree tables and their progress. Each mutation command is represented by a single row. The table has the following columns:
@ -987,7 +987,7 @@ If there were problems with mutating some parts, the following columns contain a
**latest_fail_reason** - The exception message that caused the most recent part mutation failure.
## system.disks { #system_tables-disks}
## system.disks {#system_tables-disks}
Contains information about disks defined in the [server configuration](table_engines/mergetree.md#table_engine-mergetree-multiple-volumes_configure).
@ -1000,7 +1000,7 @@ Columns:
- `keep_free_space` ([UInt64](../data_types/int_uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration.
## system.storage_policies { #system_tables-storage_policies}
## system.storage_policies {#system_tables-storage_policies}
Contains information about storage policies and volumes defined in the [server configuration](table_engines/mergetree.md#table_engine-mergetree-multiple-volumes_configure).

View File

@ -1,4 +1,4 @@
# CollapsingMergeTree { #table_engine-collapsingmergetree}
# CollapsingMergeTree {#table_engine-collapsingmergetree}
The engine inherits from [MergeTree](mergetree.md) and adds the logic of rows collapsing to data parts merge algorithm.
@ -55,7 +55,7 @@ All of the parameters excepting `sign` have the same meaning as in `MergeTree`.
</details>
## Collapsing { #table_engine-collapsingmergetree-collapsing}
## Collapsing {#table_engine-collapsingmergetree-collapsing}
### Data
@ -103,7 +103,7 @@ Why we need 2 rows for each change read in the [Algorithm](#table_engine-collaps
2. Long growing arrays in columns reduce the efficiency of the engine due to load for writing. The more straightforward data, the higher efficiency.
3. The `SELECT` results depend strongly on the consistency of object changes history. Be accurate when preparing data for inserting. You can get unpredictable results in inconsistent data, for example, negative values for non-negative metrics such as session depth.
### Algorithm { #table_engine-collapsingmergetree-collapsing-algorithm}
### Algorithm {#table_engine-collapsingmergetree-collapsing-algorithm}
When ClickHouse merges data parts, each group of consecutive rows with the same sorting key (`ORDER BY`) is reduced to not more than two rows, one with `Sign = 1` ("state" row) and another with `Sign = -1` ("cancel" row). In other words, entries collapse.

View File

@ -1,4 +1,4 @@
# File { #table_engines-file}
# File {#table_engines-file}
The File table engine keeps the data in a file in one of the supported [file
formats](../../interfaces/formats.md#formats) (TabSeparated, Native, etc.).

View File

@ -1,4 +1,4 @@
# GenerateRandom { #table_engines-generate}
# GenerateRandom {#table_engines-generate}
The GenerateRandom table engine produces random data for given table schema.

View File

@ -1,4 +1,4 @@
# GraphiteMergeTree { #graphitemergetree}
# GraphiteMergeTree {#graphitemergetree}
This engine is designed for thinning and aggregating/averaging (rollup) [Graphite](http://graphite.readthedocs.io/en/latest/index.html) data. It may be helpful to developers who want to use ClickHouse as a data store for Graphite.
@ -6,7 +6,7 @@ You can use any ClickHouse table engine to store the Graphite data if you don't
The engine inherits properties from [MergeTree](mergetree.md).
## Creating a Table { #creating-table}
## Creating a Table {#creating-table}
```sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
@ -66,7 +66,7 @@ All of the parameters excepting `config_section` have the same meaning as in `Me
- `config_section` — Name of the section in the configuration file, where are the rules of rollup set.
</details>
## Rollup configuration { #rollup-configuration}
## Rollup configuration {#rollup-configuration}
The settings for rollup are defined by the [graphite_rollup](../server_settings/settings.md#server_settings-graphite_rollup) parameter in the server configuration. The name of the parameter could be any. You can create several configurations and use them for different tables.
@ -77,14 +77,14 @@ required-columns
patterns
```
### Required Columns { #required-columns}
### Required Columns {#required-columns}
- `path_column_name` — The name of the column storing the metric name (Graphite sensor). Default value: `Path`.
- `time_column_name` — The name of the column storing the time of measuring the metric. Default value: `Time`.
- `value_column_name` — The name of the column storing the value of the metric at the time set in `time_column_name`. Default value: `Value`.
- `version_column_name` — The name of the column storing the version of the metric. Default value: `Timestamp`.
### Patterns { #patterns}
### Patterns {#patterns}
Structure of the `patterns` section:
@ -126,7 +126,7 @@ Fields for `pattern` and `default` sections:
- `function` The name of the aggregating function to apply to data whose age falls within the range `[age, age + precision]`.
### Configuration Example { #configuration-example}
### Configuration Example {#configuration-example}
```xml
<graphite_rollup>

View File

@ -1,4 +1,4 @@
# HDFS { #table_engines-hdfs}
# HDFS {#table_engines-hdfs}
This engine provides integration with [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)via ClickHouse. This engine is similar
to the [File](file.md) and [URL](url.md) engines, but provides Hadoop-specific features.

View File

@ -1,4 +1,4 @@
# Table engines { #table_engines}
# Table engines {#table_engines}
The table engine (type of table) determines:
@ -64,7 +64,7 @@ Engines in the family:
- [Memory](memory.md)
- [Buffer](buffer.md)
## Virtual columns { #table_engines-virtual_columns}
## Virtual columns {#table_engines-virtual_columns}
Virtual column is an integral table engine attribute that is defined in the engine source code.

View File

@ -1,4 +1,4 @@
# JDBC { #table_engine-jdbc}
# JDBC {#table_engine-jdbc}
Allows ClickHouse to connect to external databases via [JDBC](https://en.wikipedia.org/wiki/Java_Database_Connectivity).

View File

@ -2,7 +2,7 @@
Prepared data structure for using in [JOIN](../../query_language/select.md#select-join) operations.
## Creating a Table { #creating-a-table}
## Creating a Table {#creating-a-table}
```sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
@ -77,7 +77,7 @@ You cannot perform a `SELECT` query directly from the table. Instead, use one of
- Place the table to the right side in a `JOIN` clause.
- Call the [joinGet](../../query_language/functions/other_functions.md#joinget) function, which lets you extract data from the table the same way as from a dictionary.
### Limitations and Settings { #join-limitations-and-settings}
### Limitations and Settings {#join-limitations-and-settings}
When creating a table, the following settings are applied:

View File

@ -9,7 +9,7 @@ Kafka lets you:
- Process streams as they become available.
## Creating a Table { #table_engine-kafka-creating-a-table}
## Creating a Table {#table_engine-kafka-creating-a-table}
```sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]

View File

@ -1,4 +1,4 @@
# MergeTree { #table_engines-mergetree}
# MergeTree {#table_engines-mergetree}
The `MergeTree` engine and other engines of this family (`*MergeTree`) are the most robust ClickHouse table engines.
@ -26,7 +26,7 @@ Main features:
The [Merge](merge.md) engine does not belong to the `*MergeTree` family.
## Creating a Table { #table_engine-mergetree-creating-a-table}
## Creating a Table {#table_engine-mergetree-creating-a-table}
```sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
@ -50,7 +50,7 @@ For a description of parameters, see the [CREATE query description](../../query_
!!!note "Note"
`INDEX` is an experimental feature, see [Data Skipping Indexes](#table_engine-mergetree-data_skipping-indexes).
### Query Clauses { #mergetree-query-clauses}
### Query Clauses {#mergetree-query-clauses}
- `ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters.
@ -134,7 +134,7 @@ MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID)
The `MergeTree` engine is configured in the same way as in the example above for the main engine configuration method.
</details>
## Data Storage { #mergetree-data-storage}
## Data Storage {#mergetree-data-storage}
A table consists of data parts sorted by primary key.
@ -146,7 +146,7 @@ Each data part is logically divided into granules. A granule is the smallest ind
The granule size is restricted by the `index_granularity` and `index_granularity_bytes` settings of the table engine. The number of rows in a granule lays in the `[1, index_granularity]` range, depending on the size of the rows. The size of a granule can exceed `index_granularity_bytes` if the size of a single row is greater than the value of the setting. In this case, the size of the granule equals the size of the row.
## Primary Keys and Indexes in Queries { #primary-keys-and-indexes-in-queries}
## Primary Keys and Indexes in Queries {#primary-keys-and-indexes-in-queries}
Take the `(CounterID, Date)` primary key as an example. In this case, the sorting and index can be illustrated as follows:
@ -248,7 +248,7 @@ ClickHouse cannot use an index if the values of the primary key in the query par
ClickHouse uses this logic not only for days of the month sequences, but for any primary key that represents a partially-monotonic sequence.
### Data Skipping Indexes (Experimental) { #table_engine-mergetree-data_skipping-indexes}
### Data Skipping Indexes (Experimental) {#table_engine-mergetree-data_skipping-indexes}
The index declaration is in the columns section of the `CREATE` query.
```sql
@ -368,7 +368,7 @@ For concurrent table access, we use multi-versioning. In other words, when a tab
Reading from a table is automatically parallelized.
## TTL for Columns and Tables { #table_engine-mergetree-ttl}
## TTL for Columns and Tables {#table_engine-mergetree-ttl}
Determines the lifetime of values.
@ -390,7 +390,7 @@ TTL date_time + INTERVAL 1 MONTH
TTL date_time + INTERVAL 15 HOUR
```
### Column TTL { #mergetree-column-ttl}
### Column TTL {#mergetree-column-ttl}
When the values in the column expire, ClickHouse replaces them with the default values for the column data type. If all the column values in the data part expire, ClickHouse deletes this column from the data part in a filesystem.
@ -429,7 +429,7 @@ ALTER TABLE example_table
c String TTL d + INTERVAL 1 MONTH;
```
### Table TTL { #mergetree-table-ttl}
### Table TTL {#mergetree-table-ttl}
Table can have an expression for removal of expired rows, and multiple expressions for automatic move of parts between [disks or volumes](#table_engine-mergetree-multiple-volumes). When rows in the table expire, ClickHouse deletes all corresponding rows. For parts moving feature, all rows of a part must satisfy the movement expression criteria.
@ -479,7 +479,7 @@ If you perform the `SELECT` query between merges, you may get expired data. To a
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/mergetree/) <!--hide-->
## Using Multiple Block Devices for Data Storage { #table_engine-mergetree-multiple-volumes}
## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes}
### Introduction
@ -496,7 +496,7 @@ Data part is the minimum movable unit for `MergeTree`-engine tables. The data be
The names given to the described entities can be found in the system tables, [system.storage_policies](../system_tables.md#system_tables-storage_policies) and [system.disks](../system_tables.md#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables.
### Configuration { #table_engine-mergetree-multiple-volumes_configure}
### Configuration {#table_engine-mergetree-multiple-volumes_configure}
Disks, volumes and storage policies should be declared inside the `<storage_configuration>` tag either in the main file `config.xml` or in a distinct file in the `config.d` directory.

View File

@ -1,4 +1,4 @@
# ODBC { #table_engine-odbc}
# ODBC {#table_engine-odbc}
Allows ClickHouse to connect to external databases via [ODBC](https://en.wikipedia.org/wiki/Open_Database_Connectivity).

View File

@ -1,4 +1,4 @@
# Data Replication { #table_engines-replication}
# Data Replication {#table_engines-replication}
Replication is only supported for tables in the MergeTree family:
@ -73,7 +73,7 @@ You can have any number of replicas of the same data. Yandex.Metrica uses double
The system monitors data synchronicity on replicas and is able to recover after a failure. Failover is automatic (for small differences in data) or semi-automatic (when data differs too much, which may indicate a configuration error).
## Creating Replicated Tables { #creating-replicated-tables}
## Creating Replicated Tables {#creating-replicated-tables}
The `Replicated` prefix is added to the table engine name. For example:`ReplicatedMergeTree`.

View File

@ -4,7 +4,7 @@ This engine belongs to the family of log engines. See the common properties of l
Use this engine in scenarios when you need to write many tables with a small amount of data (less than 1 million rows).
## Creating a Table { #table_engines-stripelog-creating-a-table}
## Creating a Table {#table_engines-stripelog-creating-a-table}
```sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
@ -17,7 +17,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
See the detailed description of the [CREATE TABLE](../../query_language/create.md#create-table-query) query.
## Writing the Data { #table_engines-stripelog-writing-the-data}
## Writing the Data {#table_engines-stripelog-writing-the-data}
The `StripeLog` engine stores all the columns in one file. For each `INSERT` query, ClickHouse appends the data block to the end of a table file, writing columns one by one.
@ -28,11 +28,11 @@ For each table ClickHouse writes the files:
The `StripeLog` engine does not support the `ALTER UPDATE` and `ALTER DELETE` operations.
## Reading the Data { #table_engines-stripelog-reading-the-data}
## Reading the Data {#table_engines-stripelog-reading-the-data}
The file with marks allows ClickHouse to parallelize the reading of data. This means that a `SELECT` query returns rows in an unpredictable order. Use the `ORDER BY` clause to sort rows.
## Example of Use { #table_engines-stripelog-example-of-use}
## Example of Use {#table_engines-stripelog-example-of-use}
Creating a table:

View File

@ -86,7 +86,7 @@ SELECT key, sum(value) FROM summtt GROUP BY key
```
## Data Processing { #data-processing}
## Data Processing {#data-processing}
When data are inserted into a table, they are saved as-is. Clickhouse merges the inserted parts of data periodically and this is when rows with the same primary key are summed and replaced with one for each resulting part of data.

View File

@ -1,4 +1,4 @@
# URL(URL, Format) { #table_engines-url}
# URL(URL, Format) {#table_engines-url}
Manages data on a remote HTTP/HTTPS server. This engine is similar
to the [File](file.md) engine.

View File

@ -71,7 +71,7 @@ All of the parameters except `sign` and `version` have the same meaning as in `M
</details>
## Collapsing { #table_engines_versionedcollapsingmergetree}
## Collapsing {#table_engines_versionedcollapsingmergetree}
### Data
@ -119,7 +119,7 @@ To find out why we need two rows for each change, see [Algorithm](#table_engines
2. Long growing arrays in columns reduce the efficiency of the engine due to the load for writing. The more straightforward the data, the better the efficiency.
3. `SELECT` results depend strongly on the consistency of the history of object changes. Be accurate when preparing data for inserting. You can get unpredictable results with inconsistent data, such as negative values for non-negative metrics like session depth.
### Algorithm { #table_engines-versionedcollapsingmergetree-algorithm}
### Algorithm {#table_engines-versionedcollapsingmergetree-algorithm}
When ClickHouse merges data parts, it deletes each pair of rows that have the same primary key and version and different `Sign`. The order of rows does not matter.

View File

@ -1,4 +1,4 @@
# View { #table_engines-view}
# View {#table_engines-view}
Used for implementing views (for more information, see the `CREATE VIEW query`). It does not store data, but only stores the specified `SELECT` query. When reading from a table, it runs this query (and deletes all unnecessary columns from the query).

View File

@ -5,7 +5,7 @@
- [Query processing](#troubleshooting-does-not-process-queries)
- [Efficiency of query processing](#troubleshooting-too-slow)
## Installation { #troubleshooting-installation-errors}
## Installation {#troubleshooting-installation-errors}
### You Cannot Get Deb Packages from ClickHouse Repository With apt-get
@ -13,7 +13,7 @@
- If you cannot access the repository for any reason, download packages as described in the [Getting started](../getting_started/index.md) article and install them manually using the `sudo dpkg -i <packages>` command. You will also need the `tzdata` package.
## Connecting to the Server { #troubleshooting-accepts-no-connections}
## Connecting to the Server {#troubleshooting-accepts-no-connections}
Possible issues:
@ -120,7 +120,7 @@ Check:
You might be using the wrong user name or password.
## Query Processing { #troubleshooting-does-not-process-queries}
## Query Processing {#troubleshooting-does-not-process-queries}
If ClickHouse is not able to process the query, it sends an error description to the client. In the `clickhouse-client` you get a description of the error in the console. If you are using the HTTP interface, ClickHouse sends the error description in the response body. For example:
@ -133,7 +133,7 @@ If you start `clickhouse-client` with the `stack-trace` parameter, ClickHouse re
You might see a message about a broken connection. In this case, you can repeat the query. If the connection breaks every time you perform the query, check the server logs for errors.
## Efficiency of Query Processing { #troubleshooting-too-slow}
## Efficiency of Query Processing {#troubleshooting-too-slow}
If you see that ClickHouse is working too slowly, you need to profile the load on the server resources and network for your queries.

View File

@ -25,7 +25,7 @@ Then pass this file to a standard input of `clickhouse-benchmark`.
clickhouse-benchmark [keys] < queries_file
```
## Keys { #clickhouse-benchmark-keys}
## Keys {#clickhouse-benchmark-keys}
- `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: 1.
- `-d N`, `--delay=N` — Interval in seconds between intermediate reports (set 0 to disable reports). Default value: 1.
@ -47,7 +47,7 @@ clickhouse-benchmark [keys] < queries_file
If you want to apply some [settings](../../operations/settings/index.md) for queries, pass them as a key `--<session setting name>= SETTING_VALUE`. For example, `--max_memory_usage=1048576`.
## Output { #clickhouse-benchmark-output}
## Output {#clickhouse-benchmark-output}
By default, `clickhouse-benchmark` reports for each `--delay` interval.
@ -90,13 +90,13 @@ In the report you can find:
- Percentiles of queries execution time.
## Comparison mode { #clickhouse-benchmark-comparison-mode}
## Comparison mode {#clickhouse-benchmark-comparison-mode}
`clickhouse-benchmark` can compare performances for two running ClickHouse servers.
To use the comparison mode, specify endpoints of both servers by two pairs of `--host`, `--port` keys. Keys matched together by position in arguments list, the first `--host` is matched with the first `--port` and so on. `clickhouse-benchmark` establishes connections to both servers, then sends queries. Each query addressed to a randomly selected server. The results are shown for each server separately.
## Example { #clickhouse-benchmark-example}
## Example {#clickhouse-benchmark-example}
```bash
$ echo "SELECT * FROM system.numbers LIMIT 10000000 OFFSET 10000000" | clickhouse-benchmark -i 10

View File

@ -1,8 +1,8 @@
# Aggregate function combinators { #aggregate_functions_combinators}
# Aggregate function combinators {#aggregate_functions_combinators}
The name of an aggregate function can have a suffix appended to it. This changes the way the aggregate function works.
## -If { #agg-functions-combinator-if}
## -If {#agg-functions-combinator-if}
The suffix -If can be appended to the name of any aggregate function. In this case, the aggregate function accepts an extra argument a condition (Uint8 type). The aggregate function processes only the rows that trigger the condition. If the condition was not triggered even once, it returns a default value (usually zeros or empty strings).
@ -10,7 +10,7 @@ Examples: `sumIf(column, cond)`, `countIf(cond)`, `avgIf(x, cond)`, `quantilesTi
With conditional aggregate functions, you can calculate aggregates for several conditions at once, without using subqueries and `JOIN`s. For example, in Yandex.Metrica, conditional aggregate functions are used to implement the segment comparison functionality.
## -Array { #agg-functions-combinator-array}
## -Array {#agg-functions-combinator-array}
The -Array suffix can be appended to any aggregate function. In this case, the aggregate function takes arguments of the 'Array(T)' type (arrays) instead of 'T' type arguments. If the aggregate function accepts multiple arguments, this must be arrays of equal lengths. When processing arrays, the aggregate function works like the original aggregate function across all array elements.
@ -20,7 +20,7 @@ Example 2: `uniqArray(arr)` Counts the number of unique elements in all 'arr
-If and -Array can be combined. However, 'Array' must come first, then 'If'. Examples: `uniqArrayIf(arr, cond)`, `quantilesTimingArrayIf(level1, level2)(arr, cond)`. Due to this order, the 'cond' argument won't be an array.
## -State { #agg-functions-combinator-state}
## -State {#agg-functions-combinator-state}
If you apply this combinator, the aggregate function doesn't return the resulting value (such as the number of unique values for the [uniq](reference.md#agg_function-uniq) function), but an intermediate state of the aggregation (for `uniq`, this is the hash table for calculating the number of unique values). This is an `AggregateFunction(...)` that can be used for further processing or stored in a table to finish aggregating later.
@ -32,19 +32,19 @@ To work with these states, use:
- [-Merge](#aggregate_functions_combinators_merge) combinator.
- [-MergeState](#aggregate_functions_combinators_mergestate) combinator.
## -Merge { #aggregate_functions_combinators_merge}
## -Merge {#aggregate_functions_combinators_merge}
If you apply this combinator, the aggregate function takes the intermediate aggregation state as an argument, combines the states to finish aggregation, and returns the resulting value.
## -MergeState { #aggregate_functions_combinators_mergestate}
## -MergeState {#aggregate_functions_combinators_mergestate}
Merges the intermediate aggregation states in the same way as the -Merge combinator. However, it doesn't return the resulting value, but an intermediate aggregation state, similar to the -State combinator.
## -ForEach { #agg-functions-combinator-foreach}
## -ForEach {#agg-functions-combinator-foreach}
Converts an aggregate function for tables into an aggregate function for arrays that aggregates the corresponding array items and returns an array of results. For example, `sumForEach` for the arrays `[1, 2]`, `[3, 4, 5]`and`[6, 7]`returns the result `[10, 13, 5]` after adding together the corresponding array items.
## -OrDefault { #agg-functions-combinator-ordefault}
## -OrDefault {#agg-functions-combinator-ordefault}
Fills the default value of the aggregate function's return type if there is nothing to aggregate.
@ -57,7 +57,7 @@ SELECT avg(number), avgOrDefault(number) FROM numbers(0)
└─────────────┴──────────────────────┘
```
## -OrNull { #agg-functions-combinator-ornull}
## -OrNull {#agg-functions-combinator-ornull}
Fills `null` if there is nothing to aggregate. The return column will be nullable.
@ -85,7 +85,7 @@ FROM
└────────────────────────────────┘
```
## -Resample { #agg_functions-combinator-resample}
## -Resample {#agg_functions-combinator-resample}
Lets you divide data into groups, and then separately aggregates the data in those groups. Groups are created by splitting the values from one column into intervals.

View File

@ -1,4 +1,4 @@
# Aggregate functions { #aggregate_functions}
# Aggregate functions {#aggregate_functions}
Aggregate functions work in the [normal](http://www.sql-tutorial.com/sql-aggregate-functions-sql-tutorial) way as expected by database experts.

View File

@ -1,4 +1,4 @@
# Parametric aggregate functions { #aggregate_functions_parametric}
# Parametric aggregate functions {#aggregate_functions_parametric}
Some aggregate functions can accept not only argument columns (used for compression), but a set of parameters constants for initialization. The syntax is two pairs of brackets instead of one. The first is for parameters, and the second is for arguments.
@ -71,7 +71,7 @@ FROM
In this case, you should remember that you don't know the histogram bin borders.
## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) { #function-sequencematch}
## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) {#function-sequencematch}
Checks whether the sequence contains an event chain that matches the pattern.
@ -161,7 +161,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM
- [sequenceCount](#function-sequencecount)
## sequenceCount(pattern)(time, cond1, cond2, ...) { #function-sequencecount}
## sequenceCount(pattern)(time, cond1, cond2, ...) {#function-sequencecount}
Counts the number of event chains that matched the pattern. The function searches event chains that don't overlap. It starts to search for the next chain after the current chain is matched.
@ -219,7 +219,7 @@ SELECT sequenceCount('(?1).*(?2)')(time, number = 1, number = 2) FROM t
- [sequenceMatch](#function-sequencematch)
## windowFunnel { #windowfunnel}
## windowFunnel {#windowfunnel}
Searches for event chains in a sliding time window and calculates the maximum number of events that occurred from the chain.
@ -309,7 +309,7 @@ Result:
└───────┴───┘
```
## retention { #retention}
## retention {#retention}
The function takes as arguments a set of conditions from 1 to 32 arguments of type `UInt8` that indicate whether a certain condition was met for the event.
Any condition can be specified as an argument (as in [WHERE](../../query_language/select.md#select-where)).

View File

@ -1,6 +1,6 @@
# Function Reference
## count { #agg_function-count}
## count {#agg_function-count}
Counts the number of rows or not-NULL values.
@ -65,7 +65,7 @@ SELECT count(DISTINCT num) FROM t
This example shows that `count(DISTINCT num)` is performed by the `uniqExact` function according to the `count_distinct_implementation` setting value.
## any(x) { #agg_function-any}
## any(x) {#agg_function-any}
Selects the first encountered value.
The query can be executed in any order and even in a different order each time, so the result of this function is indeterminate.
@ -277,15 +277,15 @@ num
3
```
## min(x) { #agg_function-min}
## min(x) {#agg_function-min}
Calculates the minimum.
## max(x) { #agg_function-max}
## max(x) {#agg_function-max}
Calculates the maximum.
## argMin(arg, val) { #agg_function-argMin}
## argMin(arg, val) {#agg_function-argMin}
Calculates the 'arg' value for a minimal 'val' value. If there are several different values of 'arg' for minimal values of 'val', the first of these values encountered is output.
@ -306,12 +306,12 @@ SELECT argMin(user, salary) FROM salary
└──────────────────────┘
```
## argMax(arg, val) { #agg_function-argMax}
## argMax(arg, val) {#agg_function-argMax}
Calculates the 'arg' value for a maximum 'val' value. If there are several different values of 'arg' for maximum values of 'val', the first of these values encountered is output.
## sum(x) { #agg_function-sum}
## sum(x) {#agg_function-sum}
Calculates the sum.
Only works for numbers.
@ -323,7 +323,7 @@ Computes the sum of the numbers, using the same data type for the result as for
Only works for numbers.
## sumMap(key, value) { #agg_functions-summap}
## sumMap(key, value) {#agg_functions-summap}
Totals the 'value' array according to the keys specified in the 'key' array.
The number of elements in 'key' and 'value' must be the same for each row that is totaled.
@ -451,7 +451,7 @@ The kurtosis of the given distribution. Type — [Float64](../../data_types/floa
SELECT kurtSamp(value) FROM series_with_value_column
```
## timeSeriesGroupSum(uid, timestamp, value) { #agg_function-timeseriesgroupsum}
## timeSeriesGroupSum(uid, timestamp, value) {#agg_function-timeseriesgroupsum}
`timeSeriesGroupSum` can aggregate different time series that sample timestamp not alignment.
It will use linear interpolation between two sample timestamp and then sum time-series together.
@ -498,7 +498,7 @@ And the result will be:
[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)]
```
## timeSeriesGroupRateSum(uid, ts, val) { #agg_function-timeseriesgroupratesum}
## timeSeriesGroupRateSum(uid, ts, val) {#agg_function-timeseriesgroupratesum}
Similarly timeSeriesGroupRateSum, timeSeriesGroupRateSum will Calculate the rate of time-series and then sum rates together.
Also, timestamp should be in ascend order before use this function.
@ -507,13 +507,13 @@ Use this function, the result above case will be:
[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)]
```
## avg(x) { #agg_function-avg}
## avg(x) {#agg_function-avg}
Calculates the average.
Only works for numbers.
The result is always Float64.
## uniq { #agg_function-uniq}
## uniq {#agg_function-uniq}
Calculates the approximate number of different values of the argument.
@ -549,7 +549,7 @@ We recommend using this function in almost all scenarios.
- [uniqHLL12](#agg_function-uniqhll12)
- [uniqExact](#agg_function-uniqexact)
## uniqCombined { #agg_function-uniqcombined}
## uniqCombined {#agg_function-uniqcombined}
Calculates the approximate number of different argument values.
@ -596,11 +596,11 @@ Compared to the [uniq](#agg_function-uniq) function, the `uniqCombined`:
- [uniqHLL12](#agg_function-uniqhll12)
- [uniqExact](#agg_function-uniqexact)
## uniqCombined64 { #agg_function-uniqcombined64}
## uniqCombined64 {#agg_function-uniqcombined64}
Same as [uniqCombined](#agg_function-uniqcombined), but uses 64-bit hash for all data types.
## uniqHLL12 { #agg_function-uniqhll12}
## uniqHLL12 {#agg_function-uniqhll12}
Calculates the approximate number of different argument values, using the [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) algorithm.
@ -636,7 +636,7 @@ We don't recommend using this function. In most cases, use the [uniq](#agg_funct
- [uniqExact](#agg_function-uniqexact)
## uniqExact { #agg_function-uniqexact}
## uniqExact {#agg_function-uniqexact}
Calculates the exact number of different argument values.
@ -659,7 +659,7 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, `
- [uniqHLL12](#agg_function-uniqhll12)
## groupArray(x), groupArray(max_size)(x) { #agg_function-grouparray}
## groupArray(x), groupArray(max_size)(x) {#agg_function-grouparray}
Creates an array of argument values.
Values can be added to the array in any (indeterminate) order.
@ -684,7 +684,7 @@ Optional parameters:
- The default value for substituting in empty positions.
- The length of the resulting array. This allows you to receive arrays of the same size for all the aggregate keys. When using this parameter, the default value must be specified.
## groupArrayMovingSum { #agg_function-grouparraymovingsum}
## groupArrayMovingSum {#agg_function-grouparraymovingsum}
Calculates the moving sum of input values.
@ -753,7 +753,7 @@ FROM t
└────────────┴─────────────────────────────────┴────────────────────────┘
```
## groupArrayMovingAvg { #agg_function-grouparraymovingavg}
## groupArrayMovingAvg {#agg_function-grouparraymovingavg}
Calculates the moving average of input values.
@ -831,7 +831,7 @@ Creates an array from different argument values. Memory consumption is the same
The second version (with the `max_size` parameter) limits the size of the resulting array to `max_size` elements.
For example, `groupUniqArray(1)(x)` is equivalent to `[any(x)]`.
## quantile { #quantile}
## quantile {#quantile}
Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence.
@ -896,7 +896,7 @@ Result:
- [quantiles](#quantiles)
## quantileDeterministic { #quantiledeterministic}
## quantileDeterministic {#quantiledeterministic}
Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence.
@ -962,7 +962,7 @@ Result:
- [quantiles](#quantiles)
## quantileExact { #quantileexact}
## quantileExact {#quantileexact}
Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence.
@ -1014,7 +1014,7 @@ Result:
- [median](#median)
- [quantiles](#quantiles)
## quantileExactWeighted { #quantileexactweighted}
## quantileExactWeighted {#quantileexactweighted}
Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence, taking into account the weight of each element.
@ -1078,7 +1078,7 @@ Result:
- [median](#median)
- [quantiles](#quantiles)
## quantileTiming { #quantiletiming}
## quantileTiming {#quantiletiming}
With the determined precision computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence.
@ -1160,7 +1160,7 @@ Result:
- [median](#median)
- [quantiles](#quantiles)
## quantileTimingWeighted { #quantiletimingweighted}
## quantileTimingWeighted {#quantiletimingweighted}
With the determined precision computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence according to the weight of each sequence member.
@ -1243,7 +1243,7 @@ Result:
- [quantiles](#quantiles)
## quantileTDigest { #quantiletdigest}
## quantileTDigest {#quantiletdigest}
Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) algorithm.
@ -1298,7 +1298,7 @@ Result:
- [median](#median)
- [quantiles](#quantiles)
## quantileTDigestWeighted { #quantiletdigestweighted}
## quantileTDigestWeighted {#quantiletdigestweighted}
Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) algorithm. The function takes into account the weight of each sequence member. The maximum error is 1%. Memory consumption is `log(n)`, where `n` is a number of values.
@ -1354,7 +1354,7 @@ Result:
- [quantiles](#quantiles)
## median { #median}
## median {#median}
The `median*` functions are the aliases for the corresponding `quantile*` functions. They calculate median of a numeric data sample.
@ -1397,7 +1397,7 @@ Result:
```
## quantiles(level1, level2, ...)(x) { #quantiles}
## quantiles(level1, level2, ...)(x) {#quantiles}
All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
@ -1462,7 +1462,7 @@ FROM ontime
└─────────────────────┘
```
## topKWeighted { #topkweighted}
## topKWeighted {#topkweighted}
Similar to `topK` but takes one additional argument of integer type - `weight`. Every value is accounted `weight` times for frequency calculation.
@ -1562,12 +1562,12 @@ SELECT arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [3, 4, 5, 6])
└───────────────────────────────────────────────────────────────────┘
```
## stochasticLinearRegression { #agg_functions-stochasticlinearregression}
## stochasticLinearRegression {#agg_functions-stochasticlinearregression}
This function implements stochastic linear regression. It supports custom parameters for learning rate, L2 regularization coefficient, mini-batch size and has few methods for updating weights ([Adam](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (used by default), [simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)).
### Parameters { #agg_functions-stochasticlinearregression-parameters}
### Parameters {#agg_functions-stochasticlinearregression-parameters}
There are 4 customizable parameters. They are passed to the function sequentially, but there is no need to pass all four - default values will be used, however good model required some parameter tuning.
@ -1581,7 +1581,7 @@ stochasticLinearRegression(1.0, 1.0, 10, 'SGD')
4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, `Nesterov`. `Momentum` and `Nesterov` require little bit more computations and memory, however they happen to be useful in terms of speed of convergance and stability of stochastic gradient methods.
### Usage { #agg_functions-stochasticlinearregression-usage}
### Usage {#agg_functions-stochasticlinearregression-usage}
`stochasticLinearRegression` is used in two steps: fitting the model and predicting on new data. In order to fit the model and save its state for later usage we use `-State` combinator, which basically saves the state (model weights, etc).
To predict we use function [evalMLMethod](../functions/machine_learning_functions.md#machine_learning_methods-evalmlmethod), which takes a state as an argument as well as features to predict on.
@ -1622,7 +1622,7 @@ The query will return a column of predicted values. Note that first argument of
`test_data` is a table like `train_data` but may not contain target value.
### Notes { #agg_functions-stochasticlinearregression-notes}
### Notes {#agg_functions-stochasticlinearregression-notes}
1. To merge two models user may create such query:
```sql
@ -1642,12 +1642,12 @@ The query will return a column of predicted values. Note that first argument of
- [Difference between linear and logistic regressions](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression)
## stochasticLogisticRegression { #agg_functions-stochasticlogisticregression}
## stochasticLogisticRegression {#agg_functions-stochasticlogisticregression}
This function implements stochastic logistic regression. It can be used for binary classification problem, supports the same custom parameters as stochasticLinearRegression and works the same way.
### Parameters { #agg_functions-stochasticlogisticregression-parameters}
### Parameters {#agg_functions-stochasticlogisticregression-parameters}
Parameters are exactly the same as in stochasticLinearRegression:
`learning rate`, `l2 regularization coefficient`, `mini-batch size`, `method for updating weights`.

View File

@ -1,4 +1,4 @@
## ALTER { #query_language_queries_alter}
## ALTER {#query_language_queries_alter}
The `ALTER` query is only supported for `*MergeTree` tables, as well as `Merge`and`Distributed`. The query has several variations.
@ -23,7 +23,7 @@ The following actions are supported:
These actions are described in detail below.
#### ADD COLUMN { #alter_add-column}
#### ADD COLUMN {#alter_add-column}
```sql
ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after]
@ -43,7 +43,7 @@ Example:
ALTER TABLE visits ADD COLUMN browser String AFTER user_id
```
#### DROP COLUMN { #alter_drop-column}
#### DROP COLUMN {#alter_drop-column}
```sql
DROP COLUMN [IF EXISTS] name
@ -59,7 +59,7 @@ Example:
ALTER TABLE visits DROP COLUMN browser
```
#### CLEAR COLUMN { #alter_clear-column}
#### CLEAR COLUMN {#alter_clear-column}
```sql
CLEAR COLUMN [IF EXISTS] name IN PARTITION partition_name
@ -75,7 +75,7 @@ Example:
ALTER TABLE visits CLEAR COLUMN browser IN PARTITION tuple()
```
#### COMMENT COLUMN { #alter_comment-column}
#### COMMENT COLUMN {#alter_comment-column}
```sql
COMMENT COLUMN [IF EXISTS] name 'comment'
@ -93,7 +93,7 @@ Example:
ALTER TABLE visits COMMENT COLUMN browser 'The table shows the browser used for accessing the site.'
```
#### MODIFY COLUMN { #alter_modify-column}
#### MODIFY COLUMN {#alter_modify-column}
```sql
MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL]
@ -191,7 +191,7 @@ Constraint check *will not be executed* on existing data if it was added.
All changes on replicated tables are broadcasting to ZooKeeper so will be applied on other replicas.
### Manipulations With Partitions and Parts { #alter_manipulations-with-partitions}
### Manipulations With Partitions and Parts {#alter_manipulations-with-partitions}
The following operations with [partitions](../operations/table_engines/custom_partitioning_key.md) are available:
@ -207,7 +207,7 @@ The following operations with [partitions](../operations/table_engines/custom_pa
- [FREEZE PARTITION](#alter_freeze-partition) Creates a backup of a partition.
- [FETCH PARTITION](#alter_fetch-partition) Downloads a partition from another server.
- [MOVE PARTITION|PART](#alter_move-partition) Move partition/data part to another disk or volume.
#### DETACH PARTITION { #alter_detach-partition}
#### DETACH PARTITION {#alter_detach-partition}
```sql
ALTER TABLE table_name DETACH PARTITION partition_expr
@ -227,7 +227,7 @@ After the query is executed, you can do whatever you want with the data in the `
This query is replicated it moves the data to the `detached` directory on all replicas. Note that you can execute this query only on a leader replica. To find out if a replica is a leader, perform the `SELECT` query to the [system.replicas](../operations/system_tables.md#system_tables-replicas) table. Alternatively, it is easier to make a `DETACH` query on all replicas - all the replicas throw an exception, except the leader replica.
#### DROP PARTITION { #alter_drop-partition}
#### DROP PARTITION {#alter_drop-partition}
```sql
ALTER TABLE table_name DROP PARTITION partition_expr
@ -239,7 +239,7 @@ Read about setting the partition expression in a section [How to specify the par
The query is replicated it deletes data on all replicas.
#### DROP DETACHED PARTITION|PART { #alter_drop-detached}
#### DROP DETACHED PARTITION|PART {#alter_drop-detached}
```sql
ALTER TABLE table_name DROP DETACHED PARTITION|PART partition_expr
@ -249,7 +249,7 @@ Removes the specified part or all parts of the specified partition from `detache
Read more about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr).
#### ATTACH PARTITION|PART { #alter_attach-partition}
#### ATTACH PARTITION|PART {#alter_attach-partition}
``` sql
ALTER TABLE table_name ATTACH PARTITION|PART partition_expr
@ -268,7 +268,7 @@ This query is replicated. The replica-initiator checks whether there is data in
So you can put data to the `detached` directory on one replica, and use the `ALTER ... ATTACH` query to add it to the table on all replicas.
#### ATTACH PARTITION FROM { #alter_attach-partition-from}
#### ATTACH PARTITION FROM {#alter_attach-partition-from}
```sql
ALTER TABLE table2 ATTACH PARTITION partition_expr FROM table1
@ -281,7 +281,7 @@ For the query to run successfully, the following conditions must be met:
- Both tables must have the same structure.
- Both tables must have the same partition key.
#### REPLACE PARTITION { #alter_replace-partition}
#### REPLACE PARTITION {#alter_replace-partition}
```sql
ALTER TABLE table2 REPLACE PARTITION partition_expr FROM table1
@ -294,7 +294,7 @@ For the query to run successfully, the following conditions must be met:
- Both tables must have the same structure.
- Both tables must have the same partition key.
#### MOVE PARTITION TO TABLE { #alter_move_to_table-partition}
#### MOVE PARTITION TO TABLE {#alter_move_to_table-partition}
``` sql
ALTER TABLE table_source MOVE PARTITION partition_expr TO TABLE table_dest
@ -311,7 +311,7 @@ For the query to run successfully, the following conditions must be met:
#### CLEAR COLUMN IN PARTITION { #alter_clear-column-partition}
#### CLEAR COLUMN IN PARTITION {#alter_clear-column-partition}
```sql
ALTER TABLE table_name CLEAR COLUMN column_name IN PARTITION partition_expr
@ -325,7 +325,7 @@ Example:
ALTER TABLE visits CLEAR COLUMN hour in PARTITION 201902
```
#### FREEZE PARTITION { #alter_freeze-partition}
#### FREEZE PARTITION {#alter_freeze-partition}
```sql
ALTER TABLE table_name FREEZE [PARTITION partition_expr]
@ -364,7 +364,7 @@ Restoring from a backup doesn't require stopping the server.
For more information about backups and restoring data, see the [Data Backup](../operations/backup.md) section.
#### CLEAR INDEX IN PARTITION { #alter_clear-index-partition}
#### CLEAR INDEX IN PARTITION {#alter_clear-index-partition}
```sql
ALTER TABLE table_name CLEAR INDEX index_name IN PARTITION partition_expr
@ -372,7 +372,7 @@ ALTER TABLE table_name CLEAR INDEX index_name IN PARTITION partition_expr
The query works similar to `CLEAR COLUMN`, but it resets an index instead of a column data.
#### FETCH PARTITION { #alter_fetch-partition}
#### FETCH PARTITION {#alter_fetch-partition}
```sql
ALTER TABLE table_name FETCH PARTITION partition_expr FROM 'path-in-zookeeper'
@ -400,7 +400,7 @@ Before downloading, the system checks if the partition exists and the table stru
Although the query is called `ALTER TABLE`, it does not change the table structure and does not immediately change the data available in the table.
#### MOVE PARTITION|PART { #alter_move-partition}
#### MOVE PARTITION|PART {#alter_move-partition}
Moves partitions or data parts to another volume or disk for `MergeTree`-engine tables. See [Using Multiple Block Devices for Data Storage](../operations/table_engines/mergetree.md#table_engine-mergetree-multiple-volumes).
@ -421,7 +421,7 @@ ALTER TABLE hits MOVE PART '20190301_14343_16206_438' TO VOLUME 'slow'
ALTER TABLE hits MOVE PARTITION '2019-09-01' TO DISK 'fast_ssd'
```
#### How To Set Partition Expression { #alter-how-to-specify-part-expr}
#### How To Set Partition Expression {#alter-how-to-specify-part-expr}
You can specify the partition expression in `ALTER ... PARTITION` queries in different ways:
@ -458,7 +458,7 @@ For non-replicatable tables, all `ALTER` queries are performed synchronously. Fo
For `ALTER ... ATTACH|DETACH|DROP` queries, you can use the `replication_alter_partitions_sync` setting to set up waiting.
Possible values: `0` do not wait; `1` only wait for own execution (default); `2` wait for all.
### Mutations { #alter-mutations}
### Mutations {#alter-mutations}
Mutations are an ALTER query variant that allows changing or deleting rows in a table. In contrast to standard `UPDATE` and `DELETE` queries that are intended for point data changes, mutations are intended for heavy operations that change a lot of rows in a table. Supported for the `MergeTree` family of table engines including the engines with replication support.

View File

@ -1,6 +1,6 @@
# CREATE Queries
## CREATE DATABASE { #query_language-create-database}
## CREATE DATABASE {#query_language-create-database}
Creates database.
@ -29,7 +29,7 @@ CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] [ENGINE = engine(..
By default, ClickHouse uses its own [database engine](../database_engines/index.md).
## CREATE TABLE { #create-table-query}
## CREATE TABLE {#create-table-query}
The `CREATE TABLE` query can have several forms.
@ -70,7 +70,7 @@ In all cases, if `IF NOT EXISTS` is specified, the query won't return an error i
There can be other clauses after the `ENGINE` clause in the query. See detailed documentation on how to create tables in the descriptions of [table engines](../operations/table_engines/index.md#table_engines).
### Default Values { #create-default-values}
### Default Values {#create-default-values}
The column description can specify an expression for a default value, in one of the following ways:`DEFAULT expr`, `MATERIALIZED expr`, `ALIAS expr`.
Example: `URLDomain String DEFAULT domain(URL)`.
@ -105,7 +105,7 @@ If you add a new column to a table but later change its default expression, the
It is not possible to set default values for elements in nested data structures.
### Constraints { #constraints}
### Constraints {#constraints}
Along with columns descriptions constraints could be defined:
@ -127,7 +127,7 @@ Adding large amount of constraints can negatively affect performance of big `INS
Defines storage time for values. Can be specified only for MergeTree-family tables. For the detailed description, see [TTL for columns and tables](../operations/table_engines/mergetree.md#table_engine-mergetree-ttl).
### Column Compression Codecs { #codecs}
### Column Compression Codecs {#codecs}
By default, ClickHouse applies the `lz4` compression method. For `MergeTree`-engine family you can change the default compression method in the [compression](../operations/server_settings/settings.md#server-settings-compression) section of a server configuration. You can also define the compression method for each individual column in the `CREATE TABLE` query.
@ -158,7 +158,7 @@ Compression is supported for the following table engines:
ClickHouse supports common purpose codecs and specialized codecs.
#### Specialized Codecs { #create-query-specialized-codecs}
#### Specialized Codecs {#create-query-specialized-codecs}
These codecs are designed to make compression more effective by using specific features of data. Some of these codecs don't compress data themself. Instead, they prepare the data for a common purpose codec, which compresses it better than without this preparation.
@ -180,7 +180,7 @@ CREATE TABLE codec_example
ENGINE = MergeTree()
```
#### Common purpose codecs { #create-query-common-purpose-codecs}
#### Common purpose codecs {#create-query-common-purpose-codecs}
Codecs:
@ -275,7 +275,7 @@ Views look the same as normal tables. For example, they are listed in the result
There isn't a separate query for deleting views. To delete a view, use `DROP TABLE`.
## CREATE DICTIONARY { #create-dictionary-query}
## CREATE DICTIONARY {#create-dictionary-query}
```sql
CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name

View File

@ -1,4 +1,4 @@
# External Dictionaries { #dicts-external_dicts}
# External Dictionaries {#dicts-external_dicts}
You can add your own dictionaries from various data sources. The data source for a dictionary can be a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see "[Sources for external dictionaries](external_dicts_dict_sources.md)".
@ -37,7 +37,7 @@ You can [configure](external_dicts_dict.md) any number of dictionaries in the sa
!!! attention "Attention"
You can convert values for a small dictionary by describing it in a `SELECT` query (see the [transform](../functions/other_functions.md) function). This functionality is not related to external dictionaries.
## See also { #ext-dicts-see-also}
## See also {#ext-dicts-see-also}
- [Configuring an External Dictionary](external_dicts_dict.md)
- [Storing Dictionaries in Memory](external_dicts_dict_layout.md)

View File

@ -1,4 +1,4 @@
# Configuring an External Dictionary { #dicts-external_dicts_dict}
# Configuring an External Dictionary {#dicts-external_dicts_dict}
If dictionary is configured using xml file, than dictionary configuration has the following structure:

View File

@ -1,4 +1,4 @@
# Storing Dictionaries in Memory { #dicts-external_dicts_dict_layout}
# Storing Dictionaries in Memory {#dicts-external_dicts_dict_layout}
There are a variety of ways to store dictionaries in memory.
@ -79,7 +79,7 @@ or
LAYOUT(FLAT())
```
### hashed { #dicts-external_dicts_dict_layout-hashed}
### hashed {#dicts-external_dicts_dict_layout-hashed}
The dictionary is completely stored in memory in the form of a hash table. The dictionary can contain any number of elements with any identifiers In practice, the number of keys can reach tens of millions of items.
@ -99,7 +99,7 @@ or
LAYOUT(HASHED())
```
### sparse_hashed { #dicts-external_dicts_dict_layout-sparse_hashed}
### sparse_hashed {#dicts-external_dicts_dict_layout-sparse_hashed}
Similar to `hashed`, but uses less memory in favor more CPU usage.

View File

@ -1,4 +1,4 @@
# Sources of External Dictionaries { #dicts-external_dicts_dict_sources}
# Sources of External Dictionaries {#dicts-external_dicts_dict_sources}
An external dictionary can be connected from many different sources.
@ -43,7 +43,7 @@ Types of sources (`source_type`):
- [Redis](#dicts-external_dicts_dict_sources-redis)
## Local File { #dicts-external_dicts_dict_sources-local_file}
## Local File {#dicts-external_dicts_dict_sources-local_file}
Example of settings:
@ -68,7 +68,7 @@ Setting fields:
- `format` The file format. All the formats described in "[Formats](../../interfaces/formats.md#formats)" are supported.
## Executable File { #dicts-external_dicts_dict_sources-executable}
## Executable File {#dicts-external_dicts_dict_sources-executable}
Working with executable files depends on [how the dictionary is stored in memory](external_dicts_dict_layout.md). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable file's STDIN. Otherwise, ClickHouse starts executable file and treats its output as dictionary data.
@ -95,7 +95,7 @@ Setting fields:
- `format` The file format. All the formats described in "[Formats](../../interfaces/formats.md#formats)" are supported.
## HTTP(s) { #dicts-external_dicts_dict_sources-http}
## HTTP(s) {#dicts-external_dicts_dict_sources-http}
Working with an HTTP(s) server depends on [how the dictionary is stored in memory](external_dicts_dict_layout.md). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method.
@ -146,7 +146,7 @@ Setting fields:
- `value` Value set for a specific identifiant name.
## ODBC { #dicts-external_dicts_dict_sources-odbc}
## ODBC {#dicts-external_dicts_dict_sources-odbc}
You can use this method to connect any database that has an ODBC driver.
@ -390,7 +390,7 @@ LIFETIME(MIN 300 MAX 360)
## DBMS
### MySQL { #dicts-external_dicts_dict_sources-mysql}
### MySQL {#dicts-external_dicts_dict_sources-mysql}
Example of settings:
@ -482,7 +482,7 @@ SOURCE(MYSQL(
```
### ClickHouse { #dicts-external_dicts_dict_sources-clickhouse}
### ClickHouse {#dicts-external_dicts_dict_sources-clickhouse}
Example of settings:
@ -526,7 +526,7 @@ Setting fields:
- `invalidate_query` Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](external_dicts_dict_lifetime.md).
### MongoDB { #dicts-external_dicts_dict_sources-mongodb}
### MongoDB {#dicts-external_dicts_dict_sources-mongodb}
Example of settings:
@ -566,7 +566,7 @@ Setting fields:
- `collection` Name of the collection.
### Redis { #dicts-external_dicts_dict_sources-redis}
### Redis {#dicts-external_dicts_dict_sources-redis}
Example of settings:

View File

@ -43,7 +43,7 @@ Attributes are described in the query body:
- `AttrName AttrType` — [Data column](external_dicts_dict_structure.md#ext_dict_structure-attributes). There can be a multiple number of attributes.
## Key { #ext_dict_structure-key}
## Key {#ext_dict_structure-key}
ClickHouse supports the following types of keys:
@ -56,7 +56,7 @@ An xml structure can contain either `<id>` or `<key>`. DDL-query must contain si
You must not describe key as an attribute.
### Numeric Key { #ext_dict-numeric-key}
### Numeric Key {#ext_dict-numeric-key}
Type: `UInt64`.
@ -126,7 +126,7 @@ PRIMARY KEY field1, field2
For a query to the `dictGet*` function, a tuple is passed as the key. Example: `dictGetString('dict_name', 'attr_name', tuple('string for field1', num_for_field2))`.
## Attributes { #ext_dict_structure-attributes}
## Attributes {#ext_dict_structure-attributes}
Configuration example:

View File

@ -1,4 +1,4 @@
# Internal dictionaries { #internal_dicts}
# Internal dictionaries {#internal_dicts}
ClickHouse contains a built-in feature for working with a geobase.

View File

@ -63,7 +63,7 @@ Differs from 'modulo' in that it returns zero when the divisor is zero.
Calculates a number with the reverse sign. The result is always signed.
## abs(a) { #arithm_func-abs}
## abs(a) {#arithm_func-abs}
Calculates the absolute value of the number (a). That is, if a &lt; 0, it returns -a. For unsigned types it doesn't do anything. For signed integer types, it returns an unsigned number.

View File

@ -1,18 +1,18 @@
# Functions for working with arrays
## empty { #function-empty}
## empty {#function-empty}
Returns 1 for an empty array, or 0 for a non-empty array.
The result type is UInt8.
The function also works for strings.
## notEmpty { #function-notempty}
## notEmpty {#function-notempty}
Returns 0 for an empty array, or 1 for a non-empty array.
The result type is UInt8.
The function also works for strings.
## length { #array_functions-length}
## length {#array_functions-length}
Returns the number of items in the array.
The result type is UInt64.
@ -202,7 +202,7 @@ SELECT countEqual([1, 2, NULL, NULL], NULL)
└──────────────────────────────────────┘
```
## arrayEnumerate(arr) { #array_functions-arrayenumerate}
## arrayEnumerate(arr) {#array_functions-arrayenumerate}
Returns the array \[1, 2, 3, ..., length (arr) \]
@ -455,7 +455,7 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res
Array elements set to `NULL` are handled as normal values.
## arraySort(\[func,\] arr, ...) { #array_functions-sort}
## arraySort(\[func,\] arr, ...) {#array_functions-sort}
Sorts the elements of the `arr` array in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the elements of the array. If `func` accepts multiple arguments, the `arraySort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arraySort` description.
@ -548,7 +548,7 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res;
!!! note
To improve sorting efficiency, the [Schwartzian transform](https://en.wikipedia.org/wiki/Schwartzian_transform) is used.
## arrayReverseSort([func,] arr, ...) { #array_functions-reverse-sort}
## arrayReverseSort([func,] arr, ...) {#array_functions-reverse-sort}
Sorts the elements of the `arr` array in descending order. If the `func` function is specified, `arr` is sorted according to the result of the `func` function applied to the elements of the array, and then the sorted array is reversed. If `func` accepts multiple arguments, the `arrayReverseSort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arrayReverseSort` description.
@ -648,11 +648,11 @@ If multiple arguments are passed, it counts the number of different tuples of el
If you want to get a list of unique items in an array, you can use arrayReduce('groupUniqArray', arr).
## arrayJoin(arr) { #array_functions-join}
## arrayJoin(arr) {#array_functions-join}
A special function. See the section ["ArrayJoin function"](array_join.md#functions_arrayjoin).
## arrayDifference { #arraydifference}
## arrayDifference {#arraydifference}
Calculates the difference between adjacent array elements. Returns an array where the first element will be 0, the second is the difference between `a[1] - a[0]`, etc. The type of elements in the resulting array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`).
@ -704,7 +704,7 @@ Result:
└────────────────────────────────────────────┘
```
## arrayDistinct { #arraydistinct}
## arrayDistinct {#arraydistinct}
Takes an array, returns an array containing the distinct elements only.
@ -738,7 +738,7 @@ Result:
└────────────────────────────────┘
```
## arrayEnumerateDense(arr) { #array_functions-arrayenumeratedense}
## arrayEnumerateDense(arr) {#array_functions-arrayenumeratedense}
Returns an array of the same size as the source array, indicating where each element first appears in the source array.
@ -754,7 +754,7 @@ SELECT arrayEnumerateDense([10, 20, 10, 30])
└───────────────────────────────────────┘
```
## arrayIntersect(arr) { #array_functions-arrayintersect}
## arrayIntersect(arr) {#array_functions-arrayintersect}
Takes multiple arrays, returns an array with elements that are present in all source arrays. Elements order in the resulting array is the same as in the first array.
@ -772,7 +772,7 @@ SELECT
└──────────────┴───────────┘
```
## arrayReduce(agg_func, arr1, ...) { #array_functions-arrayreduce}
## arrayReduce(agg_func, arr1, ...) {#array_functions-arrayreduce}
Applies an aggregate function to array elements and returns its result. The name of the aggregation function is passed as a string in single quotes `'max'`, `'sum'`. When using parametric aggregate functions, the parameter is indicated after the function name in parentheses `'uniqUpTo(6)'`.
@ -814,7 +814,7 @@ SELECT arrayReduce('uniqUpTo(3)', [1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
└─────────────────────────────────────────────────────────────┘
```
## arrayReverse(arr) { #array_functions-arrayreverse}
## arrayReverse(arr) {#array_functions-arrayreverse}
Returns an array of the same size as the original array containing the elements in reverse order.
@ -830,11 +830,11 @@ SELECT arrayReverse([1, 2, 3])
└─────────────────────────┘
```
## reverse(arr) { #array_functions-reverse}
## reverse(arr) {#array_functions-reverse}
Synonym for ["arrayReverse"](#array_functions-arrayreverse)
## arrayFlatten { #arrayflatten}
## arrayFlatten {#arrayflatten}
Converts an array of arrays to a flat array.
@ -869,7 +869,7 @@ SELECT flatten([[[1]], [[2], [3]]])
└─────────────────────────────────────────────┘
```
## arrayCompact { #arraycompact}
## arrayCompact {#arraycompact}
Removes consecutive duplicate elements from an array. The order of result values is determined by the order in the source array.
@ -905,7 +905,7 @@ Result:
└────────────────────────────────────────────┘
```
## arrayZip { #arrayzip}
## arrayZip {#arrayzip}
Combine multiple Array type columns into one Array[Tuple(...)] column

View File

@ -1,4 +1,4 @@
# arrayJoin function { #functions_arrayjoin}
# arrayJoin function {#functions_arrayjoin}
This is a very unusual function.

View File

@ -20,7 +20,7 @@ The result type is an integer with bits equal to the maximum bits of its argumen
## bitRotateRight(a, b)
## bitTest { #bittest}
## bitTest {#bittest}
Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. The countdown starts from 0 from the right to the left.
@ -75,7 +75,7 @@ Result:
└────────────────┘
```
## bitTestAll { #bittestall}
## bitTestAll {#bittestall}
Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. The countdown starts from 0 from the right to the left.
@ -140,7 +140,7 @@ Result:
└───────────────────────────────┘
```
## bitTestAny { #bittestany}
## bitTestAny {#bittestany}
Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. The countdown starts from 0 from the right to the left.

View File

@ -9,7 +9,7 @@ RoaringBitmap is wrapped into a data structure while actual storage of Bitmap ob
For more information on RoaringBitmap, see: [CRoaring](https://github.com/RoaringBitmap/CRoaring).
## bitmapBuild { #bitmap_functions-bitmapbuild}
## bitmapBuild {#bitmap_functions-bitmapbuild}
Build a bitmap from unsigned integer array.
@ -56,7 +56,7 @@ SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5])) AS res
└─────────────┘
```
## bitmapSubsetInRange { #bitmap_functions-bitmapsubsetinrange}
## bitmapSubsetInRange {#bitmap_functions-bitmapsubsetinrange}
Return subset in specified range (not include the range_end).
@ -82,7 +82,7 @@ SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,
└───────────────────┘
```
## bitmapSubsetLimit { #bitmapsubsetlimit}
## bitmapSubsetLimit {#bitmapsubsetlimit}
Creates a subset of bitmap with n elements taken between `range_start` and `cardinality_limit`.
@ -120,7 +120,7 @@ Result:
└───────────────────────────┘
```
## bitmapContains { #bitmap_functions-bitmapcontains}
## bitmapContains {#bitmap_functions-bitmapcontains}
Checks whether the bitmap contains an element.

View File

@ -17,17 +17,17 @@ Strings are compared by bytes. A shorter string is smaller than all strings that
Note. Up until version 1.1.54134, signed and unsigned numbers were compared the same way as in C++. In other words, you could get an incorrect result in cases like SELECT 9223372036854775807 &gt; -1. This behavior changed in version 1.1.54134 and is now mathematically correct.
## equals, a = b and a == b operator { #function-equals}
## equals, a = b and a == b operator {#function-equals}
## notEquals, a ! operator= b and a `<>` b { #function-notequals}
## notEquals, a ! operator= b and a `<>` b {#function-notequals}
## less, `< operator` { #function-less}
## less, `< operator` {#function-less}
## greater, `> operator` { #function-greater}
## greater, `> operator` {#function-greater}
## lessOrEquals, `<= operator` { #function-lessorequals}
## lessOrEquals, `<= operator` {#function-lessorequals}
## greaterOrEquals, `>= operator` { #function-greaterorequals}
## greaterOrEquals, `>= operator` {#function-greaterorequals}
[Original article](https://clickhouse.tech/docs/en/query_language/functions/comparison_functions/) <!--hide-->

View File

@ -1,6 +1,6 @@
# Conditional functions
## if { #if}
## if {#if}
Controls conditional branching. Unlike most systems, ClickHouse always evaluate both expressions `then` and `else`.
@ -88,7 +88,7 @@ WHERE isNotNull(left) AND isNotNull(right)
```
Note: `NULL` values are not used in this example, check [NULL values in conditionals](#null-values-in-conditionals) section.
## Ternary Operator { #ternary-operator}
## Ternary Operator {#ternary-operator}
It works same as `if` function.

View File

@ -62,7 +62,7 @@ Converts a date with time to a UInt8 number containing the number of the minute
Converts a date with time to a UInt8 number containing the number of the second in the minute (0-59).
Leap seconds are not accounted for.
## toUnixTimestamp { #to_unix_timestamp}
## toUnixTimestamp {#to_unix_timestamp}
For DateTime argument: converts value to its internal numeric representation (Unix Timestamp).
For String argument: parse datetime from string according to the timezone (optional second argument, server timezone is used by default) and returns the corresponding unix timestamp.
@ -342,7 +342,7 @@ SELECT
└──────────────────────────┴───────────────────────────────┘
```
## dateDiff { #datediff}
## dateDiff {#datediff}
Returns the difference between two Date or DateTime values.
@ -401,7 +401,7 @@ For a time interval starting at 'StartTime' and continuing for 'Duration' second
For example, `timeSlots(toDateTime('2012-01-01 12:20:00'), 600) = [toDateTime('2012-01-01 12:00:00'), toDateTime('2012-01-01 12:30:00')]`.
This is necessary for searching for pageviews in the corresponding session.
## formatDateTime(Time, Format\[, Timezone\]) { #formatdatetime}
## formatDateTime(Time, Format\[, Timezone\]) {#formatdatetime}
Function formats a Time according given Format string. N.B.: Format is a constant expression, e.g. you can not have multiple formats for single result column.

View File

@ -1,6 +1,6 @@
# Encoding functions
## char { #char}
## char {#char}
Returns the string with the length as the number of passed arguments and each byte has the value of corresponding argument. Accepts multiple arguments of numeric types. If the value of argument is out of range of UInt8 data type, it is converted to UInt8 with possible rounding and overflow.
@ -60,7 +60,7 @@ Result:
└───────┘
```
## hex { #hex}
## hex {#hex}
Returns a string containing the argument's hexadecimal representation.

View File

@ -1,4 +1,4 @@
# Functions for Working with External Dictionaries { #ext_dict_functions}
# Functions for Working with External Dictionaries {#ext_dict_functions}
For information on connecting and configuring external dictionaries, see [External dictionaries](../dicts/external_dicts.md).
@ -111,7 +111,7 @@ dictHas('dict_name', id_expr)
Type: `UInt8`.
## dictGetHierarchy { #dictgethierarchy}
## dictGetHierarchy {#dictgethierarchy}
Creates an array, containing all the parents of a key in the [hierarchical dictionary](../dicts/external_dicts_dict_hierarchical.md).
@ -154,7 +154,7 @@ dictIsIn('dict_name', child_id_expr, ancestor_id_expr)
Type: `UInt8`.
## Other functions { #ext_dict_functions-other}
## Other functions {#ext_dict_functions-other}
ClickHouse supports specialized functions that convert dictionary attribute values to a specific data type regardless of the dictionary configuration.

View File

@ -152,7 +152,7 @@ SELECT geohashDecode('ezs42') AS res
└─────────────────────────────────┘
```
## geoToH3 { #geotoh3}
## geoToH3 {#geotoh3}
Returns [H3](https://uber.github.io/h3/#/documentation/overview/introduction) point index `(lon, lat)` with specified resolution.

View File

@ -2,7 +2,7 @@
Hash functions can be used for the deterministic pseudo-random shuffling of elements.
## halfMD5 { #hash_functions-halfmd5}
## halfMD5 {#hash_functions-halfmd5}
[Interprets](../../query_language/functions/type_conversion_functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order.
@ -32,13 +32,13 @@ SELECT halfMD5(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')
└────────────────────┴────────┘
```
## MD5 { #hash_functions-md5}
## MD5 {#hash_functions-md5}
Calculates the MD5 from a string and returns the resulting set of bytes as FixedString(16).
If you don't need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the 'sipHash128' function instead.
If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))).
## sipHash64 { #hash_functions-siphash64}
## sipHash64 {#hash_functions-siphash64}
Produces a 64-bit [SipHash](https://131002.net/siphash/) hash value.
@ -74,7 +74,7 @@ SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00
└──────────────────────┴────────┘
```
## sipHash128 { #hash_functions-siphash128}
## sipHash128 {#hash_functions-siphash128}
Calculates SipHash from a string.
Accepts a String-type argument. Returns FixedString(16).
@ -175,7 +175,7 @@ SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0
└──────────────────────┴────────┘
```
## javaHash { #hash_functions-javahash}
## javaHash {#hash_functions-javahash}
Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) from a string. This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result.
@ -205,7 +205,7 @@ Result:
└───────────────────────────┘
```
## javaHashUTF16LE { #javahashutf16le}
## javaHashUTF16LE {#javahashutf16le}
Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) from a string, assuming it contains bytes representing a string in UTF-16LE encoding.
@ -241,7 +241,7 @@ Result:
└──────────────────────────────────────────────────────────────┘
```
## hiveHash { #hash_functions-hivehash}
## hiveHash {#hash_functions-hivehash}
Calculates `HiveHash` from a string.
@ -391,7 +391,7 @@ SELECT murmurHash3_128('example_string') AS MurmurHash3, toTypeName(MurmurHash3)
└──────────────────┴─────────────────┘
```
## xxHash32, xxHash64 { #hash_functions-xxhash32}
## xxHash32, xxHash64 {#hash_functions-xxhash32}
Calculates `xxHash` from a string. It is proposed in two flavors, 32 and 64 bits.

View File

@ -23,7 +23,7 @@ A lambda function can't be omitted for the following functions:
- [arrayFirst](#higher_order_functions-array-first)
- [arrayFirstIndex](#higher_order_functions-array-first-index)
### arrayMap(func, arr1, ...) { #higher_order_functions-array-map}
### arrayMap(func, arr1, ...) {#higher_order_functions-array-map}
Returns an array obtained from the original application of the `func` function to each element in the `arr` array.
@ -50,7 +50,7 @@ SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res
Note that the first argument (lambda function) can't be omitted in the `arrayMap` function.
### arrayFilter(func, arr1, ...) { #higher_order_functions-array-filter}
### arrayFilter(func, arr1, ...) {#higher_order_functions-array-filter}
Returns an array containing only the elements in `arr1` for which `func` returns something other than 0.
@ -83,7 +83,7 @@ SELECT
Note that the first argument (lambda function) can't be omitted in the `arrayFilter` function.
### arrayFill(func, arr1, ...) { #higher_order_functions-array-fill}
### arrayFill(func, arr1, ...) {#higher_order_functions-array-fill}
Scan through `arr1` from the first element to the last element and replace `arr1[i]` by `arr1[i - 1]` if `func` returns 0. The first element of `arr1` will not be replaced.
@ -101,7 +101,7 @@ SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14,
Note that the first argument (lambda function) can't be omitted in the `arrayFill` function.
### arrayReverseFill(func, arr1, ...) { #higher_order_functions-array-reverse-fill}
### arrayReverseFill(func, arr1, ...) {#higher_order_functions-array-reverse-fill}
Scan through `arr1` from the last element to the first element and replace `arr1[i]` by `arr1[i + 1]` if `func` returns 0. The last element of `arr1` will not be replaced.
@ -119,7 +119,7 @@ SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5,
Note that the first argument (lambda function) can't be omitted in the `arrayReverseFill` function.
### arraySplit(func, arr1, ...) { #higher_order_functions-array-split}
### arraySplit(func, arr1, ...) {#higher_order_functions-array-split}
Split `arr1` into multiple arrays. When `func` returns something other than 0, the array will be split on the left hand side of the element. The array will not be split before the first element.
@ -137,7 +137,7 @@ SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res
Note that the first argument (lambda function) can't be omitted in the `arraySplit` function.
### arrayReverseSplit(func, arr1, ...) { #higher_order_functions-array-reverse-split}
### arrayReverseSplit(func, arr1, ...) {#higher_order_functions-array-reverse-split}
Split `arr1` into multiple arrays. When `func` returns something other than 0, the array will be split on the right hand side of the element. The array will not be split after the last element.
@ -155,7 +155,7 @@ SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res
Note that the first argument (lambda function) can't be omitted in the `arraySplit` function.
### arrayCount(\[func,\] arr1, ...) { #higher_order_functions-array-count}
### arrayCount(\[func,\] arr1, ...) {#higher_order_functions-array-count}
Returns the number of elements in the arr array for which func returns something other than 0. If 'func' is not specified, it returns the number of non-zero elements in the array.
@ -167,17 +167,17 @@ Returns 1 if there is at least one element in 'arr' for which 'func' returns som
Returns 1 if 'func' returns something other than 0 for all the elements in 'arr'. Otherwise, it returns 0.
### arraySum(\[func,\] arr1, ...) { #higher_order_functions-array-sum}
### arraySum(\[func,\] arr1, ...) {#higher_order_functions-array-sum}
Returns the sum of the 'func' values. If the function is omitted, it just returns the sum of the array elements.
### arrayFirst(func, arr1, ...) { #higher_order_functions-array-first}
### arrayFirst(func, arr1, ...) {#higher_order_functions-array-first}
Returns the first element in the 'arr1' array for which 'func' returns something other than 0.
Note that the first argument (lambda function) can't be omitted in the `arrayFirst` function.
### arrayFirstIndex(func, arr1, ...) { #higher_order_functions-array-first-index}
### arrayFirstIndex(func, arr1, ...) {#higher_order_functions-array-first-index}
Returns the index of the first element in the 'arr1' array for which 'func' returns something other than 0.

View File

@ -1,6 +1,6 @@
# Functions for implementing the IN operator
## in, notIn, globalIn, globalNotIn { #in-functions}
## in, notIn, globalIn, globalNotIn {#in-functions}
See the section [IN operators](../select.md#select-in-operators).

View File

@ -14,7 +14,7 @@ For proper operation of introspection functions:
ClickHouse saves profiler reports to the [trace_log](../../operations/system_tables.md#system_tables-trace_log) system table. Make sure the table and profiler are configured properly.
## addressToLine { #addresstoline}
## addressToLine {#addresstoline}
Converts virtual memory address inside ClickHouse server process to the filename and the line number in ClickHouse source code.
@ -104,7 +104,7 @@ trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so
/build/glibc-OTsEL5/glibc-2.27/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:97
```
## addressToSymbol { #addresstosymbol}
## addressToSymbol {#addresstosymbol}
Converts virtual memory address inside ClickHouse server process to the symbol from ClickHouse object files.
@ -201,7 +201,7 @@ start_thread
clone
```
## demangle { #demangle}
## demangle {#demangle}
Converts a symbol that you can get using the [addressToSymbol](#addresstosymbol) function to the C++ function name.

View File

@ -1,6 +1,6 @@
# Machine learning functions
## evalMLMethod (prediction) { #machine_learning_methods-evalmlmethod}
## evalMLMethod (prediction) {#machine_learning_methods-evalmlmethod}
Prediction using fitted regression models uses `evalMLMethod` function. See link in `linearRegression`.

View File

@ -4,7 +4,7 @@
Returns a string with the name of the host that this function was performed on. For distributed processing, this is the name of the remote server host, if the function is performed on a remote server.
## FQDN { #fqdn}
## FQDN {#fqdn}
Returns the fully qualified domain name.
@ -109,7 +109,7 @@ Returns a string containing the type name of the passed argument.
If `NULL` is passed to the function as input, then it returns the `Nullable(Nothing)` type, which corresponds to an internal `NULL` representation in ClickHouse.
## blockSize() { #function-blocksize}
## blockSize() {#function-blocksize}
Gets the size of the block.
In ClickHouse, queries are always run on blocks (sets of column parts). This function allows getting the size of the block that you called it for.
@ -137,7 +137,7 @@ Sleeps 'seconds' seconds on each row. You can specify an integer or a floating-p
Returns the name of the current database.
You can use this function in table engine parameters in a CREATE TABLE query where you need to specify the database.
## currentUser() { #other_function-currentuser}
## currentUser() {#other_function-currentuser}
Returns the login of current user. Login of user, that initiated query, will be returned in case distibuted query.
@ -178,7 +178,7 @@ Accepts Float32 and Float64 and returns UInt8 equal to 1 if the argument is not
Accepts Float32 and Float64 and returns UInt8 equal to 1 if the argument is infinite, otherwise 0. Note that 0 is returned for a NaN.
## ifNotFinite { #ifnotfinite}
## ifNotFinite {#ifnotfinite}
Checks whether floating point value is finite.
@ -225,7 +225,7 @@ Accepts constant strings: database name, table name, and column name. Returns a
The function throws an exception if the table does not exist.
For elements in a nested data structure, the function checks for the existence of a column. For the nested data structure itself, the function returns 0.
## bar { #function-bar}
## bar {#function-bar}
Allows building a unicode-art diagram.
@ -408,7 +408,7 @@ Returns the timezone of the server.
Returns the sequence number of the data block where the row is located.
## rowNumberInBlock { #function-rownumberinblock}
## rowNumberInBlock {#function-rownumberinblock}
Returns the ordinal number of the row in the data block. Different data blocks are always recalculated.
@ -416,7 +416,7 @@ Returns the ordinal number of the row in the data block. Different data blocks a
Returns the ordinal number of the row in the data block. This function only considers the affected data blocks.
## neighbor { #neighbor}
## neighbor {#neighbor}
The window function that provides access to a row at a specified offset which comes before or after the current row of a given column.
@ -527,7 +527,7 @@ Result:
└────────────┴───────┴───────────┴────────────────┘
```
## runningDifference(x) { #other_functions-runningdifference}
## runningDifference(x) {#other_functions-runningdifference}
Calculates the difference between successive row values in the data block.
Returns 0 for the first row and the difference from the previous row for each subsequent row.
@ -772,7 +772,7 @@ SELECT defaultValueOfArgumentType( CAST(1 AS Nullable(Int8) ) )
```
## replicate { #other_functions-replicate}
## replicate {#other_functions-replicate}
Creates an array with a single value.
@ -809,7 +809,7 @@ Result:
└───────────────────────────────┘
```
## filesystemAvailable { #filesystemavailable}
## filesystemAvailable {#filesystemavailable}
Returns amount of remaining space on the filesystem where the files of the databases located. It is always smaller than total free space ([filesystemFree](#filesystemfree)) because some space is reserved for OS.
@ -841,7 +841,7 @@ Result:
└─────────────────┴────────┘
```
## filesystemFree { #filesystemfree}
## filesystemFree {#filesystemfree}
Returns total amount of the free space on the filesystem where the files of the databases located. See also `filesystemAvailable`
@ -873,7 +873,7 @@ Result:
└────────────┴────────┘
```
## filesystemCapacity { #filesystemcapacity}
## filesystemCapacity {#filesystemcapacity}
Returns the capacity of the filesystem in bytes. For evaluation, the [path](../../operations/server_settings/settings.md#server_settings-path) to the data directory must be configured.
@ -905,17 +905,17 @@ Result:
└───────────┴────────┘
```
## finalizeAggregation { #function-finalizeaggregation}
## finalizeAggregation {#function-finalizeaggregation}
Takes state of aggregate function. Returns result of aggregation (finalized state).
## runningAccumulate { #function-runningaccumulate}
## runningAccumulate {#function-runningaccumulate}
Takes the states of the aggregate function and returns a column with values, are the result of the accumulation of these states for a set of block lines, from the first to the current line.
For example, takes state of aggregate function (example runningAccumulate(uniqState(UserID))), and for each row of block, return result of aggregate function on merge of states of all previous rows and current row.
So, result of function depends on partition of data to blocks and on order of data in block.
## joinGet { #joinget}
## joinGet {#joinget}
The function lets you extract data from the table the same way as from a [dictionary](../../query_language/dicts/index.md).
@ -978,7 +978,7 @@ Result:
└──────────────────────────────────────────────────┘
```
## modelEvaluate(model_name, ...) { #function-modelevaluate}
## modelEvaluate(model_name, ...) {#function-modelevaluate}
Evaluate external model.
Accepts a model name and model arguments. Returns Float64.
@ -995,7 +995,7 @@ SELECT throwIf(number = 3, 'Too many') FROM numbers(10);
Code: 395. DB::Exception: Received from localhost:9000. DB::Exception: Too many.
```
## identity { #identity}
## identity {#identity}
Returns the same value that was used as its argument. Used for debugging and testing, allows to cancel using index, and get the query performance of a full scan. When query is analyzed for possible use of index, the analyzer doesn't look inside `identity` functions.
@ -1021,7 +1021,7 @@ Result:
└──────────────┘
```
## randomPrintableASCII { #randomascii}
## randomPrintableASCII {#randomascii}
Generates a string with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters.

View File

@ -20,7 +20,7 @@ Returns the smallest round number that is greater than or equal to `x`. In every
Returns the round number with largest absolute value that has an absolute value less than or equal to `x`'s. In every other way, it is the same as the 'floor' function (see above).
## round(x\[, N\]) { #rounding_functions-round}
## round(x\[, N\]) {#rounding_functions-round}
Rounds a value to a specified number of decimal places.
@ -82,7 +82,7 @@ round(3.65, 1) = 3.6
- [roundBankers](#roundbankers)
## roundBankers { #roundbankers}
## roundBankers {#roundbankers}
Rounds a number to a specified decimal position.

Some files were not shown because too many files have changed in this diff Show More