Merge branch 'master' into fix_materialize_ttl_recalculate_only_test

This commit is contained in:
Jordi Villar 2023-03-18 09:18:50 +00:00 committed by GitHub
commit 3be6f42bfe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
322 changed files with 5115 additions and 4831 deletions

View File

@ -162,56 +162,28 @@ Checks: '*,
WarningsAsErrors: '*'
# TODO: use dictionary syntax for CheckOptions when minimum clang-tidy level rose to 15
# some-check.SomeOption: 'some value'
# instead of
# - key: some-check.SomeOption
# value: 'some value'
CheckOptions:
- key: readability-identifier-naming.ClassCase
value: CamelCase
- key: readability-identifier-naming.EnumCase
value: CamelCase
- key: readability-identifier-naming.LocalVariableCase
value: lower_case
- key: readability-identifier-naming.StaticConstantCase
value: aNy_CasE
- key: readability-identifier-naming.MemberCase
value: lower_case
- key: readability-identifier-naming.PrivateMemberPrefix
value: ''
- key: readability-identifier-naming.ProtectedMemberPrefix
value: ''
- key: readability-identifier-naming.PublicMemberCase
value: lower_case
- key: readability-identifier-naming.MethodCase
value: camelBack
- key: readability-identifier-naming.PrivateMethodPrefix
value: ''
- key: readability-identifier-naming.ProtectedMethodPrefix
value: ''
- key: readability-identifier-naming.ParameterPackCase
value: lower_case
- key: readability-identifier-naming.StructCase
value: CamelCase
- key: readability-identifier-naming.TemplateTemplateParameterCase
value: CamelCase
- key: readability-identifier-naming.TemplateUsingCase
value: lower_case
- key: readability-identifier-naming.TypeTemplateParameterCase
value: CamelCase
- key: readability-identifier-naming.TypedefCase
value: CamelCase
- key: readability-identifier-naming.UnionCase
value: CamelCase
- key: readability-identifier-naming.UsingCase
value: CamelCase
- key: modernize-loop-convert.UseCxx20ReverseRanges
value: false
- key: performance-move-const-arg.CheckTriviallyCopyableMove
value: false
# Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097
- key: readability-identifier-naming.TypeTemplateParameterIgnoredRegexp
value: expr-type
- key: cppcoreguidelines-avoid-do-while.IgnoreMacros
value: true
readability-identifier-naming.ClassCase: CamelCase
readability-identifier-naming.EnumCase: CamelCase
readability-identifier-naming.LocalVariableCase: lower_case
readability-identifier-naming.StaticConstantCase: aNy_CasE
readability-identifier-naming.MemberCase: lower_case
readability-identifier-naming.PrivateMemberPrefix: ''
readability-identifier-naming.ProtectedMemberPrefix: ''
readability-identifier-naming.PublicMemberCase: lower_case
readability-identifier-naming.MethodCase: camelBack
readability-identifier-naming.PrivateMethodPrefix: ''
readability-identifier-naming.ProtectedMethodPrefix: ''
readability-identifier-naming.ParameterPackCase: lower_case
readability-identifier-naming.StructCase: CamelCase
readability-identifier-naming.TemplateTemplateParameterCase: CamelCase
readability-identifier-naming.TemplateUsingCase: lower_case
readability-identifier-naming.TypeTemplateParameterCase: CamelCase
readability-identifier-naming.TypedefCase: CamelCase
readability-identifier-naming.UnionCase: CamelCase
readability-identifier-naming.UsingCase: CamelCase
modernize-loop-convert.UseCxx20ReverseRanges: false
performance-move-const-arg.CheckTriviallyCopyableMove: false
# Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097
readability-identifier-naming.TypeTemplateParameterIgnoredRegexp: expr-type
cppcoreguidelines-avoid-do-while.IgnoreMacros: true

View File

@ -121,6 +121,7 @@ if (ENABLE_COLORED_BUILD AND CMAKE_GENERATOR STREQUAL "Ninja")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-color=always")
# ... such manually setting of flags can be removed once CMake supports a variable to
# activate colors in *all* build systems: https://gitlab.kitware.com/cmake/cmake/-/issues/15502
# --> available since CMake 3.24: https://stackoverflow.com/a/73349744
endif ()
include (cmake/check_flags.cmake)
@ -134,24 +135,15 @@ if (COMPILER_CLANG)
set(COMPILER_FLAGS "${COMPILER_FLAGS} -gdwarf-aranges")
endif ()
if (HAS_USE_CTOR_HOMING)
# For more info see https://blog.llvm.org/posts/2021-04-05-constructor-homing-for-debug-info/
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -fuse-ctor-homing")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xclang -fuse-ctor-homing")
endif()
# See https://blog.llvm.org/posts/2021-04-05-constructor-homing-for-debug-info/
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -fuse-ctor-homing")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xclang -fuse-ctor-homing")
endif()
no_warning(enum-constexpr-conversion) # breaks Protobuf in clang-16
endif ()
# If compiler has support for -Wreserved-identifier. It is difficult to detect by clang version,
# because there are two different branches of clang: clang and AppleClang.
# (AppleClang is not supported by ClickHouse, but some developers have misfortune to use it).
if (HAS_RESERVED_IDENTIFIER)
add_compile_definitions (HAS_RESERVED_IDENTIFIER)
endif ()
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON)
option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF)
option(ENABLE_BENCHMARKS "Build all benchmark programs in 'benchmarks' subdirectories" OFF)
@ -184,26 +176,12 @@ if (OS_DARWIN)
set (ENABLE_CURL_BUILD OFF)
endif ()
# Ignored if `lld` is used
option(ADD_GDB_INDEX_FOR_GOLD "Add .gdb-index to resulting binaries for gold linker.")
if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
# Can be lld or ld-lld or lld-13 or /path/to/lld.
if (LINKER_NAME MATCHES "lld" AND OS_LINUX)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gdb-index")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gdb-index")
message (STATUS "Adding .gdb-index via --gdb-index linker option.")
# we use another tool for gdb-index, because gold linker removes section .debug_aranges, which used inside clickhouse stacktraces
# http://sourceware-org.1504.n7.nabble.com/gold-No-debug-aranges-section-when-linking-with-gdb-index-td540965.html#a556932
elseif (LINKER_NAME MATCHES "gold$" AND ADD_GDB_INDEX_FOR_GOLD)
find_program (GDB_ADD_INDEX_EXE NAMES "gdb-add-index" DOC "Path to gdb-add-index executable")
if (NOT GDB_ADD_INDEX_EXE)
set (USE_GDB_ADD_INDEX 0)
message (WARNING "Cannot add gdb index to binaries, because gold linker is used, but gdb-add-index executable not found.")
else()
set (USE_GDB_ADD_INDEX 1)
message (STATUS "gdb-add-index found: ${GDB_ADD_INDEX_EXE}")
endif()
endif ()
endif()
@ -302,15 +280,16 @@ if (ENABLE_BUILD_PROFILING)
endif ()
set (CMAKE_CXX_STANDARD 23)
set (CMAKE_CXX_EXTENSIONS ON) # Same as gnu++2a (ON) vs c++2a (OFF): https://cmake.org/cmake/help/latest/prop_tgt/CXX_EXTENSIONS.html
set (CMAKE_CXX_EXTENSIONS OFF)
set (CMAKE_CXX_STANDARD_REQUIRED ON)
set (CMAKE_C_STANDARD 11)
set (CMAKE_C_EXTENSIONS ON)
set (CMAKE_C_EXTENSIONS ON) # required by most contribs written in C
set (CMAKE_C_STANDARD_REQUIRED ON)
if (COMPILER_GCC OR COMPILER_CLANG)
# Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure.
# See https://reviews.llvm.org/D112921
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsized-deallocation")
endif ()
@ -329,11 +308,7 @@ if (ARCH_AMD64)
set(BRANCHES_WITHIN_32B_BOUNDARIES "-Wa,${BRANCHES_WITHIN_32B_BOUNDARIES}")
endif()
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("${BRANCHES_WITHIN_32B_BOUNDARIES}" HAS_BRANCHES_WITHIN_32B_BOUNDARIES)
if (HAS_BRANCHES_WITHIN_32B_BOUNDARIES)
set(COMPILER_FLAGS "${COMPILER_FLAGS} ${BRANCHES_WITHIN_32B_BOUNDARIES}")
endif()
set(COMPILER_FLAGS "${COMPILER_FLAGS} ${BRANCHES_WITHIN_32B_BOUNDARIES}")
endif()
if (COMPILER_GCC)
@ -445,6 +420,7 @@ option(WERROR "Enable -Werror compiler option" ON)
if (WERROR)
# Don't pollute CMAKE_CXX_FLAGS with -Werror as it will break some CMake checks.
# Instead, adopt modern cmake usage requirement.
# TODO: Set CMAKE_COMPILE_WARNING_AS_ERROR (cmake 3.24)
target_compile_options(global-group INTERFACE "-Werror")
endif ()

View File

@ -1,6 +1,4 @@
#ifdef HAS_RESERVED_IDENTIFIER
#pragma clang diagnostic ignored "-Wreserved-identifier"
#endif
/// This code was based on the code by Fedor Korotkiy https://www.linkedin.com/in/fedor-korotkiy-659a1838/

View File

@ -5,10 +5,8 @@ constexpr size_t KiB = 1024;
constexpr size_t MiB = 1024 * KiB;
constexpr size_t GiB = 1024 * MiB;
#ifdef HAS_RESERVED_IDENTIFIER
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wreserved-identifier"
#endif
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wreserved-identifier"
// NOLINTBEGIN(google-runtime-int)
constexpr size_t operator"" _KiB(unsigned long long val) { return val * KiB; }
@ -16,6 +14,4 @@ constexpr size_t operator"" _MiB(unsigned long long val) { return val * MiB; }
constexpr size_t operator"" _GiB(unsigned long long val) { return val * GiB; }
// NOLINTEND(google-runtime-int)
#ifdef HAS_RESERVED_IDENTIFIER
# pragma clang diagnostic pop
#endif
#pragma clang diagnostic pop

View File

@ -27,9 +27,7 @@
#define _PATH_TTY "/dev/tty"
#endif
#ifdef HAS_RESERVED_IDENTIFIER
#pragma clang diagnostic ignored "-Wreserved-identifier"
#endif
#include <termios.h>
#include <signal.h>

View File

@ -1,7 +1,5 @@
include (CheckCXXCompilerFlag)
include (CheckCCompilerFlag)
check_cxx_compiler_flag("-Wreserved-identifier" HAS_RESERVED_IDENTIFIER)
check_cxx_compiler_flag("-Wsuggest-destructor-override" HAS_SUGGEST_DESTRUCTOR_OVERRIDE)
check_cxx_compiler_flag("-Wsuggest-override" HAS_SUGGEST_OVERRIDE)
check_cxx_compiler_flag("-Xclang -fuse-ctor-homing" HAS_USE_CTOR_HOMING)
# Set/unset variable based on existence of compiler flags. Example:
# check_cxx_compiler_flag("-Wreserved-identifier" HAS_RESERVED_IDENTIFIER)

View File

@ -50,15 +50,18 @@ endif ()
string (REGEX MATCHALL "[0-9]+" COMPILER_VERSION_LIST ${CMAKE_CXX_COMPILER_VERSION})
list (GET COMPILER_VERSION_LIST 0 COMPILER_VERSION_MAJOR)
# Example values: `lld-10`, `gold`.
# Example values: `lld-10`
option (LINKER_NAME "Linker name or full path")
if (LINKER_NAME MATCHES "gold")
message (FATAL_ERROR "Linking with gold is unsupported. Please use lld.")
endif ()
# s390x doesnt support lld
if (NOT ARCH_S390X)
if (NOT LINKER_NAME)
if (COMPILER_GCC)
find_program (LLD_PATH NAMES "ld.lld")
find_program (GOLD_PATH NAMES "ld.gold")
elseif (COMPILER_CLANG)
# llvm lld is a generic driver.
# Invoke ld.lld (Unix), ld64.lld (macOS), lld-link (Windows), wasm-ld (WebAssembly) instead
@ -67,13 +70,11 @@ if (NOT ARCH_S390X)
elseif (OS_DARWIN)
find_program (LLD_PATH NAMES "ld64.lld-${COMPILER_VERSION_MAJOR}" "ld64.lld")
endif ()
find_program (GOLD_PATH NAMES "ld.gold" "gold")
endif ()
endif()
endif()
if ((OS_LINUX OR OS_DARWIN) AND NOT LINKER_NAME)
# prefer lld linker over gold or ld on linux and macos
if (LLD_PATH)
if (COMPILER_GCC)
# GCC driver requires one of supported linker names like "lld".
@ -83,17 +84,6 @@ if ((OS_LINUX OR OS_DARWIN) AND NOT LINKER_NAME)
set (LINKER_NAME ${LLD_PATH})
endif ()
endif ()
if (NOT LINKER_NAME)
if (GOLD_PATH)
message (FATAL_ERROR "Linking with gold is unsupported. Please use lld.")
if (COMPILER_GCC)
set (LINKER_NAME "gold")
else ()
set (LINKER_NAME ${GOLD_PATH})
endif ()
endif ()
endif ()
endif ()
# TODO: allow different linker on != OS_LINUX

View File

@ -71,7 +71,7 @@ SELECT 1
| `global` | Same as `shard`. Prefer `shard` ||
| `zookeeper` | Test requires Zookeeper or ClickHouse Keeper to run | Test uses `ReplicatedMergeTree` |
| `replica` | Same as `zookeeper`. Prefer `zookeeper` ||
| `no-fasttest`| Test is not run under [Fast test](continuous-integration#fast-test) | Test uses `MySQL` table engine which is disabled in Fast test|
| `no-fasttest`| Test is not run under [Fast test](continuous-integration.md#fast-test) | Test uses `MySQL` table engine which is disabled in Fast test|
| `no-[asan, tsan, msan, ubsan]` | Disables tests in build with [sanitizers](#sanitizers) | Test is run under QEMU which doesn't work with sanitizers |
| `no-replicated-database` |||
| `no-ordinary-database` |||

View File

@ -4,5 +4,4 @@ collapsible: true
collapsed: true
link:
type: generated-index
title: Database & Table Engines
slug: /en/engines

View File

@ -180,4 +180,4 @@ Default value: `300`.
## See Also {#see-also}
- [The mysql table function](../../../sql-reference/table-functions/mysql.md)
- [Using MySQL as a dictionary source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql)
- [Using MySQL as a dictionary source](../../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-mysql)

View File

@ -126,5 +126,5 @@ SELECT * FROM odbc_t
## See Also {#see-also}
- [ODBC dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-odbc)
- [ODBC dictionaries](../../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-odbc)
- [ODBC table function](../../../sql-reference/table-functions/odbc.md)

View File

@ -174,7 +174,7 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32)
**See Also**
- [The `postgresql` table function](../../../sql-reference/table-functions/postgresql.md)
- [Using PostgreSQL as a dictionary source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql)
- [Using PostgreSQL as a dictionary source](../../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-postgresql)
## Related content
- Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres)

View File

@ -150,6 +150,7 @@ The following settings can be specified in configuration file for given endpoint
- `use_environment_credentials` — If set to `true`, S3 client will try to obtain credentials from environment variables and [Amazon EC2](https://en.wikipedia.org/wiki/Amazon_Elastic_Compute_Cloud) metadata for given endpoint. Optional, default value is `false`.
- `region` — Specifies S3 region name. Optional.
- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Optional, default value is `false`.
- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`.
- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times.
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional.
- `max_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. Optional.
@ -166,6 +167,7 @@ The following settings can be specified in configuration file for given endpoint
<!-- <region>us-west-1</region> -->
<!-- <use_environment_credentials>false</use_environment_credentials> -->
<!-- <use_insecure_imds_request>false</use_insecure_imds_request> -->
<!-- <expiration_window_seconds>120</expiration_window_seconds> -->
<!-- <header>Authorization: Bearer SOME-TOKEN</header> -->
<!-- <server_side_encryption_customer_key_base64>BASE64-ENCODED-KEY</server_side_encryption_customer_key_base64> -->
<!-- <max_single_read_retries>4</max_single_read_retries> -->

View File

@ -901,7 +901,7 @@ User can assign new big parts to different disks of a [JBOD](https://en.wikipedi
## Using S3 for Data Storage {#table_engine-mergetree-s3}
:::note
Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/data-ingestion/s3/gcs-merge-tree.md).
Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs).
:::
`MergeTree` family table engines can store data to [S3](https://aws.amazon.com/s3/) using a disk with type `s3`.
@ -960,6 +960,7 @@ Optional parameters:
- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs.
- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`.
- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`.
- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`.
- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL.
- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`.
- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`.

View File

@ -39,7 +39,7 @@ Compressed data for `INSERT` and `ALTER` queries is replicated (for more informa
- The `DROP TABLE` query deletes the replica located on the server where the query is run.
- The `RENAME` query renames the table on one of the replicas. In other words, replicated tables can have different names on different replicas.
ClickHouse uses [ClickHouse Keeper](/docs/en/guides/sre/keeper/clickhouse-keeper.md) for storing replicas meta information. It is possible to use ZooKeeper version 3.4.5 or newer, but ClickHouse Keeper is recommended.
ClickHouse uses [ClickHouse Keeper](/docs/en/guides/sre/keeper/index.md) for storing replicas meta information. It is possible to use ZooKeeper version 3.4.5 or newer, but ClickHouse Keeper is recommended.
To use replication, set parameters in the [zookeeper](/docs/en/operations/server-configuration-parameters/settings.md/#server-settings_zookeeper) server configuration section.
@ -144,7 +144,7 @@ ENGINE = ReplicatedReplacingMergeTree
The `Replicated` prefix is added to the table engine name. For example:`ReplicatedMergeTree`.
:::tip
Adding `Replicated` is optional in ClickHouse Cloud, as all of the tables are replicated.
Adding `Replicated` is optional in ClickHouse Cloud, as all of the tables are replicated.
:::
### Replicated\*MergeTree parameters

View File

@ -6,7 +6,7 @@ sidebar_label: Dictionary
# Dictionary Table Engine
The `Dictionary` engine displays the [dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) data as a ClickHouse table.
The `Dictionary` engine displays the [dictionary](../../../sql-reference/dictionaries/index.md) data as a ClickHouse table.
## Example {#example}

View File

@ -184,7 +184,7 @@ The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `com
- `host` The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server does not start. If you change the DNS record, restart the server.
- `port` The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Not to be confused with `http_port`.
- `user` Name of the user for connecting to a remote server. Default value is the `default` user. This user must have access to connect to the specified server. Access is configured in the `users.xml` file. For more information, see the section [Access rights](../../../operations/access-rights.md).
- `user` Name of the user for connecting to a remote server. Default value is the `default` user. This user must have access to connect to the specified server. Access is configured in the `users.xml` file. For more information, see the section [Access rights](../../../guides/sre/user-management/index.md).
- `password` The password for connecting to a remote server (not masked). Default value: empty string.
- `secure` - Whether to use a secure SSL/TLS connection. Usually also requires specifying the port (the default secure port is `9440`). The server should listen on `<tcp_port_secure>9440</tcp_port_secure>` and be configured with correct certificates.
- `compression` - Use data compression. Default value: `true`.

View File

@ -1,9 +1,10 @@
---
slug: /en/getting-started/example-datasets/cell-towers
sidebar_label: Cell Towers
sidebar_label: Geo Data
sidebar_position: 3
title: "Cell Towers"
title: "Geo Data using the Cell Tower Dataset"
---
import ConnectionDetails from '@site/docs/en/_snippets/_gather_your_details_http.mdx';
import Tabs from '@theme/Tabs';
@ -163,7 +164,7 @@ SELECT mcc, count() FROM cell_towers GROUP BY mcc ORDER BY count() DESC LIMIT 10
Based on the above query and the [MCC list](https://en.wikipedia.org/wiki/Mobile_country_code), the countries with the most cell towers are: the USA, Germany, and Russia.
You may want to create a [Dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) in ClickHouse to decode these values.
You may want to create a [Dictionary](../../sql-reference/dictionaries/index.md) in ClickHouse to decode these values.
## Use case: Incorporate geo data {#use-case}

View File

@ -3,14 +3,56 @@ slug: /en/getting-started/example-datasets/criteo
sidebar_label: Terabyte Click Logs from Criteo
---
# Terabyte of Click Logs from Criteo
# Terabyte of Click Logs from Criteo
Download the data from http://labs.criteo.com/downloads/download-terabyte-click-logs/
Create a table to import the log to:
``` sql
CREATE TABLE criteo_log (date Date, clicked UInt8, int1 Int32, int2 Int32, int3 Int32, int4 Int32, int5 Int32, int6 Int32, int7 Int32, int8 Int32, int9 Int32, int10 Int32, int11 Int32, int12 Int32, int13 Int32, cat1 String, cat2 String, cat3 String, cat4 String, cat5 String, cat6 String, cat7 String, cat8 String, cat9 String, cat10 String, cat11 String, cat12 String, cat13 String, cat14 String, cat15 String, cat16 String, cat17 String, cat18 String, cat19 String, cat20 String, cat21 String, cat22 String, cat23 String, cat24 String, cat25 String, cat26 String) ENGINE = Log
CREATE TABLE criteo_log (
date Date,
clicked UInt8,
int1 Int32,
int2 Int32,
int3 Int32,
int4 Int32,
int5 Int32,
int6 Int32,
int7 Int32,
int8 Int32,
int9 Int32,
int10 Int32,
int11 Int32,
int12 Int32,
int13 Int32,
cat1 String,
cat2 String,
cat3 String,
cat4 String,
cat5 String,
cat6 String,
cat7 String,
cat8 String,
cat9 String,
cat10 String,
cat11 String,
cat12 String,
cat13 String,
cat14 String,
cat15 String,
cat16 String,
cat17 String,
cat18 String,
cat19 String,
cat20 String,
cat21 String,
cat22 String,
cat23 String,
cat24 String,
cat25 String,
cat26 String
) ENGINE = Log;
```
Download the data:
@ -73,7 +115,52 @@ ORDER BY (date, icat1)
Transform data from the raw log and put it in the second table:
``` sql
INSERT INTO criteo SELECT date, clicked, int1, int2, int3, int4, int5, int6, int7, int8, int9, int10, int11, int12, int13, reinterpretAsUInt32(unhex(cat1)) AS icat1, reinterpretAsUInt32(unhex(cat2)) AS icat2, reinterpretAsUInt32(unhex(cat3)) AS icat3, reinterpretAsUInt32(unhex(cat4)) AS icat4, reinterpretAsUInt32(unhex(cat5)) AS icat5, reinterpretAsUInt32(unhex(cat6)) AS icat6, reinterpretAsUInt32(unhex(cat7)) AS icat7, reinterpretAsUInt32(unhex(cat8)) AS icat8, reinterpretAsUInt32(unhex(cat9)) AS icat9, reinterpretAsUInt32(unhex(cat10)) AS icat10, reinterpretAsUInt32(unhex(cat11)) AS icat11, reinterpretAsUInt32(unhex(cat12)) AS icat12, reinterpretAsUInt32(unhex(cat13)) AS icat13, reinterpretAsUInt32(unhex(cat14)) AS icat14, reinterpretAsUInt32(unhex(cat15)) AS icat15, reinterpretAsUInt32(unhex(cat16)) AS icat16, reinterpretAsUInt32(unhex(cat17)) AS icat17, reinterpretAsUInt32(unhex(cat18)) AS icat18, reinterpretAsUInt32(unhex(cat19)) AS icat19, reinterpretAsUInt32(unhex(cat20)) AS icat20, reinterpretAsUInt32(unhex(cat21)) AS icat21, reinterpretAsUInt32(unhex(cat22)) AS icat22, reinterpretAsUInt32(unhex(cat23)) AS icat23, reinterpretAsUInt32(unhex(cat24)) AS icat24, reinterpretAsUInt32(unhex(cat25)) AS icat25, reinterpretAsUInt32(unhex(cat26)) AS icat26 FROM criteo_log;
INSERT INTO
criteo
SELECT
date,
clicked,
int1,
int2,
int3,
int4,
int5,
int6,
int7,
int8,
int9,
int10,
int11,
int12,
int13,
reinterpretAsUInt32(unhex(cat1)) AS icat1,
reinterpretAsUInt32(unhex(cat2)) AS icat2,
reinterpretAsUInt32(unhex(cat3)) AS icat3,
reinterpretAsUInt32(unhex(cat4)) AS icat4,
reinterpretAsUInt32(unhex(cat5)) AS icat5,
reinterpretAsUInt32(unhex(cat6)) AS icat6,
reinterpretAsUInt32(unhex(cat7)) AS icat7,
reinterpretAsUInt32(unhex(cat8)) AS icat8,
reinterpretAsUInt32(unhex(cat9)) AS icat9,
reinterpretAsUInt32(unhex(cat10)) AS icat10,
reinterpretAsUInt32(unhex(cat11)) AS icat11,
reinterpretAsUInt32(unhex(cat12)) AS icat12,
reinterpretAsUInt32(unhex(cat13)) AS icat13,
reinterpretAsUInt32(unhex(cat14)) AS icat14,
reinterpretAsUInt32(unhex(cat15)) AS icat15,
reinterpretAsUInt32(unhex(cat16)) AS icat16,
reinterpretAsUInt32(unhex(cat17)) AS icat17,
reinterpretAsUInt32(unhex(cat18)) AS icat18,
reinterpretAsUInt32(unhex(cat19)) AS icat19,
reinterpretAsUInt32(unhex(cat20)) AS icat20,
reinterpretAsUInt32(unhex(cat21)) AS icat21,
reinterpretAsUInt32(unhex(cat22)) AS icat22,
reinterpretAsUInt32(unhex(cat23)) AS icat23,
reinterpretAsUInt32(unhex(cat24)) AS icat24,
reinterpretAsUInt32(unhex(cat25)) AS icat25,
reinterpretAsUInt32(unhex(cat26)) AS icat26
FROM
criteo_log;
DROP TABLE criteo_log;
```

View File

@ -1,12 +1,13 @@
---
slug: /en/getting-started/example-datasets/github
sidebar_label: GitHub Repo Analysis
sidebar_label: Github Repo
sidebar_position: 1
description: Analyze the ClickHouse GitHub repo or any repository of your choosing
---
# ClickHouse GitHub data
# Writing Queries in ClickHouse using GitHub Data
This dataset contains all of the commits and changes for the ClickHouse repository. It can be generated using the native `git-import` tool distributed with ClickHouse.
This dataset contains all of the commits and changes for the ClickHouse repository. It can be generated using the native `git-import` tool distributed with ClickHouse.
The generated data provides a `tsv` file for each of the following tables:
@ -323,7 +324,7 @@ Note a more complex variant of this query exists where we find the [line-by-line
## Find the current active files
This is important for later analysis when we only want to consider the current files in the repository. We estimate this set as the files which haven't been renamed or deleted (and then re-added/re-named).
This is important for later analysis when we only want to consider the current files in the repository. We estimate this set as the files which haven't been renamed or deleted (and then re-added/re-named).
**Note there appears to have been a broken commit history in relation to files under the `dbms`, `libs`, `tests/testflows/` directories during their renames. We also thus exclude these.**
@ -417,7 +418,7 @@ git ls-files | grep -v -E 'generated\.cpp|^(contrib|docs?|website|libs/(libcityh
The difference here is caused by a few factors:
- A rename can occur alongside other modifications to the file. These are listed as separate events in file_changes but with the same time. The `argMax` function has no way of distinguishing these - it picks the first value. The natural ordering of the inserts (the only means of knowing the correct order) is not maintained across the union so modified events can be selected. For example, below the `src/Functions/geometryFromColumn.h` file has several modifications before being renamed to `src/Functions/geometryConverters.h`. Our current solution may pick a Modify event as the latest change causing `src/Functions/geometryFromColumn.h` to be retained.
- A rename can occur alongside other modifications to the file. These are listed as separate events in file_changes but with the same time. The `argMax` function has no way of distinguishing these - it picks the first value. The natural ordering of the inserts (the only means of knowing the correct order) is not maintained across the union so modified events can be selected. For example, below the `src/Functions/geometryFromColumn.h` file has several modifications before being renamed to `src/Functions/geometryConverters.h`. Our current solution may pick a Modify event as the latest change causing `src/Functions/geometryFromColumn.h` to be retained.
[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICAgIGNoYW5nZV90eXBlLAogICAgICBwYXRoLAogICAgICBvbGRfcGF0aCwKICAgICAgdGltZSwKICAgICAgY29tbWl0X2hhc2gKICBGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwogIFdIRVJFIChwYXRoID0gJ3NyYy9GdW5jdGlvbnMvZ2VvbWV0cnlGcm9tQ29sdW1uLmgnKSBPUiAob2xkX3BhdGggPSAnc3JjL0Z1bmN0aW9ucy9nZW9tZXRyeUZyb21Db2x1bW4uaCcpCg==)
@ -1386,7 +1387,7 @@ LIMIT 1 BY day_of_week
7 rows in set. Elapsed: 0.004 sec. Processed 21.82 thousand rows, 140.02 KB (4.88 million rows/s., 31.29 MB/s.)
```
This is still a little simple and doesn't reflect people's work.
This is still a little simple and doesn't reflect people's work.
A better metric might be who is the top contributor each day as a fraction of the total work performed in the last year. Note that we treat the deletion and adding code equally.
@ -1952,7 +1953,7 @@ SELECT
Most contributors write more code than tests, as you'd expect.
What about who adds the most comments when contributing code?
What about who adds the most comments when contributing code?
[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhdXRob3IsCiAgICBhdmcocmF0aW9fY29tbWVudHMpIEFTIGF2Z19yYXRpb19jb21tZW50cywKICAgIHN1bShjb2RlKSBBUyBjb2RlCkZST00KKAogICAgU0VMRUNUCiAgICAgICAgYXV0aG9yLAogICAgICAgIGNvbW1pdF9oYXNoLAogICAgICAgIGNvdW50SWYobGluZV90eXBlID0gJ0NvbW1lbnQnKSBBUyBjb21tZW50cywKICAgICAgICBjb3VudElmKGxpbmVfdHlwZSA9ICdDb2RlJykgQVMgY29kZSwKICAgICAgICBpZihjb21tZW50cyA+IDAsIGNvbW1lbnRzIC8gKGNvbW1lbnRzICsgY29kZSksIDApIEFTIHJhdGlvX2NvbW1lbnRzCiAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmxpbmVfY2hhbmdlcwogICAgR1JPVVAgQlkKICAgICAgICBhdXRob3IsCiAgICAgICAgY29tbWl0X2hhc2gKKQpHUk9VUCBCWSBhdXRob3IKT1JERVIgQlkgY29kZSBERVNDCkxJTUlUIDEwCg==)
@ -2393,7 +2394,7 @@ WHERE (path = 'src/Storages/StorageReplicatedMergeTree.cpp') AND (change_type =
This makes viewing the full history of a file challenging since we don't have a single value connecting all line or file changes.
To address this, we can use User Defined Functions (UDFs). These cannot, currently, be recursive, so to identify the history of a file we must define a series of UDFs which call each other explicitly.
To address this, we can use User Defined Functions (UDFs). These cannot, currently, be recursive, so to identify the history of a file we must define a series of UDFs which call each other explicitly.
This means we can only track renames to a maximum depth - the below example is 5 deep. It is unlikely a file will be renamed more times than this, so for now, this is sufficient.

View File

@ -84,7 +84,7 @@ clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1"
1680609
```
## An example JOIN
## An example JOIN
The hits and visits dataset is used in the ClickHouse test
routines, this is one of the queries from the test suite. The rest
@ -131,10 +131,10 @@ FORMAT PrettyCompact"
## Next Steps
[A Practical Introduction to Sparse Primary Indexes in ClickHouse](../../guides/improving-query-performance/sparse-primary-indexes/sparse-primary-indexes-intro.md) uses the hits dataset to discuss the differences in ClickHouse indexing compared to traditional relational databases, how ClickHouse builds and uses a sparse primary index, and indexing best practices.
[A Practical Introduction to Sparse Primary Indexes in ClickHouse](/docs/en/guides/best-practices/sparse-primary-indexes.md) uses the hits dataset to discuss the differences in ClickHouse indexing compared to traditional relational databases, how ClickHouse builds and uses a sparse primary index, and indexing best practices.
Additional examples of queries to these tables can be found among the ClickHouse [stateful tests](https://github.com/ClickHouse/ClickHouse/blob/d7129855757f38ceec3e4ecc6dafacdabe9b178f/tests/queries/1_stateful/00172_parallel_join.sql).
:::note
The test suite uses a database name `test`, and the tables are named `hits` and `visits`. You can rename your database and tables, or edit the SQL from the test file.
The test suite uses a database name `test`, and the tables are named `hits` and `visits`. You can rename your database and tables, or edit the SQL from the test file.
:::

View File

@ -16,7 +16,7 @@ While working through this guide you will:
The dataset used in this guide comes from the NYC Open Data team, and contains data about "all valid felony, misdemeanor, and violation crimes reported to the New York City Police Department (NYPD)". At the time of writing, the data file is 166MB, but it is updated regularly.
**Source**: [data.cityofnewyork.us](https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Current-Year-To-Date-/5uac-w243)
**Source**: [data.cityofnewyork.us](https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Current-Year-To-Date-/5uac-w243)
**Terms of use**: https://www1.nyc.gov/home/terms-of-use.page
## Prerequisites
@ -35,7 +35,7 @@ The examples in this guide assume that you have saved the TSV file to `${HOME}/N
## Familiarize yourself with the TSV file
Before starting to work with the ClickHouse database familiarize yourself with the data.
Before starting to work with the ClickHouse database familiarize yourself with the data.
### Look at the fields in the source TSV file
@ -47,15 +47,15 @@ clickhouse-local --query \
Sample response
```response
CMPLNT_NUM Nullable(Float64)
ADDR_PCT_CD Nullable(Float64)
BORO_NM Nullable(String)
CMPLNT_FR_DT Nullable(String)
CMPLNT_FR_TM Nullable(String)
CMPLNT_NUM Nullable(Float64)
ADDR_PCT_CD Nullable(Float64)
BORO_NM Nullable(String)
CMPLNT_FR_DT Nullable(String)
CMPLNT_FR_TM Nullable(String)
```
:::tip
Most of the time the above command will let you know which fields in the input data are numeric, and which are strings, and which are tuples. This is not always the case. Because ClickHouse is routineley used with datasets containing billions of records there is a default number (100) of rows examined to [infer the schema](../../guides/developer/working-with-json/json-semi-structured.md/#relying-on-schema-inference) in order to avoid parsing billions of rows to infer the schema. The response below may not match what you see, as the dataset is updated several times each year. Looking at the Data Dictionary you can see that CMPLNT_NUM is specified as text, and not numeric. By overriding the default of 100 rows for inference with the setting `SETTINGS input_format_max_rows_to_read_for_schema_inference=2000`
Most of the time the above command will let you know which fields in the input data are numeric, and which are strings, and which are tuples. This is not always the case. Because ClickHouse is routineley used with datasets containing billions of records there is a default number (100) of rows examined to [infer the schema](/docs/en/integrations/data-ingestion/data-formats/json.md#relying-on-schema-inference) in order to avoid parsing billions of rows to infer the schema. The response below may not match what you see, as the dataset is updated several times each year. Looking at the Data Dictionary you can see that CMPLNT_NUM is specified as text, and not numeric. By overriding the default of 100 rows for inference with the setting `SETTINGS input_format_max_rows_to_read_for_schema_inference=2000`
you can get a better idea of the content.
Note: as of version 22.5 the default is now 25,000 rows for inferring the schema, so only change the setting if you are on an older version or if you need more than 25,000 rows to be sampled.
@ -65,46 +65,46 @@ Run this command at your command prompt. You will be using `clickhouse-local` t
```sh
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"describe file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')"
"describe file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')"
```
Result:
```response
CMPLNT_NUM Nullable(String)
ADDR_PCT_CD Nullable(Float64)
BORO_NM Nullable(String)
CMPLNT_FR_DT Nullable(String)
CMPLNT_FR_TM Nullable(String)
CMPLNT_TO_DT Nullable(String)
CMPLNT_TO_TM Nullable(String)
CRM_ATPT_CPTD_CD Nullable(String)
HADEVELOPT Nullable(String)
HOUSING_PSA Nullable(Float64)
JURISDICTION_CODE Nullable(Float64)
JURIS_DESC Nullable(String)
KY_CD Nullable(Float64)
LAW_CAT_CD Nullable(String)
LOC_OF_OCCUR_DESC Nullable(String)
OFNS_DESC Nullable(String)
PARKS_NM Nullable(String)
PATROL_BORO Nullable(String)
PD_CD Nullable(Float64)
PD_DESC Nullable(String)
PREM_TYP_DESC Nullable(String)
RPT_DT Nullable(String)
STATION_NAME Nullable(String)
SUSP_AGE_GROUP Nullable(String)
SUSP_RACE Nullable(String)
SUSP_SEX Nullable(String)
TRANSIT_DISTRICT Nullable(Float64)
VIC_AGE_GROUP Nullable(String)
VIC_RACE Nullable(String)
VIC_SEX Nullable(String)
X_COORD_CD Nullable(Float64)
Y_COORD_CD Nullable(Float64)
Latitude Nullable(Float64)
Longitude Nullable(Float64)
Lat_Lon Tuple(Nullable(Float64), Nullable(Float64))
CMPLNT_NUM Nullable(String)
ADDR_PCT_CD Nullable(Float64)
BORO_NM Nullable(String)
CMPLNT_FR_DT Nullable(String)
CMPLNT_FR_TM Nullable(String)
CMPLNT_TO_DT Nullable(String)
CMPLNT_TO_TM Nullable(String)
CRM_ATPT_CPTD_CD Nullable(String)
HADEVELOPT Nullable(String)
HOUSING_PSA Nullable(Float64)
JURISDICTION_CODE Nullable(Float64)
JURIS_DESC Nullable(String)
KY_CD Nullable(Float64)
LAW_CAT_CD Nullable(String)
LOC_OF_OCCUR_DESC Nullable(String)
OFNS_DESC Nullable(String)
PARKS_NM Nullable(String)
PATROL_BORO Nullable(String)
PD_CD Nullable(Float64)
PD_DESC Nullable(String)
PREM_TYP_DESC Nullable(String)
RPT_DT Nullable(String)
STATION_NAME Nullable(String)
SUSP_AGE_GROUP Nullable(String)
SUSP_RACE Nullable(String)
SUSP_SEX Nullable(String)
TRANSIT_DISTRICT Nullable(Float64)
VIC_AGE_GROUP Nullable(String)
VIC_RACE Nullable(String)
VIC_SEX Nullable(String)
X_COORD_CD Nullable(Float64)
Y_COORD_CD Nullable(Float64)
Latitude Nullable(Float64)
Longitude Nullable(Float64)
Lat_Lon Tuple(Nullable(Float64), Nullable(Float64))
New Georeferenced Column Nullable(String)
```
@ -362,7 +362,7 @@ The dates shown as `1925` above are from errors in the data. There are several
The decisions made above on the data types used for the columns are reflected in the table schema
below. We also need to decide on the `ORDER BY` and `PRIMARY KEY` used for the table. At least one
of `ORDER BY` or `PRIMARY KEY` must be specified. Here are some guidelines on deciding on the
of `ORDER BY` or `PRIMARY KEY` must be specified. Here are some guidelines on deciding on the
columns to includes in `ORDER BY`, and more information is in the *Next Steps* section at the end
of this document.
@ -420,7 +420,7 @@ ORDER BY ( borough, offense_description, date_reported )
Putting together the changes to data types and the `ORDER BY` tuple gives this table structure:
```sql
CREATE TABLE NYPD_Complaint (
CREATE TABLE NYPD_Complaint (
complaint_number String,
precinct UInt8,
borough LowCardinality(String),
@ -429,7 +429,7 @@ CREATE TABLE NYPD_Complaint (
was_crime_completed String,
housing_authority String,
housing_level_code UInt32,
jurisdiction_code UInt8,
jurisdiction_code UInt8,
jurisdiction LowCardinality(String),
offense_code UInt8,
offense_level LowCardinality(String),
@ -478,7 +478,7 @@ Query id: 6a5b10bf-9333-4090-b36e-c7f08b1d9e01
Row 1:
──────
partition_key:
partition_key:
sorting_key: borough, offense_description, date_reported
primary_key: borough, offense_description, date_reported
table: NYPD_Complaint
@ -495,7 +495,7 @@ We will use `clickhouse-local` tool for data preprocessing and `clickhouse-clien
:::tip
`table='input'` appears in the arguments to clickhouse-local below. clickhouse-local takes the provided input (`cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv`) and inserts the input into a table. By default the table is named `table`. In this guide the name of the table is set to `input` to make the data flow clearer. The final argument to clickhouse-local is a query that selects from the table (`FROM input`) which is then piped to `clickhouse-client` to populate the table `NYPD_Complaint`.
:::
```sql
cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv \
| clickhouse-local --table='input' --input-format='TSVWithNames' \
@ -512,12 +512,12 @@ cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv \
CRM_ATPT_CPTD_CD AS was_crime_completed,
HADEVELOPT AS housing_authority_development,
HOUSING_PSA AS housing_level_code,
JURISDICTION_CODE AS jurisdiction_code,
JURISDICTION_CODE AS jurisdiction_code,
JURIS_DESC AS jurisdiction,
KY_CD AS offense_code,
LAW_CAT_CD AS offense_level,
LOC_OF_OCCUR_DESC AS location_descriptor,
OFNS_DESC AS offense_description,
OFNS_DESC AS offense_description,
PARKS_NM AS park_name,
PATROL_BORO AS patrol_borough,
PD_CD,
@ -529,7 +529,7 @@ cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv \
SUSP_RACE AS suspect_race,
SUSP_SEX AS suspect_sex,
TRANSIT_DISTRICT AS transit_district,
VIC_AGE_GROUP AS victim_age_group,
VIC_AGE_GROUP AS victim_age_group,
VIC_RACE AS victim_race,
VIC_SEX AS victim_sex,
X_COORD_CD AS NY_x_coordinate,
@ -538,7 +538,7 @@ cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv \
Longitude
FROM input" \
| clickhouse-client --query='INSERT INTO NYPD_Complaint FORMAT TSV'
```
```
## Validate the Data {#validate-data}
@ -560,7 +560,7 @@ Result:
│ 208993 │
└─────────┘
1 row in set. Elapsed: 0.001 sec.
1 row in set. Elapsed: 0.001 sec.
```
The size of the dataset in ClickHouse is just 12% of the original TSV file, compare the size of the original TSV file with the size of the table:
@ -651,4 +651,4 @@ Query id: 8cdcdfd4-908f-4be0-99e3-265722a2ab8d
## Next Steps
[A Practical Introduction to Sparse Primary Indexes in ClickHouse](../../guides/improving-query-performance/sparse-primary-indexes/sparse-primary-indexes-intro.md) discusses the differences in ClickHouse indexing compared to traditional relational databases, how ClickHouse builds and uses a sparse primary index, and indexing best practices.
[A Practical Introduction to Sparse Primary Indexes in ClickHouse](/docs/en/guides/best-practices/sparse-primary-indexes.md) discusses the differences in ClickHouse indexing compared to traditional relational databases, how ClickHouse builds and uses a sparse primary index, and indexing best practices.

View File

@ -80,7 +80,7 @@ Result:
### Top Components by the Number of Recipes:
In this example we learn how to use [arrayJoin](../../sql-reference/functions/array-join/) function to expand an array into a set of rows.
In this example we learn how to use [arrayJoin](../../sql-reference/functions/array-join.md) function to expand an array into a set of rows.
Query:
@ -185,7 +185,7 @@ Result:
10 rows in set. Elapsed: 0.215 sec. Processed 2.23 million rows, 1.48 GB (10.35 million rows/s., 6.86 GB/s.)
```
In this example, we involve [has](../../sql-reference/functions/array-functions/#hasarr-elem) function to filter by array elements and sort by the number of directions.
In this example, we involve [has](../../sql-reference/functions/array-functions.md#hasarr-elem) function to filter by array elements and sort by the number of directions.
There is a wedding cake that requires the whole 126 steps to produce! Show that directions:

View File

@ -1,17 +1,17 @@
---
slug: /en/getting-started/example-datasets/uk-price-paid
sidebar_label: UK Property Price Paid
sidebar_label: UK Property Prices
sidebar_position: 1
title: "UK Property Price Paid"
---
The dataset contains data about prices paid for real-estate property in England and Wales. The data is available since year 1995.
The size of the dataset in uncompressed form is about 4 GiB and it will take about 278 MiB in ClickHouse.
# The UK property prices dataset
Source: https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads
Description of the fields: https://www.gov.uk/guidance/about-the-price-paid-data
Projections are a great way to improve the performance of queries that you run frequently. We will demonstrate the power of projections
using the UK property dataset, which contains data about prices paid for real-estate property in England and Wales. The data is available since 1995, and the size of the dataset in uncompressed form is about 4 GiB (which will only take about 278 MiB in ClickHouse).
Contains HM Land Registry data © Crown copyright and database right 2021. This data is licensed under the Open Government Licence v3.0.
- Source: https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads
- Description of the fields: https://www.gov.uk/guidance/about-the-price-paid-data
- Contains HM Land Registry data © Crown copyright and database right 2021. This data is licensed under the Open Government Licence v3.0.
## Create the Table {#create-table}

View File

@ -14,75 +14,35 @@ import CodeBlock from '@theme/CodeBlock';
You have three options for getting up and running with ClickHouse:
- **[ClickHouse Cloud](https://clickhouse.com/cloud/):** The official ClickHouse as a service, - built by, maintained and supported by the creators of ClickHouse
- **[Self-managed ClickHouse](#self-managed-install):** ClickHouse can run on any Linux, FreeBSD, or macOS with x86-64, ARM, or PowerPC64LE CPU architecture
- **[Docker Image](https://hub.docker.com/r/clickhouse/clickhouse-server/):** Read the guide with the official image in Docker Hub
- **[Quick Install](#quick-install):** an easy-to-download binary for testing and developing with ClickHouse
- **[Production Deployments](#available-installation-options):** ClickHouse can run on any Linux, FreeBSD, or macOS with x86-64, ARM, or PowerPC64LE CPU architecture
- **[Docker Image](https://hub.docker.com/r/clickhouse/clickhouse-server/):** use the official Docker image in Docker Hub
## ClickHouse Cloud
The quickest and easiest way to get up and running with ClickHouse is to create a new service in [ClickHouse Cloud](https://clickhouse.cloud/).
## Self-Managed Install
## Quick Install
:::tip
For production installs of a specific release version see the [installation options](#available-installation-options) down below.
:::
<Tabs>
<TabItem value="linux" label="Linux" default>
On Linux and macOS:
1. The simplest way to download ClickHouse locally is to run the following command. If your operating system is supported, an appropriate ClickHouse binary will be downloaded and made runnable:
1. If you are just getting started and want to see what ClickHouse can do, the simplest way to download ClickHouse locally is to run the following command. It downloads a single binary for your operating system that can be used to run the ClickHouse server, clickhouse-client, clickhouse-local,
ClickHouse Keeper, and other tools:
```bash
curl https://clickhouse.com/ | sh
```
1. Run the `install` command, which defines a collection of useful symlinks along with the files and folders used by ClickHouse - all of which you can see in the output of the install script:
```bash
sudo ./clickhouse install
```
1. At the end of the install script, you are prompted for a password for the `default` user. Feel free to enter a password, or you can optionally leave it blank:
```response
Creating log directory /var/log/clickhouse-server.
Creating data directory /var/lib/clickhouse.
Creating pid directory /var/run/clickhouse-server.
chown -R clickhouse:clickhouse '/var/log/clickhouse-server'
chown -R clickhouse:clickhouse '/var/run/clickhouse-server'
chown clickhouse:clickhouse '/var/lib/clickhouse'
Enter password for default user:
```
You should see the following output:
```response
ClickHouse has been successfully installed.
Start clickhouse-server with:
sudo clickhouse start
Start clickhouse-client with:
clickhouse-client
```
1. Run the following command to start the ClickHouse server:
```bash
sudo clickhouse start
./clickhouse server
```
</TabItem>
<TabItem value="macos" label="macOS">
1. The simplest way to download ClickHouse locally is to run the following command. If your operating system is supported, an appropriate ClickHouse binary will be downloaded and made runnable:
```bash
curl https://clickhouse.com/ | sh
```
1. Run the ClickHouse server:
```bash
./clickhouse server
```
The first time you run this script, the necessary files and folders are created in the current directory, then the server starts.
1. Open a new terminal and use the **clickhouse-client** to connect to your service:
@ -101,15 +61,14 @@ For production installs of a specific release version see the [installation opti
You are ready to start sending DDL and SQL commands to ClickHouse!
</TabItem>
</Tabs>
:::tip
The [Quick Start](/docs/en/quick-start.mdx/#step-1-get-clickhouse) walks through the steps to download and run ClickHouse, connect to it, and insert data.
The [Quick Start](/docs/en/quick-start.mdx) walks through the steps for creating tables and inserting data.
:::
## Available Installation Options {#available-installation-options}
## Production Deployments {#available-installation-options}
For production deployments of ClickHouse, choose from one of the following install options.
### From DEB Packages {#install-from-deb-packages}
@ -174,7 +133,7 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.
</details>
You can replace `stable` with `lts` to use different [release kinds](/docs/en/faq/operations/production.md) based on your needs.
You can replace `stable` with `lts` to use different [release kinds](/knowledgebase/production) based on your needs.
You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/main/c/).
@ -272,7 +231,7 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.
</details>
You can replace `stable` with `lts` to use different [release kinds](/docs/en/faq/operations/production.md) based on your needs.
You can replace `stable` with `lts` to use different [release kinds](/knowledgebase/production) based on your needs.
Then run these commands to install packages:

View File

@ -1,5 +1,5 @@
---
sidebar_label: Playground
sidebar_label: ClickHouse Playground
sidebar_position: 2
keywords: [clickhouse, playground, getting, started, docs]
description: The ClickHouse Playground allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
@ -11,7 +11,7 @@ slug: /en/getting-started/playground
[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
Several example datasets are available in Playground.
You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces).
You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../integrations/index.mdx).
## Credentials {#credentials}

View File

@ -1,7 +1,7 @@
---
slug: /en/interfaces/formats
sidebar_position: 21
sidebar_label: Input and Output Formats
sidebar_label: View all formats...
title: Formats for Input and Output Data
---
@ -684,7 +684,7 @@ Example:
## JSONColumns {#jsoncolumns}
:::tip
The output of the JSONColumns* formats provides the ClickHouse field name and then the content of each row of the table for that field;
The output of the JSONColumns* formats provides the ClickHouse field name and then the content of each row of the table for that field;
visually, the data is rotated 90 degrees to the left.
:::

View File

@ -8,7 +8,7 @@ sidebar_label: PostgreSQL Interface
ClickHouse supports the PostgreSQL wire protocol, which allows you to use Postgres clients to connect to ClickHouse. In a sense, ClickHouse can pretend to be a PostgreSQL instance - allowing you to connect a PostgreSQL client application to ClickHouse that is not already directly supported by ClickHouse (for example, Amazon Redshift).
To enable the PostgreSQL wire protocol, add the [postgresql_port](../operations/server-configuration-parameters/settings#server_configuration_parameters-postgresql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder:
To enable the PostgreSQL wire protocol, add the [postgresql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-postgresql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder:
```xml
<clickhouse>

View File

@ -2,7 +2,3 @@ position: 70
label: 'Operations'
collapsible: true
collapsed: true
link:
type: generated-index
title: Operations
slug: /en/operations

View File

@ -1,152 +0,0 @@
---
slug: /en/operations/access-rights
sidebar_position: 48
sidebar_label: Access Control and Account Management
title: Access Control and Account Management
---
ClickHouse supports access control management based on [RBAC](https://en.wikipedia.org/wiki/Role-based_access_control) approach.
ClickHouse access entities:
- [User account](#user-account-management)
- [Role](#role-management)
- [Row Policy](#row-policy-management)
- [Settings Profile](#settings-profiles-management)
- [Quota](#quotas-management)
You can configure access entities using:
- SQL-driven workflow.
You need to [enable](#enabling-access-control) this functionality.
- Server [configuration files](../operations/configuration-files.md) `users.xml` and `config.xml`.
We recommend using SQL-driven workflow. Both of the configuration methods work simultaneously, so if you use the server configuration files for managing accounts and access rights, you can smoothly switch to SQL-driven workflow.
:::warning
You cant manage the same access entity by both configuration methods simultaneously.
:::
To see all users, roles, profiles, etc. and all their grants use [SHOW ACCESS](../sql-reference/statements/show.md#show-access-statement) statement.
## Usage {#access-control-usage}
By default, the ClickHouse server provides the `default` user account which is not allowed using SQL-driven access control and account management but has all the rights and permissions. The `default` user account is used in any cases when the username is not defined, for example, at login from client or in distributed queries. In distributed query processing a default user account is used, if the configuration of the server or cluster does not specify the [user and password](../engines/table-engines/special/distributed.md) properties.
If you just started using ClickHouse, consider the following scenario:
1. [Enable](#enabling-access-control) SQL-driven access control and account management for the `default` user.
2. Log in to the `default` user account and create all the required users. Dont forget to create an administrator account (`GRANT ALL ON *.* TO admin_user_account WITH GRANT OPTION`).
3. [Restrict permissions](../operations/settings/permissions-for-queries.md#permissions_for_queries) for the `default` user and disable SQL-driven access control and account management for it.
### Properties of Current Solution {#access-control-properties}
- You can grant permissions for databases and tables even if they do not exist.
- If a table was deleted, all the privileges that correspond to this table are not revoked. This means that even if you create a new table with the same name later, all the privileges remain valid. To revoke privileges corresponding to the deleted table, you need to execute, for example, the `REVOKE ALL PRIVILEGES ON db.table FROM ALL` query.
- There are no lifetime settings for privileges.
## User Account {#user-account-management}
A user account is an access entity that allows to authorize someone in ClickHouse. A user account contains:
- Identification information.
- [Privileges](../sql-reference/statements/grant.md#grant-privileges) that define a scope of queries the user can execute.
- Hosts allowed to connect to the ClickHouse server.
- Assigned and default roles.
- Settings with their constraints applied by default at user login.
- Assigned settings profiles.
Privileges can be granted to a user account by the [GRANT](../sql-reference/statements/grant.md) query or by assigning [roles](#role-management). To revoke privileges from a user, ClickHouse provides the [REVOKE](../sql-reference/statements/revoke.md) query. To list privileges for a user, use the [SHOW GRANTS](../sql-reference/statements/show.md#show-grants-statement) statement.
Management queries:
- [CREATE USER](../sql-reference/statements/create/user.md)
- [ALTER USER](../sql-reference/statements/alter/user.md#alter-user-statement)
- [DROP USER](../sql-reference/statements/drop.md)
- [SHOW CREATE USER](../sql-reference/statements/show.md#show-create-user-statement)
- [SHOW USERS](../sql-reference/statements/show.md#show-users-statement)
### Settings Applying {#access-control-settings-applying}
Settings can be configured differently: for a user account, in its granted roles and in settings profiles. At user login, if a setting is configured for different access entities, the value and constraints of this setting are applied as follows (from higher to lower priority):
1. User account settings.
2. The settings of default roles of the user account. If a setting is configured in some roles, then order of the setting application is undefined.
3. The settings from settings profiles assigned to a user or to its default roles. If a setting is configured in some profiles, then order of setting application is undefined.
4. Settings applied to all the server by default or from the [default profile](../operations/server-configuration-parameters/settings.md#default-profile).
## Role {#role-management}
Role is a container for access entities that can be granted to a user account.
Role contains:
- [Privileges](../sql-reference/statements/grant.md#grant-privileges)
- Settings and constraints
- List of assigned roles
Management queries:
- [CREATE ROLE](../sql-reference/statements/create/role.md)
- [ALTER ROLE](../sql-reference/statements/alter/role.md#alter-role-statement)
- [DROP ROLE](../sql-reference/statements/drop.md)
- [SET ROLE](../sql-reference/statements/set-role.md)
- [SET DEFAULT ROLE](../sql-reference/statements/set-role.md#set-default-role-statement)
- [SHOW CREATE ROLE](../sql-reference/statements/show.md#show-create-role-statement)
- [SHOW ROLES](../sql-reference/statements/show.md#show-roles-statement)
Privileges can be granted to a role by the [GRANT](../sql-reference/statements/grant.md) query. To revoke privileges from a role ClickHouse provides the [REVOKE](../sql-reference/statements/revoke.md) query.
## Row Policy {#row-policy-management}
Row policy is a filter that defines which of the rows are available to a user or a role. Row policy contains filters for one particular table, as well as a list of roles and/or users which should use this row policy.
:::warning
Row policies makes sense only for users with readonly access. If user can modify table or copy partitions between tables, it defeats the restrictions of row policies.
:::
Management queries:
- [CREATE ROW POLICY](../sql-reference/statements/create/row-policy.md)
- [ALTER ROW POLICY](../sql-reference/statements/alter/row-policy.md#alter-row-policy-statement)
- [DROP ROW POLICY](../sql-reference/statements/drop.md#drop-row-policy-statement)
- [SHOW CREATE ROW POLICY](../sql-reference/statements/show.md#show-create-row-policy-statement)
- [SHOW POLICIES](../sql-reference/statements/show.md#show-policies-statement)
## Settings Profile {#settings-profiles-management}
Settings profile is a collection of [settings](../operations/settings/index.md). Settings profile contains settings and constraints, as well as a list of roles and/or users to which this profile is applied.
Management queries:
- [CREATE SETTINGS PROFILE](../sql-reference/statements/create/settings-profile.md#create-settings-profile-statement)
- [ALTER SETTINGS PROFILE](../sql-reference/statements/alter/settings-profile.md#alter-settings-profile-statement)
- [DROP SETTINGS PROFILE](../sql-reference/statements/drop.md#drop-settings-profile-statement)
- [SHOW CREATE SETTINGS PROFILE](../sql-reference/statements/show.md#show-create-settings-profile-statement)
- [SHOW PROFILES](../sql-reference/statements/show.md#show-profiles-statement)
## Quota {#quotas-management}
Quota limits resource usage. See [Quotas](../operations/quotas.md).
Quota contains a set of limits for some durations, as well as a list of roles and/or users which should use this quota.
Management queries:
- [CREATE QUOTA](../sql-reference/statements/create/quota.md)
- [ALTER QUOTA](../sql-reference/statements/alter/quota.md#alter-quota-statement)
- [DROP QUOTA](../sql-reference/statements/drop.md#drop-quota-statement)
- [SHOW CREATE QUOTA](../sql-reference/statements/show.md#show-create-quota-statement)
- [SHOW QUOTA](../sql-reference/statements/show.md#show-quota-statement)
- [SHOW QUOTAS](../sql-reference/statements/show.md#show-quotas-statement)
## Enabling SQL-driven Access Control and Account Management {#enabling-access-control}
- Setup a directory for configurations storage.
ClickHouse stores access entity configurations in the folder set in the [access_control_path](../operations/server-configuration-parameters/settings.md#access_control_path) server configuration parameter.
- Enable SQL-driven access control and account management for at least one user account.
By default, SQL-driven access control and account management is disabled for all users. You need to configure at least one user in the `users.xml` configuration file and set the value of the [access_management](../operations/settings/settings-users.md#access_management-user-setting) setting to 1.

View File

@ -1,5 +1,6 @@
---
slug: /en/operations/backup
description: In order to effectively mitigate possible human errors, you should carefully prepare a strategy for backing up and restoring your data.
---
# Backup and Restore
@ -213,7 +214,7 @@ To write backups to an S3 bucket you need three pieces of information:
for example `Abc+123`
:::note
Creating an S3 bucket is covered in [Use S3 Object Storage as a ClickHouse disk](/docs/en/integrations/data-ingestion/s3/configuring-s3-for-clickhouse-use.md), just come back to this doc after saving the policy, there is no need to configure ClickHouse to use the S3 bucket.
Creating an S3 bucket is covered in [Use S3 Object Storage as a ClickHouse disk](/docs/en/integrations/data-ingestion/s3/index.md#configuring-s3-for-clickhouse-use), just come back to this doc after saving the policy, there is no need to configure ClickHouse to use the S3 bucket.
:::
The destination for a backup will be specified like this:

View File

@ -3,6 +3,7 @@ slug: /en/operations/caches
sidebar_position: 65
sidebar_label: Caches
title: "Cache Types"
description: When performing queries, ClickHouse uses different caches.
---
When performing queries, ClickHouse uses different caches.

View File

@ -1,378 +0,0 @@
---
slug: /en/operations/clickhouse-keeper
sidebar_position: 66
sidebar_label: ClickHouse Keeper
---
# ClickHouse Keeper
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md';
<SelfManaged />
ClickHouse Keeper provides the coordination system for data [replication](../engines/table-engines/mergetree-family/replication.md) and [distributed DDL](../sql-reference/distributed-ddl.md) queries execution. ClickHouse Keeper is compatible with ZooKeeper.
## Implementation details {#implementation-details}
ZooKeeper is one of the first well-known open-source coordination systems. It's implemented in Java, and has quite a simple and powerful data model. ZooKeeper's coordination algorithm, ZooKeeper Atomic Broadcast (ZAB), doesn't provide linearizability guarantees for reads, because each ZooKeeper node serves reads locally. Unlike ZooKeeper ClickHouse Keeper is written in C++ and uses the [RAFT algorithm](https://raft.github.io/) [implementation](https://github.com/eBay/NuRaft). This algorithm allows linearizability for reads and writes, and has several open-source implementations in different languages.
By default, ClickHouse Keeper provides the same guarantees as ZooKeeper (linearizable writes, non-linearizable reads). It has a compatible client-server protocol, so any standard ZooKeeper client can be used to interact with ClickHouse Keeper. Snapshots and logs have an incompatible format with ZooKeeper, but the `clickhouse-keeper-converter` tool enables the conversion of ZooKeeper data to ClickHouse Keeper snapshots. The interserver protocol in ClickHouse Keeper is also incompatible with ZooKeeper so a mixed ZooKeeper / ClickHouse Keeper cluster is impossible.
ClickHouse Keeper supports Access Control Lists (ACLs) the same way as [ZooKeeper](https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) does. ClickHouse Keeper supports the same set of permissions and has the identical built-in schemes: `world`, `auth` and `digest`. The digest authentication scheme uses the pair `username:password`, the password is encoded in Base64.
:::note
External integrations are not supported.
:::
## Configuration {#configuration}
ClickHouse Keeper can be used as a standalone replacement for ZooKeeper or as an internal part of the ClickHouse server. In both cases the configuration is almost the same `.xml` file. The main ClickHouse Keeper configuration tag is `<keeper_server>`. Keeper configuration has the following parameters:
- `tcp_port` — Port for a client to connect (default for ZooKeeper is `2181`).
- `tcp_port_secure` — Secure port for an SSL connection between client and keeper-server.
- `server_id` — Unique server id, each participant of the ClickHouse Keeper cluster must have a unique number (1, 2, 3, and so on).
- `log_storage_path` — Path to coordination logs, just like ZooKeeper it is best to store logs on non-busy nodes.
- `snapshot_storage_path` — Path to coordination snapshots.
Other common parameters are inherited from the ClickHouse server config (`listen_host`, `logger`, and so on).
Internal coordination settings are located in the `<keeper_server>.<coordination_settings>` section:
- `operation_timeout_ms` — Timeout for a single client operation (ms) (default: 10000).
- `min_session_timeout_ms` — Min timeout for client session (ms) (default: 10000).
- `session_timeout_ms` — Max timeout for client session (ms) (default: 100000).
- `dead_session_check_period_ms` — How often ClickHouse Keeper checks for dead sessions and removes them (ms) (default: 500).
- `heart_beat_interval_ms` — How often a ClickHouse Keeper leader will send heartbeats to followers (ms) (default: 500).
- `election_timeout_lower_bound_ms` — If the follower does not receive a heartbeat from the leader in this interval, then it can initiate leader election (default: 1000). Must be less than or equal to `election_timeout_upper_bound_ms`. Ideally they shouldn't be equal.
- `election_timeout_upper_bound_ms` — If the follower does not receive a heartbeat from the leader in this interval, then it must initiate leader election (default: 2000).
- `rotate_log_storage_interval` — How many log records to store in a single file (default: 100000).
- `reserved_log_items` — How many coordination log records to store before compaction (default: 100000).
- `snapshot_distance` — How often ClickHouse Keeper will create new snapshots (in the number of records in logs) (default: 100000).
- `snapshots_to_keep` — How many snapshots to keep (default: 3).
- `stale_log_gap` — Threshold when leader considers follower as stale and sends the snapshot to it instead of logs (default: 10000).
- `fresh_log_gap` — When node became fresh (default: 200).
- `max_requests_batch_size` - Max size of batch in requests count before it will be sent to RAFT (default: 100).
- `force_sync` — Call `fsync` on each write to coordination log (default: true).
- `quorum_reads` — Execute read requests as writes through whole RAFT consensus with similar speed (default: false).
- `raft_logs_level` — Text logging level about coordination (trace, debug, and so on) (default: system default).
- `auto_forwarding` — Allow to forward write requests from followers to the leader (default: true).
- `shutdown_timeout` — Wait to finish internal connections and shutdown (ms) (default: 5000).
- `startup_timeout` — If the server doesn't connect to other quorum participants in the specified timeout it will terminate (ms) (default: 30000).
- `four_letter_word_white_list` — White list of 4lw commands (default: `conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld`).
Quorum configuration is located in the `<keeper_server>.<raft_configuration>` section and contain servers description.
The only parameter for the whole quorum is `secure`, which enables encrypted connection for communication between quorum participants. The parameter can be set `true` if SSL connection is required for internal communication between nodes, or left unspecified otherwise.
The main parameters for each `<server>` are:
- `id` — Server identifier in a quorum.
- `hostname` — Hostname where this server is placed.
- `port` — Port where this server listens for connections.
:::note
In the case of a change in the topology of your ClickHouse Keeper cluster (e.g., replacing a server), please make sure to keep the mapping of `server_id` to `hostname` consistent and avoid shuffling or reusing an existing `server_id` for different servers (e.g., it can happen if your rely on automation scripts to deploy ClickHouse Keeper)
:::
Examples of configuration for quorum with three nodes can be found in [integration tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/integration) with `test_keeper_` prefix. Example configuration for server #1:
```xml
<keeper_server>
<tcp_port>2181</tcp_port>
<server_id>1</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<coordination_settings>
<operation_timeout_ms>10000</operation_timeout_ms>
<session_timeout_ms>30000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>zoo1</hostname>
<port>9444</port>
</server>
<server>
<id>2</id>
<hostname>zoo2</hostname>
<port>9444</port>
</server>
<server>
<id>3</id>
<hostname>zoo3</hostname>
<port>9444</port>
</server>
</raft_configuration>
</keeper_server>
```
## How to run {#how-to-run}
ClickHouse Keeper is bundled into the ClickHouse server package, just add configuration of `<keeper_server>` and start ClickHouse server as always. If you want to run standalone ClickHouse Keeper you can start it in a similar way with:
```bash
clickhouse-keeper --config /etc/your_path_to_config/config.xml
```
If you don't have the symlink (`clickhouse-keeper`) you can create it or specify `keeper` as an argument to `clickhouse`:
```bash
clickhouse keeper --config /etc/your_path_to_config/config.xml
```
## Four Letter Word Commands {#four-letter-word-commands}
ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively.
The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value `conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld`.
You can issue the commands to ClickHouse Keeper via telnet or nc, at the client port.
```
echo mntr | nc localhost 9181
```
Bellow is the detailed 4lw commands:
- `ruok`: Tests if server is running in a non-error state. The server will respond with `imok` if it is running. Otherwise it will not respond at all. A response of `imok` does not necessarily indicate that the server has joined the quorum, just that the server process is active and bound to the specified client port. Use "stat" for details on state wrt quorum and client connection information.
```
imok
```
- `mntr`: Outputs a list of variables that could be used for monitoring the health of the cluster.
```
zk_version v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
zk_avg_latency 0
zk_max_latency 0
zk_min_latency 0
zk_packets_received 68
zk_packets_sent 68
zk_num_alive_connections 1
zk_outstanding_requests 0
zk_server_state leader
zk_znode_count 4
zk_watch_count 1
zk_ephemerals_count 0
zk_approximate_data_size 723
zk_open_file_descriptor_count 310
zk_max_file_descriptor_count 10240
zk_followers 0
zk_synced_followers 0
```
- `srvr`: Lists full details for the server.
```
ClickHouse Keeper version: v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
Latency min/avg/max: 0/0/0
Received: 2
Sent : 2
Connections: 1
Outstanding: 0
Zxid: 34
Mode: leader
Node count: 4
```
- `stat`: Lists brief details for the server and connected clients.
```
ClickHouse Keeper version: v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
Clients:
192.168.1.1:52852(recved=0,sent=0)
192.168.1.1:52042(recved=24,sent=48)
Latency min/avg/max: 0/0/0
Received: 4
Sent : 4
Connections: 1
Outstanding: 0
Zxid: 36
Mode: leader
Node count: 4
```
- `srst`: Reset server statistics. The command will affect the result of `srvr`, `mntr` and `stat`.
```
Server stats reset.
```
- `conf`: Print details about serving configuration.
```
server_id=1
tcp_port=2181
four_letter_word_white_list=*
log_storage_path=./coordination/logs
snapshot_storage_path=./coordination/snapshots
max_requests_batch_size=100
session_timeout_ms=30000
operation_timeout_ms=10000
dead_session_check_period_ms=500
heart_beat_interval_ms=500
election_timeout_lower_bound_ms=1000
election_timeout_upper_bound_ms=2000
reserved_log_items=1000000000000000
snapshot_distance=10000
auto_forwarding=true
shutdown_timeout=5000
startup_timeout=240000
raft_logs_level=information
snapshots_to_keep=3
rotate_log_storage_interval=100000
stale_log_gap=10000
fresh_log_gap=200
max_requests_batch_size=100
quorum_reads=false
force_sync=false
compress_logs=true
compress_snapshots_with_zstd_format=true
configuration_change_tries_count=20
```
- `cons`: List full connection/session details for all clients connected to this server. Includes information on numbers of packets received/sent, session id, operation latencies, last operation performed, etc...
```
192.168.1.1:52163(recved=0,sent=0,sid=0xffffffffffffffff,lop=NA,est=1636454787393,to=30000,lzxid=0xffffffffffffffff,lresp=0,llat=0,minlat=0,avglat=0,maxlat=0)
192.168.1.1:52042(recved=9,sent=18,sid=0x0000000000000001,lop=List,est=1636454739887,to=30000,lcxid=0x0000000000000005,lzxid=0x0000000000000005,lresp=1636454739892,llat=0,minlat=0,avglat=0,maxlat=0)
```
- `crst`: Reset connection/session statistics for all connections.
```
Connection stats reset.
```
- `envi`: Print details about serving environment
```
Environment:
clickhouse.keeper.version=v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
host.name=ZBMAC-C02D4054M.local
os.name=Darwin
os.arch=x86_64
os.version=19.6.0
cpu.count=12
user.name=root
user.home=/Users/JackyWoo/
user.dir=/Users/JackyWoo/project/jd/clickhouse/cmake-build-debug/programs/
user.tmp=/var/folders/b4/smbq5mfj7578f2jzwn602tt40000gn/T/
```
- `dirs`: Shows the total size of snapshot and log files in bytes
```
snapshot_dir_size: 0
log_dir_size: 3875
```
- `isro`: Tests if server is running in read-only mode. The server will respond with "ro" if in read-only mode or "rw" if not in read-only mode.
```
rw
```
- `wchs`: Lists brief information on watches for the server.
```
1 connections watching 1 paths
Total watches:1
```
- `wchc`: Lists detailed information on watches for the server, by session. This outputs a list of sessions (connections) with associated watches (paths). Note, depending on the number of watches this operation may be expensive (ie impact server performance), use it carefully.
```
0x0000000000000001
/clickhouse/task_queue/ddl
```
- `wchp`: Lists detailed information on watches for the server, by path. This outputs a list of paths (znodes) with associated sessions. Note, depending on the number of watches this operation may be expensive (i. e. impact server performance), use it carefully.
```
/clickhouse/task_queue/ddl
0x0000000000000001
```
- `dump`: Lists the outstanding sessions and ephemeral nodes. This only works on the leader.
```
Sessions dump (2):
0x0000000000000001
0x0000000000000002
Sessions with Ephemerals (1):
0x0000000000000001
/clickhouse/task_queue/ddl
```
- `csnp`: Schedule a snapshot creation task. Return the last committed log index of the scheduled snapshot if success or `Failed to schedule snapshot creation task.` if failed. Note that `lgif` command can help you determine whether the snapshot is done.
```
100
```
- `lgif`: Keeper log information. `first_log_idx` : my first log index in log store; `first_log_term` : my first log term; `last_log_idx` : my last log index in log store; `last_log_term` : my last log term; `last_committed_log_idx` : my last committed log index in state machine; `leader_committed_log_idx` : leader's committed log index from my perspective; `target_committed_log_idx` : target log index should be committed to; `last_snapshot_idx` : the largest committed log index in last snapshot.
```
first_log_idx 1
first_log_term 1
last_log_idx 101
last_log_term 1
last_committed_log_idx 100
leader_committed_log_idx 101
target_committed_log_idx 101
last_snapshot_idx 50
```
- `rqld`: Request to become new leader. Return `Sent leadership request to leader.` if request sent or `Failed to send leadership request to leader.` if request not sent. Note that if node is already leader the outcome is same as the request is sent.
```
Sent leadership request to leader.
```
## Migration from ZooKeeper {#migration-from-zookeeper}
Seamlessly migration from ZooKeeper to ClickHouse Keeper is impossible you have to stop your ZooKeeper cluster, convert data and start ClickHouse Keeper. `clickhouse-keeper-converter` tool allows converting ZooKeeper logs and snapshots to ClickHouse Keeper snapshot. It works only with ZooKeeper > 3.4. Steps for migration:
1. Stop all ZooKeeper nodes.
2. Optional, but recommended: find ZooKeeper leader node, start and stop it again. It will force ZooKeeper to create a consistent snapshot.
3. Run `clickhouse-keeper-converter` on a leader, for example:
```bash
clickhouse-keeper-converter --zookeeper-logs-dir /var/lib/zookeeper/version-2 --zookeeper-snapshots-dir /var/lib/zookeeper/version-2 --output-dir /path/to/clickhouse/keeper/snapshots
```
4. Copy snapshot to ClickHouse server nodes with a configured `keeper` or start ClickHouse Keeper instead of ZooKeeper. The snapshot must persist on all nodes, otherwise, empty nodes can be faster and one of them can become a leader.
## Recovering after losing quorum
Because ClickHouse Keeper uses Raft it can tolerate certain amount of node crashes depending on the cluster size. \
E.g. for a 3-node cluster, it will continue working correctly if only 1 node crashes.
Cluster configuration can be dynamically configured but there are some limitations. Reconfiguration relies on Raft also
so to add/remove a node from the cluster you need to have a quorum. If you lose too many nodes in your cluster at the same time without any chance
of starting them again, Raft will stop working and not allow you to reconfigure your cluster using the conventional way.
Nevertheless, ClickHouse Keeper has a recovery mode which allows you to forcefully reconfigure your cluster with only 1 node.
This should be done only as your last resort if you cannot start your nodes again, or start a new instance on the same endpoint.
Important things to note before continuing:
- Make sure that the failed nodes cannot connect to the cluster again.
- Do not start any of the new nodes until it's specified in the steps.
After making sure that the above things are true, you need to do following:
1. Pick a single Keeper node to be your new leader. Be aware that the data of that node will be used for the entire cluster so we recommend to use a node with the most up to date state.
2. Before doing anything else, make a backup of the `log_storage_path` and `snapshot_storage_path` folders of the picked node.
3. Reconfigure the cluster on all of the nodes you want to use.
4. Send the four letter command `rcvr` to the node you picked which will move the node to the recovery mode OR stop Keeper instance on the picked node and start it again with the `--force-recovery` argument.
5. One by one, start Keeper instances on the new nodes making sure that `mntr` returns `follower` for the `zk_server_state` before starting the next one.
6. While in the recovery mode, the leader node will return error message for `mntr` command until it achieves quorum with the new nodes and refuse any requests from the client and the followers.
7. After quorum is achieved, the leader node will return to the normal mode of operation, accepting all the requests using Raft - verify with `mntr` which should return `leader` for the `zk_server_state`.

View File

@ -113,7 +113,7 @@ Note, that now, once user `my_user` uses `kerberos`, Kerberos must be enabled in
### Enabling Kerberos using SQL {#enabling-kerberos-using-sql}
When [SQL-driven Access Control and Account Management](../access-rights.md#access-control) is enabled in ClickHouse, users identified by Kerberos can also be created using SQL statements.
When [SQL-driven Access Control and Account Management](/docs/en/guides/sre/user-management/index.md#access-control) is enabled in ClickHouse, users identified by Kerberos can also be created using SQL statements.
```sql
CREATE USER my_user IDENTIFIED WITH kerberos REALM 'EXAMPLE.COM'

View File

@ -112,7 +112,7 @@ At each login attempt, ClickHouse tries to "bind" to the specified DN defined by
Note, that user `my_user` refers to `my_ldap_server`. This LDAP server must be configured in the main `config.xml` file as described previously.
When SQL-driven [Access Control and Account Management](../access-rights.md#access-control) is enabled, users that are authenticated by LDAP servers can also be created using the [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement) statement.
When SQL-driven [Access Control and Account Management](/docs/en/guides/sre/user-management/index.md#access-control) is enabled, users that are authenticated by LDAP servers can also be created using the [CREATE USER](/docs/en/sql-reference/statements/create/user.md#create-user-statement) statement.
Query:
@ -124,7 +124,7 @@ CREATE USER my_user IDENTIFIED WITH ldap SERVER 'my_ldap_server';
In addition to the locally defined users, a remote LDAP server can be used as a source of user definitions. To achieve this, specify previously defined LDAP server name (see [LDAP Server Definition](#ldap-server-definition)) in the `ldap` section inside the `users_directories` section of the `config.xml` file.
At each login attempt, ClickHouse tries to find the user definition locally and authenticate it as usual. If the user is not defined, ClickHouse will assume the definition exists in the external LDAP directory and will try to "bind" to the specified DN at the LDAP server using the provided credentials. If successful, the user will be considered existing and authenticated. The user will be assigned roles from the list specified in the `roles` section. Additionally, LDAP "search" can be performed and results can be transformed and treated as role names and then be assigned to the user if the `role_mapping` section is also configured. All this implies that the SQL-driven [Access Control and Account Management](../access-rights.md#access-control) is enabled and roles are created using the [CREATE ROLE](../../sql-reference/statements/create/role.md#create-role-statement) statement.
At each login attempt, ClickHouse tries to find the user definition locally and authenticate it as usual. If the user is not defined, ClickHouse will assume the definition exists in the external LDAP directory and will try to "bind" to the specified DN at the LDAP server using the provided credentials. If successful, the user will be considered existing and authenticated. The user will be assigned roles from the list specified in the `roles` section. Additionally, LDAP "search" can be performed and results can be transformed and treated as role names and then be assigned to the user if the `role_mapping` section is also configured. All this implies that the SQL-driven [Access Control and Account Management](/docs/en/guides/sre/user-management/index.md#access-control) is enabled and roles are created using the [CREATE ROLE](/docs/en/sql-reference/statements/create/role.md#create-role-statement) statement.
**Example**
@ -173,7 +173,7 @@ Note that `my_ldap_server` referred in the `ldap` section inside the `user_direc
- `roles` — Section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server.
- If no roles are specified here or assigned during role mapping (below), user will not be able to perform any actions after authentication.
- `role_mapping` — Section with LDAP search parameters and mapping rules.
- When a user authenticates, while still bound to LDAP, an LDAP search is performed using `search_filter` and the name of the logged-in user. For each entry found during that search, the value of the specified attribute is extracted. For each attribute value that has the specified prefix, the prefix is removed, and the rest of the value becomes the name of a local role defined in ClickHouse, which is expected to be created beforehand by the [CREATE ROLE](../../sql-reference/statements/create/role.md#create-role-statement) statement.
- When a user authenticates, while still bound to LDAP, an LDAP search is performed using `search_filter` and the name of the logged-in user. For each entry found during that search, the value of the specified attribute is extracted. For each attribute value that has the specified prefix, the prefix is removed, and the rest of the value becomes the name of a local role defined in ClickHouse, which is expected to be created beforehand by the [CREATE ROLE](/docs/en/sql-reference/statements/create/role.md#create-role-statement) statement.
- There can be multiple `role_mapping` sections defined inside the same `ldap` section. All of them will be applied.
- `base_dn` — Template used to construct the base DN for the LDAP search.
- The resulting DN will be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{user_dn}` substrings of the template with the actual user name, bind DN, and user DN during each LDAP search.

View File

@ -2,6 +2,7 @@
slug: /en/operations/monitoring
sidebar_position: 45
sidebar_label: Monitoring
description: You can monitor the utilization of hardware resources and also ClickHouse server metrics.
---
# Monitoring

View File

@ -2,6 +2,7 @@
slug: /en/operations/server-configuration-parameters/settings
sidebar_position: 57
sidebar_label: Server Settings
description: This section contains descriptions of server settings that cannot be changed at the session or query level.
---
# Server Settings
@ -275,7 +276,7 @@ Path:
- Specify the absolute path or the path relative to the server config file.
- The path can contain wildcards \* and ?.
See also “[Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md)”.
See also “[Dictionaries](../../sql-reference/dictionaries/index.md)”.
**Example**
@ -1025,7 +1026,7 @@ If the number of **idle** threads in the Backups IO Thread pool exceeds `max_bac
Possible values:
- Positive integer.
- Zero.
- Zero.
Default value: `0`.
@ -1917,7 +1918,7 @@ Default value: `/var/lib/clickhouse/access/`.
**See also**
- [Access Control and Account Management](../../operations/access-rights.md#access-control)
- [Access Control and Account Management](../../guides/sre/user-management/index.md#access-control)
## user_directories {#user_directories}

View File

@ -9,7 +9,7 @@ sidebar_label: Settings Profiles
A settings profile is a collection of settings grouped under the same name.
:::note
ClickHouse also supports [SQL-driven workflow](../../operations/access-rights.md#access-control) for managing settings profiles. We recommend using it.
ClickHouse also supports [SQL-driven workflow](../../guides/sre/user-management/index.md#access-control) for managing settings profiles. We recommend using it.
:::
The profile can have any name. You can specify the same profile for different users. The most important thing you can write in the settings profile is `readonly=1`, which ensures read-only access.

View File

@ -9,7 +9,7 @@ sidebar_label: User Settings
The `users` section of the `user.xml` configuration file contains user settings.
:::note
ClickHouse also supports [SQL-driven workflow](../../operations/access-rights.md#access-control) for managing users. We recommend using it.
ClickHouse also supports [SQL-driven workflow](../../guides/sre/user-management/index.md#access-control) for managing users. We recommend using it.
:::
Structure of the `users` section:
@ -77,7 +77,7 @@ Password can be specified in plaintext or in SHA256 (hex format).
### access_management {#access_management-user-setting}
This setting enables or disables using of SQL-driven [access control and account management](../../operations/access-rights.md#access-control) for the user.
This setting enables or disables using of SQL-driven [access control and account management](../../guides/sre/user-management/index.md#access-control) for the user.
Possible values:

View File

@ -2999,7 +2999,7 @@ It can be useful when merges are CPU bounded not IO bounded (performing heavy da
## max_final_threads {#max-final-threads}
Sets the maximum number of parallel threads for the `SELECT` query data read phase with the [FINAL](../../sql-reference/statements/select/from.md/#select-from-final) modifier.
Sets the maximum number of parallel threads for the `SELECT` query data read phase with the [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier.
Possible values:
@ -3094,9 +3094,9 @@ Possible values:
Default value: `0`.
## s3_truncate_on_insert
## s3_truncate_on_insert
Enables or disables truncate before inserts in s3 engine tables. If disabled, an exception will be thrown on insert attempts if an S3 object already exists.
Enables or disables truncate before inserts in s3 engine tables. If disabled, an exception will be thrown on insert attempts if an S3 object already exists.
Possible values:
- 0 — `INSERT` query appends new data to the end of the file.
@ -3104,9 +3104,9 @@ Possible values:
Default value: `0`.
## hdfs_truncate_on_insert
## hdfs_truncate_on_insert
Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists.
Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists.
Possible values:
- 0 — `INSERT` query appends new data to the end of the file.
@ -3114,11 +3114,11 @@ Possible values:
Default value: `0`.
## engine_file_allow_create_multiple_files
## engine_file_allow_create_multiple_files
Enables or disables creating a new file on each insert in file engine tables if the format has the suffix (`JSON`, `ORC`, `Parquet`, etc.). If enabled, on each insert a new file will be created with a name following this pattern:
`data.Parquet` -> `data.1.Parquet` -> `data.2.Parquet`, etc.
`data.Parquet` -> `data.1.Parquet` -> `data.2.Parquet`, etc.
Possible values:
- 0 — `INSERT` query appends new data to the end of the file.
@ -3126,11 +3126,11 @@ Possible values:
Default value: `0`.
## s3_create_new_file_on_insert
## s3_create_new_file_on_insert
Enables or disables creating a new file on each insert in s3 engine tables. If enabled, on each insert a new S3 object will be created with the key, similar to this pattern:
initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc.
initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc.
Possible values:
- 0 — `INSERT` query appends new data to the end of the file.
@ -3142,7 +3142,7 @@ Default value: `0`.
Enables or disables creating a new file on each insert in HDFS engine tables. If enabled, on each insert a new HDFS file will be created with the name, similar to this pattern:
initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc.
initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc.
Possible values:
- 0 — `INSERT` query appends new data to the end of the file.
@ -3753,7 +3753,7 @@ Default value: `1`.
## optimize_move_to_prewhere_if_final {#optimize_move_to_prewhere_if_final}
Enables or disables automatic [PREWHERE](../../sql-reference/statements/select/prewhere.md) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries with [FINAL](../../sql-reference/statements/select/from.md/#select-from-final) modifier.
Enables or disables automatic [PREWHERE](../../sql-reference/statements/select/prewhere.md) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries with [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier.
Works only for [*MergeTree](../../engines/table-engines/mergetree-family/index.md) tables.
@ -3770,7 +3770,7 @@ Default value: `0`.
## optimize_using_constraints
Use [constraints](../../sql-reference/statements/create/table#constraints) for query optimization. The default is `false`.
Use [constraints](../../sql-reference/statements/create/table.md#constraints) for query optimization. The default is `false`.
Possible values:
@ -3778,7 +3778,7 @@ Possible values:
## optimize_append_index
Use [constraints](../../sql-reference/statements/create/table#constraints) in order to append index condition. The default is `false`.
Use [constraints](../../sql-reference/statements/create/table.md#constraints) in order to append index condition. The default is `false`.
Possible values:
@ -3786,7 +3786,7 @@ Possible values:
## optimize_substitute_columns
Use [constraints](../../sql-reference/statements/create/table#constraints) for column substitution. The default is `false`.
Use [constraints](../../sql-reference/statements/create/table.md#constraints) for column substitution. The default is `false`.
Possible values:
@ -3984,7 +3984,7 @@ Use this setting only for backward compatibility if your use cases depend on old
## final {#final}
Automatically applies [FINAL](../../sql-reference/statements/select/from/#final-modifier) modifier to all tables in a query, to tables where [FINAL](../../sql-reference/statements/select/from/#final-modifier) is applicable, including joined tables and tables in sub-queries, and
Automatically applies [FINAL](../../sql-reference/statements/select/from.md#final-modifier) modifier to all tables in a query, to tables where [FINAL](../../sql-reference/statements/select/from.md#final-modifier) is applicable, including joined tables and tables in sub-queries, and
distributed tables.
Possible values:
@ -4030,7 +4030,7 @@ SELECT * FROM test;
## asterisk_include_materialized_columns {#asterisk_include_materialized_columns}
Include [MATERIALIZED](../../sql-reference/statements/create/table/#materialized) columns for wildcard query (`SELECT *`).
Include [MATERIALIZED](../../sql-reference/statements/create/table.md#materialized) columns for wildcard query (`SELECT *`).
Possible values:
@ -4041,7 +4041,7 @@ Default value: `0`.
## asterisk_include_alias_columns {#asterisk_include_alias_columns}
Include [ALIAS](../../sql-reference/statements/create/table/#alias) columns for wildcard query (`SELECT *`).
Include [ALIAS](../../sql-reference/statements/create/table.md#alias) columns for wildcard query (`SELECT *`).
Possible values:

View File

@ -3,12 +3,12 @@ slug: /en/operations/system-tables/dictionaries
---
# dictionaries
Contains information about [dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
Contains information about [dictionaries](../../sql-reference/dictionaries/index.md).
Columns:
- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database containing the dictionary created by DDL query. Empty string for other dictionaries.
- `name` ([String](../../sql-reference/data-types/string.md)) — [Dictionary name](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md).
- `name` ([String](../../sql-reference/data-types/string.md)) — [Dictionary name](../../sql-reference/dictionaries/index.md).
- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Dictionary UUID.
- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Dictionary status. Possible values:
- `NOT_LOADED` — Dictionary was not loaded because it was not used.
@ -18,20 +18,20 @@ Columns:
- `LOADED_AND_RELOADING` — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: [SYSTEM RELOAD DICTIONARY](../../sql-reference/statements/system.md#query_language-system-reload-dictionary) query, timeout, dictionary config has changed).
- `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now.
- `origin` ([String](../../sql-reference/data-types/string.md)) — Path to the configuration file that describes the dictionary.
- `type` ([String](../../sql-reference/data-types/string.md)) — Type of a dictionary allocation. [Storing Dictionaries in Memory](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md).
- `key.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [key names](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key) provided by the dictionary.
- `key.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [key types](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key) provided by the dictionary.
- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [attribute names](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) provided by the dictionary.
- `attribute.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [attribute types](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) provided by the dictionary.
- `type` ([String](../../sql-reference/data-types/string.md)) — Type of a dictionary allocation. [Storing Dictionaries in Memory](../../sql-reference/dictionaries/index.md#storig-dictionaries-in-memory).
- `key.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [key names](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-key) provided by the dictionary.
- `key.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [key types](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-key) provided by the dictionary.
- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [attribute names](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-attributes) provided by the dictionary.
- `attribute.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [attribute types](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-attributes) provided by the dictionary.
- `bytes_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary.
- `query_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of queries since the dictionary was loaded or since the last successful reboot.
- `hit_rate` ([Float64](../../sql-reference/data-types/float.md)) — For cache dictionaries, the percentage of uses for which the value was in the cache.
- `found_rate` ([Float64](../../sql-reference/data-types/float.md)) — The percentage of uses for which the value was found.
- `element_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of items stored in the dictionary.
- `load_factor` ([Float64](../../sql-reference/data-types/float.md)) — Percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table).
- `source` ([String](../../sql-reference/data-types/string.md)) — Text describing the [data source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) for the dictionary.
- `lifetime_min` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Minimum [lifetime](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds.
- `lifetime_max` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Maximum [lifetime](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds.
- `source` ([String](../../sql-reference/data-types/string.md)) — Text describing the [data source](../../sql-reference/dictionaries/index.md#dictionary-sources) for the dictionary.
- `lifetime_min` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Minimum [lifetime](../../sql-reference/dictionaries/index.md#dictionary-updates) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds.
- `lifetime_max` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Maximum [lifetime](../../sql-reference/dictionaries/index.md#dictionary-updates) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds.
- `loading_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time for loading the dictionary.
- `last_successful_update_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — End time for loading or updating the dictionary. Helps to monitor some troubles with dictionary sources and investigate the causes.
- `loading_duration` ([Float32](../../sql-reference/data-types/float.md)) — Duration of a dictionary loading.

View File

@ -20,7 +20,7 @@ Columns:
- `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows which users the quota is applied to. Values:
- `0` — The quota applies to users specify in the `apply_to_list`.
- `1` — The quota applies to all users except those listed in `apply_to_except`.
- `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/[roles](../../operations/access-rights.md#role-management) that the quota should be applied to.
- `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/[roles](../../guides/sre/user-management/index.md#role-management) that the quota should be applied to.
- `apply_to_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/roles that the quota should not apply to.
## See Also {#see-also}

View File

@ -3,7 +3,7 @@ slug: /en/operations/system-tables/roles
---
# roles
Contains information about configured [roles](../../operations/access-rights.md#role-management).
Contains information about configured [roles](../../guides/sre/user-management/index.md#role-management).
Columns:

View File

@ -3,7 +3,7 @@ slug: /en/operations/system-tables/users
---
# users
Contains a list of [user accounts](../../operations/access-rights.md#user-account-management) configured at the server.
Contains a list of [user accounts](../../guides/sre/user-management/index.md#user-account-management) configured at the server.
Columns:
- `name` ([String](../../sql-reference/data-types/string.md)) — User name.

View File

@ -126,7 +126,7 @@ Otherwise you may get `Illegal instruction` crashes when hypervisor is run on ol
## ClickHouse Keeper and ZooKeeper {#zookeeper}
ClickHouse Keeper is recommended to replace ZooKeeper for ClickHouse clusters. See the documentation for [ClickHouse Keeper](clickhouse-keeper.md)
ClickHouse Keeper is recommended to replace ZooKeeper for ClickHouse clusters. See the documentation for [ClickHouse Keeper](../guides/sre/keeper/index.md)
If you would like to continue using ZooKeeper then it is best to use a fresh version of ZooKeeper 3.4.9 or later. The version in stable Linux distributions may be outdated.
@ -134,7 +134,7 @@ You should never use manually written scripts to transfer data between different
If you want to divide an existing ZooKeeper cluster into two, the correct way is to increase the number of its replicas and then reconfigure it as two independent clusters.
You can run ClickHouse Keeper on the same server as ClickHouse in test environments, or in environments with low ingestion rate.
You can run ClickHouse Keeper on the same server as ClickHouse in test environments, or in environments with low ingestion rate.
For production environments we suggest to use separate servers for ClickHouse and ZooKeeper/Keeper, or place ClickHouse files and Keeper files on to separate disks. Because ZooKeeper/Keeper are very sensitive for disk latency and ClickHouse may utilize all available system resources.
You can have ZooKeeper observers in an ensemble but ClickHouse servers should not interact with observers.

View File

@ -27,7 +27,7 @@ $ clickhouse-format --query "select number from numbers(10) where number%2 order
Result:
```text
```sql
SELECT number
FROM numbers(10)
WHERE number % 2
@ -54,7 +54,7 @@ $ clickhouse-format -n <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNIO
Result:
```text
```sql
SELECT *
FROM
(
@ -75,7 +75,7 @@ $ clickhouse-format --seed Hello --obfuscate <<< "SELECT cost_first_screen BETWE
Result:
```text
```sql
SELECT treasury_mammoth_hazelnut BETWEEN nutmeg AND span, CASE WHEN chive >= 116 THEN switching ELSE ANYTHING END;
```
@ -87,7 +87,7 @@ $ clickhouse-format --seed World --obfuscate <<< "SELECT cost_first_screen BETWE
Result:
```text
```sql
SELECT horse_tape_summer BETWEEN folklore AND moccasins, CASE WHEN intestine >= 116 THEN nonconformist ELSE FORESTRY END;
```
@ -99,7 +99,7 @@ $ clickhouse-format --backslash <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELE
Result:
```text
```sql
SELECT * \
FROM \
( \

View File

@ -4,9 +4,9 @@ sidebar_position: 60
sidebar_label: clickhouse-local
---
# clickhouse-local
# clickhouse-local
The `clickhouse-local` program enables you to perform fast processing on local files, without having to deploy and configure the ClickHouse server. It accepts data that represent tables and queries them using [ClickHouse SQL dialect](../../sql-reference/). `clickhouse-local` uses the same core as ClickHouse server, so it supports most of the features and the same set of formats and table engines.
The `clickhouse-local` program enables you to perform fast processing on local files, without having to deploy and configure the ClickHouse server. It accepts data that represent tables and queries them using [ClickHouse SQL dialect](../../sql-reference/index.md). `clickhouse-local` uses the same core as ClickHouse server, so it supports most of the features and the same set of formats and table engines.
By default `clickhouse-local` has access to data on the same host, and it does not depend on the server's configuration. It also supports loading server configuration using `--config-file` argument. For temporary data, a unique temporary data directory is created by default.

View File

@ -1,11 +1,11 @@
---
slug: /en/operations/utilities/
sidebar_position: 56
sidebar_label: Overview
sidebar_label: Utilities
pagination_next: 'en/operations/utilities/clickhouse-copier'
---
# ClickHouse Utilities
# List of tools and utilities
- [clickhouse-local](../../operations/utilities/clickhouse-local.md) — Allows running SQL queries on data without starting the ClickHouse server, similar to how `awk` does this.
- [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster.

View File

@ -1,7 +1,7 @@
position: 15
position: 1
label: 'SQL Reference'
collapsible: true
collapsed: true
link:
type: doc
id: en/sql-reference/index
type: generated-index
slug: /en/sql-reference

View File

@ -5,7 +5,7 @@ sidebar_position: 350
# contingency
The `contingency` function calculates the [contingency coefficient](https://en.wikipedia.org/wiki/Contingency_table#Cram%C3%A9r's_V_and_the_contingency_coefficient_C), a value that measures the association between two columns in a table. The computation is similar to [the `cramersV` function](./cramersv) but with a different denominator in the square root.
The `contingency` function calculates the [contingency coefficient](https://en.wikipedia.org/wiki/Contingency_table#Cram%C3%A9r's_V_and_the_contingency_coefficient_C), a value that measures the association between two columns in a table. The computation is similar to [the `cramersV` function](./cramersv.md) but with a different denominator in the square root.
**Syntax**

View File

@ -6,7 +6,7 @@ sidebar_position: 352
# cramersVBiasCorrected
Cramér's V is a measure of association between two columns in a table. The result of the [`cramersV` function](./cramersv) ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. The function can be heavily biased, so this version of Cramér's V uses the [bias correction](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V#Bias_correction).
Cramér's V is a measure of association between two columns in a table. The result of the [`cramersV` function](./cramersv.md) ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. The function can be heavily biased, so this version of Cramér's V uses the [bias correction](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V#Bias_correction).

View File

@ -19,7 +19,7 @@ Each `value` corresponds to the determinate `timeunit`. The half-life `x` is the
**Arguments**
- `value` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `timeunit` — Timeunit. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). Timeunit is not timestamp (seconds), it's -- an index of the time interval. Can be calculated using [intDiv](../../functions/arithmetic-functions/#intdiva-b).
- `timeunit` — Timeunit. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). Timeunit is not timestamp (seconds), it's -- an index of the time interval. Can be calculated using [intDiv](../../functions/arithmetic-functions.md#intdiva-b).
**Parameters**

View File

@ -7,7 +7,7 @@ sidebar_label: JSON
# JSON
:::warning
This feature is experimental and is not production ready. If you need to work with JSON documents, consider using [this guide](/docs/en/guides/developer/working-with-json/json-load-data.md) instead.
This feature is experimental and is not production ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead.
:::
Stores JavaScript Object Notation (JSON) documents in a single column.

View File

@ -1,8 +0,0 @@
position: 37
label: 'Dictionaries'
collapsible: true
collapsed: true
link:
type: generated-index
title: Dictionaries
slug: /en/sql-reference/dictionaries/external-dictionaries

View File

@ -1,67 +0,0 @@
---
slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical
sidebar_position: 45
sidebar_label: Hierarchical dictionaries
---
# Hierarchical Dictionaries
ClickHouse supports hierarchical dictionaries with a [numeric key](../../dictionaries/external-dictionaries/external-dicts-dict-structure.md#numeric-key).
Look at the following hierarchical structure:
``` text
0 (Common parent)
├── 1 (Russia)
│ │
│ └── 2 (Moscow)
│ │
│ └── 3 (Center)
└── 4 (Great Britain)
└── 5 (London)
```
This hierarchy can be expressed as the following dictionary table.
| region_id | parent_region | region_name |
|------------|----------------|---------------|
| 1 | 0 | Russia |
| 2 | 1 | Moscow |
| 3 | 2 | Center |
| 4 | 0 | Great Britain |
| 5 | 4 | London |
This table contains a column `parent_region` that contains the key of the nearest parent for the element.
ClickHouse supports the [hierarchical](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#hierarchical-dict-attr) property for [external dictionary](../../../sql-reference/dictionaries/external-dictionaries/) attributes. This property allows you to configure the hierarchical dictionary similar to described above.
The [dictGetHierarchy](../../../sql-reference/functions/ext-dict-functions.md#dictgethierarchy) function allows you to get the parent chain of an element.
For our example, the structure of dictionary can be the following:
``` xml
<dictionary>
<structure>
<id>
<name>region_id</name>
</id>
<attribute>
<name>parent_region</name>
<type>UInt64</type>
<null_value>0</null_value>
<hierarchical>true</hierarchical>
</attribute>
<attribute>
<name>region_name</name>
<type>String</type>
<null_value></null_value>
</attribute>
</structure>
</dictionary>
```

View File

@ -1,751 +0,0 @@
---
slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout
sidebar_position: 41
sidebar_label: Storing Dictionaries in Memory
---
import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md';
# Storing Dictionaries in Memory
There are a variety of ways to store dictionaries in memory.
We recommend [flat](#flat), [hashed](#dicts-external_dicts_dict_layout-hashed) and [complex_key_hashed](#complex-key-hashed), which provide optimal processing speed.
Caching is not recommended because of potentially poor performance and difficulties in selecting optimal parameters. Read more in the section [cache](#cache).
There are several ways to improve dictionary performance:
- Call the function for working with the dictionary after `GROUP BY`.
- Mark attributes to extract as injective. An attribute is called injective if different attribute values correspond to different keys. So when `GROUP BY` uses a function that fetches an attribute value by the key, this function is automatically taken out of `GROUP BY`.
ClickHouse generates an exception for errors with dictionaries. Examples of errors:
- The dictionary being accessed could not be loaded.
- Error querying a `cached` dictionary.
You can view the list of dictionaries and their statuses in the [system.dictionaries](../../../operations/system-tables/dictionaries.md) table.
<CloudDetails />
The configuration looks like this:
``` xml
<clickhouse>
<dictionary>
...
<layout>
<layout_type>
<!-- layout settings -->
</layout_type>
</layout>
...
</dictionary>
</clickhouse>
```
Corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md):
``` sql
CREATE DICTIONARY (...)
...
LAYOUT(LAYOUT_TYPE(param value)) -- layout settings
...
```
Dictionaries without word `complex-key*` in a layout have a key with [UInt64](../../../sql-reference/data-types/int-uint.md) type, `complex-key*` dictionaries have a composite key (complex, with arbitrary types).
[UInt64](../../../sql-reference/data-types/int-uint.md) keys in XML dictionaries are defined with `<id>` tag.
Configuration example (column key_column has UInt64 type):
```xml
...
<structure>
<id>
<name>key_column</name>
</id>
...
```
Composite `complex` keys XML dictionaries are defined `<key>` tag.
Configuration example of a composite key (key has one element with [String](../../../sql-reference/data-types/string.md) type):
```xml
...
<structure>
<key>
<attribute>
<name>country_code</name>
<type>String</type>
</attribute>
</key>
...
```
## Ways to Store Dictionaries in Memory
- [flat](#flat)
- [hashed](#dicts-external_dicts_dict_layout-hashed)
- [sparse_hashed](#dicts-external_dicts_dict_layout-sparse_hashed)
- [complex_key_hashed](#complex-key-hashed)
- [complex_key_sparse_hashed](#complex-key-sparse-hashed)
- [hashed_array](#dicts-external_dicts_dict_layout-hashed-array)
- [complex_key_hashed_array](#complex-key-hashed-array)
- [range_hashed](#range-hashed)
- [complex_key_range_hashed](#complex-key-range-hashed)
- [cache](#cache)
- [complex_key_cache](#complex-key-cache)
- [ssd_cache](#ssd-cache)
- [complex_key_ssd_cache](#complex-key-ssd-cache)
- [direct](#direct)
- [complex_key_direct](#complex-key-direct)
- [ip_trie](#ip-trie)
### flat
The dictionary is completely stored in memory in the form of flat arrays. How much memory does the dictionary use? The amount is proportional to the size of the largest key (in space used).
The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type and the value is limited to `max_array_size` (by default — 500,000). If a larger key is discovered when creating the dictionary, ClickHouse throws an exception and does not create the dictionary. Dictionary flat arrays initial size is controlled by `initial_array_size` setting (by default — 1024).
All types of sources are supported. When updating, data (from a file or from a table) is read in it entirety.
This method provides the best performance among all available methods of storing the dictionary.
Configuration example:
``` xml
<layout>
<flat>
<initial_array_size>50000</initial_array_size>
<max_array_size>5000000</max_array_size>
</flat>
</layout>
```
or
``` sql
LAYOUT(FLAT(INITIAL_ARRAY_SIZE 50000 MAX_ARRAY_SIZE 5000000))
```
### hashed
The dictionary is completely stored in memory in the form of a hash table. The dictionary can contain any number of elements with any identifiers In practice, the number of keys can reach tens of millions of items.
The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type.
All types of sources are supported. When updating, data (from a file or from a table) is read in its entirety.
Configuration example:
``` xml
<layout>
<hashed />
</layout>
```
or
``` sql
LAYOUT(HASHED())
```
If `shards` greater then 1 (default is `1`) the dictionary will load data in parallel, useful if you have huge amount of elements in one dictionary.
Configuration example:
``` xml
<layout>
<hashed>
<shards>10</shards>
<!-- Size of the backlog for blocks in parallel queue.
Since the bottleneck in parallel loading is rehash, and so to avoid
stalling because of thread is doing rehash, you need to have some
backlog.
10000 is good balance between memory and speed.
Even for 10e10 elements and can handle all the load without starvation. -->
<shard_load_queue_backlog>10000</shard_load_queue_backlog>
</hashed>
</layout>
```
or
``` sql
LAYOUT(HASHED(SHARDS 10 [SHARD_LOAD_QUEUE_BACKLOG 10000]))
```
### sparse_hashed
Similar to `hashed`, but uses less memory in favor more CPU usage.
The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type.
Configuration example:
``` xml
<layout>
<sparse_hashed />
</layout>
```
or
``` sql
LAYOUT(SPARSE_HASHED())
```
It is also possible to use `shards` for this type of dictionary, and again it is more important for `sparse_hashed` then for `hashed`, since `sparse_hashed` is slower.
### complex_key_hashed
This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to `hashed`.
Configuration example:
``` xml
<layout>
<complex_key_hashed>
<shards>1</shards>
<!-- <shard_load_queue_backlog>10000</shard_load_queue_backlog> -->
</complex_key_hashed>
</layout>
```
or
``` sql
LAYOUT(COMPLEX_KEY_HASHED([SHARDS 1] [SHARD_LOAD_QUEUE_BACKLOG 10000]))
```
### complex_key_sparse_hashed
This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to [sparse_hashed](#dicts-external_dicts_dict_layout-sparse_hashed).
Configuration example:
``` xml
<layout>
<complex_key_sparse_hashed>
<shards>1</shards>
</complex_key_sparse_hashed>
</layout>
```
or
``` sql
LAYOUT(COMPLEX_KEY_SPARSE_HASHED([SHARDS 1] [SHARD_LOAD_QUEUE_BACKLOG 10000]))
```
### hashed_array
The dictionary is completely stored in memory. Each attribute is stored in an array. The key attribute is stored in the form of a hashed table where value is an index in the attributes array. The dictionary can contain any number of elements with any identifiers. In practice, the number of keys can reach tens of millions of items.
The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type.
All types of sources are supported. When updating, data (from a file or from a table) is read in its entirety.
Configuration example:
``` xml
<layout>
<hashed_array>
</hashed_array>
</layout>
```
or
``` sql
LAYOUT(HASHED_ARRAY())
```
### complex_key_hashed_array
This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to [hashed_array](#dicts-external_dicts_dict_layout-hashed-array).
Configuration example:
``` xml
<layout>
<complex_key_hashed_array />
</layout>
```
or
``` sql
LAYOUT(COMPLEX_KEY_HASHED_ARRAY())
```
### range_hashed
The dictionary is stored in memory in the form of a hash table with an ordered array of ranges and their corresponding values.
The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type.
This storage method works the same way as hashed and allows using date/time (arbitrary numeric type) ranges in addition to the key.
Example: The table contains discounts for each advertiser in the format:
``` text
┌─advertiser_id─┬─discount_start_date─┬─discount_end_date─┬─amount─┐
│ 123 │ 2015-01-16 │ 2015-01-31 │ 0.25 │
│ 123 │ 2015-01-01 │ 2015-01-15 │ 0.15 │
│ 456 │ 2015-01-01 │ 2015-01-15 │ 0.05 │
└───────────────┴─────────────────────┴───────────────────┴────────┘
```
To use a sample for date ranges, define the `range_min` and `range_max` elements in the [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). These elements must contain elements `name` and `type` (if `type` is not specified, the default type will be used - Date). `type` can be any numeric type (Date / DateTime / UInt64 / Int32 / others).
:::warning
Values of `range_min` and `range_max` should fit in `Int64` type.
:::
Example:
``` xml
<layout>
<range_hashed>
<!-- Strategy for overlapping ranges (min/max). Default: min (return a matching range with the min(range_min -> range_max) value) -->
<range_lookup_strategy>min</range_lookup_strategy>
</range_hashed>
</layout>
<structure>
<id>
<name>advertiser_id</name>
</id>
<range_min>
<name>discount_start_date</name>
<type>Date</type>
</range_min>
<range_max>
<name>discount_end_date</name>
<type>Date</type>
</range_max>
...
```
or
``` sql
CREATE DICTIONARY discounts_dict (
advertiser_id UInt64,
discount_start_date Date,
discount_end_date Date,
amount Float64
)
PRIMARY KEY id
SOURCE(CLICKHOUSE(TABLE 'discounts'))
LIFETIME(MIN 1 MAX 1000)
LAYOUT(RANGE_HASHED(range_lookup_strategy 'max'))
RANGE(MIN discount_start_date MAX discount_end_date)
```
To work with these dictionaries, you need to pass an additional argument to the `dictGet` function, for which a range is selected:
``` sql
dictGet('dict_name', 'attr_name', id, date)
```
Query example:
``` sql
SELECT dictGet('discounts_dict', 'amount', 1, '2022-10-20'::Date);
```
This function returns the value for the specified `id`s and the date range that includes the passed date.
Details of the algorithm:
- If the `id` is not found or a range is not found for the `id`, it returns the default value of the attribute's type.
- If there are overlapping ranges and `range_lookup_strategy=min`, it returns a matching range with minimal `range_min`, if several ranges found, it returns a range with minimal `range_max`, if again several ranges found (several ranges had the same `range_min` and `range_max` it returns a random range of them.
- If there are overlapping ranges and `range_lookup_strategy=max`, it returns a matching range with maximal `range_min`, if several ranges found, it returns a range with maximal `range_max`, if again several ranges found (several ranges had the same `range_min` and `range_max` it returns a random range of them.
- If the `range_max` is `NULL`, the range is open. `NULL` is treated as maximal possible value. For the `range_min` `1970-01-01` or `0` (-MAX_INT) can be used as the open value.
Configuration example:
``` xml
<clickhouse>
<dictionary>
...
<layout>
<range_hashed />
</layout>
<structure>
<id>
<name>Abcdef</name>
</id>
<range_min>
<name>StartTimeStamp</name>
<type>UInt64</type>
</range_min>
<range_max>
<name>EndTimeStamp</name>
<type>UInt64</type>
</range_max>
<attribute>
<name>XXXType</name>
<type>String</type>
<null_value />
</attribute>
</structure>
</dictionary>
</clickhouse>
```
or
``` sql
CREATE DICTIONARY somedict(
Abcdef UInt64,
StartTimeStamp UInt64,
EndTimeStamp UInt64,
XXXType String DEFAULT ''
)
PRIMARY KEY Abcdef
RANGE(MIN StartTimeStamp MAX EndTimeStamp)
```
Configuration example with overlapping ranges and open ranges:
```sql
CREATE TABLE discounts
(
advertiser_id UInt64,
discount_start_date Date,
discount_end_date Nullable(Date),
amount Float64
)
ENGINE = Memory;
INSERT INTO discounts VALUES (1, '2015-01-01', Null, 0.1);
INSERT INTO discounts VALUES (1, '2015-01-15', Null, 0.2);
INSERT INTO discounts VALUES (2, '2015-01-01', '2015-01-15', 0.3);
INSERT INTO discounts VALUES (2, '2015-01-04', '2015-01-10', 0.4);
INSERT INTO discounts VALUES (3, '1970-01-01', '2015-01-15', 0.5);
INSERT INTO discounts VALUES (3, '1970-01-01', '2015-01-10', 0.6);
SELECT * FROM discounts ORDER BY advertiser_id, discount_start_date;
┌─advertiser_id─┬─discount_start_date─┬─discount_end_date─┬─amount─┐
│ 1 │ 2015-01-01 │ ᴺᵁᴸᴸ │ 0.1 │
│ 1 │ 2015-01-15 │ ᴺᵁᴸᴸ │ 0.2 │
│ 2 │ 2015-01-01 │ 2015-01-15 │ 0.3 │
│ 2 │ 2015-01-04 │ 2015-01-10 │ 0.4 │
│ 3 │ 1970-01-01 │ 2015-01-15 │ 0.5 │
│ 3 │ 1970-01-01 │ 2015-01-10 │ 0.6 │
└───────────────┴─────────────────────┴───────────────────┴────────┘
-- RANGE_LOOKUP_STRATEGY 'max'
CREATE DICTIONARY discounts_dict
(
advertiser_id UInt64,
discount_start_date Date,
discount_end_date Nullable(Date),
amount Float64
)
PRIMARY KEY advertiser_id
SOURCE(CLICKHOUSE(TABLE discounts))
LIFETIME(MIN 600 MAX 900)
LAYOUT(RANGE_HASHED(RANGE_LOOKUP_STRATEGY 'max'))
RANGE(MIN discount_start_date MAX discount_end_date);
select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-14')) res;
┌─res─┐
│ 0.1 │ -- the only one range is matching: 2015-01-01 - Null
└─────┘
select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res;
┌─res─┐
│ 0.2 │ -- two ranges are matching, range_min 2015-01-15 (0.2) is bigger than 2015-01-01 (0.1)
└─────┘
select dictGet('discounts_dict', 'amount', 2, toDate('2015-01-06')) res;
┌─res─┐
│ 0.4 │ -- two ranges are matching, range_min 2015-01-04 (0.4) is bigger than 2015-01-01 (0.3)
└─────┘
select dictGet('discounts_dict', 'amount', 3, toDate('2015-01-01')) res;
┌─res─┐
│ 0.5 │ -- two ranges are matching, range_min are equal, 2015-01-15 (0.5) is bigger than 2015-01-10 (0.6)
└─────┘
DROP DICTIONARY discounts_dict;
-- RANGE_LOOKUP_STRATEGY 'min'
CREATE DICTIONARY discounts_dict
(
advertiser_id UInt64,
discount_start_date Date,
discount_end_date Nullable(Date),
amount Float64
)
PRIMARY KEY advertiser_id
SOURCE(CLICKHOUSE(TABLE discounts))
LIFETIME(MIN 600 MAX 900)
LAYOUT(RANGE_HASHED(RANGE_LOOKUP_STRATEGY 'min'))
RANGE(MIN discount_start_date MAX discount_end_date);
select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-14')) res;
┌─res─┐
│ 0.1 │ -- the only one range is matching: 2015-01-01 - Null
└─────┘
select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res;
┌─res─┐
│ 0.1 │ -- two ranges are matching, range_min 2015-01-01 (0.1) is less than 2015-01-15 (0.2)
└─────┘
select dictGet('discounts_dict', 'amount', 2, toDate('2015-01-06')) res;
┌─res─┐
│ 0.3 │ -- two ranges are matching, range_min 2015-01-01 (0.3) is less than 2015-01-04 (0.4)
└─────┘
select dictGet('discounts_dict', 'amount', 3, toDate('2015-01-01')) res;
┌─res─┐
│ 0.6 │ -- two ranges are matching, range_min are equal, 2015-01-10 (0.6) is less than 2015-01-15 (0.5)
└─────┘
```
### complex_key_range_hashed
The dictionary is stored in memory in the form of a hash table with an ordered array of ranges and their corresponding values (see [range_hashed](#range-hashed)). This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md).
Configuration example:
``` sql
CREATE DICTIONARY range_dictionary
(
CountryID UInt64,
CountryKey String,
StartDate Date,
EndDate Date,
Tax Float64 DEFAULT 0.2
)
PRIMARY KEY CountryID, CountryKey
SOURCE(CLICKHOUSE(TABLE 'date_table'))
LIFETIME(MIN 1 MAX 1000)
LAYOUT(COMPLEX_KEY_RANGE_HASHED())
RANGE(MIN StartDate MAX EndDate);
```
### cache
The dictionary is stored in a cache that has a fixed number of cells. These cells contain frequently used elements.
The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type.
When searching for a dictionary, the cache is searched first. For each block of data, all keys that are not found in the cache or are outdated are requested from the source using `SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)`. The received data is then written to the cache.
If keys are not found in dictionary, then update cache task is created and added into update queue. Update queue properties can be controlled with settings `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates`.
For cache dictionaries, the expiration [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cells value is not used and key becomes expired. The key is re-requested the next time it needs to be used. This behaviour can be configured with setting `allow_read_expired_keys`.
This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the [system.dictionaries](../../../operations/system-tables/dictionaries.md) table.
If setting `allow_read_expired_keys` is set to 1, by default 0. Then dictionary can support asynchronous updates. If a client requests keys and all of them are in cache, but some of them are expired, then dictionary will return expired keys for a client and request them asynchronously from the source.
To improve cache performance, use a subquery with `LIMIT`, and call the function with the dictionary externally.
All types of sources are supported.
Example of settings:
``` xml
<layout>
<cache>
<!-- The size of the cache, in number of cells. Rounded up to a power of two. -->
<size_in_cells>1000000000</size_in_cells>
<!-- Allows to read expired keys. -->
<allow_read_expired_keys>0</allow_read_expired_keys>
<!-- Max size of update queue. -->
<max_update_queue_size>100000</max_update_queue_size>
<!-- Max timeout in milliseconds for push update task into queue. -->
<update_queue_push_timeout_milliseconds>10</update_queue_push_timeout_milliseconds>
<!-- Max wait timeout in milliseconds for update task to complete. -->
<query_wait_timeout_milliseconds>60000</query_wait_timeout_milliseconds>
<!-- Max threads for cache dictionary update. -->
<max_threads_for_updates>4</max_threads_for_updates>
</cache>
</layout>
```
or
``` sql
LAYOUT(CACHE(SIZE_IN_CELLS 1000000000))
```
Set a large enough cache size. You need to experiment to select the number of cells:
1. Set some value.
2. Run queries until the cache is completely full.
3. Assess memory consumption using the `system.dictionaries` table.
4. Increase or decrease the number of cells until the required memory consumption is reached.
:::warning
Do not use ClickHouse as a source, because it is slow to process queries with random reads.
:::
### complex_key_cache
This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to `cache`.
### ssd_cache
Similar to `cache`, but stores data on SSD and index in RAM. All cache dictionary settings related to update queue can also be applied to SSD cache dictionaries.
The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type.
``` xml
<layout>
<ssd_cache>
<!-- Size of elementary read block in bytes. Recommended to be equal to SSD's page size. -->
<block_size>4096</block_size>
<!-- Max cache file size in bytes. -->
<file_size>16777216</file_size>
<!-- Size of RAM buffer in bytes for reading elements from SSD. -->
<read_buffer_size>131072</read_buffer_size>
<!-- Size of RAM buffer in bytes for aggregating elements before flushing to SSD. -->
<write_buffer_size>1048576</write_buffer_size>
<!-- Path where cache file will be stored. -->
<path>/var/lib/clickhouse/user_files/test_dict</path>
</ssd_cache>
</layout>
```
or
``` sql
LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576
PATH '/var/lib/clickhouse/user_files/test_dict'))
```
### complex_key_ssd_cache
This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to `ssd_cache`.
### direct
The dictionary is not stored in memory and directly goes to the source during the processing of a request.
The dictionary key has the [UInt64](../../../sql-reference/data-types/int-uint.md) type.
All types of [sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), except local files, are supported.
Configuration example:
``` xml
<layout>
<direct />
</layout>
```
or
``` sql
LAYOUT(DIRECT())
```
### complex_key_direct
This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to `direct`.
### ip_trie
This type of storage is for mapping network prefixes (IP addresses) to metadata such as ASN.
**Example**
Suppose we have a table in ClickHouse that contains our IP prefixes and mappings:
```sql
CREATE TABLE my_ip_addresses (
prefix String,
asn UInt32,
cca2 String
)
ENGINE = MergeTree
PRIMARY KEY prefix;
```
```sql
INSERT INTO my_ip_addresses VALUES
('202.79.32.0/20', 17501, 'NP'),
('2620:0:870::/48', 3856, 'US'),
('2a02:6b8:1::/48', 13238, 'RU'),
('2001:db8::/32', 65536, 'ZZ')
;
```
Let's define an `ip_trie` dictionary for this table. The `ip_trie` layout requires a composite key:
``` xml
<structure>
<key>
<attribute>
<name>prefix</name>
<type>String</type>
</attribute>
</key>
<attribute>
<name>asn</name>
<type>UInt32</type>
<null_value />
</attribute>
<attribute>
<name>cca2</name>
<type>String</type>
<null_value>??</null_value>
</attribute>
...
</structure>
<layout>
<ip_trie>
<!-- Key attribute `prefix` can be retrieved via dictGetString. -->
<!-- This option increases memory usage. -->
<access_to_key_from_attributes>true</access_to_key_from_attributes>
</ip_trie>
</layout>
```
or
``` sql
CREATE DICTIONARY my_ip_trie_dictionary (
prefix String,
asn UInt32,
cca2 String DEFAULT '??'
)
PRIMARY KEY prefix
SOURCE(CLICKHOUSE(TABLE 'my_ip_addresses'))
LAYOUT(IP_TRIE)
LIFETIME(3600);
```
The key must have only one `String` type attribute that contains an allowed IP prefix. Other types are not supported yet.
For queries, you must use the same functions (`dictGetT` with a tuple) as for dictionaries with composite keys. The syntax is:
``` sql
dictGetT('dict_name', 'attr_name', tuple(ip))
```
The function takes either `UInt32` for IPv4, or `FixedString(16)` for IPv6. For example:
``` sql
select dictGet('my_ip_trie_dictionary', 'asn', tuple(IPv6StringToNum('2001:db8::1')))
```
Other types are not supported yet. The function returns the attribute for the prefix that corresponds to this IP address. If there are overlapping prefixes, the most specific one is returned.
Data must completely fit into RAM.
## Related Content
- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)

View File

@ -1,142 +0,0 @@
---
slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime
sidebar_position: 42
sidebar_label: Dictionary Updates
---
import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md';
# Dictionary Updates
ClickHouse periodically updates the dictionaries. The update interval for fully downloaded dictionaries and the invalidation interval for cached dictionaries are defined in the `lifetime` tag in seconds.
Dictionary updates (other than loading for first use) do not block queries. During updates, the old version of a dictionary is used. If an error occurs during an update, the error is written to the server log, and queries continue using the old version of dictionaries.
Example of settings:
<CloudDetails />
``` xml
<dictionary>
...
<lifetime>300</lifetime>
...
</dictionary>
```
or
``` sql
CREATE DICTIONARY (...)
...
LIFETIME(300)
...
```
Setting `<lifetime>0</lifetime>` (`LIFETIME(0)`) prevents dictionaries from updating.
You can set a time interval for updates, and ClickHouse will choose a uniformly random time within this range. This is necessary in order to distribute the load on the dictionary source when updating on a large number of servers.
Example of settings:
``` xml
<dictionary>
...
<lifetime>
<min>300</min>
<max>360</max>
</lifetime>
...
</dictionary>
```
or
``` sql
LIFETIME(MIN 300 MAX 360)
```
If `<min>0</min>` and `<max>0</max>`, ClickHouse does not reload the dictionary by timeout.
In this case, ClickHouse can reload the dictionary earlier if the dictionary configuration file was changed or the `SYSTEM RELOAD DICTIONARY` command was executed.
When updating the dictionaries, the ClickHouse server applies different logic depending on the type of [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md):
- For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated.
- For MySQL source, the time of modification is checked using a `SHOW TABLE STATUS` query (in case of MySQL 8 you need to disable meta-information caching in MySQL by `set global information_schema_stats_expiry=0`).
- Dictionaries from other sources are updated every time by default.
For other sources (ODBC, PostgreSQL, ClickHouse, etc), you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps:
- The dictionary table must have a field that always changes when the source data is updated.
- The settings of the source must specify a query that retrieves the changing field. The ClickHouse server interprets the query result as a row, and if this row has changed relative to its previous state, the dictionary is updated. Specify the query in the `<invalidate_query>` field in the settings for the [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md).
Example of settings:
``` xml
<dictionary>
...
<odbc>
...
<invalidate_query>SELECT update_time FROM dictionary_source where id = 1</invalidate_query>
</odbc>
...
</dictionary>
```
or
``` sql
...
SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source where id = 1'))
...
```
For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronious and asynchronious updates are supported.
It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to only request data that was changed after the previous update. If `update_field` is specified as part of the dictionary source configuration, value of the previous update time in seconds will be added to the data request. Depends on source type (Executable, HTTP, MySQL, PostgreSQL, ClickHouse, or ODBC) different logic will be applied to `update_field` before request data from an external source.
- If the source is HTTP then `update_field` will be added as a query parameter with the last update time as the parameter value.
- If the source is Executable then `update_field` will be added as an executable script argument with the last update time as the argument value.
- If the source is ClickHouse, MySQL, PostgreSQL, ODBC there will be an additional part of `WHERE`, where `update_field` is compared as greater or equal with the last update time.
- Per default, this `WHERE`-condition is checked at the highest level of the SQL-Query. Alternatively, the condition can be checked in any other `WHERE`-clause within the query using the `{condition}`-keyword. Example:
```sql
...
SOURCE(CLICKHOUSE(...
update_field 'added_time'
QUERY '
SELECT my_arr.1 AS x, my_arr.2 AS y, creation_time
FROM (
SELECT arrayZip(x_arr, y_arr) AS my_arr, creation_time
FROM dictionary_source
WHERE {condition}
)'
))
...
```
If `update_field` option is set, additional option `update_lag` can be set. Value of `update_lag` option is subtracted from previous update time before request updated data.
Example of settings:
``` xml
<dictionary>
...
<clickhouse>
...
<update_field>added_time</update_field>
<update_lag>15</update_lag>
</clickhouse>
...
</dictionary>
```
or
``` sql
...
SOURCE(CLICKHOUSE(... update_field 'added_time' update_lag 15))
...
```
## Related Content
- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)

View File

@ -1,140 +0,0 @@
---
slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon
sidebar_position: 46
sidebar_label: Polygon Dictionaries With Grids
title: "Polygon dictionaries"
---
import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md';
Polygon dictionaries allow you to efficiently search for the polygon containing specified points.
For example: defining a city area by geographical coordinates.
Example of a polygon dictionary configuration:
<CloudDetails />
``` xml
<dictionary>
<structure>
<key>
<attribute>
<name>key</name>
<type>Array(Array(Array(Array(Float64))))</type>
</attribute>
</key>
<attribute>
<name>name</name>
<type>String</type>
<null_value></null_value>
</attribute>
<attribute>
<name>value</name>
<type>UInt64</type>
<null_value>0</null_value>
</attribute>
</structure>
<layout>
<polygon>
<store_polygon_key_column>1</store_polygon_key_column>
</polygon>
</layout>
...
</dictionary>
```
The corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md#create-dictionary-query):
``` sql
CREATE DICTIONARY polygon_dict_name (
key Array(Array(Array(Array(Float64)))),
name String,
value UInt64
)
PRIMARY KEY key
LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1))
...
```
When configuring the polygon dictionary, the key must have one of two types:
- A simple polygon. It is an array of points.
- MultiPolygon. It is an array of polygons. Each polygon is a two-dimensional array of points. The first element of this array is the outer boundary of the polygon, and subsequent elements specify areas to be excluded from it.
Points can be specified as an array or a tuple of their coordinates. In the current implementation, only two-dimensional points are supported.
The user can [upload their own data](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) in all formats supported by ClickHouse.
There are 3 types of [in-memory storage](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) available:
- `POLYGON_SIMPLE`. This is a naive implementation, where a linear pass through all polygons is made for each query, and membership is checked for each one without using additional indexes.
- `POLYGON_INDEX_EACH`. A separate index is built for each polygon, which allows you to quickly check whether it belongs in most cases (optimized for geographical regions).
Also, a grid is superimposed on the area under consideration, which significantly narrows the number of polygons under consideration.
The grid is created by recursively dividing the cell into 16 equal parts and is configured with two parameters.
The division stops when the recursion depth reaches `MAX_DEPTH` or when the cell crosses no more than `MIN_INTERSECTIONS` polygons.
To respond to the query, there is a corresponding cell, and the index for the polygons stored in it is accessed alternately.
- `POLYGON_INDEX_CELL`. This placement also creates the grid described above. The same options are available. For each sheet cell, an index is built on all pieces of polygons that fall into it, which allows you to quickly respond to a request.
- `POLYGON`. Synonym to `POLYGON_INDEX_CELL`.
Dictionary queries are carried out using standard [functions](../../../sql-reference/functions/ext-dict-functions.md) for working with dictionaries.
An important difference is that here the keys will be the points for which you want to find the polygon containing them.
**Example**
Example of working with the dictionary defined above:
``` sql
CREATE TABLE points (
x Float64,
y Float64
)
...
SELECT tuple(x, y) AS key, dictGet(dict_name, 'name', key), dictGet(dict_name, 'value', key) FROM points ORDER BY x, y;
```
As a result of executing the last command for each point in the 'points' table, a minimum area polygon containing this point will be found, and the requested attributes will be output.
**Example**
You can read columns from polygon dictionaries via SELECT query, just turn on the `store_polygon_key_column = 1` in the dictionary configuration or corresponding DDL-query.
Query:
``` sql
CREATE TABLE polygons_test_table
(
key Array(Array(Array(Tuple(Float64, Float64)))),
name String
) ENGINE = TinyLog;
INSERT INTO polygons_test_table VALUES ([[[(3, 1), (0, 1), (0, -1), (3, -1)]]], 'Value');
CREATE DICTIONARY polygons_test_dictionary
(
key Array(Array(Array(Tuple(Float64, Float64)))),
name String
)
PRIMARY KEY key
SOURCE(CLICKHOUSE(TABLE 'polygons_test_table'))
LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1))
LIFETIME(0);
SELECT * FROM polygons_test_dictionary;
```
Result:
``` text
┌─key─────────────────────────────┬─name──┐
│ [[[(3,1),(0,1),(0,-1),(3,-1)]]] │ Value │
└─────────────────────────────────┴───────┘
```
## Related Content
- [Exploring massive, real-world data sets: 100+ Years of Weather Records in ClickHouse](https://clickhouse.com/blog/real-world-data-noaa-climate-data)

View File

@ -1,847 +0,0 @@
---
slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources
sidebar_position: 43
sidebar_label: Dictionary Sources
---
import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md';
# Dictionary Sources
<CloudDetails />
A dictionary can be connected to ClickHouse from many different sources.
If the dictionary is configured using an xml-file, the configuration looks like this:
``` xml
<clickhouse>
<dictionary>
...
<source>
<source_type>
<!-- Source configuration -->
</source_type>
</source>
...
</dictionary>
...
</clickhouse>
```
In case of [DDL-query](../../../sql-reference/statements/create/dictionary.md), the configuration described above will look like:
``` sql
CREATE DICTIONARY dict_name (...)
...
SOURCE(SOURCE_TYPE(param1 val1 ... paramN valN)) -- Source configuration
...
```
The source is configured in the `source` section.
For source types [Local file](#dicts-external_dicts_dict_sources-local_file), [Executable file](#dicts-external_dicts_dict_sources-executable), [HTTP(s)](#dicts-external_dicts_dict_sources-http), [ClickHouse](#dicts-external_dicts_dict_sources-clickhouse)
optional settings are available:
``` xml
<source>
<file>
<path>/opt/dictionaries/os.tsv</path>
<format>TabSeparated</format>
</file>
<settings>
<format_csv_allow_single_quotes>0</format_csv_allow_single_quotes>
</settings>
</source>
```
or
``` sql
SOURCE(FILE(path './user_files/os.tsv' format 'TabSeparated'))
SETTINGS(format_csv_allow_single_quotes = 0)
```
Types of sources (`source_type`):
- [Local file](#dicts-external_dicts_dict_sources-local_file)
- [Executable File](#dicts-external_dicts_dict_sources-executable)
- [Executable Pool](#dicts-external_dicts_dict_sources-executable_pool)
- [HTTP(s)](#dicts-external_dicts_dict_sources-http)
- DBMS
- [ODBC](#odbc)
- [MySQL](#mysql)
- [ClickHouse](#clickhouse)
- [MongoDB](#mongodb)
- [Redis](#redis)
- [Cassandra](#cassandra)
- [PostgreSQL](#postgresql)
## Local File
Example of settings:
``` xml
<source>
<file>
<path>/opt/dictionaries/os.tsv</path>
<format>TabSeparated</format>
</file>
</source>
```
or
``` sql
SOURCE(FILE(path './user_files/os.tsv' format 'TabSeparated'))
```
Setting fields:
- `path` The absolute path to the file.
- `format` The file format. All the formats described in [Formats](../../../interfaces/formats.md#formats) are supported.
When a dictionary with source `FILE` is created via DDL command (`CREATE DICTIONARY ...`), the source file needs to be located in the `user_files` directory to prevent DB users from accessing arbitrary files on the ClickHouse node.
**See Also**
- [Dictionary function](../../../sql-reference/table-functions/dictionary.md#dictionary-function)
## Executable File
Working with executable files depends on [how the dictionary is stored in memory](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable files STDIN. Otherwise, ClickHouse starts the executable file and treats its output as dictionary data.
Example of settings:
``` xml
<source>
<executable>
<command>cat /opt/dictionaries/os.tsv</command>
<format>TabSeparated</format>
<implicit_key>false</implicit_key>
</executable>
</source>
```
Setting fields:
- `command` — The absolute path to the executable file, or the file name (if the command's directory is in the `PATH`).
- `format` — The file format. All the formats described in [Formats](../../../interfaces/formats.md#formats) are supported.
- `command_termination_timeout` — The executable script should contain a main read-write loop. After the dictionary is destroyed, the pipe is closed, and the executable file will have `command_termination_timeout` seconds to shutdown before ClickHouse will send a SIGTERM signal to the child process. `command_termination_timeout` is specified in seconds. Default value is 10. Optional parameter.
- `command_read_timeout` - Timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_write_timeout` - Timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false.
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using a whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `0`. Optional parameter.
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled; otherwise, the DB user would be able to execute arbitrary binaries on the ClickHouse node.
## Executable Pool
Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary [is stored](external-dicts-dict-layout.md#ways-to-store-dictionaries-in-memory) using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, or `complex_key_direct` layouts.
Executable pool will spawn a pool of processes with the specified command and keep them running until they exit. The program should read data from STDIN while it is available and output the result to STDOUT. It can wait for the next block of data on STDIN. ClickHouse will not close STDIN after processing a block of data, but will pipe another chunk of data when needed. The executable script should be ready for this way of data processing — it should poll STDIN and flush data to STDOUT early.
Example of settings:
``` xml
<source>
<executable_pool>
<command><command>while read key; do printf "$key\tData for key $key\n"; done</command</command>
<format>TabSeparated</format>
<pool_size>10</pool_size>
<max_command_execution_time>10<max_command_execution_time>
<implicit_key>false</implicit_key>
</executable_pool>
</source>
```
Setting fields:
- `command` — The absolute path to the executable file, or the file name (if the program directory is written to `PATH`).
- `format` — The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported.
- `pool_size` — Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions. Default value is `16`.
- `command_termination_timeout` — executable script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter.
- `max_command_execution_time` — Maximum executable script command execution time for processing block of data. Specified in seconds. Default value is 10. Optional parameter.
- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false. Optional parameter.
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter.
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.
## Http(s)
Working with an HTTP(s) server depends on [how the dictionary is stored in memory](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method.
Example of settings:
``` xml
<source>
<http>
<url>http://[::1]/os.tsv</url>
<format>TabSeparated</format>
<credentials>
<user>user</user>
<password>password</password>
</credentials>
<headers>
<header>
<name>API-KEY</name>
<value>key</value>
</header>
</headers>
</http>
</source>
```
or
``` sql
SOURCE(HTTP(
url 'http://[::1]/os.tsv'
format 'TabSeparated'
credentials(user 'user' password 'password')
headers(header(name 'API-KEY' value 'key'))
))
```
In order for ClickHouse to access an HTTPS resource, you must [configure openSSL](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl) in the server configuration.
Setting fields:
- `url` The source URL.
- `format` The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported.
- `credentials` Basic HTTP authentication. Optional parameter.
- `user` Username required for the authentication.
- `password` Password required for the authentication.
- `headers` All custom HTTP headers entries used for the HTTP request. Optional parameter.
- `header` Single HTTP header entry.
- `name` Identifiant name used for the header send on the request.
- `value` Value set for a specific identifiant name.
When creating a dictionary using the DDL command (`CREATE DICTIONARY ...`) remote hosts for HTTP dictionaries are checked against the contents of `remote_url_allow_hosts` section from config to prevent database users to access arbitrary HTTP server.
### Known Vulnerability of the ODBC Dictionary Functionality
:::note
When connecting to the database through the ODBC driver connection parameter `Servername` can be substituted. In this case values of `USERNAME` and `PASSWORD` from `odbc.ini` are sent to the remote server and can be compromised.
:::
**Example of insecure use**
Lets configure unixODBC for PostgreSQL. Content of `/etc/odbc.ini`:
``` text
[gregtest]
Driver = /usr/lib/psqlodbca.so
Servername = localhost
PORT = 5432
DATABASE = test_db
#OPTION = 3
USERNAME = test
PASSWORD = test
```
If you then make a query such as
``` sql
SELECT * FROM odbc('DSN=gregtest;Servername=some-server.com', 'test_db');
```
ODBC driver will send values of `USERNAME` and `PASSWORD` from `odbc.ini` to `some-server.com`.
### Example of Connecting Postgresql
Ubuntu OS.
Installing unixODBC and the ODBC driver for PostgreSQL:
``` bash
$ sudo apt-get install -y unixodbc odbcinst odbc-postgresql
```
Configuring `/etc/odbc.ini` (or `~/.odbc.ini` if you signed in under a user that runs ClickHouse):
``` text
[DEFAULT]
Driver = myconnection
[myconnection]
Description = PostgreSQL connection to my_db
Driver = PostgreSQL Unicode
Database = my_db
Servername = 127.0.0.1
UserName = username
Password = password
Port = 5432
Protocol = 9.3
ReadOnly = No
RowVersioning = No
ShowSystemTables = No
ConnSettings =
```
The dictionary configuration in ClickHouse:
``` xml
<clickhouse>
<dictionary>
<name>table_name</name>
<source>
<odbc>
<!-- You can specify the following parameters in connection_string: -->
<!-- DSN=myconnection;UID=username;PWD=password;HOST=127.0.0.1;PORT=5432;DATABASE=my_db -->
<connection_string>DSN=myconnection</connection_string>
<table>postgresql_table</table>
</odbc>
</source>
<lifetime>
<min>300</min>
<max>360</max>
</lifetime>
<layout>
<hashed/>
</layout>
<structure>
<id>
<name>id</name>
</id>
<attribute>
<name>some_column</name>
<type>UInt64</type>
<null_value>0</null_value>
</attribute>
</structure>
</dictionary>
</clickhouse>
```
or
``` sql
CREATE DICTIONARY table_name (
id UInt64,
some_column UInt64 DEFAULT 0
)
PRIMARY KEY id
SOURCE(ODBC(connection_string 'DSN=myconnection' table 'postgresql_table'))
LAYOUT(HASHED())
LIFETIME(MIN 300 MAX 360)
```
You may need to edit `odbc.ini` to specify the full path to the library with the driver `DRIVER=/usr/local/lib/psqlodbcw.so`.
### Example of Connecting MS SQL Server
Ubuntu OS.
Installing the ODBC driver for connecting to MS SQL:
``` bash
$ sudo apt-get install tdsodbc freetds-bin sqsh
```
Configuring the driver:
```bash
$ cat /etc/freetds/freetds.conf
...
[MSSQL]
host = 192.168.56.101
port = 1433
tds version = 7.0
client charset = UTF-8
# test TDS connection
$ sqsh -S MSSQL -D database -U user -P password
$ cat /etc/odbcinst.ini
[FreeTDS]
Description = FreeTDS
Driver = /usr/lib/x86_64-linux-gnu/odbc/libtdsodbc.so
Setup = /usr/lib/x86_64-linux-gnu/odbc/libtdsS.so
FileUsage = 1
UsageCount = 5
$ cat /etc/odbc.ini
# $ cat ~/.odbc.ini # if you signed in under a user that runs ClickHouse
[MSSQL]
Description = FreeTDS
Driver = FreeTDS
Servername = MSSQL
Database = test
UID = test
PWD = test
Port = 1433
# (optional) test ODBC connection (to use isql-tool install the [unixodbc](https://packages.debian.org/sid/unixodbc)-package)
$ isql -v MSSQL "user" "password"
```
Remarks:
- to determine the earliest TDS version that is supported by a particular SQL Server version, refer to the product documentation or look at [MS-TDS Product Behavior](https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-tds/135d0ebe-5c4c-4a94-99bf-1811eccb9f4a)
Configuring the dictionary in ClickHouse:
``` xml
<clickhouse>
<dictionary>
<name>test</name>
<source>
<odbc>
<table>dict</table>
<connection_string>DSN=MSSQL;UID=test;PWD=test</connection_string>
</odbc>
</source>
<lifetime>
<min>300</min>
<max>360</max>
</lifetime>
<layout>
<flat />
</layout>
<structure>
<id>
<name>k</name>
</id>
<attribute>
<name>s</name>
<type>String</type>
<null_value></null_value>
</attribute>
</structure>
</dictionary>
</clickhouse>
```
or
``` sql
CREATE DICTIONARY test (
k UInt64,
s String DEFAULT ''
)
PRIMARY KEY k
SOURCE(ODBC(table 'dict' connection_string 'DSN=MSSQL;UID=test;PWD=test'))
LAYOUT(FLAT())
LIFETIME(MIN 300 MAX 360)
```
## DBMS
### ODBC
You can use this method to connect any database that has an ODBC driver.
Example of settings:
``` xml
<source>
<odbc>
<db>DatabaseName</db>
<table>ShemaName.TableName</table>
<connection_string>DSN=some_parameters</connection_string>
<invalidate_query>SQL_QUERY</invalidate_query>
<query>SELECT id, value_1, value_2 FROM ShemaName.TableName</query>
</odbc>
</source>
```
or
``` sql
SOURCE(ODBC(
db 'DatabaseName'
table 'SchemaName.TableName'
connection_string 'DSN=some_parameters'
invalidate_query 'SQL_QUERY'
query 'SELECT id, value_1, value_2 FROM db_name.table_name'
))
```
Setting fields:
- `db` Name of the database. Omit it if the database name is set in the `<connection_string>` parameters.
- `table` Name of the table and schema if exists.
- `connection_string` Connection string.
- `invalidate_query` Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
- `query` The custom query. Optional parameter.
:::note
The `table` and `query` fields cannot be used together. And either one of the `table` or `query` fields must be declared.
:::
ClickHouse receives quoting symbols from ODBC-driver and quote all settings in queries to driver, so its necessary to set table name accordingly to table name case in database.
If you have a problems with encodings when using Oracle, see the corresponding [FAQ](../../../faq/integration/oracle-odbc.md) item.
### Mysql
Example of settings:
``` xml
<source>
<mysql>
<port>3306</port>
<user>clickhouse</user>
<password>qwerty</password>
<replica>
<host>example01-1</host>
<priority>1</priority>
</replica>
<replica>
<host>example01-2</host>
<priority>1</priority>
</replica>
<db>db_name</db>
<table>table_name</table>
<where>id=10</where>
<invalidate_query>SQL_QUERY</invalidate_query>
<fail_on_connection_loss>true</fail_on_connection_loss>
<query>SELECT id, value_1, value_2 FROM db_name.table_name</query>
</mysql>
</source>
```
or
``` sql
SOURCE(MYSQL(
port 3306
user 'clickhouse'
password 'qwerty'
replica(host 'example01-1' priority 1)
replica(host 'example01-2' priority 1)
db 'db_name'
table 'table_name'
where 'id=10'
invalidate_query 'SQL_QUERY'
fail_on_connection_loss 'true'
query 'SELECT id, value_1, value_2 FROM db_name.table_name'
))
```
Setting fields:
- `port` The port on the MySQL server. You can specify it for all replicas, or for each one individually (inside `<replica>`).
- `user` Name of the MySQL user. You can specify it for all replicas, or for each one individually (inside `<replica>`).
- `password` Password of the MySQL user. You can specify it for all replicas, or for each one individually (inside `<replica>`).
- `replica` Section of replica configurations. There can be multiple sections.
- `replica/host` The MySQL host.
- `replica/priority` The replica priority. When attempting to connect, ClickHouse traverses the replicas in order of priority. The lower the number, the higher the priority.
- `db` Name of the database.
- `table` Name of the table.
- `where` The selection criteria. The syntax for conditions is the same as for `WHERE` clause in MySQL, for example, `id > 10 AND id < 20`. Optional parameter.
- `invalidate_query` Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
- `fail_on_connection_loss` The configuration parameter that controls behavior of the server on connection loss. If `true`, an exception is thrown immediately if the connection between client and server was lost. If `false`, the ClickHouse server retries to execute the query three times before throwing an exception. Note that retrying leads to increased response times. Default value: `false`.
- `query` The custom query. Optional parameter.
:::note
The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared.
:::
:::note
There is no explicit parameter `secure`. When establishing an SSL-connection security is mandatory.
:::
MySQL can be connected to on a local host via sockets. To do this, set `host` and `socket`.
Example of settings:
``` xml
<source>
<mysql>
<host>localhost</host>
<socket>/path/to/socket/file.sock</socket>
<user>clickhouse</user>
<password>qwerty</password>
<db>db_name</db>
<table>table_name</table>
<where>id=10</where>
<invalidate_query>SQL_QUERY</invalidate_query>
<fail_on_connection_loss>true</fail_on_connection_loss>
<query>SELECT id, value_1, value_2 FROM db_name.table_name</query>
</mysql>
</source>
```
or
``` sql
SOURCE(MYSQL(
host 'localhost'
socket '/path/to/socket/file.sock'
user 'clickhouse'
password 'qwerty'
db 'db_name'
table 'table_name'
where 'id=10'
invalidate_query 'SQL_QUERY'
fail_on_connection_loss 'true'
query 'SELECT id, value_1, value_2 FROM db_name.table_name'
))
```
### ClickHouse
Example of settings:
``` xml
<source>
<clickhouse>
<host>example01-01-1</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>default</db>
<table>ids</table>
<where>id=10</where>
<secure>1</secure>
<query>SELECT id, value_1, value_2 FROM default.ids</query>
</clickhouse>
</source>
```
or
``` sql
SOURCE(CLICKHOUSE(
host 'example01-01-1'
port 9000
user 'default'
password ''
db 'default'
table 'ids'
where 'id=10'
secure 1
query 'SELECT id, value_1, value_2 FROM default.ids'
));
```
Setting fields:
- `host` The ClickHouse host. If it is a local host, the query is processed without any network activity. To improve fault tolerance, you can create a [Distributed](../../../engines/table-engines/special/distributed.md) table and enter it in subsequent configurations.
- `port` The port on the ClickHouse server.
- `user` Name of the ClickHouse user.
- `password` Password of the ClickHouse user.
- `db` Name of the database.
- `table` Name of the table.
- `where` The selection criteria. May be omitted.
- `invalidate_query` Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
- `secure` - Use ssl for connection.
- `query` The custom query. Optional parameter.
:::note
The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared.
:::
### Mongodb
Example of settings:
``` xml
<source>
<mongodb>
<host>localhost</host>
<port>27017</port>
<user></user>
<password></password>
<db>test</db>
<collection>dictionary_source</collection>
</mongodb>
</source>
```
or
``` sql
SOURCE(MONGODB(
host 'localhost'
port 27017
user ''
password ''
db 'test'
collection 'dictionary_source'
))
```
Setting fields:
- `host` The MongoDB host.
- `port` The port on the MongoDB server.
- `user` Name of the MongoDB user.
- `password` Password of the MongoDB user.
- `db` Name of the database.
- `collection` Name of the collection.
### Redis
Example of settings:
``` xml
<source>
<redis>
<host>localhost</host>
<port>6379</port>
<storage_type>simple</storage_type>
<db_index>0</db_index>
</redis>
</source>
```
or
``` sql
SOURCE(REDIS(
host 'localhost'
port 6379
storage_type 'simple'
db_index 0
))
```
Setting fields:
- `host` The Redis host.
- `port` The port on the Redis server.
- `storage_type` The structure of internal Redis storage using for work with keys. `simple` is for simple sources and for hashed single key sources, `hash_map` is for hashed sources with two keys. Ranged sources and cache sources with complex key are unsupported. May be omitted, default value is `simple`.
- `db_index` The specific numeric index of Redis logical database. May be omitted, default value is 0.
### Cassandra
Example of settings:
``` xml
<source>
<cassandra>
<host>localhost</host>
<port>9042</port>
<user>username</user>
<password>qwerty123</password>
<keyspase>database_name</keyspase>
<column_family>table_name</column_family>
<allow_filering>1</allow_filering>
<partition_key_prefix>1</partition_key_prefix>
<consistency>One</consistency>
<where>"SomeColumn" = 42</where>
<max_threads>8</max_threads>
<query>SELECT id, value_1, value_2 FROM database_name.table_name</query>
</cassandra>
</source>
```
Setting fields:
- `host` The Cassandra host or comma-separated list of hosts.
- `port` The port on the Cassandra servers. If not specified, default port 9042 is used.
- `user` Name of the Cassandra user.
- `password` Password of the Cassandra user.
- `keyspace` Name of the keyspace (database).
- `column_family` Name of the column family (table).
- `allow_filering` Flag to allow or not potentially expensive conditions on clustering key columns. Default value is 1.
- `partition_key_prefix` Number of partition key columns in primary key of the Cassandra table. Required for compose key dictionaries. Order of key columns in the dictionary definition must be the same as in Cassandra. Default value is 1 (the first key column is a partition key and other key columns are clustering key).
- `consistency` Consistency level. Possible values: `One`, `Two`, `Three`, `All`, `EachQuorum`, `Quorum`, `LocalQuorum`, `LocalOne`, `Serial`, `LocalSerial`. Default value is `One`.
- `where` Optional selection criteria.
- `max_threads` The maximum number of threads to use for loading data from multiple partitions in compose key dictionaries.
- `query` The custom query. Optional parameter.
:::note
The `column_family` or `where` fields cannot be used together with the `query` field. And either one of the `column_family` or `query` fields must be declared.
:::
### PostgreSQL
Example of settings:
``` xml
<source>
<postgresql>
<port>5432</port>
<user>clickhouse</user>
<password>qwerty</password>
<db>db_name</db>
<table>table_name</table>
<where>id=10</where>
<invalidate_query>SQL_QUERY</invalidate_query>
<query>SELECT id, value_1, value_2 FROM db_name.table_name</query>
</postgresql>
</source>
```
or
``` sql
SOURCE(POSTGRESQL(
port 5432
host 'postgresql-hostname'
user 'postgres_user'
password 'postgres_password'
db 'db_name'
table 'table_name'
replica(host 'example01-1' port 5432 priority 1)
replica(host 'example01-2' port 5432 priority 2)
where 'id=10'
invalidate_query 'SQL_QUERY'
query 'SELECT id, value_1, value_2 FROM db_name.table_name'
))
```
Setting fields:
- `host` The host on the PostgreSQL server. You can specify it for all replicas, or for each one individually (inside `<replica>`).
- `port` The port on the PostgreSQL server. You can specify it for all replicas, or for each one individually (inside `<replica>`).
- `user` Name of the PostgreSQL user. You can specify it for all replicas, or for each one individually (inside `<replica>`).
- `password` Password of the PostgreSQL user. You can specify it for all replicas, or for each one individually (inside `<replica>`).
- `replica` Section of replica configurations. There can be multiple sections:
- `replica/host` The PostgreSQL host.
- `replica/port` The PostgreSQL port.
- `replica/priority` The replica priority. When attempting to connect, ClickHouse traverses the replicas in order of priority. The lower the number, the higher the priority.
- `db` Name of the database.
- `table` Name of the table.
- `where` The selection criteria. The syntax for conditions is the same as for `WHERE` clause in PostgreSQL. For example, `id > 10 AND id < 20`. Optional parameter.
- `invalidate_query` Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
- `query` The custom query. Optional parameter.
:::note
The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared.
:::
## Null
A special source that can be used to create dummy (empty) dictionaries. Such dictionaries can useful for tests or with setups with separated data and query nodes at nodes with Distributed tables.
``` sql
CREATE DICTIONARY null_dict (
id UInt64,
val UInt8,
default_val UInt8 DEFAULT 123,
nullable_val Nullable(UInt8)
)
PRIMARY KEY id
SOURCE(NULL())
LAYOUT(FLAT())
LIFETIME(0);
```
## Related Content
- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)

View File

@ -1,181 +0,0 @@
---
slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure
sidebar_position: 44
sidebar_label: Dictionary Key and Fields
---
import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md';
# Dictionary Key and Fields
<CloudDetails />
The `structure` clause describes the dictionary key and fields available for queries.
XML description:
``` xml
<dictionary>
<structure>
<id>
<name>Id</name>
</id>
<attribute>
<!-- Attribute parameters -->
</attribute>
...
</structure>
</dictionary>
```
Attributes are described in the elements:
- `<id>` — [Key column](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key).
- `<attribute>` — [Data column](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes). There can be a multiple number of attributes.
DDL query:
``` sql
CREATE DICTIONARY dict_name (
Id UInt64,
-- attributes
)
PRIMARY KEY Id
...
```
Attributes are described in the query body:
- `PRIMARY KEY` — [Key column](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key)
- `AttrName AttrType` — [Data column](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes). There can be a multiple number of attributes.
## Key
ClickHouse supports the following types of keys:
- Numeric key. `UInt64`. Defined in the `<id>` tag or using `PRIMARY KEY` keyword.
- Composite key. Set of values of different types. Defined in the tag `<key>` or `PRIMARY KEY` keyword.
An xml structure can contain either `<id>` or `<key>`. DDL-query must contain single `PRIMARY KEY`.
:::warning
You must not describe key as an attribute.
:::
### Numeric Key
Type: `UInt64`.
Configuration example:
``` xml
<id>
<name>Id</name>
</id>
```
Configuration fields:
- `name` The name of the column with keys.
For DDL-query:
``` sql
CREATE DICTIONARY (
Id UInt64,
...
)
PRIMARY KEY Id
...
```
- `PRIMARY KEY` The name of the column with keys.
### Composite Key
The key can be a `tuple` from any types of fields. The [layout](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) in this case must be `complex_key_hashed` or `complex_key_cache`.
:::tip
A composite key can consist of a single element. This makes it possible to use a string as the key, for instance.
:::
The key structure is set in the element `<key>`. Key fields are specified in the same format as the dictionary [attributes](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Example:
``` xml
<structure>
<key>
<attribute>
<name>field1</name>
<type>String</type>
</attribute>
<attribute>
<name>field2</name>
<type>UInt32</type>
</attribute>
...
</key>
...
```
or
``` sql
CREATE DICTIONARY (
field1 String,
field2 String
...
)
PRIMARY KEY field1, field2
...
```
For a query to the `dictGet*` function, a tuple is passed as the key. Example: `dictGetString('dict_name', 'attr_name', tuple('string for field1', num_for_field2))`.
## Attributes
Configuration example:
``` xml
<structure>
...
<attribute>
<name>Name</name>
<type>ClickHouseDataType</type>
<null_value></null_value>
<expression>rand64()</expression>
<hierarchical>true</hierarchical>
<injective>true</injective>
<is_object_id>true</is_object_id>
</attribute>
</structure>
```
or
``` sql
CREATE DICTIONARY somename (
Name ClickHouseDataType DEFAULT '' EXPRESSION rand64() HIERARCHICAL INJECTIVE IS_OBJECT_ID
)
```
Configuration fields:
| Tag | Description | Required |
|------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
| `name` | Column name. | Yes |
| `type` | ClickHouse data type: [UInt8](../../../sql-reference/data-types/int-uint.md), [UInt16](../../../sql-reference/data-types/int-uint.md), [UInt32](../../../sql-reference/data-types/int-uint.md), [UInt64](../../../sql-reference/data-types/int-uint.md), [Int8](../../../sql-reference/data-types/int-uint.md), [Int16](../../../sql-reference/data-types/int-uint.md), [Int32](../../../sql-reference/data-types/int-uint.md), [Int64](../../../sql-reference/data-types/int-uint.md), [Float32](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md), [UUID](../../../sql-reference/data-types/uuid.md), [Decimal32](../../../sql-reference/data-types/decimal.md), [Decimal64](../../../sql-reference/data-types/decimal.md), [Decimal128](../../../sql-reference/data-types/decimal.md), [Decimal256](../../../sql-reference/data-types/decimal.md),[Date](../../../sql-reference/data-types/date), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md), [String](../../../sql-reference/data-types/string.md), [Array](../../../sql-reference/data-types/array.md).<br/>ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.<br/>[Nullable](../../../sql-reference/data-types/nullable.md) is currently supported for [Flat](external-dicts-dict-layout.md#flat), [Hashed](external-dicts-dict-layout.md#dicts-external_dicts_dict_layout-hashed), [ComplexKeyHashed](external-dicts-dict-layout.md#complex-key-hashed), [Direct](external-dicts-dict-layout.md#direct), [ComplexKeyDirect](external-dicts-dict-layout.md#complex-key-direct), [RangeHashed](external-dicts-dict-layout.md#range-hashed), [Polygon](external-dicts-dict-polygon.md), [Cache](external-dicts-dict-layout.md#cache), [ComplexKeyCache](external-dicts-dict-layout.md#complex-key-cache), [SSDCache](external-dicts-dict-layout.md#ssd-cache), [SSDComplexKeyCache](external-dicts-dict-layout.md#complex-key-ssd-cache) dictionaries. In [IPTrie](external-dicts-dict-layout.md#ip-trie) dictionaries `Nullable` types are not supported. | Yes |
| `null_value` | Default value for a non-existing element.<br/>In the example, it is an empty string. [NULL](../../syntax.md#null-literal) value can be used only for the `Nullable` types (see the previous line with types description). | Yes |
| `expression` | [Expression](../../../sql-reference/syntax.md#syntax-expressions) that ClickHouse executes on the value.<br/>The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.<br/><br/>Default value: no expression. | No |
| <a name="hierarchical-dict-attr"></a> `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md).<br/><br/>Default value: `false`. | No |
| `injective` | Flag that shows whether the `id -> attribute` image is [injective](https://en.wikipedia.org/wiki/Injective_function).<br/>If `true`, ClickHouse can automatically place after the `GROUP BY` clause the requests to dictionaries with injection. Usually it significantly reduces the amount of such requests.<br/><br/>Default value: `false`. | No |
| `is_object_id` | Flag that shows whether the query is executed for a MongoDB document by `ObjectID`.<br/><br/>Default value: `false`. | No |
**See Also**
- [Functions for working with dictionaries](../../../sql-reference/functions/ext-dict-functions.md).
## Related Content
- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)

View File

@ -1,57 +0,0 @@
---
slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict
sidebar_position: 40
sidebar_label: Configuring a Dictionary
---
import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md';
# Configuring a Dictionary
<CloudDetails />
If dictionary is configured using xml file, than dictionary configuration has the following structure:
``` xml
<dictionary>
<name>dict_name</name>
<structure>
<!-- Complex key configuration -->
</structure>
<source>
<!-- Source configuration -->
</source>
<layout>
<!-- Memory layout configuration -->
</layout>
<lifetime>
<!-- Lifetime of dictionary in memory -->
</lifetime>
</dictionary>
```
Corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md) has the following structure:
``` sql
CREATE DICTIONARY dict_name
(
... -- attributes
)
PRIMARY KEY ... -- complex or single key configuration
SOURCE(...) -- Source configuration
LAYOUT(...) -- Memory layout configuration
LIFETIME(...) -- Lifetime of dictionary in memory
```
- `name` The identifier that can be used to access the dictionary. Use the characters `[a-zA-Z0-9_\-]`.
- [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) — Source of the dictionary.
- [layout](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) — Dictionary layout in memory.
- [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md) — Structure of the dictionary . A key and attributes that can be retrieved by this key.
- [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) — Frequency of dictionary updates.
## Related Content
- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)

View File

@ -1,84 +0,0 @@
---
slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts
sidebar_position: 39
sidebar_label: General Description
---
import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md';
# Dictionaries
:::tip Tutorial
If you are getting started with Dictionaries in ClickHouse we have a tutorial that covers that topic. Take a look [here](/docs/en/tutorial.md).
:::
You can add your own dictionaries from various data sources. The source for a dictionary can be a ClickHouse table, a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see “[Dictionary Sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md)”.
ClickHouse:
- Fully or partially stores dictionaries in RAM.
- Periodically updates dictionaries and dynamically loads missing values. In other words, dictionaries can be loaded dynamically.
- Allows creating dictionaries with xml files or [DDL queries](../../../sql-reference/statements/create/dictionary.md).
The configuration of dictionaries can be located in one or more xml-files. The path to the configuration is specified in the [dictionaries_config](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_config) parameter.
Dictionaries can be loaded at server startup or at first use, depending on the [dictionaries_lazy_load](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load) setting.
The [dictionaries](../../../operations/system-tables/dictionaries.md#system_tables-dictionaries) system table contains information about dictionaries configured at server. For each dictionary you can find there:
- Status of the dictionary.
- Configuration parameters.
- Metrics like amount of RAM allocated for the dictionary or a number of queries since the dictionary was successfully loaded.
<CloudDetails />
## Creating a dictionary with a DDL query
Dictionaries can be created with [DDL queries](../../../sql-reference/statements/create/dictionary.md), and this is the recommended method because with DDL created dictionaries:
- No additional records are added to server configuration files
- The dictionaries can be worked with as first-class entities, like tables or views
- Data can be read directly, using familiar SELECT rather than dictionary table functions
- The dictionaries can be easily renamed
## Creating a dictionary with a configuration file
:::note
Creating a dictionary with a configuration file is not applicable to ClickHouse Cloud. Please use DDL (see above), and create your dictionary as user `default`.
:::
The dictionary configuration file has the following format:
``` xml
<clickhouse>
<comment>An optional element with any content. Ignored by the ClickHouse server.</comment>
<!--Optional element. File name with substitutions-->
<include_from>/etc/metrika.xml</include_from>
<dictionary>
<!-- Dictionary configuration. -->
<!-- There can be any number of <dictionary> sections in the configuration file. -->
</dictionary>
</clickhouse>
```
You can [configure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md) any number of dictionaries in the same file.
:::note
You can convert values for a small dictionary by describing it in a `SELECT` query (see the [transform](../../../sql-reference/functions/other-functions.md) function). This functionality is not related to dictionaries.
:::
## See Also
- [Configuring a Dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md)
- [Storing Dictionaries in Memory](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md)
- [Dictionary Updates](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md)
- [Dictionary Sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md)
- [Dictionary Key and Fields](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md)
- [Functions for Working with Dictionaries](../../../sql-reference/functions/ext-dict-functions.md)
## Related Content
- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse)

View File

@ -1,76 +0,0 @@
---
slug: /en/sql-reference/dictionaries/external-dictionaries/regexp-tree
sidebar_position: 47
sidebar_label: RegExp Tree Dictionary
title: "RegExp Tree Dictionary"
---
import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md';
Regexp Tree dictionary stores multiple trees of regular expressions with attributions. Users can retrieve strings in the dictionary. If a string matches the root of the regexp tree, we will collect the corresponding attributes of the matched root and continue to walk the children. If any of the children matches the string, we will collect attributes and rewrite the old ones if conflicts occur, then continue the traverse until we reach leaf nodes.
Example of the ddl query for creating Regexp Tree dictionary:
<CloudDetails />
```sql
create dictionary regexp_dict
(
regexp String,
name String,
version String
)
PRIMARY KEY(regexp)
SOURCE(YAMLRegExpTree(PATH '/var/lib/clickhouse/user_files/regexp_tree.yaml'))
LAYOUT(regexp_tree)
...
```
We only allow `YAMLRegExpTree` to work with regexp_tree dicitionary layout. If you want to use other sources, please set variable `regexp_dict_allow_other_sources` true.
**Source**
We introduce a type of source called `YAMLRegExpTree` representing the structure of Regexp Tree dictionary. An Example of a valid yaml config is like:
```xml
- regexp: 'Linux/(\d+[\.\d]*).+tlinux'
name: 'TencentOS'
version: '\1'
- regexp: '\d+/tclwebkit(?:\d+[\.\d]*)'
name: 'Andriod'
versions:
- regexp: '33/tclwebkit'
version: '13'
- regexp: '3[12]/tclwebkit'
version: '12'
- regexp: '30/tclwebkit'
version: '11'
- regexp: '29/tclwebkit'
version: '10'
```
The key `regexp` represents the regular expression of a tree node. The name of key is same as the dictionary key. The `name` and `version` is user-defined attributions in the dicitionary. The `versions` (which can be any name that not appear in attributions or the key) indicates the children nodes of this tree.
**Back Reference**
The value of an attribution could contain a back reference which refers to a capture group of the matched regular expression. Reference number ranges from 1 to 9 and writes as `$1` or `\1`.
During the query execution, the back reference in the value will be replaced by the matched capture group.
**Query**
Due to the specialty of Regexp Tree dictionary, we only allow functions `dictGet`, `dictGetOrDefault` and `dictGetOrNull` work with it.
Example:
```sql
SELECT dictGet('regexp_dict', ('name', 'version'), '31/tclwebkit1024');
```
Result:
```
┌─dictGet('regexp_dict', ('name', 'version'), '31/tclwebkit1024')─┐
│ ('Andriod','12') │
└─────────────────────────────────────────────────────────────────┘
```

File diff suppressed because it is too large Load Diff

View File

@ -1,55 +0,0 @@
---
slug: /en/sql-reference/dictionaries/internal-dicts
sidebar_position: 39
sidebar_label: Embedded Dictionaries
---
import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
# Embedded Dictionaries
<SelfManaged />
ClickHouse contains a built-in feature for working with a geobase.
This allows you to:
- Use a regions ID to get its name in the desired language.
- Use a regions ID to get the ID of a city, area, federal district, country, or continent.
- Check whether a region is part of another region.
- Get a chain of parent regions.
All the functions support “translocality,” the ability to simultaneously use different perspectives on region ownership. For more information, see the section “Functions for working with web analytics dictionaries”.
The internal dictionaries are disabled in the default package.
To enable them, uncomment the parameters `path_to_regions_hierarchy_file` and `path_to_regions_names_files` in the server configuration file.
The geobase is loaded from text files.
Place the `regions_hierarchy*.txt` files into the `path_to_regions_hierarchy_file` directory. This configuration parameter must contain the path to the `regions_hierarchy.txt` file (the default regional hierarchy), and the other files (`regions_hierarchy_ua.txt`) must be located in the same directory.
Put the `regions_names_*.txt` files in the `path_to_regions_names_files` directory.
You can also create these files yourself. The file format is as follows:
`regions_hierarchy*.txt`: TabSeparated (no header), columns:
- region ID (`UInt32`)
- parent region ID (`UInt32`)
- region type (`UInt8`): 1 - continent, 3 - country, 4 - federal district, 5 - region, 6 - city; other types do not have values
- population (`UInt32`) — optional column
`regions_names_*.txt`: TabSeparated (no header), columns:
- region ID (`UInt32`)
- region name (`String`) — Cant contain tabs or line feeds, even escaped ones.
A flat array is used for storing in RAM. For this reason, IDs shouldnt be more than a million.
Dictionaries can be updated without restarting the server. However, the set of available dictionaries is not updated.
For updates, the file modification times are checked. If a file has changed, the dictionary is updated.
The interval to check for changes is configured in the `builtin_dictionaries_reload_interval` parameter.
Dictionary updates (other than loading at first use) do not block queries. During updates, queries use the old versions of dictionaries. If an error occurs during an update, the error is written to the server log, and queries continue using the old version of dictionaries.
We recommend periodically updating the dictionaries with the geobase. During an update, generate new files and write them to a separate location. When everything is ready, rename them to the files used by the server.
There are also functions for working with OS identifiers and search engines, but they shouldnt be used.

View File

@ -283,7 +283,7 @@ Result:
```
:::note
The return type of `toStartOf*`, `toLastDayOfMonth`, `toMonday`, `timeSlot` functions described below is determined by the configuration parameter [enable_extended_results_for_datetime_functions](../../operations/settings/settings#enable-extended-results-for-datetime-functions) which is `0` by default.
The return type of `toStartOf*`, `toLastDayOfMonth`, `toMonday`, `timeSlot` functions described below is determined by the configuration parameter [enable_extended_results_for_datetime_functions](../../operations/settings/settings.md#enable-extended-results-for-datetime-functions) which is `0` by default.
Behavior for
* `enable_extended_results_for_datetime_functions = 0`: Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime`. Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime`. Though these functions can take values of the extended types `Date32` and `DateTime64` as an argument, passing them a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results.
@ -1135,7 +1135,7 @@ SELECT
```
```response
┌─toYYYYMM(now(), 'US/Eastern')─┐
│ 202303 │
│ 202303 │
└───────────────────────────────┘
```
@ -1335,7 +1335,7 @@ Similar to formatDateTime, except that it formats datetime in Joda style instead
**Replacement fields**
Using replacement fields, you can define a pattern for the resulting string.
Using replacement fields, you can define a pattern for the resulting string.
| Placeholder | Description | Presentation | Examples |

View File

@ -6,11 +6,11 @@ sidebar_label: Dictionaries
# Functions for Working with Dictionaries
:::note
:::note
For dictionaries created with [DDL queries](../../sql-reference/statements/create/dictionary.md), the `dict_name` parameter must be fully specified, like `<database>.<dict_name>`. Otherwise, the current database is used.
:::
For information on connecting and configuring dictionaries, see [Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
For information on connecting and configuring dictionaries, see [Dictionaries](../../sql-reference/dictionaries/index.md).
## dictGet, dictGetOrDefault, dictGetOrNull
@ -31,7 +31,7 @@ dictGetOrNull('dict_name', attr_name, id_expr)
**Returned value**
- If ClickHouse parses the attribute successfully in the [attributes data type](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes), functions return the value of the dictionary attribute that corresponds to `id_expr`.
- If ClickHouse parses the attribute successfully in the [attributes data type](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-attributes), functions return the value of the dictionary attribute that corresponds to `id_expr`.
- If there is no the key, corresponding to `id_expr`, in the dictionary, then:
@ -226,7 +226,7 @@ Result:
**See Also**
- [Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md)
- [Dictionaries](../../sql-reference/dictionaries/index.md)
## dictHas
@ -250,7 +250,7 @@ Type: `UInt8`.
## dictGetHierarchy
Creates an array, containing all the parents of a key in the [hierarchical dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md).
Creates an array, containing all the parents of a key in the [hierarchical dictionary](../../sql-reference/dictionaries/index.md#hierarchical-dictionaries).
**Syntax**
@ -436,7 +436,7 @@ dictGet[Type]OrDefault('dict_name', 'attr_name', id_expr, default_value_expr)
**Returned value**
- If ClickHouse parses the attribute successfully in the [attributes data type](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes), functions return the value of the dictionary attribute that corresponds to `id_expr`.
- If ClickHouse parses the attribute successfully in the [attributes data type](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-attributes), functions return the value of the dictionary attribute that corresponds to `id_expr`.
- If there is no requested `id_expr` in the dictionary then:

View File

@ -792,7 +792,7 @@ neighbor(column, offset[, default_value])
The result of the function depends on the affected data blocks and the order of data in the block.
:::warning
:::warning
It can reach the neighbor rows only inside the currently processed data block.
:::
@ -902,7 +902,7 @@ Result:
Calculates the difference between successive row values in the data block.
Returns 0 for the first row and the difference from the previous row for each subsequent row.
:::warning
:::warning
It can reach the previous row only inside the currently processed data block.
:::
@ -986,7 +986,7 @@ Each event has a start time and an end time. The start time is included in the e
The function calculates the total number of active (concurrent) events for each event start time.
:::warning
:::warning
Events must be ordered by the start time in ascending order. If this requirement is violated the function raises an exception. Every data block is processed separately. If events from different data blocks overlap then they can not be processed correctly.
:::
@ -1674,7 +1674,7 @@ Result:
Accumulates states of an aggregate function for each row of a data block.
:::warning
:::warning
The state is reset for each new data block.
:::
@ -2177,7 +2177,7 @@ Number of digits.
Type: [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
:::note
:::note
For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#is-decimal-overflow).
:::
@ -2260,7 +2260,7 @@ Result:
## currentProfiles
Returns a list of the current [settings profiles](../../operations/access-rights.md#settings-profiles-management) for the current user.
Returns a list of the current [settings profiles](../../guides/sre/user-management/index.md#settings-profiles-management) for the current user.
The command [SET PROFILE](../../sql-reference/statements/set.md#query-set) could be used to change the current setting profile. If the command `SET PROFILE` was not used the function returns the profiles specified at the current user's definition (see [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement)).
@ -2272,7 +2272,7 @@ currentProfiles()
**Returned value**
- List of the current user settings profiles.
- List of the current user settings profiles.
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
@ -2288,7 +2288,7 @@ enabledProfiles()
**Returned value**
- List of the enabled settings profiles.
- List of the enabled settings profiles.
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
@ -2304,7 +2304,7 @@ defaultProfiles()
**Returned value**
- List of the default settings profiles.
- List of the default settings profiles.
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
@ -2320,7 +2320,7 @@ currentRoles()
**Returned value**
- List of the current roles for the current user.
- List of the current roles for the current user.
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
@ -2336,13 +2336,13 @@ enabledRoles()
**Returned value**
- List of the enabled roles for the current user.
- List of the enabled roles for the current user.
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
## defaultRoles
Returns the names of the roles which are enabled by default for the current user when he logins. Initially these are all roles granted to the current user (see [GRANT](../../sql-reference/statements/grant/#grant-select)), but that can be changed with the [SET DEFAULT ROLE](../../sql-reference/statements/set-role.md#set-default-role-statement) statement.
Returns the names of the roles which are enabled by default for the current user when he logins. Initially these are all roles granted to the current user (see [GRANT](../../sql-reference/statements/grant.md#grant-select)), but that can be changed with the [SET DEFAULT ROLE](../../sql-reference/statements/set-role.md#set-default-role-statement) statement.
**Syntax**
@ -2352,7 +2352,7 @@ defaultRoles()
**Returned value**
- List of the default roles for the current user.
- List of the default roles for the current user.
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
@ -2499,7 +2499,7 @@ In the following example a configuration with two shards is used. The query is e
Query:
``` sql
CREATE TABLE shard_num_example (dummy UInt8)
CREATE TABLE shard_num_example (dummy UInt8)
ENGINE=Distributed(test_cluster_two_shards_localhost, system, one, dummy);
SELECT dummy, shardNum(), shardCount() FROM shard_num_example;
```

View File

@ -22,15 +22,15 @@ tuple(x, y, …)
## tupleElement
A function that allows getting a column from a tuple.
N is the column index, starting from 1. N must be a constant. N must be a strict postive integer no greater than the size of the tuple.
There is no cost to execute the function.
The function implements the operator `x.N`.
If the second argument is a number `n`, it is the column index, starting from 1. If the second argument is a string `s`, it represents the name of the element. Besides, we can provide the third optional argument, such that when index out of bounds or element for such name does not exist, the default value returned instead of throw exception. The second and third arguments if provided are always must be constant. There is no cost to execute the function.
The function implements the operator `x.n` and `x.s`.
**Syntax**
``` sql
tupleElement(tuple, n)
tupleElement(tuple, n/s [, default_value])
```
## untuple

View File

@ -0,0 +1,22 @@
---
keywords: [clickhouse, docs, sql reference, sql statements, sql, syntax]
title: SQL Reference
---
import { TwoColumnList } from '/src/components/two_column_list'
import { ClickableSquare } from '/src/components/clickable_square'
import { HorizontalDivide } from '/src/components/horizontal_divide'
import { ViewAllLink } from '/src/components/view_all_link'
import { VideoContainer } from '/src/components/video_container'
import LinksDeployment from './sql-reference-links.json'
# ClickHouse SQL Reference
ClickHouse supports a declarative query language based on SQL that is identical to the ANSI SQL standard in many cases.
Supported queries include GROUP BY, ORDER BY, subqueries in FROM, JOIN clause, IN operator, window functions and scalar subqueries.
<HorizontalDivide />
<TwoColumnList items={LinksDeployment} />

View File

@ -0,0 +1,12 @@
[
{
"title": "Statements",
"description": "A list of available SQL statements in ClickHouse",
"url": "/docs/en/sql-reference/statements/"
},
{
"title": "Database and Table Engines",
"description": "Engines determine where and how your data is stored",
"url": "/docs/en/engines/table-engines"
}
]

View File

@ -16,7 +16,7 @@ ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY COMMENT 'Comment'
**Examples**
Creating a table with comment (for more information, see the [COMMENT] clause(../../../sql-reference/statements/create/table.md#comment-table)):
Creating a table with comment (for more information, see the [COMMENT](../../../sql-reference/statements/create/table.md#comment-table) clause):
``` sql
CREATE TABLE table_with_comment

View File

@ -17,7 +17,7 @@ Projections will create internally a new hidden table, this means that more IO a
Example, If the projection has defined a different primary key, all the data from the original table will be duplicated.
:::
You can see more technical details about how projections work internally on this [page](/docs/en/guides/improving-query-performance/sparse-primary-indexes/sparse-primary-indexes-multiple.md/#option-3-projections).
You can see more technical details about how projections work internally on this [page](/docs/en/guides/best-practices/sparse-primary-indexes.md/#option-3-projections).
## Example filtering without using primary keys
@ -37,7 +37,7 @@ Using `ALTER TABLE`, we could add the Projection to an existing table:
```
ALTER TABLE visits_order ADD PROJECTION user_name_projection (
SELECT
*
*
ORDER BY user_name
)
@ -161,6 +161,6 @@ The commands `ADD`, `DROP` and `CLEAR` are lightweight in a sense that they only
Also, they are replicated, syncing projections metadata via ClickHouse Keeper or ZooKeeper.
:::note
:::note
Projection manipulation is supported only for tables with [`*MergeTree`](/docs/en/engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](/docs/en/engines/table-engines/mergetree-family/replication.md) variants).
:::

View File

@ -5,7 +5,7 @@ sidebar_label: DICTIONARY
title: "CREATE DICTIONARY"
---
Creates a new [dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) with given [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [layout](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) and [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
Creates a new [dictionary](../../../sql-reference/dictionaries/index.md) with given [structure](../../../sql-reference/dictionaries/index.md#dictionary-key-and-fields), [source](../../../sql-reference/dictionaries/index.md#dictionary-sources), [layout](../../../sql-reference/dictionaries/index.md#storig-dictionaries-in-memory) and [lifetime](../../../sql-reference/dictionaries/index.md#dictionary-updates).
## Syntax
@ -29,7 +29,7 @@ The dictionary structure consists of attributes. Dictionary attributes are speci
`ON CLUSTER` clause allows creating dictionary on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
Depending on dictionary [layout](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) one or more attributes can be specified as dictionary keys.
Depending on dictionary [layout](../../../sql-reference/dictionaries/index.md#storig-dictionaries-in-memory) one or more attributes can be specified as dictionary keys.
## SOURCE
@ -125,9 +125,9 @@ LAYOUT(HASHED())
### Create a dictionary from another database
Please see the details in [Dictionary sources](/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md/#dbms).
Please see the details in [Dictionary sources](/docs/en/sql-reference/dictionaries/index.md#dictionary-sources/#dbms).
**See Also**
- For more information, see the [Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) section.
- [system.dictionaries](../../../operations/system-tables/dictionaries.md) — This table contains information about [Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
- For more information, see the [Dictionaries](../../../sql-reference/dictionaries/index.md) section.
- [system.dictionaries](../../../operations/system-tables/dictionaries.md) — This table contains information about [Dictionaries](../../../sql-reference/dictionaries/index.md).

View File

@ -5,7 +5,7 @@ sidebar_label: QUOTA
title: "CREATE QUOTA"
---
Creates a [quota](../../../operations/access-rights.md#quotas-management) that can be assigned to a user or a role.
Creates a [quota](../../../guides/sre/user-management/index.md#quotas-management) that can be assigned to a user or a role.
Syntax:

View File

@ -5,7 +5,7 @@ sidebar_label: ROLE
title: "CREATE ROLE"
---
Creates new [roles](../../../operations/access-rights.md#role-management). Role is a set of [privileges](../../../sql-reference/statements/grant.md#grant-privileges). A [user](../../../sql-reference/statements/create/user.md) assigned a role gets all the privileges of this role.
Creates new [roles](../../../guides/sre/user-management/index.md#role-management). Role is a set of [privileges](../../../sql-reference/statements/grant.md#grant-privileges). A [user](../../../sql-reference/statements/create/user.md) assigned a role gets all the privileges of this role.
Syntax:
@ -22,7 +22,7 @@ User can have default roles which apply at user login. To set default roles, use
To revoke a role, use the [REVOKE](../../../sql-reference/statements/revoke.md) statement.
To delete role, use the [DROP ROLE](../../../sql-reference/statements/drop#drop-role-statement) statement. The deleted role is being automatically revoked from all the users and roles to which it was assigned.
To delete role, use the [DROP ROLE](../../../sql-reference/statements/drop.md#drop-role-statement) statement. The deleted role is being automatically revoked from all the users and roles to which it was assigned.
## Examples

View File

@ -5,9 +5,9 @@ sidebar_label: ROW POLICY
title: "CREATE ROW POLICY"
---
Creates a [row policy](../../../operations/access-rights.md#row-policy-management), i.e. a filter used to determine which rows a user can read from a table.
Creates a [row policy](../../../guides/sre/user-management/index.md#row-policy-management), i.e. a filter used to determine which rows a user can read from a table.
:::warning
:::warning
Row policies makes sense only for users with readonly access. If user can modify table or copy partitions between tables, it defeats the restrictions of row policies.
:::
@ -31,7 +31,7 @@ In the section `TO` you can provide a list of users and roles this policy should
Keyword `ALL` means all the ClickHouse users including current user. Keyword `ALL EXCEPT` allow to exclude some users from the all users list, for example, `CREATE ROW POLICY ... TO ALL EXCEPT accountant, john@localhost`
:::note
:::note
If there are no row policies defined for a table then any user can `SELECT` all the row from the table. Defining one or more row policies for the table makes the access to the table depending on the row policies no matter if those row policies are defined for the current user or not. For example, the following policy
`CREATE ROW POLICY pol1 ON mydb.table1 USING b=1 TO mira, peter`

View File

@ -5,7 +5,7 @@ sidebar_label: SETTINGS PROFILE
title: "CREATE SETTINGS PROFILE"
---
Creates [settings profiles](../../../operations/access-rights.md#settings-profiles-management) that can be assigned to a user or a role.
Creates [settings profiles](../../../guides/sre/user-management/index.md#settings-profiles-management) that can be assigned to a user or a role.
Syntax:
@ -27,7 +27,7 @@ CREATE USER robin IDENTIFIED BY 'password';
Create the `max_memory_usage_profile` settings profile with value and constraints for the `max_memory_usage` setting and assign it to user `robin`:
``` sql
CREATE
SETTINGS PROFILE max_memory_usage_profile SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000
CREATE
SETTINGS PROFILE max_memory_usage_profile SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000
TO robin
```

View File

@ -393,15 +393,15 @@ These codecs are designed to make compression more effective by using specific f
#### DoubleDelta
`DoubleDelta` — Calculates delta of deltas and writes it in compact binary form. Optimal compression rates are achieved for monotonic sequences with a constant stride, such as time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Uses 1 extra bit for 32-byte deltas: 5-bit prefixes instead of 4-bit prefixes. For additional information, see Compressing Time Stamps in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf).
`DoubleDelta(bytes_size)` — Calculates delta of deltas and writes it in compact binary form. Possible `bytes_size` values: 1, 2, 4, 8, the default value is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, its 1. Optimal compression rates are achieved for monotonic sequences with a constant stride, such as time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Uses 1 extra bit for 32-bit deltas: 5-bit prefixes instead of 4-bit prefixes. For additional information, see Compressing Time Stamps in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf).
#### Gorilla
`Gorilla` — Calculates XOR between current and previous floating point value and writes it in compact binary form. The smaller the difference between consecutive values is, i.e. the slower the values of the series changes, the better the compression rate. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. For additional information, see section 4.1 in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](https://doi.org/10.14778/2824032.2824078).
`Gorilla(bytes_size)` — Calculates XOR between current and previous floating point value and writes it in compact binary form. The smaller the difference between consecutive values is, i.e. the slower the values of the series changes, the better the compression rate. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Possible `bytes_size` values: 1, 2, 4, 8, the default value is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, its 1. For additional information, see section 4.1 in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](https://doi.org/10.14778/2824032.2824078).
#### FPC
`FPC` - Repeatedly predicts the next floating point value in the sequence using the better of two predictors, then XORs the actual with the predicted value, and leading-zero compresses the result. Similar to Gorilla, this is efficient when storing a series of floating point values that change slowly. For 64-bit values (double), FPC is faster than Gorilla, for 32-bit values your mileage may vary. For a detailed description of the algorithm see [High Throughput Compression of Double-Precision Floating-Point Data](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf).
`FPC(level, float_size)` - Repeatedly predicts the next floating point value in the sequence using the better of two predictors, then XORs the actual with the predicted value, and leading-zero compresses the result. Similar to Gorilla, this is efficient when storing a series of floating point values that change slowly. For 64-bit values (double), FPC is faster than Gorilla, for 32-bit values your mileage may vary. Possible `level` values: 1-28, the default value is 12. Possible `float_size` values: 4, 8, the default value is `sizeof(type)` if type is Float. In all other cases, its 4. For a detailed description of the algorithm see [High Throughput Compression of Double-Precision Floating-Point Data](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf).
#### T64
@ -473,7 +473,7 @@ ENGINE = MergeTree ORDER BY x;
ClickHouse supports temporary tables which have the following characteristics:
- Temporary tables disappear when the session ends, including if the connection is lost.
- A temporary table uses the Memory engine only.
- A temporary table uses the Memory table engine when engine is not specified and it may use any table engine except Replicated and `KeeperMap` engines.
- The DB cant be specified for a temporary table. It is created outside of databases.
- Impossible to create a temporary table with distributed DDL query on all cluster servers (by using `ON CLUSTER`): this table exists only in the current session.
- If a temporary table has the same name as another one and a query specifies the table name without specifying the DB, the temporary table will be used.
@ -487,7 +487,7 @@ CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
)
) [ENGINE = engine]
```
In most cases, temporary tables are not created manually, but when using external data for a query, or for distributed `(GLOBAL) IN`. For more information, see the appropriate sections

View File

@ -5,7 +5,7 @@ sidebar_label: USER
title: "CREATE USER"
---
Creates [user accounts](../../../operations/access-rights.md#user-account-management).
Creates [user accounts](../../../guides/sre/user-management/index.md#user-account-management).
Syntax:

View File

@ -30,12 +30,6 @@ SET allow_experimental_lightweight_delete = true;
:::
An [alternative way to delete rows](./alter/delete.md) in ClickHouse is `ALTER TABLE ... DELETE`, which might be more efficient if you do bulk deletes only occasionally and don't need the operation to be applied instantly. In most use cases the new lightweight `DELETE FROM` behavior will be considerably faster.
:::warning
Even though deletes are becoming more lightweight in ClickHouse, they should still not be used as aggressively as on an OLTP system. Lightweight deletes are currently efficient for wide parts, but for compact parts, they can be a heavyweight operation, and it may be better to use `ALTER TABLE` for some scenarios.
:::
:::note
`DELETE FROM` requires the `ALTER DELETE` privilege:
```sql
@ -51,7 +45,7 @@ The idea behind Lightweight Delete is that when a `DELETE FROM table ...` query
The mask is implemented as a hidden `_row_exists` system column that stores True for all visible rows and False for deleted ones. This column is only present in a part if some rows in this part were deleted. In other words, the column is not persisted when it has all values equal to True.
## SELECT query
When the column is present `SELECT ... FROM table WHERE condition` query internally is extended by an additional predicate on `_row_exists` and becomes similar to
When the column is present `SELECT ... FROM table WHERE condition` query internally is extended by an additional predicate on `_row_exists` and becomes similar to
```sql
SELECT ... FROM table PREWHERE _row_exists WHERE condition
```

View File

@ -22,7 +22,7 @@ System log tables can be also attached back (e.g. `query_log`, `text_log`, etc).
Note that you can not detach permanently the table which is already detached (temporary). But you can attach it back and then detach permanently again.
Also you can not [DROP](../../sql-reference/statements/drop#drop-table) the detached table, or [CREATE TABLE](../../sql-reference/statements/create/table.md) with the same name as detached permanently, or replace it with the other table with [RENAME TABLE](../../sql-reference/statements/rename.md) query.
Also you can not [DROP](../../sql-reference/statements/drop.md#drop-table) the detached table, or [CREATE TABLE](../../sql-reference/statements/create/table.md) with the same name as detached permanently, or replace it with the other table with [RENAME TABLE](../../sql-reference/statements/rename.md) query.
The `SYNC` modifier executes the action without delay.

View File

@ -105,7 +105,8 @@ Hierarchy of privileges:
- [CREATE](#grant-create)
- `CREATE DATABASE`
- `CREATE TABLE`
- `CREATE TEMPORARY TABLE`
- `CREATE ARBITRARY TEMPORARY TABLE`
- `CREATE TEMPORARY TABLE`
- `CREATE VIEW`
- `CREATE DICTIONARY`
- `CREATE FUNCTION`
@ -313,7 +314,8 @@ Allows executing [CREATE](../../sql-reference/statements/create/index.md) and [A
- `CREATE`. Level: `GROUP`
- `CREATE DATABASE`. Level: `DATABASE`
- `CREATE TABLE`. Level: `TABLE`
- `CREATE TEMPORARY TABLE`. Level: `GLOBAL`
- `CREATE ARBITRARY TEMPORARY TABLE`. Level: `GLOBAL`
- `CREATE TEMPORARY TABLE`. Level: `GLOBAL`
- `CREATE VIEW`. Level: `VIEW`
- `CREATE DICTIONARY`. Level: `DICTIONARY`

View File

@ -4,7 +4,7 @@ sidebar_position: 33
sidebar_label: INSERT INTO
---
# INSERT INTO Statement
# INSERT INTO Statement
Inserts data into a table.
@ -89,7 +89,7 @@ INSERT INTO t FORMAT TabSeparated
22 Qwerty
```
You can insert data separately from the query by using the command-line client or the HTTP interface. For more information, see the section “[Interfaces](../../interfaces)”.
You can insert data separately from the query by using the [command-line client](/docs/en/integrations/sql-clients/clickhouse-client-local) or the [HTTP interface](/docs/en/interfaces/http/).
:::note
If you want to specify `SETTINGS` for `INSERT` query then you have to do it _before_ `FORMAT` clause since everything after `FORMAT format_name` is treated as data. For example:
@ -129,7 +129,7 @@ To insert a default value instead of `NULL` into a column with not nullable data
INSERT INTO [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name
```
Use the syntax above to insert data from a file, or files, stored on the **client** side. `file_name` and `type` are string literals. Input file [format](../../interfaces/formats.md) must be set in the `FORMAT` clause.
Use the syntax above to insert data from a file, or files, stored on the **client** side. `file_name` and `type` are string literals. Input file [format](../../interfaces/formats.md) must be set in the `FORMAT` clause.
Compressed files are supported. The compression type is detected by the extension of the file name. Or it can be explicitly specified in a `COMPRESSION` clause. Supported types are: `'none'`, `'gzip'`, `'deflate'`, `'br'`, `'xz'`, `'zstd'`, `'lz4'`, `'bz2'`.
@ -191,7 +191,7 @@ INSERT INTO [TABLE] FUNCTION table_func ...
``` sql
CREATE TABLE simple_table (id UInt32, text String) ENGINE=MergeTree() ORDER BY id;
INSERT INTO TABLE FUNCTION remote('localhost', default.simple_table)
INSERT INTO TABLE FUNCTION remote('localhost', default.simple_table)
VALUES (100, 'inserted via remote()');
SELECT * FROM simple_table;
```

View File

@ -146,7 +146,7 @@ ARRAY JOIN arr AS a, arrayEnumerate(arr) AS num, arrayMap(x -> x + 1, arr) AS ma
└───────┴─────────┴───┴─────┴────────┘
```
The example below uses the [arrayEnumerate](../../../sql-reference/functions/array-functions#array_functions-arrayenumerate) function:
The example below uses the [arrayEnumerate](../../../sql-reference/functions/array-functions.md#array_functions-arrayenumerate) function:
``` sql
SELECT s, arr, a, num, arrayEnumerate(arr)
@ -166,8 +166,8 @@ ARRAY JOIN arr AS a, arrayEnumerate(arr) AS num;
Multiple arrays with different sizes can be joined by using: `SETTINGS enable_unaligned_array_join = 1`. Example:
```sql
SELECT s, arr, a, b
FROM arrays_test ARRAY JOIN arr as a, [['a','b'],['c']] as b
SELECT s, arr, a, b
FROM arrays_test ARRAY JOIN arr as a, [['a','b'],['c']] as b
SETTINGS enable_unaligned_array_join = 1;
```
@ -278,7 +278,7 @@ ARRAY JOIN nest AS n;
└───────┴─────┴─────┴─────────┴────────────┘
```
Example of using the [arrayEnumerate](../../../sql-reference/functions/array-functions#array_functions-arrayenumerate) function:
Example of using the [arrayEnumerate](../../../sql-reference/functions/array-functions.md#array_functions-arrayenumerate) function:
``` sql
SELECT s, `n.x`, `n.y`, `nest.x`, `nest.y`, num

View File

@ -8,12 +8,12 @@ sidebar_label: GROUP BY
`GROUP BY` clause switches the `SELECT` query into an aggregation mode, which works as follows:
- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expression”.
- All the expressions in the [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having), and [ORDER BY](../../../sql-reference/statements/select/order-by.md) clauses **must** be calculated based on key expressions **or** on [aggregate functions](../../../sql-reference/aggregate-functions/index.md) over non-key expressions (including plain columns). In other words, each column selected from the table must be used either in a key expression or inside an aggregate function, but not both.
- All the expressions in the [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having.md), and [ORDER BY](../../../sql-reference/statements/select/order-by.md) clauses **must** be calculated based on key expressions **or** on [aggregate functions](../../../sql-reference/aggregate-functions/index.md) over non-key expressions (including plain columns). In other words, each column selected from the table must be used either in a key expression or inside an aggregate function, but not both.
- Result of aggregating `SELECT` query will contain as many rows as there were unique values of “grouping key” in source table. Usually, this significantly reduces the row count, often by orders of magnitude, but not necessarily: row count stays the same if all “grouping key” values were distinct.
When you want to group data in the table by column numbers instead of column names, enable the setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments).
:::note
:::note
Theres an additional way to run aggregation over a table. If a query contains table columns only inside aggregate functions, the `GROUP BY clause` can be omitted, and aggregation by an empty set of keys is assumed. Such queries always return exactly one row.
:::
@ -57,8 +57,8 @@ The subtotals are calculated in the reverse order: at first subtotals are calcul
In the subtotals rows the values of already "grouped" key expressions are set to `0` or empty line.
:::note
Mind that [HAVING](../../../sql-reference/statements/select/having) clause can affect the subtotals results.
:::note
Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results.
:::
**Example**
@ -125,8 +125,8 @@ SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP;
In the subtotals rows the values of all "grouped" key expressions are set to `0` or empty line.
:::note
Mind that [HAVING](../../../sql-reference/statements/select/having) clause can affect the subtotals results.
:::note
Mind that [HAVING](../../../sql-reference/statements/select/having.md) clause can affect the subtotals results.
:::
**Example**
@ -226,11 +226,11 @@ This extra row is only produced in `JSON*`, `TabSeparated*`, and `Pretty*` forma
- In `Template` format, the row is output according to specified template.
- In the other formats it is not available.
:::note
totals is output in the results of `SELECT` queries, and is not output in `INSERT INTO ... SELECT`.
:::note
totals is output in the results of `SELECT` queries, and is not output in `INSERT INTO ... SELECT`.
:::
`WITH TOTALS` can be run in different ways when [HAVING](../../../sql-reference/statements/select/having) is present. The behavior depends on the `totals_mode` setting.
`WITH TOTALS` can be run in different ways when [HAVING](../../../sql-reference/statements/select/having.md) is present. The behavior depends on the `totals_mode` setting.
### Configuring Totals Processing

View File

@ -4,7 +4,7 @@ sidebar_position: 32
sidebar_label: SELECT
---
# SELECT Query
# SELECT Query
`SELECT` queries perform data retrieval. By default, the requested data is returned to the client, while in conjunction with [INSERT INTO](../../../sql-reference/statements/insert-into.md) it can be forwarded to a different table.
@ -44,7 +44,7 @@ Specifics of each optional clause are covered in separate sections, which are li
- [WHERE clause](../../../sql-reference/statements/select/where.md)
- [GROUP BY clause](../../../sql-reference/statements/select/group-by.md)
- [LIMIT BY clause](../../../sql-reference/statements/select/limit-by.md)
- [HAVING clause](../../../sql-reference/statements/select/having)
- [HAVING clause](../../../sql-reference/statements/select/having.md)
- [LIMIT clause](../../../sql-reference/statements/select/limit.md)
- [OFFSET clause](../../../sql-reference/statements/select/offset.md)
- [UNION clause](../../../sql-reference/statements/select/union.md)

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/statements/select/join
sidebar_label: JOIN
sidebar_label: Joining Tables
---
# JOIN Clause
@ -282,7 +282,7 @@ Each time a query is run with the same `JOIN`, the subquery is run again because
In some cases, it is more efficient to use [IN](../../../sql-reference/operators/in.md) instead of `JOIN`.
If you need a `JOIN` for joining with dimension tables (these are relatively small tables that contain dimension properties, such as names for advertising campaigns), a `JOIN` might not be very convenient due to the fact that the right table is re-accessed for every query. For such cases, there is a “dictionaries” feature that you should use instead of `JOIN`. For more information, see the [Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) section.
If you need a `JOIN` for joining with dimension tables (these are relatively small tables that contain dimension properties, such as names for advertising campaigns), a `JOIN` might not be very convenient due to the fact that the right table is re-accessed for every query. For such cases, there is a “dictionaries” feature that you should use instead of `JOIN`. For more information, see the [Dictionaries](../../../sql-reference/dictionaries/index.md) section.
### Memory Limitations

View File

@ -198,7 +198,7 @@ Result:
## SHOW DICTIONARIES
Displays a list of [Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
Displays a list of [Dictionaries](../../sql-reference/dictionaries/index.md).
``` sql
SHOW DICTIONARIES [FROM <db>] [LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
@ -293,7 +293,7 @@ SHOW CREATE [SETTINGS] PROFILE name1 [, name2 ...]
## SHOW USERS
Returns a list of [user account](../../operations/access-rights.md#user-account-management) names. To view user accounts parameters, see the system table [system.users](../../operations/system-tables/users.md#system_tables-users).
Returns a list of [user account](../../guides/sre/user-management/index.md#user-account-management) names. To view user accounts parameters, see the system table [system.users](../../operations/system-tables/users.md#system_tables-users).
### Syntax
@ -303,7 +303,7 @@ SHOW USERS
## SHOW ROLES
Returns a list of [roles](../../operations/access-rights.md#role-management). To view another parameters, see system tables [system.roles](../../operations/system-tables/roles.md#system_tables-roles) and [system.role_grants](../../operations/system-tables/role-grants.md#system_tables-role_grants).
Returns a list of [roles](../../guides/sre/user-management/index.md#role-management). To view another parameters, see system tables [system.roles](../../operations/system-tables/roles.md#system_tables-roles) and [system.role_grants](../../operations/system-tables/role-grants.md#system_tables-role_grants).
### Syntax
@ -312,7 +312,7 @@ SHOW [CURRENT|ENABLED] ROLES
```
## SHOW PROFILES
Returns a list of [setting profiles](../../operations/access-rights.md#settings-profiles-management). To view user accounts parameters, see the system table [settings_profiles](../../operations/system-tables/settings_profiles.md#system_tables-settings_profiles).
Returns a list of [setting profiles](../../guides/sre/user-management/index.md#settings-profiles-management). To view user accounts parameters, see the system table [settings_profiles](../../operations/system-tables/settings_profiles.md#system_tables-settings_profiles).
### Syntax
@ -322,7 +322,7 @@ SHOW [SETTINGS] PROFILES
## SHOW POLICIES
Returns a list of [row policies](../../operations/access-rights.md#row-policy-management) for the specified table. To view user accounts parameters, see the system table [system.row_policies](../../operations/system-tables/row_policies.md#system_tables-row_policies).
Returns a list of [row policies](../../guides/sre/user-management/index.md#row-policy-management) for the specified table. To view user accounts parameters, see the system table [system.row_policies](../../operations/system-tables/row_policies.md#system_tables-row_policies).
### Syntax
@ -332,7 +332,7 @@ SHOW [ROW] POLICIES [ON [db.]table]
## SHOW QUOTAS
Returns a list of [quotas](../../operations/access-rights.md#quotas-management). To view quotas parameters, see the system table [system.quotas](../../operations/system-tables/quotas.md#system_tables-quotas).
Returns a list of [quotas](../../guides/sre/user-management/index.md#quotas-management). To view quotas parameters, see the system table [system.quotas](../../operations/system-tables/quotas.md#system_tables-quotas).
### Syntax
@ -351,7 +351,7 @@ SHOW [CURRENT] QUOTA
```
## SHOW ACCESS
Shows all [users](../../operations/access-rights.md#user-account-management), [roles](../../operations/access-rights.md#role-management), [profiles](../../operations/access-rights.md#settings-profiles-management), etc. and all their [grants](../../sql-reference/statements/grant.md#grant-privileges).
Shows all [users](../../guides/sre/user-management/index.md#user-account-management), [roles](../../guides/sre/user-management/index.md#role-management), [profiles](../../guides/sre/user-management/index.md#settings-profiles-management), etc. and all their [grants](../../sql-reference/statements/grant.md#grant-privileges).
### Syntax

View File

@ -8,7 +8,7 @@ sidebar_label: SYSTEM
## RELOAD EMBEDDED DICTIONARIES
Reload all [Internal dictionaries](../../sql-reference/dictionaries/internal-dicts.md).
Reload all [Internal dictionaries](../../sql-reference/dictionaries/index.md).
By default, internal dictionaries are disabled.
Always returns `Ok.` regardless of the result of the internal dictionary update.
@ -369,7 +369,7 @@ SYSTEM DROP FILESYSTEM CACHE
It's too heavy and has potential for misuse.
:::
Will do sync syscall.
Will do sync syscall.
```sql
SYSTEM SYNC FILE CACHE

View File

@ -5,7 +5,7 @@ sidebar_label: dictionary function
title: dictionary
---
Displays the [dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) data as a ClickHouse table. Works the same way as [Dictionary](../../engines/table-engines/special/dictionary.md) engine.
Displays the [dictionary](../../sql-reference/dictionaries/index.md) data as a ClickHouse table. Works the same way as [Dictionary](../../engines/table-engines/special/dictionary.md) engine.
**Syntax**

View File

@ -85,7 +85,7 @@ The response looks like:
## Passing Query Results to a Script
Be sure to check out the example in the `Executable` table engine on [how to pass query results to a script](../../engines/table-engines/special/executable#passing-query-results-to-a-script). Here is how you execute the same script in that example using the `executable` table function:
Be sure to check out the example in the `Executable` table engine on [how to pass query results to a script](../../engines/table-engines/special/executable.md#passing-query-results-to-a-script). Here is how you execute the same script in that example using the `executable` table function:
```sql
SELECT * FROM executable(

View File

@ -70,5 +70,5 @@ SELECT * FROM mongodb(
**See Also**
- [The `MongoDB` table engine](../../engines/table-engines/integrations/mongodb.md)
- [Using MongoDB as a dictionary source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources/#mongodb)
- [The `MongoDB` table engine](/docs/en/engines/table-engines/integrations/mongodb.md)
- [Using MongoDB as a dictionary source](/docs/en/sql-reference/dictionaries/index.md#mongodb)

View File

@ -56,7 +56,7 @@ SELECT name FROM mysql(`mysql1:3306|mysql2:3306|mysql3:3306`, 'mysql_database',
A table object with the same columns as the original MySQL table.
:::note
:::note
In the `INSERT` query to distinguish table function `mysql(...)` from table name with column names list, you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below.
:::
@ -110,4 +110,4 @@ SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123');
**See Also**
- [The MySQL table engine](../../engines/table-engines/integrations/mysql.md)
- [Using MySQL as a dictionary source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql)
- [Using MySQL as a dictionary source](../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-mysql)

View File

@ -101,5 +101,5 @@ SELECT * FROM odbc('DSN=mysqlconn', 'test', 'test')
## See Also
- [ODBC dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-odbc)
- [ODBC dictionaries](../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-odbc)
- [ODBC table engine](../../engines/table-engines/integrations/odbc.md).

View File

@ -27,7 +27,7 @@ postgresql('host:port', 'database', 'table', 'user', 'password'[, `schema`])
A table object with the same columns as the original PostgreSQL table.
:::note
:::note
In the `INSERT` query to distinguish table function `postgresql(...)` from table name with column names list you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below.
:::
@ -43,7 +43,7 @@ All joins, aggregations, sorting, `IN [ array ]` conditions and the `LIMIT` samp
PostgreSQL Array types converts into ClickHouse arrays.
:::note
:::note
Be careful, in PostgreSQL an array data type column like Integer[] may contain arrays of different dimensions in different rows, but in ClickHouse it is only allowed to have multidimensional arrays of the same dimension in all rows.
:::
@ -130,7 +130,7 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32)
**See Also**
- [The PostgreSQL table engine](../../engines/table-engines/integrations/postgresql.md)
- [Using PostgreSQL as a dictionary source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql)
- [Using PostgreSQL as a dictionary source](../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-postgresql)
## Related content
- Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres)

View File

@ -97,7 +97,7 @@ CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user',
### DDL-запросы {#ddl-queries}
DDL-запросы в MySQL конвертируются в соответствующие DDL-запросы в ClickHouse ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop), [RENAME](../../sql-reference/statements/rename.md)). Если ClickHouse не может конвертировать какой-либо DDL-запрос, он его игнорирует.
DDL-запросы в MySQL конвертируются в соответствующие DDL-запросы в ClickHouse ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). Если ClickHouse не может конвертировать какой-либо DDL-запрос, он его игнорирует.
### Репликация данных {#data-replication}

View File

@ -89,7 +89,7 @@ ORDER BY expr
- `min_merge_bytes_to_use_direct_io` — минимальный объём данных при слиянии, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объём хранения всех данных, подлежащих слиянию. Если общий объём хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байтов.
- `merge_with_ttl_timeout` — минимальное время в секундах перед повторным слиянием для удаления данных с истекшим TTL. По умолчанию: `14400` секунд (4 часа).
- `merge_with_recompression_ttl_timeout` — минимальное время в секундах перед повторным слиянием для повторного сжатия данных с истекшим TTL. По умолчанию: `14400` секунд (4 часа).
- `try_fetch_recompressed_part_timeout` — время ожидания (в секундах) перед началом слияния с повторным сжатием. В течение этого времени ClickHouse пытается извлечь сжатую часть из реплики, которая назначила это слияние. Значение по умолчанию: `7200` секунд (2 часа).
- `try_fetch_recompressed_part_timeout` — время ожидания (в секундах) перед началом слияния с повторным сжатием. В течение этого времени ClickHouse пытается извлечь сжатую часть из реплики, которая назначила это слияние. Значение по умолчанию: `7200` секунд (2 часа).
- `write_final_mark` — включает или отключает запись последней засечки индекса в конце куска данных, указывающей за последний байт. По умолчанию — 1. Не отключайте её.
- `merge_max_block_size` — максимальное количество строк в блоке для операций слияния. Значение по умолчанию: 8192.
- `storage_policy` — политика хранения данных. Смотрите [Хранение данных таблицы на нескольких блочных устройствах](#table_engine-mergetree-multiple-volumes).
@ -337,7 +337,7 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
Поддерживаемые типы данных: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`.
Фильтром могут пользоваться функции: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions), [notIn](../../../sql-reference/functions/in-functions), [has](../../../sql-reference/functions/array-functions#hasarr-elem), [hasAny](../../../sql-reference/functions/array-functions#hasany), [hasAll](../../../sql-reference/functions/array-functions#hasall).
Фильтром могут пользоваться функции: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md#hasarr-elem), [hasAny](../../../sql-reference/functions/array-functions.md#hasany), [hasAll](../../../sql-reference/functions/array-functions.md#hasall).
**Примеры**
@ -361,14 +361,14 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT
| [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ |
| [endsWith](../../../sql-reference/functions/string-functions.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ |
| [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ |
| [in](../../../sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notIn](../../../sql-reference/functions/in-functions#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [in](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notIn](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [less (\<)](../../../sql-reference/functions/comparison-functions.md#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [greater (\>)](../../../sql-reference/functions/comparison-functions.md#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [empty](../../../sql-reference/functions/array-functions#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [notEmpty](../../../sql-reference/functions/array-functions#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [empty](../../../sql-reference/functions/array-functions.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ |
| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ |
Функции с постоянным агрументом, который меньше, чем размер ngram не могут использовать индекс `ngrambf_v1` для оптимизации запроса.
@ -396,7 +396,7 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT
Проекции не поддерживаются для запросов `SELECT` с модификатором [FINAL](../../../sql-reference/statements/select/from.md#select-from-final).
### Запрос проекции {#projection-query}
Запрос проекции — это то, что определяет проекцию. Такой запрос неявно выбирает данные из родительской таблицы.
Запрос проекции — это то, что определяет проекцию. Такой запрос неявно выбирает данные из родительской таблицы.
**Синтаксис**
```sql
@ -406,9 +406,9 @@ SELECT <column list expr> [GROUP BY] <group keys expr> [ORDER BY] <expr>
Проекции можно изменить или удалить с помощью запроса [ALTER](../../../sql-reference/statements/alter/projection.md).
### Хранение проекции {#projection-storage}
Проекции хранятся в каталоге куска данных. Это похоже на хранение индексов, но используется подкаталог, в котором хранится анонимный кусок таблицы `MergeTree`. Таблица создается запросом определения проекции.
Если присутствует секция `GROUP BY`, то используется движок [AggregatingMergeTree](aggregatingmergetree.md), а все агрегатные функции преобразуются в `AggregateFunction`.
Если присутствует секция `ORDER BY`, таблица `MergeTree` использует ее в качестве выражения для первичного ключа.
Проекции хранятся в каталоге куска данных. Это похоже на хранение индексов, но используется подкаталог, в котором хранится анонимный кусок таблицы `MergeTree`. Таблица создается запросом определения проекции.
Если присутствует секция `GROUP BY`, то используется движок [AggregatingMergeTree](aggregatingmergetree.md), а все агрегатные функции преобразуются в `AggregateFunction`.
Если присутствует секция `ORDER BY`, таблица `MergeTree` использует ее в качестве выражения для первичного ключа.
Во время процесса слияния кусок данных проекции объединяется с помощью процедуры слияния хранилища. Контрольная сумма куска данных родительской таблицы включает кусок данных проекции. Другие процедуры аналогичны индексам пропуска данных.
### Анализ запросов {#projection-query-analysis}
@ -499,7 +499,7 @@ TTL expr
За каждым `TTL` выражением может следовать тип действия, которое выполняется после достижения времени, соответствующего результату `TTL` выражения:
- `DELETE` - удалить данные (действие по умолчанию);
- `RECOMPRESS codec_name` - повторно сжать данные с помощью кодека `codec_name`;
- `RECOMPRESS codec_name` - повторно сжать данные с помощью кодека `codec_name`;
- `TO DISK 'aaa'` - переместить данные на диск `aaa`;
- `TO VOLUME 'bbb'` - переместить данные на том `bbb`;
- `GROUP BY` - агрегировать данные.
@ -679,7 +679,7 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y);
- `policy_name_N` — название политики. Названия политик должны быть уникальны.
- `volume_name_N` — название тома. Названия томов должны быть уникальны.
- `disk` — диск, находящийся внутри тома.
- `max_data_part_size_bytes` — максимальный размер куска данных, который может находиться на любом из дисков этого тома. Если в результате слияния размер куска ожидается больше, чем max_data_part_size_bytes, то этот кусок будет записан в следующий том. В основном эта функция позволяет хранить новые / мелкие куски на горячем (SSD) томе и перемещать их на холодный (HDD) том, когда они достигают большого размера. Не используйте этот параметр, если политика имеет только один том.
- `max_data_part_size_bytes` — максимальный размер куска данных, который может находиться на любом из дисков этого тома. Если в результате слияния размер куска ожидается больше, чем max_data_part_size_bytes, то этот кусок будет записан в следующий том. В основном эта функция позволяет хранить новые / мелкие куски на горячем (SSD) томе и перемещать их на холодный (HDD) том, когда они достигают большого размера. Не используйте этот параметр, если политика имеет только один том.
- `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1). Для перемещения куски сортируются по размеру от большего к меньшему (по убыванию) и выбираются куски, совокупный размер которых достаточен для соблюдения условия `move_factor`, если совокупный размер всех партов недостаточен, будут перемещены все парты.
- `prefer_not_to_merge` — Отключает слияние кусков данных, хранящихся на данном томе. Если данная настройка включена, то слияние данных, хранящихся на данном томе, не допускается. Это позволяет контролировать работу ClickHouse с медленными дисками.

View File

@ -66,4 +66,4 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10
Таблицы типа Buffer используются в тех случаях, когда от большого количества серверов поступает слишком много INSERT-ов в единицу времени, и нет возможности заранее самостоятельно буферизовать данные перед вставкой, в результате чего, INSERT-ы не успевают выполняться.
Заметим, что даже для таблиц типа Buffer не имеет смысла вставлять данные по одной строке, так как таким образом будет достигнута скорость всего лишь в несколько тысяч строк в секунду, тогда как при вставке более крупными блоками, достижимо более миллиона строк в секунду (смотрите раздел [«Производительность»](../../../introduction/performance/).
Заметим, что даже для таблиц типа Buffer не имеет смысла вставлять данные по одной строке, так как таким образом будет достигнута скорость всего лишь в несколько тысяч строк в секунду, тогда как при вставке более крупными блоками, достижимо более миллиона строк в секунду (смотрите раздел [«Производительность»](../../../introduction/performance.md).

Some files were not shown because too many files have changed in this diff Show More