Merge branch 'master' into mvcc_prototype

This commit is contained in:
Alexander Tokmakov 2022-01-10 19:53:50 +03:00
commit bf2b6c2c37
685 changed files with 21764 additions and 4523 deletions

4
.github/CODEOWNERS vendored
View File

@ -1,3 +1 @@
docs/* @ClickHouse/docs
docs/zh/* @ClickHouse/docs-zh
website/* @ClickHouse/docs

View File

@ -21,9 +21,10 @@ The following versions of ClickHouse server are currently being supported with s
| 21.6 | :x: |
| 21.7 | :x: |
| 21.8 | ✅ |
| 21.9 | |
| 21.9 | :x: |
| 21.10 | ✅ |
| 21.11 | ✅ |
| 21.12 | ✅ |
## Reporting a Vulnerability

View File

@ -827,7 +827,7 @@ public:
CompilerUInt128 a = (CompilerUInt128(numerator.items[1]) << 64) + numerator.items[0];
CompilerUInt128 b = (CompilerUInt128(denominator.items[1]) << 64) + denominator.items[0];
CompilerUInt128 c = a / b;
CompilerUInt128 c = a / b; // NOLINT
integer<Bits, Signed> res;
res.items[0] = c;
@ -1020,8 +1020,15 @@ constexpr integer<Bits, Signed>::integer(std::initializer_list<T> il) noexcept
{
auto it = il.begin();
for (size_t i = 0; i < _impl::item_count; ++i)
{
if (it < il.end())
{
items[i] = *it;
++it;
}
else
items[i] = 0;
}
}
}

View File

@ -1,30 +1,29 @@
option (ENABLE_AZURE_BLOB_STORAGE "Enable Azure blob storage" ${ENABLE_LIBRARIES})
option(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY
"Set to FALSE to use system Azure SDK instead of bundled (OFF currently not implemented)"
ON)
if (ENABLE_AZURE_BLOB_STORAGE)
option(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY
"Set to FALSE to use system Azure SDK instead of bundled (OFF currently not implemented)"
ON)
set(USE_AZURE_BLOB_STORAGE 1)
set(AZURE_BLOB_STORAGE_LIBRARY azure_sdk)
if ((NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/azure/sdk"
OR NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/azure/cmake-modules")
AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
message (WARNING "submodule contrib/azure is missing. to fix try run: \n git submodule update --init")
set(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY OFF)
set(USE_AZURE_BLOB_STORAGE 0)
endif ()
if (NOT USE_INTERNAL_SSL_LIBRARY AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
message (FATAL_ERROR "Currently Blob Storage support can be built only with internal SSL library")
endif()
if (NOT USE_INTERNAL_CURL AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
message (FATAL_ERROR "Currently Blob Storage support can be built only with internal curl library")
endif()
endif()
if ((NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/azure/sdk"
OR NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/azure/cmake-modules")
AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
message (WARNING "submodule contrib/azure is missing. to fix try run: \n git submodule update --init")
set(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY OFF)
set(USE_AZURE_BLOB_STORAGE 0)
endif ()
if (NOT USE_INTERNAL_SSL_LIBRARY AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
message (FATAL_ERROR "Currently Blob Storage support can be built only with internal SSL library")
endif()
if (NOT USE_INTERNAL_CURL AND USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY)
message (FATAL_ERROR "Currently Blob Storage support can be built only with internal curl library")
endif()
if (USE_AZURE_BLOB_STORAGE)
message (STATUS "Using Azure Blob Storage - ${USE_AZURE_BLOB_STORAGE}")
endif()
message (STATUS "Using Azure Blob Storage - ${USE_AZURE_BLOB_STORAGE}")

View File

@ -31,6 +31,7 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}")
set(LAUNCHER ${CCACHE_FOUND})
# debian (debhelpers) set SOURCE_DATE_EPOCH environment variable, that is
# filled from the debian/changelog or current time.
@ -39,13 +40,8 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
# of the manifest, which do not allow to use previous cache,
# - 4.2+ ccache ignores SOURCE_DATE_EPOCH for every file w/o __DATE__/__TIME__
#
# So for:
# - 4.2+ does not require any sloppiness
# - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable.
if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2")
message(STATUS "ccache is 4.2+ no quirks for SOURCE_DATE_EPOCH required")
set(LAUNCHER ${CCACHE_FOUND})
elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0")
# Exclude SOURCE_DATE_EPOCH env for ccache versions between [4.0, 4.2).
if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0" AND CCACHE_VERSION VERSION_LESS "4.2")
message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
set(LAUNCHER env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND})
endif()

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit ff100a8713146e1ca4b4158dd6cc4eef9af47fc3
Subproject commit c2043aa250e53ad5cf75e596e319d587af4dcb3c

View File

@ -1,46 +0,0 @@
FROM ubuntu:20.04
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13
RUN apt-get update \
&& apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
--yes --no-install-recommends --verbose-versions \
&& export LLVM_PUBKEY_HASH="bda960a8da687a275a2078d43c111d66b1c6a893a3275271beedf266c1ff4a0cdecb429c7a5cccf9f486ea7aa43fd27f" \
&& wget -nv -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \
&& echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \
&& apt-key add /tmp/llvm-snapshot.gpg.key \
&& export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& echo "deb [trusted=yes] http://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
/etc/apt/sources.list
RUN apt-get update \
&& apt-get install \
bash \
ccache \
cmake \
curl \
expect \
g++ \
gcc \
ninja-build \
perl \
pkg-config \
python3 \
python3-lxml \
python3-requests \
python3-termcolor \
tzdata \
llvm-${LLVM_VERSION} \
clang-${LLVM_VERSION} \
clang-tidy-${LLVM_VERSION} \
lld-${LLVM_VERSION} \
lldb-${LLVM_VERSION} \
--yes --no-install-recommends
COPY build.sh /
CMD ["/bin/bash", "/build.sh"]

View File

@ -1,12 +0,0 @@
build: image
mkdir -p $(HOME)/.ccache
docker run --network=host --rm --workdir /server --volume $(realpath ../..):/server --cap-add=SYS_PTRACE --mount=type=bind,source=$(HOME)/.ccache,destination=/ccache -e CCACHE_DIR=/ccache -it yandex/clickhouse-builder
pull:
docker pull yandex/clickhouse-builder
image:
docker build --network=host -t yandex/clickhouse-builder .
image_push:
docker push yandex/clickhouse-builder

View File

@ -1,33 +0,0 @@
Allows to build ClickHouse in Docker.
This is useful if you have an old OS distribution and you don't want to build fresh gcc or clang from sources.
Usage:
Prepare image:
```
make image
```
Run build:
```
make build
```
Before run, ensure that your user has access to docker:
To check, that you have access to Docker, run `docker ps`.
If not, you must add this user to `docker` group: `sudo usermod -aG docker $USER` and relogin.
(You must close all your sessions. For example, restart your computer.)
Build results are available in `build_docker` directory at top level of your working copy.
It builds only binaries, not packages.
For example, run server:
```
cd $(git rev-parse --show-toplevel)/src/Server
$(git rev-parse --show-toplevel)/docker/builder/programs/clickhouse server --config-file $(git rev-parse --show-toplevel)/programs/server/config.xml
```
Run client:
```
$(git rev-parse --show-toplevel)/docker/builder/programs/clickhouse client
```

View File

@ -1,15 +0,0 @@
#!/usr/bin/env bash
set -e
#ccache -s # uncomment to display CCache statistics
mkdir -p /server/build_docker
cd /server/build_docker
cmake -G Ninja /server "-DCMAKE_C_COMPILER=$(command -v clang-13)" "-DCMAKE_CXX_COMPILER=$(command -v clang++-13)"
# Set the number of build jobs to the half of number of virtual CPU cores (rounded up).
# By default, ninja use all virtual CPU cores, that leads to very high memory consumption without much improvement in build time.
# Note that modern x86_64 CPUs use two-way hyper-threading (as of 2018).
# Without this option my laptop with 16 GiB RAM failed to execute build due to full system freeze.
NUM_JOBS=$(( ($(nproc || grep -c ^processor /proc/cpuinfo) + 1) / 2 ))
ninja -j $NUM_JOBS && env TEST_OPT="--skip long compile $TEST_OPT" ctest -V -j $NUM_JOBS

View File

@ -103,6 +103,10 @@
"name": "clickhouse/mysql-golang-client",
"dependent": []
},
"docker/test/integration/dotnet_client": {
"name": "clickhouse/dotnet-client",
"dependent": []
},
"docker/test/integration/mysql_java_client": {
"name": "clickhouse/mysql-java-client",
"dependent": []

View File

@ -52,9 +52,21 @@ function clone
}
function wget_with_retry
{
for _ in 1 2 3 4; do
if wget -nv -nd -c "$1";then
return 0
else
sleep 0.5
fi
done
return 1
}
function download
{
wget -nv -nd -c "$BINARY_URL_TO_DOWNLOAD"
wget_with_retry "$BINARY_URL_TO_DOWNLOAD"
chmod +x clickhouse
ln -s ./clickhouse ./clickhouse-server

View File

@ -0,0 +1,2 @@
bin/
obj/

View File

@ -0,0 +1,10 @@
# docker build .
# docker run -it --rm --network=host 14f23e59669c dotnet run --host localhost --port 8123 --user default --database default
FROM mcr.microsoft.com/dotnet/sdk:3.1
WORKDIR /client
COPY *.cs *.csproj /client/
ARG VERSION=4.1.0
RUN dotnet add package ClickHouse.Client -v ${VERSION}

View File

@ -0,0 +1,90 @@
using System;
using System.Threading.Tasks;
using ClickHouse.Client.ADO;
using ClickHouse.Client.Utility;
namespace clickhouse.test
{
class Program
{
static async Task Main(string[] args)
{
try
{
using var connection = new ClickHouseConnection(GetConnectionString(args));
await connection.ExecuteStatementAsync("CREATE DATABASE IF NOT EXISTS test");
await connection.ExecuteStatementAsync("TRUNCATE TABLE IF EXISTS test.dotnet_test");
await connection.ExecuteStatementAsync("CREATE TABLE IF NOT EXISTS test.dotnet_test (`age` Int32, `name` String) Engine = Memory");
using var command = connection.CreateCommand();
command.AddParameter("name", "Linus Torvalds");
command.AddParameter("age", 51);
command.CommandText = "INSERT INTO test.dotnet_test VALUES({age:Int32}, {name:String})";
await command.ExecuteNonQueryAsync();
using var result1 = await connection.ExecuteReaderAsync("SELECT * FROM test.dotnet_test");
while (result1.Read())
{
var values = new object[result1.FieldCount];
result1.GetValues(values);
foreach (var row in values)
{
Console.WriteLine(row);
}
}
using var result2 = await connection.ExecuteReaderAsync(selectSql);
while (result2.Read())
{
var values = new object[result2.FieldCount];
result2.GetValues(values);
foreach (var row in values)
{
Console.WriteLine(row);
}
}
}
catch (Exception e)
{
Console.Error.WriteLine(e);
Environment.ExitCode = 1;
}
}
private static string GetConnectionString(string[] args)
{
var builder = new ClickHouseConnectionStringBuilder();
int i = 0;
while (i < args.Length)
{
switch (args[i])
{
case "--host":
builder.Host = args[++i];
break;
case "--port":
builder.Port = UInt16.Parse(args[++i]);
break;
case "--user":
builder.Username = args[++i];
break;
case "--password":
builder.Password = args[++i];
break;
case "--database":
builder.Database = args[++i];
break;
default:
i++;
break;
}
}
return builder.ToString();
}
private static string selectSql = @"SELECT NULL, toInt8(-8), toUInt8(8), toInt16(-16), toUInt16(16), toInt16(-32), toUInt16(32), toInt64(-64), toUInt64(64), toFloat32(32e6), toFloat32(-32e6), toFloat64(64e6), toFloat64(-64e6), 'TestString', toFixedString('ASD',3), toFixedString('ASD',5), toUUID('00000000-0000-0000-0000-000000000000'), toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0'), toIPv4('1.2.3.4'), toIPv4('255.255.255.255'), CAST('a', 'Enum(\'a\' = 1, \'b\' = 2)'), CAST('a', 'Enum8(\'a\' = -1, \'b\' = 127)'), CAST('a', 'Enum16(\'a\' = -32768, \'b\' = 32767)'), array(1, 2, 3), array('a', 'b', 'c'), array(1, 2, NULL), toInt32OrNull('123'), toInt32OrNull(NULL), CAST(NULL AS Nullable(DateTime)), CAST(NULL AS LowCardinality(Nullable(String))), toLowCardinality('lowcardinality'), tuple(1, 'a', 8), tuple(123, tuple(5, 'a', 7)), toDateOrNull('1999-11-12'), toDateTime('1988-08-28 11:22:33'), toDateTime64('2043-03-01 18:34:04.4444444', 9), toDecimal32(123.45, 3), toDecimal32(-123.45, 3), toDecimal64(1.2345, 7), toDecimal64(-1.2345, 7), toDecimal128(12.34, 9), toDecimal128(-12.34, 9), toIPv6('2001:0db8:85a3:0000:0000:8a2e:0370:7334')";
}
}

View File

@ -0,0 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>netcoreapp3.1</TargetFramework>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="clickhouse.client" Version="4.1.0" />
<PackageReference Include="dapper" Version="2.0.30" />
</ItemGroup>
</Project>

View File

@ -0,0 +1,6 @@
version: '2.3'
services:
dotnet1:
image: clickhouse/dotnet-client:${DOCKER_DOTNET_CLIENT_TAG:-latest}
# to keep container running
command: sleep infinity

View File

@ -39,6 +39,7 @@ export CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH=/clickhouse-odbc-bridge
export CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH=/clickhouse-library-bridge
export DOCKER_MYSQL_GOLANG_CLIENT_TAG=${DOCKER_MYSQL_GOLANG_CLIENT_TAG:=latest}
export DOCKER_DOTNET_CLIENT_TAG=${DOCKER_DOTNET_CLIENT_TAG:=latest}
export DOCKER_MYSQL_JAVA_CLIENT_TAG=${DOCKER_MYSQL_JAVA_CLIENT_TAG:=latest}
export DOCKER_MYSQL_JS_CLIENT_TAG=${DOCKER_MYSQL_JS_CLIENT_TAG:=latest}
export DOCKER_MYSQL_PHP_CLIENT_TAG=${DOCKER_MYSQL_PHP_CLIENT_TAG:=latest}

View File

@ -1,3 +1,3 @@
wget 'https://builds.clickhouse.com/master/freebsd/clickhouse'
fetch 'https://builds.clickhouse.com/master/freebsd/clickhouse'
chmod a+x ./clickhouse
sudo ./clickhouse install
su -m root -c './clickhouse install'

View File

@ -7,7 +7,7 @@ toc_title: MaterializedPostgreSQL
Creates a ClickHouse database with tables from PostgreSQL database. Firstly, database with engine `MaterializedPostgreSQL` creates a snapshot of PostgreSQL database and loads required tables. Required tables can include any subset of tables from any subset of schemas from specified database. Along with the snapshot database engine acquires LSN and once initial dump of tables is performed - it starts pulling updates from WAL. After database is created, newly added tables to PostgreSQL database are not automatically added to replication. They have to be added manually with `ATTACH TABLE db.table` query.
Replication is implemented with PostgreSQL Logical Replication Protocol, which does not allow to replicate DDL, but allows to know whether replication breaking changes happened (column type changes, adding/removing columns). Such changes are detected and according tables stop receiving updates. Such tables can be automatically reloaded in the background in case required setting is turned on. Safest way for now is to use `ATTACH`/ `DETACH` queries to reload table completely. If DDL does not break replication (for example, renaming a column) table will still receive updates (insertion is done by position).
Replication is implemented with PostgreSQL Logical Replication Protocol, which does not allow to replicate DDL, but allows to know whether replication breaking changes happened (column type changes, adding/removing columns). Such changes are detected and according tables stop receiving updates. Such tables can be automatically reloaded in the background in case required setting is turned on (can be used starting from 22.1). Safest way for now is to use `ATTACH`/ `DETACH` queries to reload table completely. If DDL does not break replication (for example, renaming a column) table will still receive updates (insertion is done by position).
## Creating a Database {#creating-a-database}
@ -46,7 +46,7 @@ After `MaterializedPostgreSQL` database is created, it does not automatically de
ATTACH TABLE postgres_database.new_table;
```
Warning: before version 21.13 adding table to replication left unremoved temprorary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in clickhouse version before 21.13, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. Issue is fixed in 21.13.
Warning: before version 22.1 adding table to replication left unremoved temprorary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in clickhouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. Issue is fixed in 22.1.
## Dynamically removing tables from replication {#dynamically-removing-table-from-replication}
@ -77,7 +77,7 @@ Tables are accessed via schema name and table name at the same time:
``` sql
CREATE DATABASE database1
ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password')
SETTINGS materialized_postgresql_tables_list = 'schema1.table1,schema2.table2,schema1.table3';
SETTINGS materialized_postgresql_tables_list = 'schema1.table1,schema2.table2,schema1.table3',
materialized_postgresql_tables_list_with_schema = 1;
SELECT * FROM database1.`schema1.table1`;
@ -156,6 +156,8 @@ Default value: empty list. (Default schema is used)
4. materialized_postgresql_allow_automatic_update {#materialized-postgresql-allow-automatic-update}
Do not use this setting before 22.1 version.
Allows reloading table in the background, when schema changes are detected. DDL queries on the PostgreSQL side are not replicated via ClickHouse [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) engine, because it is not allowed with PostgreSQL logical replication protocol, but the fact of DDL changes is detected transactionally. In this case, the default behaviour is to stop replicating those tables once DDL is detected. However, if this setting is enabled, then, instead of stopping the replication of those tables, they will be reloaded in the background via database snapshot without data losses and replication will continue for them.
Possible values:

View File

@ -189,7 +189,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us
|libhdfs3\_conf | "" |
### Limitations {#limitations}
* `hadoop_security_kerberos_ticket_cache_path` and `libhdfs3_conf` can be global only, not user specific
* `hadoop_security_kerberos_ticket_cache_path` and `libhdfs3_conf` can be global only, not user specific
## Kerberos support {#kerberos-support}

View File

@ -339,7 +339,7 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
For `Map` data type client can specify if index should be created for keys or values using [mapKeys](../../../sql-reference/functions/tuple-map-functions.md#mapkeys) or [mapValues](../../../sql-reference/functions/tuple-map-functions.md#mapvalues) function.
The following functions can use the filter: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md#hasarr-elem).
The following functions can use the filter: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md#hasarr-elem), [hasAny](../../../sql-reference/functions/array-functions.md#hasany), [hasAll](../../../sql-reference/functions/array-functions.md#hasall).
Example of index creation for `Map` data type

View File

@ -11,6 +11,7 @@ Questions:
- [Which ClickHouse version to use in production?](../../faq/operations/production.md)
- [Is it possible to delete old records from a ClickHouse table?](../../faq/operations/delete-old-data.md)
- [Does ClickHouse support multi-region replication?](../../faq/operations/multi-region-replication.md)
!!! info "Dont see what you were looking for?"
Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar.

View File

@ -0,0 +1,13 @@
---
title: Does ClickHouse support multi-region replication?
toc_hidden: true
toc_priority: 30
---
# Does ClickHouse support multi-region replication? {#does-clickhouse-support-multi-region-replication}
The short answer is "yes". However, we recommend keeping latency between all regions/datacenters in two-digit range, otherwise write performance will suffer as it goes through distributed consensus protocol. For example, replication between US coasts will likely work fine, but between the US and Europe won't.
Configuration-wise there's no difference compared to single-region replication, simply use hosts that are located in different locations for replicas.
For more information, see [full article on data replication](../../engines/table-engines/mergetree-family/replication.md).

View File

@ -9,6 +9,8 @@ The HTTP interface lets you use ClickHouse on any platform from any programming
By default, `clickhouse-server` listens for HTTP on port 8123 (this can be changed in the config).
Sometimes, `curl` command is not available on user operating systems. On Ubuntu or Debian, run `sudo apt install curl`. Please refer this [documentation](https://curl.se/download.html) to install it before running the examples.
If you make a `GET /` request without parameters, it returns 200 response code and the string which defined in [http_server_default_response](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-http_server_default_response) default value “Ok.” (with a line feed at the end)
``` bash
@ -186,7 +188,7 @@ $ echo "SELECT 1" | gzip -c | \
```
``` bash
# Receiving compressed data from the server
# Receiving compressed data archive from the server
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" \
-H 'Accept-Encoding: gzip' --output result.gz -d 'SELECT number FROM system.numbers LIMIT 3'
$ zcat result.gz
@ -195,6 +197,15 @@ $ zcat result.gz
2
```
```bash
# Receiving compressed data from the server and using the gunzip to receive decompressed data
$ curl -sS "http://localhost:8123/?enable_http_compression=1" \
-H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 3' | gunzip -
0
1
2
```
## Default Database {#default-database}
You can use the database URL parameter or the X-ClickHouse-Database header to specify the default database.
@ -424,10 +435,10 @@ Next are the configuration methods for different `type`.
`query` value is a predefined query of `predefined_query_handler`, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration.
The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` settings, then queries the system table to check whether these settings were set successfully.
The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` settings, then queries the system table to check whether these settings were set successfully.
!!! note "Warning"
To keep the default `handlers` such as` query`, `play`,` ping`, use the `<defaults/>` rule.
To keep the default `handlers` such as` query`, `play`,` ping`, use the `<defaults/>` rule.
Example:
@ -451,9 +462,9 @@ Example:
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_alter_threads?max_threads=1&max_alter_threads=2'
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2'
1
max_alter_threads 2
max_final_threads 2
```
!!! note "caution"
@ -465,7 +476,7 @@ In `dynamic_query_handler`, the query is written in the form of param of the HTT
ClickHouse extracts and executes the value corresponding to the `query_param_name` value in the URL of the HTTP request. The default value of `query_param_name` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the param is not passed in.
To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` and `queries` whether the settings were set successfully.
To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` and `queries` whether the settings were set successfully.
Example:
@ -484,9 +495,9 @@ Example:
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_alter_threads=2&param_name_1=max_threads&param_name_2=max_alter_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_final_threads=2&param_name_1=max_threads&param_name_2=max_final_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
max_threads 1
max_alter_threads 2
max_final_threads 2
```
### static {#static}

View File

@ -36,7 +36,7 @@ mysql>
```
For compatibility with all MySQL clients, it is recommended to specify user password with [double SHA1](../operations/settings/settings-users.md#password_double_sha1_hex) in configuration file.
If user password is specified using [SHA256](../operations/settings/settings-users.md#password_sha256_hex), some clients wont be able to authenticate (mysqljs and old versions of command-line tool mysql).
If user password is specified using [SHA256](../operations/settings/settings-users.md#password_sha256_hex), some clients wont be able to authenticate (mysqljs and old versions of command-line tool MySQL and MariaDB).
Restrictions:

View File

@ -60,8 +60,10 @@ toc_title: Adopters
| <a href="https://www.exness.com/" class="favicon">Exness</a> | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) |
| <a href="https://www.eventbunker.io/" class="favicon">EventBunker.io</a> | Serverless Data Processing | — | — | — | [Tweet, April 2021](https://twitter.com/Halil_D_/status/1379839133472985091) |
| <a href="https://fastnetmon.com/" class="favicon">FastNetMon</a> | DDoS Protection | Main Product | | — | [Official website](https://fastnetmon.com/docs-fnm-advanced/fastnetmon-advanced-traffic-persistency/) |
| <a href="https://www.firebolt.io/" class="favicon">Firebolt</a> | Analytics | Main product | - | - | [YouTube Tech Talk](https://www.youtube.com/watch?v=9rW9uEJ15tU) |
| <a href="https://www.flipkart.com/" class="favicon">Flipkart</a> | e-Commerce | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=239) |
| <a href="https://fun.co/rp" class="favicon">FunCorp</a> | Games | | — | 14 bn records/day as of Jan 2021 | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) |
| <a href="https://futurragroup.com/" class="favicon">Futurra Group</a> | Analytics | — | — | — | [Article in Russian, December 2021](https://dou.ua/forums/topic/35587/) |
| <a href="https://geniee.co.jp" class="favicon">Geniee</a> | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) |
| <a href="https://www.genotek.ru/" class="favicon">Genotek</a> | Bioinformatics | Main product | — | — | [Video, August 2020](https://youtu.be/v3KyZbz9lEE) |
| <a href="https://gigapipe.com/" class="favicon">Gigapipe</a> | Managed ClickHouse | Main product | — | — | [Official website](https://gigapipe.com/) |
@ -70,6 +72,7 @@ toc_title: Adopters
| <a href="https://www.grouparoo.com" class="favicon">Grouparoo</a> | Data Warehouse Integrations | Main product | — | — | [Official Website, November 2021](https://www.grouparoo.com/integrations) |
| <a href="https://www.huya.com/" class="favicon">HUYA</a> | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) |
| <a href="https://www.hydrolix.io/" class="favicon">Hydrolix</a> | Cloud data platform | Main product | — | — | [Documentation](https://docs.hydrolix.io/guide/query) |
| <a href="https://hystax.com" class="favicon">Hystax</a> | Cloud Operations | Observability Analytics | - | - | [Blog](https://hystax.com/clickhouse-for-real-time-cost-saving-analytics-how-to-stop-hammering-screws-and-use-an-electric-screwdriver/) |
| <a href="https://www.the-ica.com/" class="favicon">ICA</a> | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) |
| <a href="https://www.idealista.com" class="favicon">Idealista</a> | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.com/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) |
| <a href="https://infobaleen.com" class="favicon">Infobaleen</a> | AI markting tool | Analytics | — | — | [Official site](https://infobaleen.com) |
@ -81,14 +84,18 @@ toc_title: Adopters
| <a href="https://ippon.tech" class="favicon">Ippon Technologies</a> | Technology Consulting | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=205) |
| <a href="https://www.ivi.ru/" class="favicon">Ivi</a> | Online Cinema | Analytics, Monitoring | — | — | [Article in Russian, Jan 2018](https://habr.com/en/company/ivi/blog/347408/) |
| <a href="https://jinshuju.net" class="favicon">Jinshuju 金数据</a> | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) |
| <a href="https://jitsu.com" class="favicon">Jitsu</a> | Cloud Software | Data Pipeline | — | — | [Documentation](https://jitsu.com/docs/destinations-configuration/clickhouse-destination), [Hacker News](https://news.ycombinator.com/item?id=29106082) |
| <a href="https://jitsu.com" class="favicon">Jitsu</a> | Cloud Software | Data Pipeline | — | — | [Documentation](https://jitsu.com/docs/destinations-configuration/clickhouse-destination), [Hacker News post](https://news.ycombinator.com/item?id=29106082) |
| <a href="https://juicefs.com/" class="favicon">JuiceFS</a> | Storage | Shopping Cart | - | - | [Blog](https://juicefs.com/blog/en/posts/shopee-clickhouse-with-juicefs/) |
| <a href="https://www.kakaocorp.com/" class="favicon">kakaocorp</a> | Internet company | — | — | — | [if(kakao)2020](https://tv.kakao.com/channel/3693125/cliplink/414129353), [if(kakao)2021](https://if.kakao.com/session/24) |
| <a href="https://www.kodiakdata.com/" class="favicon">Kodiak Data</a> | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) |
| <a href="https://kontur.ru" class="favicon">Kontur</a> | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) |
| <a href="https://www.kuaishou.com/" class="favicon">Kuaishou</a> | Video | — | — | — | [ClickHouse Meetup, October 2018](https://clickhouse.com/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/) |
| <a href="https://www.kgk-global.com/en/" class="favicon">KGK Global</a> | Vehicle monitoring | — | — | — | [Press release, June 2021](https://zoom.cnews.ru/news/item/530921) |
| <a href="https://www.lancom-systems.com/" class="favicon">LANCOM Systems</a> | Network Solutions | Traffic analysis | - | - | [ClickHouse Operator for Kubernetes](https://www.lancom-systems.com/), [Hacker News post] (https://news.ycombinator.com/item?id=29413660) |
| <a href="https://www.lbl.gov" class="favicon">Lawrence Berkeley National Laboratory</a> | Research | Traffic analysis | 5 servers | 55 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) |
| <a href="https://www.lever.co/" class="favicon">Lever</a> | Talent Management | Recruiting | - | - | [Hacker News post](https://news.ycombinator.com/item?id=29558544) |
| <a href="https://lifestreet.com/" class="favicon">LifeStreet</a> | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) |
| <a href="https://lookforsale.ru/" class="favicon">Lookforsale</a> | E-Commerce | — | — | — | [Job Posting, December 2021](https://telegram.me/javascript_jobs/587318) |
| <a href="https://mcs.mail.ru/" class="favicon">Mail.ru Cloud Solutions</a> | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) |
| <a href="https://maxilect.com/" class="favicon">MAXILECT</a> | Ad Tech, Blockchain, ML, AI | — | — | — | [Job advertisement, 2021](https://www.linkedin.com/feed/update/urn:li:activity:6780842017229430784/) |
| <a href="https://tech.mymarilyn.ru" class="favicon">Marilyn</a> | Advertising | Statistics | — | — | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc) |
@ -106,6 +113,7 @@ toc_title: Adopters
| <a href="https://ok.ru" class="favicon">Ok.ru</a> | Social Network | — | 72 servers | 810 TB compressed, 50bn rows/day, 1.5 TB/day | [SmartData conference, October 2021](https://assets.ctfassets.net/oxjq45e8ilak/4JPHkbJenLgZhBGGyyonFP/57472ec6987003ec4078d0941740703b/____________________ClickHouse_______________________.pdf) |
| <a href="https://omnicomm.ru/" class="favicon">Omnicomm</a> | Transportation Monitoring | — | — | — | [Facebook post, October 2021](https://www.facebook.com/OmnicommTeam/posts/2824479777774500) |
| <a href="https://www.oneapm.com/" class="favicon">OneAPM</a> | Monitoring and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
| <a href="https://opensee.io/" class="favicon">Opensee</a> | Financial Analytics | Main product | - | - | [Blog](https://opensee.io/news/from-moscow-to-wall-street-the-remarkable-journey-of-clickhouse/) |
| <a href="https://www.opentargets.org/" class="favicon">Open Targets</a> | Genome Research | Genome Search | — | — | [Tweet, October 2021](https://twitter.com/OpenTargets/status/1452570865342758913?s=20), [Blog](https://blog.opentargets.org/graphql/) |
| <a href="https://corp.ozon.com/" class="favicon">OZON</a> | E-commerce | — | — | — | [Official website](https://job.ozon.ru/vacancy/razrabotchik-clickhouse-ekspluatatsiya-40991870/) |
| <a href="https://panelbear.com/" class="favicon">Panelbear | Analytics | Monitoring and Analytics | — | — | [Tech Stack, November 2020](https://panelbear.com/blog/tech-stack/) |
@ -118,6 +126,7 @@ toc_title: Adopters
| <a href="https://prana-system.com/en/" class="favicon">PRANA</a> | Industrial predictive analytics | Main product | — | — | [News (russian), Feb 2021](https://habr.com/en/news/t/541392/) |
| <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a> | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
| <a href="https://qrator.net" class="favicon">Qrator</a> | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) |
| <a href="https://rvision.pro/en/" class="favicon">R-Vision</a> | Information Security | — | — | — | [Article in Russian, December 2021](https://www.anti-malware.ru/reviews/R-Vision-SENSE-15) |
| <a href="https://www.rbinternational.com/" class="favicon">Raiffeisenbank</a> | Banking | Analytics | — | — | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html) |
| <a href="https://rambler.ru" class="favicon">Rambler</a> | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) |
| <a href="https://replicahq.com" class="favicon">Replica</a> | Urban Planning | Analytics | — | — | [Job advertisement](https://boards.greenhouse.io/replica/jobs/5547732002?gh_jid=5547732002) |
@ -153,6 +162,7 @@ toc_title: Adopters
| <a href="https://www.tinybird.co/" class="favicon">Tinybird</a> | Real-time Data Products | Data processing | — | — | [Official website](https://www.tinybird.co/) |
| <a href="https://trafficstars.com/" class="favicon">Traffic Stars</a> | AD network | — | 300 servers in Europe/US | 1.8 PiB, 700 000 insert rps (as of 2021) | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) |
| <a href="https://www.uber.com" class="favicon">Uber</a> | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/uber.pdf) |
| <a href="https://usetech.com/" class="favicon">UseTech</a> | Software Development | — | — | — | [Job Posting, December 2021](https://vk.com/wall136266658_2418) |
| <a href="https://hello.utmstat.com/" class="favicon">UTMSTAT</a> | Analytics | Main product | — | — | [Blog post, June 2020](https://vc.ru/tribuna/133956-striming-dannyh-iz-servisa-skvoznoy-analitiki-v-clickhouse) |
| <a href="https://vercel.com/" class="favicon">Vercel</a> | Traffic and Performance Analytics | — | — | — | Direct reference, October 2021 |
| <a href="https://vk.com" class="favicon">VKontakte</a> | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) |
@ -168,7 +178,8 @@ toc_title: Adopters
| <a href="https://cloud.yandex.ru/services/managed-clickhouse" class="favicon">Yandex Cloud</a> | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) |
| <a href="https://cloud.yandex.ru/services/datalens" class="favicon">Yandex DataLens</a> | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/datalens.pdf) |
| <a href="https://market.yandex.ru/" class="favicon">Yandex Market</a> | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) |
| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/introduction/#13) |
| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Macin product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/introduction/#13) |
| <a href="https://www.yellowfinbi.com" class="favicon"><COMPANYNAME></a> | Analytics | Main product | - | - | [Integration](https://www.yellowfinbi.com/campaign/yellowfin-9-whats-new#el-30219e0e) |
| <a href="https://www.yotascale.com/" class="favicon">Yotascale</a> | Cloud | Data pipeline | — | 2 bn records/day | [LinkedIn (Accomplishments)](https://www.linkedin.com/in/adilsaleem/) |
| <a href="https://www.your-analytics.org/" class="favicon">Your Analytics</a> | Product Analytics | Main Product | — | - | [Tweet, November 2021](https://twitter.com/mikenikles/status/1459737241165565953) |
| <a href="https://zagravagames.com/en/" class="favicon">Zagrava Trading</a> | — | — | — | — | [Job offer, May 2021](https://twitter.com/datastackjobs/status/1394707267082063874) |
@ -178,9 +189,5 @@ toc_title: Adopters
| <a href="https://promo.croc.ru/digitalworker" class="favicon">Цифровой Рабочий</a> | Industrial IoT, Analytics | — | — | — | [Blog post in Russian, March 2021](https://habr.com/en/company/croc/blog/548018/) |
| <a href="https://shop.okraina.ru/" class="favicon">ООО «МПЗ Богородский»</a> | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) |
| <a href="https://domclick.ru/" class="favicon">ДомКлик</a> | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) |
| <a href="https://futurragroup.com/" class="favicon">Futurra Group</a> | Analytics | — | — | — | [Article in Russian, December 2021](https://dou.ua/forums/topic/35587/) |
| <a href="https://usetech.com/" class="favicon">UseTech</a> | Software Development | — | — | — | [Job Posting, December 2021](https://vk.com/wall136266658_2418) |
| <a href="https://lookforsale.ru/" class="favicon">Lookforsale</a> | E-Commerce | — | — | — | [Job Posting, December 2021](https://telegram.me/javascript_jobs/587318) |
| <a href="https://rvision.pro/en/" class="favicon">R-Vision</a> | Information Security | — | — | — | [Article in Russian, December 2021](https://www.anti-malware.ru/reviews/R-Vision-SENSE-15) |
[Original article](https://clickhouse.com/docs/en/introduction/adopters/) <!--hide-->

View File

@ -3,14 +3,14 @@ toc_priority: 66
toc_title: ClickHouse Keeper
---
# [pre-production] ClickHouse Keeper
# [pre-production] ClickHouse Keeper {#clickHouse-keeper}
ClickHouse server uses [ZooKeeper](https://zookeeper.apache.org/) coordination system for data [replication](../engines/table-engines/mergetree-family/replication.md) and [distributed DDL](../sql-reference/distributed-ddl.md) queries execution. ClickHouse Keeper is an alternative coordination system compatible with ZooKeeper.
!!! warning "Warning"
This feature is currently in the pre-production stage. We test it in our CI and on small internal installations.
## Implementation details
## Implementation details {#implementation-details}
ZooKeeper is one of the first well-known open-source coordination systems. It's implemented in Java, has quite a simple and powerful data model. ZooKeeper's coordination algorithm called ZAB (ZooKeeper Atomic Broadcast) doesn't provide linearizability guarantees for reads, because each ZooKeeper node serves reads locally. Unlike ZooKeeper ClickHouse Keeper is written in C++ and uses [RAFT algorithm](https://raft.github.io/) [implementation](https://github.com/eBay/NuRaft). This algorithm allows to have linearizability for reads and writes, has several open-source implementations in different languages.
@ -21,7 +21,7 @@ ClickHouse Keeper supports Access Control List (ACL) the same way as [ZooKeeper]
!!! info "Note"
External integrations are not supported.
## Configuration
## Configuration {#configuration}
ClickHouse Keeper can be used as a standalone replacement for ZooKeeper or as an internal part of the ClickHouse server, but in both cases configuration is almost the same `.xml` file. The main ClickHouse Keeper configuration tag is `<keeper_server>`. Keeper configuration has the following parameters:
@ -102,7 +102,7 @@ Examples of configuration for quorum with three nodes can be found in [integrati
</keeper_server>
```
## How to run
## How to run {#how-to-run}
ClickHouse Keeper is bundled into the ClickHouse server package, just add configuration of `<keeper_server>` and start ClickHouse server as always. If you want to run standalone ClickHouse Keeper you can start it in a similar way with:
@ -110,13 +110,14 @@ ClickHouse Keeper is bundled into the ClickHouse server package, just add config
clickhouse-keeper --config /etc/your_path_to_config/config.xml --daemon
```
## Four Letter Word Commands
## Four Letter Word Commands {#four-letter-word-commands}
ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively.
The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value "conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro".
You can issue the commands to ClickHouse Keeper via telnet or nc, at the client port.
```
echo mntr | nc localhost 9181
```
@ -296,7 +297,7 @@ Sessions with Ephemerals (1):
/clickhouse/task_queue/ddl
```
## [experimental] Migration from ZooKeeper
## [experimental] Migration from ZooKeeper {#migration-from-zookeeper}
Seamlessly migration from ZooKeeper to ClickHouse Keeper is impossible you have to stop your ZooKeeper cluster, convert data and start ClickHouse Keeper. `clickhouse-keeper-converter` tool allows converting ZooKeeper logs and snapshots to ClickHouse Keeper snapshot. It works only with ZooKeeper > 3.4. Steps for migration:

View File

@ -672,7 +672,8 @@ On hosts with low RAM and swap, you possibly need setting `max_server_memory_usa
## max_concurrent_queries {#max-concurrent-queries}
The maximum number of simultaneously processed queries related to MergeTree table. Queries may be limited by other settings: [max_concurrent_queries_for_user](#max-concurrent-queries-for-user), [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries).
The maximum number of simultaneously processed queries related to MergeTree table.
Queries may be limited by other settings: [max_concurrent_insert_queries](#max-concurrent-insert-queries), [max_concurrent_select_queries](#max-concurrent-select-queries), [max_concurrent_queries_for_user](#max-concurrent-queries-for-user), [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries).
!!! info "Note"
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
@ -680,7 +681,9 @@ The maximum number of simultaneously processed queries related to MergeTree tabl
Possible values:
- Positive integer.
- 0 — Disabled.
- 0 — No limit.
Default value: `100`.
**Example**
@ -688,6 +691,46 @@ Possible values:
<max_concurrent_queries>100</max_concurrent_queries>
```
## max_concurrent_insert_queries {#max-concurrent-insert-queries}
The maximum number of simultaneously processed `INSERT` queries.
!!! info "Note"
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
Possible values:
- Positive integer.
- 0 — No limit.
Default value: `0`.
**Example**
``` xml
<max_concurrent_insert_queries>100</max_concurrent_insert_queries>
```
## max_concurrent_select_queries {#max-concurrent-select-queries}
The maximum number of simultaneously processed `SELECT` queries.
!!! info "Note"
These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged.
Possible values:
- Positive integer.
- 0 — No limit.
Default value: `0`.
**Example**
``` xml
<max_concurrent_select_queries>100</max_concurrent_select_queries>
```
## max_concurrent_queries_for_user {#max-concurrent-queries-for-user}
The maximum number of simultaneously processed queries related to MergeTree table per user.
@ -695,7 +738,9 @@ The maximum number of simultaneously processed queries related to MergeTree tabl
Possible values:
- Positive integer.
- 0 — Disabled.
- 0 — No limit.
Default value: `0`.
**Example**
@ -711,7 +756,12 @@ Example: `max_concurrent_queries_for_all_users` can be set to 99 for all users a
Modifying the setting for one query or user does not affect other queries.
Default value: `0` that means no limit.
Possible values:
- Positive integer.
- 0 — No limit.
Default value: `0`.
**Example**
@ -1238,6 +1288,20 @@ Example
<mysql_port>9004</mysql_port>
```
## postgresql_port {#server_configuration_parameters-postgresql_port}
Port for communicating with clients over PostgreSQL protocol.
**Possible values**
Positive integer.
Example
``` xml
<postgresql_port>9005</postgresql_port>
```
## tmp_path {#tmp-path}
Path to temporary data for processing large queries.

View File

@ -27,6 +27,10 @@ An example of changing the settings for a specific table with the `ALTER TABLE .
``` sql
ALTER TABLE foo
MODIFY SETTING max_suspicious_broken_parts = 100;
-- reset to default (use value from system.merge_tree_settings)
ALTER TABLE foo
RESET SETTING max_suspicious_broken_parts;
```
## parts_to_throw_insert {#parts-to-throw-insert}

View File

@ -3154,6 +3154,12 @@ Possible values:
Default value: `0`.
!!! warning "Warning"
Nullable primary key usually indicates bad design. It is forbidden in almost all main stream DBMS. The feature is mainly for [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) and is not heavily tested. Use with care.
!!! warning "Warning"
Do not enable this feature in version `<= 21.8`. It's not properly implemented and may lead to server crash.
## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty}
Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility.
@ -4149,3 +4155,20 @@ Default value: `''`.
Sets the character that is interpreted as a suffix after the result set for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
Default value: `''`.
## shutdown_wait_unfinished_queries
Enables or disables waiting unfinished queries when shutdown server.
Possible values:
- 0 — Disabled.
- 1 — Enabled. The wait time equal shutdown_wait_unfinished config.
Default value: 0.
## shutdown_wait_unfinished
The waiting time in seconds for currently handled connections when shutdown server.
Default Value: 5.

View File

@ -6,7 +6,7 @@ You can use this table to get information similar to the [DESCRIBE TABLE](../../
Columns from [temporary tables](../../sql-reference/statements/create/table.md#temporary-tables) are visible in the `system.columns` only in those session where they have been created. They are shown with the empty `database` field.
Columns:
The `system.columns` table contains the following columns (the column type is shown in brackets):
- `database` ([String](../../sql-reference/data-types/string.md)) — Database name.
- `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
@ -86,21 +86,4 @@ numeric_scale: ᴺᵁᴸᴸ
datetime_precision: ᴺᵁᴸᴸ
```
The `system.columns` table contains the following columns (the column type is shown in brackets):
- `database` (String) — Database name.
- `table` (String) — Table name.
- `name` (String) — Column name.
- `type` (String) — Column type.
- `default_kind` (String) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined.
- `default_expression` (String) — Expression for the default value, or an empty string if it is not defined.
- `data_compressed_bytes` (UInt64) — The size of compressed data, in bytes.
- `data_uncompressed_bytes` (UInt64) — The size of decompressed data, in bytes.
- `marks_bytes` (UInt64) — The size of marks, in bytes.
- `comment` (String) — Comment on the column, or an empty string if it is not defined.
- `is_in_partition_key` (UInt8) — Flag that indicates whether the column is in the partition expression.
- `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression.
- `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression.
- `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression.
[Original article](https://clickhouse.com/docs/en/operations/system-tables/columns) <!--hide-->

View File

@ -35,7 +35,7 @@ SELECT * FROM system.metrics LIMIT 10
- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred.
- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` and `system.events`.
- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.
[Original article](https://clickhouse.com/docs/en/operations/system-tables/metrics) <!--hide-->

View File

@ -174,7 +174,7 @@ Parameters:
</table_hits>
<!-- Next table to copy. It is not copied until previous table is copying. -->
</table_visits>
<table_visits>
...
</table_visits>
...

View File

@ -122,7 +122,12 @@ Setting fields:
- `command` — The absolute path to the executable file, or the file name (if the program directory is written to `PATH`).
- `format` — The file format. All the formats described in [Formats](../../../interfaces/formats.md#formats) are supported.
- `command_termination_timeout` — executable script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter.
- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false.
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder. Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `0`. Optional parameter.
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.
@ -150,10 +155,14 @@ Setting fields:
- `command` — The absolute path to the executable file, or the file name (if the program directory is written to `PATH`).
- `format` — The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported.
- `pool_size` — Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions.
- `command_termination_timeout`Executable pool script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter.
- `pool_size` — Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions. Default value is `16`.
- `command_termination_timeout`executable script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter.
- `max_command_execution_time` — Maximum executable script command execution time for processing block of data. Specified in seconds. Default value is 10. Optional parameter.
- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false. Optional parameter.
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder. Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter.
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.

View File

@ -1392,12 +1392,24 @@ Returns the first element in the `arr1` array for which `func` returns something
Note that the `arrayFirst` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it cant be omitted.
## arrayLast(func, arr1, …) {#array-last}
Returns the last element in the `arr1` array for which `func` returns something other than 0.
Note that the `arrayLast` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it cant be omitted.
## arrayFirstIndex(func, arr1, …) {#array-first-index}
Returns the index of the first element in the `arr1` array for which `func` returns something other than 0.
Note that the `arrayFirstIndex` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it cant be omitted.
## arrayLastIndex(func, arr1, …) {#array-last-index}
Returns the index of the last element in the `arr1` array for which `func` returns something other than 0.
Note that the `arrayLastIndex` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it cant be omitted.
## arrayMin {#array-min}
Returns the minimum of elements in the source array.

View File

@ -57,7 +57,7 @@ Alias: `toTimezone`.
**Arguments**
- `value` — Time or date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — Timezone for the returned value. [String](../../sql-reference/data-types/string.md).
- `timezone` — Timezone for the returned value. [String](../../sql-reference/data-types/string.md). This argument is a constant, because `toTimezone` changes the timezone of a column (timezone is an attribute of `DateTime*` types).
**Returned value**

View File

@ -217,8 +217,8 @@ Result:
``` text
(0,'2019-05-20') 0 \N \N (NULL,NULL)
(1,'2019-05-20') 1 First First ('First','First')
(2,'2019-05-20') 0 \N \N (NULL,NULL)
(3,'2019-05-20') 0 \N \N (NULL,NULL)
(2,'2019-05-20') 1 Second \N ('Second',NULL)
(3,'2019-05-20') 1 Third Third ('Third','Third')
(4,'2019-05-20') 0 \N \N (NULL,NULL)
```

View File

@ -73,26 +73,74 @@ User defined function configurations are searched relative to the path specified
A function configuration contains the following settings:
- `name` - a function name.
- `command` - a command or a script to execute.
- `command` - script name to execute or command if `execute_direct` is false.
- `argument` - argument description with the `type` of an argument. Each argument is described in a separate setting.
- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command.
- `return_type` - the type of a returned value.
- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created.
- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`.
- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`.
- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`.
- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `pool_size` - the size of a command pool. Optional. Default value is `16`.
- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded.
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder. Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter.
- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded. Default value is `0`. Optional parameter.
The command must read arguments from `STDIN` and must output the result to `STDOUT`. The command must process arguments iteratively. That is after processing a chunk of arguments it must wait for the next chunk.
**Example**
Creating `test_function` using XML configuration:
```
Creating `test_function` using XML configuration.
File test_function.xml.
```xml
<functions>
<function>
<type>executable</type>
<name>test_function</name>
<name>test_function_python</name>
<return_type>String</return_type>
<argument>
<type>UInt64</type>
</argument>
<format>TabSeparated</format>
<command>test_function.py</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function.py`.
```python
#!/usr/bin/python3
import sys
if __name__ == '__main__':
for line in sys.stdin:
print("Value " + line, end='')
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_python(toUInt64(2));
```
Result:
``` text
┌─test_function_python(2)─┐
│ Value 2 │
└─────────────────────────┘
```
Creating `test_function_sum` manually specifying `execute_direct` to `0` using XML configuration.
File test_function.xml.
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_sum</name>
<return_type>UInt64</return_type>
<argument>
<type>UInt64</type>
@ -102,7 +150,7 @@ Creating `test_function` using XML configuration:
</argument>
<format>TabSeparated</format>
<command>cd /; clickhouse-local --input-format TabSeparated --output-format TabSeparated --structure 'x UInt64, y UInt64' --query "SELECT x + y FROM table"</command>
<lifetime>0</lifetime>
<execute_direct>0</execute_direct>
</function>
</functions>
```
@ -110,15 +158,15 @@ Creating `test_function` using XML configuration:
Query:
``` sql
SELECT test_function(toUInt64(2), toUInt64(2));
SELECT test_function_sum(2, 2);
```
Result:
``` text
┌─test_function(toUInt64(2), toUInt64(2))─┐
4 │
└─────────────────────────────────────────
┌─test_function_sum(2, 2)─┐
│ 4 │
└─────────────────────────┘
```

View File

@ -351,8 +351,6 @@ Checks whether the string matches the `pattern` regular expression. A `re2` regu
Returns 0 if it does not match, or 1 if it matches.
Note that the backslash symbol (`\`) is used for escaping in the regular expression. The same symbol is used for escaping in string literals. So in order to escape the symbol in a regular expression, you must write two backslashes (\\) in a string literal.
The regular expression works with the string as if it is a set of bytes. The regular expression cant contain null bytes.
For patterns to search for substrings in a string, it is better to use LIKE or position, since they work much faster.

View File

@ -344,9 +344,9 @@ SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val);
Result:
``` text
┌──────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐
│ -1.11100 │ Nullable(Decimal(9, 5)) │
└──────────┴────────────────────────────────────────────────────┘
┌────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐
│ -1.111 │ Nullable(Decimal(9, 5)) │
└────────┴────────────────────────────────────────────────────┘
```
Query:
@ -451,9 +451,9 @@ SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val);
Result:
``` text
┌──────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐
│ -1.11100 │ Decimal(9, 5) │
└──────────┴────────────────────────────────────────────────────┘
┌────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐
│ -1.111 │ Decimal(9, 5) │
└────────┴────────────────────────────────────────────────────┘
```
Query:

View File

@ -360,6 +360,21 @@ SELECT decodeURLComponent('http://127.0.0.1:8123/?query=SELECT%201%3B') AS Decod
└────────────────────────────────────────┘
```
### decodeURLFormComponent(URL) {#decodeurlformcomponenturl}
Returns the decoded URL. Follows rfc-1866, plain plus(`+`) is decoded as space(` `).
Example:
``` sql
SELECT decodeURLFormComponent('http://127.0.0.1:8123/?query=SELECT%201+2%2B3') AS DecodedURL;
```
``` text
┌─DecodedURL────────────────────────────────┐
│ http://127.0.0.1:8123/?query=SELECT 1 2+3 │
└───────────────────────────────────────────┘
```
### netloc {#netloc}
Extracts network locality (`username:password@host:port`) from a URL.

View File

@ -9,11 +9,12 @@ The following operations with [projections](../../../engines/table-engines/merge
- `ALTER TABLE [db].name ADD PROJECTION name ( SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY] )` - Adds projection description to tables metadata.
- `ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk.
- `ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
- `ALTER TABLE [db.]table MATERIALIZE PROJECTION name IN PARTITION partition_name` - The query rebuilds the projection `name` in the partition `partition_name`. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
- `ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description.
- `ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
The commands `ADD`, `DROP` and `CLEAR` are lightweight in a sense that they only change metadata or remove files.

View File

@ -10,7 +10,7 @@ Creates a new [external dictionary](../../../sql-reference/dictionaries/external
**Syntax**
``` sql
CREATE DICTIONARY [OR REPLACE][IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
CREATE [OR REPLACE] DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
(
key1 type1 [DEFAULT|EXPRESSION expr1] [IS_OBJECT_ID],
key2 type2 [DEFAULT|EXPRESSION expr2],

View File

@ -10,7 +10,12 @@ Shows the execution plan of a statement.
Syntax:
```sql
EXPLAIN [AST | SYNTAX | PLAN | PIPELINE] [setting = value, ...] SELECT ... [FORMAT ...]
EXPLAIN [AST | SYNTAX | PLAN | PIPELINE | TABLE OVERRIDE] [setting = value, ...]
[
SELECT ... |
tableFunction(...) [COLUMNS (...)] [ORDER BY ...] [PARTITION BY ...] [PRIMARY KEY] [SAMPLE BY ...] [TTL ...]
]
[FORMAT ...]
```
Example:
@ -412,4 +417,37 @@ Result:
└──────────┴───────┴───────┴──────┴───────┘
```
### EXPLAIN TABLE OVERRIDE {#explain-table-override}
Shows the result of a table override on a table schema accessed through a table function.
Also does some validation, throwing an exception if the override would have caused some kind of failure.
**Example**
Assume you have a remote MySQL table like this:
```sql
CREATE TABLE db.tbl (
id INT PRIMARY KEY,
created DATETIME DEFAULT now()
)
```
```sql
EXPLAIN TABLE OVERRIDE mysql('127.0.0.1:3306', 'db', 'tbl', 'root', 'clickhouse')
PARTITION BY toYYYYMM(assumeNotNull(created))
```
Result:
```text
┌─explain─────────────────────────────────────────────────┐
│ PARTITION BY uses columns: `created` Nullable(DateTime) │
└─────────────────────────────────────────────────────────┘
```
!!! note "Note"
The validation is not complete, so a successfull query does not guarantee that the override would
not cause issues.
[Оriginal article](https://clickhouse.com/docs/en/sql-reference/statements/explain/) <!--hide-->

View File

@ -21,7 +21,7 @@ GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.ta
- `user` — ClickHouse user account.
The `WITH GRANT OPTION` clause grants `user` or `role` with permission to execute the `GRANT` query. Users can grant privileges of the same scope they have and less.
The `WITH REPLACE OPTION` clause replace old privileges by new privileges for the `user` or `role`, if not specified it is append privileges.
The `WITH REPLACE OPTION` clause replace old privileges by new privileges for the `user` or `role`, if is not specified it appends privileges.
## Assigning Role Syntax {#assign-role-syntax}
@ -33,7 +33,7 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US
- `user` — ClickHouse user account.
The `WITH ADMIN OPTION` clause grants [ADMIN OPTION](#admin-option-privilege) privilege to `user` or `role`.
The `WITH REPLACE OPTION` clause replace old roles by new role for the `user` or `role`, if not specified it is append roles.
The `WITH REPLACE OPTION` clause replace old roles by new role for the `user` or `role`, if is not specified it appends roles.
## Usage {#grant-usage}

View File

@ -0,0 +1 @@
../../../en/faq/operations/multi-region-replication.md

View File

@ -397,7 +397,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
`<query>` 値は以下の定義済みクエリです `<predefined_query_handler>` これは、Http要求が一致し、クエリの結果が返されたときにClickHouseによって実行されます。 これは必須構成です。
次の例では、次の値を定義します `max_threads``max_alter_threads` 設定、そしてクエリのテーブルから設定設定します。
次の例では、次の値を定義します `max_threads``max_final_threads` 設定、そしてクエリのテーブルから設定設定します。
例:
@ -420,9 +420,9 @@ $ curl -v 'http://localhost:8123/predefined_query'
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_alter_threads?max_threads=1&max_alter_threads=2'
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2'
1
max_alter_threads 2
max_final_threads 2
```
!!! note "注意"
@ -434,7 +434,7 @@ max_alter_threads 2
クリックハウスは、 `<query_param_name>` HTTP要求のurlの値。 のデフォルト値 `<query_param_name>``/query` . これはオプションの構成です。 設定ファイルに定義がない場合、paramは渡されません。
この機能を試すために、この例ではmax_threadsとmax_alter_threadsの値を定義し、設定が正常に設定されたかどうかを照会します。
この機能を試すために、この例ではmax_threadsとmax_final_threadsの値を定義し、設定が正常に設定されたかどうかを照会します。
例:
@ -452,9 +452,9 @@ max_alter_threads 2
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_alter_threads=2&param_name_1=max_threads&param_name_2=max_alter_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_final_threads=2&param_name_1=max_threads&param_name_2=max_final_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
max_threads 1
max_alter_threads 2
max_final_threads 2
```
## 静的 {#static}

View File

@ -163,7 +163,7 @@ $ clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/pat
</table_hits>
<!-- Next table to copy. It is not copied until previous table is copying. -->
</table_visits>
<table_visits>
...
</table_visits>
...

View File

@ -170,9 +170,9 @@ SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val)
```
``` text
┌──────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐
│ -1.11100 │ Nullable(Decimal(9, 5)) │
└──────────┴────────────────────────────────────────────────────┘
┌────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐
│ -1.111 │ Nullable(Decimal(9, 5)) │
└────────┴────────────────────────────────────────────────────┘
```
``` sql
@ -214,9 +214,9 @@ SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val)
```
``` text
┌──────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐
│ -1.11100 │ Decimal(9, 5) │
└──────────┴────────────────────────────────────────────────────┘
┌────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐
│ -1.111 │ Decimal(9, 5) │
└────────┴────────────────────────────────────────────────────┘
```
``` sql

View File

@ -40,10 +40,10 @@ ClickHouse не работает и не собирается на 32-битны
Выполните в терминале:
git clone git@github.com:ClickHouse/ClickHouse.git
git clone git@github.com:your_github_username/ClickHouse.git --recursive
cd ClickHouse
Замените первое вхождение слова `ClickHouse` в команде для git на имя вашего аккаунта на GitHub.
Замените слово `your_github_username` в команде для git на имя вашего аккаунта на GitHub.
Эта команда создаст директорию ClickHouse, содержащую рабочую копию проекта.

View File

@ -5,7 +5,7 @@ toc_title: HDFS
# HDFS {#table_engines-hdfs}
Управляет данными в HDFS. Данный движок похож на движки [File](../special/file.md#table_engines-file) и [URL](../special/url.md#table_engines-url).
Этот движок обеспечивает интеграцию с экосистемой [Apache Hadoop](https://ru.wikipedia.org/wiki/Hadoop), позволяя управлять данными в HDFS посредством ClickHouse. Данный движок похож на движки [File](../special/file.md#table_engines-file) и [URL](../special/url.md#table_engines-url), но предоставляет возможности, характерные для Hadoop.
## Использование движка {#usage}
@ -13,9 +13,11 @@ toc_title: HDFS
ENGINE = HDFS(URI, format)
```
В параметр `URI` нужно передавать полный URI файла в HDFS.
**Параметры движка**
В параметр `URI` нужно передавать полный URI файла в HDFS. Часть URI с путем файла может содержать шаблоны. В этом случае таблица может использоваться только для чтения.
Параметр `format` должен быть таким, который ClickHouse может использовать и в запросах `INSERT`, и в запросах `SELECT`. Полный список поддерживаемых форматов смотрите в разделе [Форматы](../../../interfaces/formats.md#formats).
Часть URI с путем файла может содержать шаблоны. В этом случае таблица может использоваться только для чтения.
**Пример:**
@ -67,12 +69,12 @@ SELECT * FROM hdfs_engine_table LIMIT 2
1. Предположим, у нас есть несколько файлов со следующими URI в HDFS:
- 'hdfs://hdfs1:9000/some_dir/some_file_1'
- 'hdfs://hdfs1:9000/some_dir/some_file_2'
- 'hdfs://hdfs1:9000/some_dir/some_file_3'
- 'hdfs://hdfs1:9000/another_dir/some_file_1'
- 'hdfs://hdfs1:9000/another_dir/some_file_2'
- 'hdfs://hdfs1:9000/another_dir/some_file_3'
- 'hdfs://hdfs1:9000/some_dir/some_file_1'
- 'hdfs://hdfs1:9000/some_dir/some_file_2'
- 'hdfs://hdfs1:9000/some_dir/some_file_3'
- 'hdfs://hdfs1:9000/another_dir/some_file_1'
- 'hdfs://hdfs1:9000/another_dir/some_file_2'
- 'hdfs://hdfs1:9000/another_dir/some_file_3'
1. Есть несколько возможностей создать таблицу, состояющую из этих шести файлов:
@ -128,6 +130,7 @@ CREATE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9
| **параметр** | **по умолчанию** |
| - | - |
| rpc\_client\_connect\_tcpnodelay | true |
| dfs\_client\_read\_shortcircuit | true |
| output\_replace-datanode-on-failure | true |
@ -177,22 +180,23 @@ CREATE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9
#### Расширенные параметры для ClickHouse {#clickhouse-extras}
| **параметр** | **по умолчанию** |
| - | - |
|hadoop\_kerberos\_keytab | "" |
|hadoop\_kerberos\_principal | "" |
|hadoop\_kerberos\_kinit\_command | kinit |
### Ограничения {#limitations}
* hadoop\_security\_kerberos\_ticket\_cache\_path могут быть определены только на глобальном уровне
* `hadoop_security_kerberos_ticket_cache_path` и `libhdfs3_conf` могут быть определены только на глобальном, а не на пользовательском уровне
## Поддержка Kerberos {#kerberos-support}
Если hadoop\_security\_authentication параметр имеет значение 'kerberos', ClickHouse аутентифицируется с помощью Kerberos.
[Расширенные параметры](#clickhouse-extras) и hadoop\_security\_kerberos\_ticket\_cache\_path помогают сделать это.
Если параметр `hadoop_security_authentication` имеет значение `kerberos`, ClickHouse аутентифицируется с помощью Kerberos.
[Расширенные параметры](#clickhouse-extras) и `hadoop_security_kerberos_ticket_cache_path` помогают сделать это.
Обратите внимание что из-за ограничений libhdfs3 поддерживается только устаревший метод аутентификации,
коммуникация с узлами данных не защищена SASL (HADOOP\_SECURE\_DN\_USER надежный показатель такого
подхода к безопасности). Используйте tests/integration/test\_storage\_kerberized\_hdfs/hdfs_configs/bootstrap.sh для примера настроек.
коммуникация с узлами данных не защищена SASL (`HADOOP_SECURE_DN_USER` надежный показатель такого
подхода к безопасности). Используйте `tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh` для примера настроек.
Если hadoop\_kerberos\_keytab, hadoop\_kerberos\_principal или hadoop\_kerberos\_kinit\_command указаны в настройках, kinit будет вызван. hadoop\_kerberos\_keytab и hadoop\_kerberos\_principal обязательны в этом случае. Необходимо также будет установить kinit и файлы конфигурации krb5.
Если `hadoop_kerberos_keytab`, `hadoop_kerberos_principal` или `hadoop_kerberos_kinit_command` указаны в настройках, `kinit` будет вызван. `hadoop_kerberos_keytab` и `hadoop_kerberos_principal` обязательны в этом случае. Необходимо также будет установить `kinit` и файлы конфигурации krb5.
## Виртуальные столбцы {#virtual-columns}

View File

@ -191,5 +191,5 @@ ClickHouse может поддерживать учетные данные Kerbe
**Смотрите также**
- [Виртуальные столбцы](index.md#table_engines-virtual_columns)
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size)
- [background_message_broker_schedule_pool_size](../../../operations/settings/settings.md#background_message_broker_schedule_pool_size)

View File

@ -0,0 +1 @@
../../../en/faq/operations/multi-region-replication.md

View File

@ -422,7 +422,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
Значение `query` — это предопределенный запрос `predefined_query_handler`, который выполняется ClickHouse при совпадении HTTP-запроса и возврате результата запроса. Это обязательная настройка.
В следующем примере определяются настройки [max_threads](../operations/settings/settings.md#settings-max_threads) и `max_alter_threads`, а затем запрашивается системная таблица, чтобы проверить, были ли эти параметры успешно установлены.
В следующем примере определяются настройки [max_threads](../operations/settings/settings.md#settings-max_threads) и `max_final_threads`, а затем запрашивается системная таблица, чтобы проверить, были ли эти параметры успешно установлены.
!!! note "Предупреждение"
Чтобы сохранить стандартные `handlers` такие как `query`, `play`, `ping`, используйте правило `<defaults/>`.
@ -449,9 +449,9 @@ $ curl -v 'http://localhost:8123/predefined_query'
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_alter_threads?max_threads=1&max_alter_threads=2'
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2'
1
max_alter_threads 2
max_final_threads 2
```
!!! note "Предупреждение"
@ -463,7 +463,7 @@ max_alter_threads 2
ClickHouse извлекает и выполняет значение, соответствующее значению `query_param_name` URL-адресе HTTP-запроса. Значение по умолчанию `query_param_name` — это `/query` . Это необязательная настройка. Если в файле конфигурации нет определения, параметр не передается.
Чтобы поэкспериментировать с этой функциональностью, в примере определяются значения [max_threads](../operations/settings/settings.md#settings-max_threads) и `max_alter_threads` и запрашивается, успешно ли были установлены настройки.
Чтобы поэкспериментировать с этой функциональностью, в примере определяются значения [max_threads](../operations/settings/settings.md#settings-max_threads) и `max_final_threads` и запрашивается, успешно ли были установлены настройки.
Пример:
@ -482,9 +482,9 @@ ClickHouse извлекает и выполняет значение, соотв
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_alter_threads=2&param_name_1=max_threads&param_name_2=max_alter_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_final_threads=2&param_name_1=max_threads&param_name_2=max_final_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
max_threads 1
max_alter_threads 2
max_final_threads 2
```
### static {#static}

View File

@ -3,14 +3,14 @@ toc_priority: 66
toc_title: ClickHouse Keeper
---
# [пре-продакшн] ClickHouse Keeper
# [пре-продакшн] ClickHouse Keeper {#clickHouse-keeper}
Сервер ClickHouse использует сервис координации [ZooKeeper](https://zookeeper.apache.org/) для [репликации](../engines/table-engines/mergetree-family/replication.md) данных и выполнения [распределенных DDL запросов](../sql-reference/distributed-ddl.md). ClickHouse Keeper — это альтернативный сервис координации, совместимый с ZooKeeper.
!!! warning "Предупреждение"
ClickHouse Keeper находится в стадии пре-продакшн и тестируется в CI ClickHouse и на нескольких внутренних инсталляциях.
## Детали реализации
## Детали реализации {#implementation-details}
ZooKeeper — один из первых широко известных сервисов координации с открытым исходным кодом. Он реализован на языке программирования Java, имеет достаточно простую и мощную модель данных. Алгоритм координации Zookeeper называется ZAB (ZooKeeper Atomic Broadcast). Он не гарантирует линеаризуемость операций чтения, поскольку каждый узел ZooKeeper обслуживает чтения локально. В отличие от ZooKeeper, ClickHouse Keeper реализован на C++ и использует алгоритм [RAFT](https://raft.github.io/), [реализация](https://github.com/eBay/NuRaft). Этот алгоритм позволяет достичь линеаризуемости чтения и записи, имеет несколько реализаций с открытым исходным кодом на разных языках.
@ -21,7 +21,7 @@ ZooKeeper — один из первых широко известных сер
!!! info "Примечание"
Внешние интеграции не поддерживаются.
## Конфигурация
## Конфигурация {#configuration}
ClickHouse Keeper может использоваться как равноценная замена ZooKeeper или как внутренняя часть сервера ClickHouse, но в обоих случаях конфигурация представлена файлом `.xml`. Главный тег конфигурации ClickHouse Keeper — это `<keeper_server>`. Параметры конфигурации:
@ -54,6 +54,7 @@ ClickHouse Keeper может использоваться как равноце
- `auto_forwarding` — разрешить пересылку запросов на запись от последователей лидеру (по умолчанию: true).
- `shutdown_timeout` — время ожидания завершения внутренних подключений и выключения, в миллисекундах (по умолчанию: 5000).
- `startup_timeout` — время отключения сервера, если он не подключается к другим участникам кворума, в миллисекундах (по умолчанию: 30000).
- `four_letter_word_white_list` — список разрешенных 4-х буквенных команд (по умолчанию: "conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro").
Конфигурация кворума находится в `<keeper_server>.<raft_configuration>` и содержит описание серверов.
@ -101,7 +102,7 @@ ClickHouse Keeper может использоваться как равноце
</keeper_server>
```
## Как запустить
## Как запустить {#how-to-run}
ClickHouse Keeper входит в пакет `clickhouse-server`, просто добавьте кофигурацию `<keeper_server>` и запустите сервер ClickHouse как обычно. Если вы хотите запустить ClickHouse Keeper автономно, сделайте это аналогичным способом:
@ -109,7 +110,195 @@ ClickHouse Keeper входит в пакет `clickhouse-server`, просто
clickhouse-keeper --config /etc/your_path_to_config/config.xml --daemon
```
## [экспериментально] Переход с ZooKeeper
## 4-х буквенные команды {#four-letter-word-commands}
ClickHouse Keeper также поддерживает 4-х буквенные команды, почти такие же, как у Zookeeper. Каждая команда состоит из 4-х символов, например, `mntr`, `stat` и т. д. Несколько интересных команд: `stat` предоставляет общую информацию о сервере и подключенных клиентах, а `srvr` и `cons` предоставляют расширенные сведения о сервере и подключениях соответственно.
У 4-х буквенных команд есть параметр для настройки разрешенного списка `four_letter_word_white_list`, который имеет значение по умолчанию "conf,cons,crst,envi,ruok,srst,srvr,stat, wchc,wchs,dirs,mntr,isro".
Вы можете отправлять команды в ClickHouse Keeper через telnet или nc на порт для клиента.
```
echo mntr | nc localhost 9181
```
Ниже приведен подробный список 4-х буквенных команд:
- `ruok`: Проверяет, что сервер запущен без ошибок. В этом случае сервер ответит `imok`. В противном случае он не ответит. Ответ `imok` не обязательно означает, что сервер присоединился к кворуму, а указывает, что процесс сервера активен и привязан к указанному клиентскому порту. Используйте команду `stat` для получения подробной информации о состоянии кворума и клиентском подключении.
```
imok
```
- `mntr`: Выводит список переменных, которые используются для мониторинга работоспособности кластера.
```
zk_version v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
zk_avg_latency 0
zk_max_latency 0
zk_min_latency 0
zk_packets_received 68
zk_packets_sent 68
zk_num_alive_connections 1
zk_outstanding_requests 0
zk_server_state leader
zk_znode_count 4
zk_watch_count 1
zk_ephemerals_count 0
zk_approximate_data_size 723
zk_open_file_descriptor_count 310
zk_max_file_descriptor_count 10240
zk_followers 0
zk_synced_followers 0
```
- `srvr`: Выводит информацию о сервере: его версию, роль участника кворума и т.п.
```
ClickHouse Keeper version: v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
Latency min/avg/max: 0/0/0
Received: 2
Sent : 2
Connections: 1
Outstanding: 0
Zxid: 34
Mode: leader
Node count: 4
```
- `stat`: Выводит краткие сведения о сервере и подключенных клиентах.
```
ClickHouse Keeper version: v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
Clients:
192.168.1.1:52852(recved=0,sent=0)
192.168.1.1:52042(recved=24,sent=48)
Latency min/avg/max: 0/0/0
Received: 4
Sent : 4
Connections: 1
Outstanding: 0
Zxid: 36
Mode: leader
Node count: 4
```
- `srst`: Сбрасывает статистику сервера. Команда влияет на результат вывода `srvr`, `mntr` и `stat`.
```
Server stats reset.
```
- `conf`: Выводит подробную информацию о серверной конфигурации.
```
server_id=1
tcp_port=2181
four_letter_word_white_list=*
log_storage_path=./coordination/logs
snapshot_storage_path=./coordination/snapshots
max_requests_batch_size=100
session_timeout_ms=30000
operation_timeout_ms=10000
dead_session_check_period_ms=500
heart_beat_interval_ms=500
election_timeout_lower_bound_ms=1000
election_timeout_upper_bound_ms=2000
reserved_log_items=1000000000000000
snapshot_distance=10000
auto_forwarding=true
shutdown_timeout=5000
startup_timeout=240000
raft_logs_level=information
snapshots_to_keep=3
rotate_log_storage_interval=100000
stale_log_gap=10000
fresh_log_gap=200
max_requests_batch_size=100
quorum_reads=false
force_sync=false
compress_logs=true
compress_snapshots_with_zstd_format=true
configuration_change_tries_count=20
```
- `cons`: Выводит полную информацию о подключениях/сессиях для всех клиентов, подключенных к этому серверу. Включает информацию о количестве принятых/отправленных пакетов, идентификаторе сессии, задержках операций, последней выполненной операции и т. д.
```
192.168.1.1:52163(recved=0,sent=0,sid=0xffffffffffffffff,lop=NA,est=1636454787393,to=30000,lzxid=0xffffffffffffffff,lresp=0,llat=0,minlat=0,avglat=0,maxlat=0)
192.168.1.1:52042(recved=9,sent=18,sid=0x0000000000000001,lop=List,est=1636454739887,to=30000,lcxid=0x0000000000000005,lzxid=0x0000000000000005,lresp=1636454739892,llat=0,minlat=0,avglat=0,maxlat=0)
```
- `crst`: Сбрасывает статистику подключений/сессий для всех подключений.
```
Connection stats reset.
```
- `envi`: Выводит подробную информацию о серверном окружении.
```
Environment:
clickhouse.keeper.version=v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7
host.name=ZBMAC-C02D4054M.local
os.name=Darwin
os.arch=x86_64
os.version=19.6.0
cpu.count=12
user.name=root
user.home=/Users/JackyWoo/
user.dir=/Users/JackyWoo/project/jd/clickhouse/cmake-build-debug/programs/
user.tmp=/var/folders/b4/smbq5mfj7578f2jzwn602tt40000gn/T/
```
- `dirs`: Показывает общий размер файлов снэпшотов и журналов в байтах.
```
snapshot_dir_size: 0
log_dir_size: 3875
```
- `isro`: Проверяет, что сервер работает в режиме только для чтения. Сервер ответит `ro`, если он находится в режиме только для чтения, или `rw`, если нет.
```
rw
```
- `wchs`: Показывает краткую информацию о количестве отслеживаемых путей (watches) на сервере.
```
1 connections watching 1 paths
Total watches:1
```
- `wchc`: Показывает подробную информацию об отслеживаемых путях (watches) на сервере в разбивке по сессиям. При этом выводится список сессий (подключений) с соответствующими отслеживаемыми путями. Обратите внимание, что в зависимости от количества отслеживаемых путей эта операция может быть дорогостоящей (т. е. повлиять на производительность сервера), используйте ее осторожно.
```
0x0000000000000001
/clickhouse/task_queue/ddl
```
- `wchp`: Показывает подробную информацию об отслеживаемых путях (watches) на сервере в разбивке по пути. При этом выводится список путей (узлов) с соответствующими сессиями. Обратите внимание, что в зависимости от количества отселживаемых путей (watches) эта операция может быть дорогостоящей (т. е. повлиять на производительность сервера), используйте ее осторожно.
```
/clickhouse/task_queue/ddl
0x0000000000000001
```
- `dump`: Выводит список незавершенных сеансов и эфемерных узлов. Команда работает только на лидере.
```
Sessions dump (2):
0x0000000000000001
0x0000000000000002
Sessions with Ephemerals (1):
0x0000000000000001
/clickhouse/task_queue/ddl
```
## [экспериментально] Переход с ZooKeeper {#migration-from-zookeeper}
Плавный переход с ZooKeeper на ClickHouse Keeper невозможен, необходимо остановить кластер ZooKeeper, преобразовать данные и запустить ClickHouse Keeper. Утилита `clickhouse-keeper-converter` конвертирует журналы и снэпшоты ZooKeeper в снэпшот ClickHouse Keeper. Работа утилиты проверена только для версий ZooKeeper выше 3.4. Для миграции необходимо выполнить следующие шаги:

View File

@ -673,7 +673,7 @@ ClickHouse поддерживает динамическое изменение
## max_concurrent_queries {#max-concurrent-queries}
Определяет максимальное количество одновременно обрабатываемых запросов, связанных с таблицей семейства `MergeTree`. Запросы также могут быть ограничены настройками: [max_concurrent_queries_for_user](#max-concurrent-queries-for-user), [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries).
Определяет максимальное количество одновременно обрабатываемых запросов, связанных с таблицей семейства `MergeTree`. Запросы также могут быть ограничены настройками: [max_concurrent_insert_queries](#max-concurrent-insert-queries), [max_concurrent_select_queries](#max-concurrent-select-queries), [max_concurrent_queries_for_user](#max-concurrent-queries-for-user), [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries).
!!! info "Примечание"
Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений.
@ -681,7 +681,9 @@ ClickHouse поддерживает динамическое изменение
Возможные значения:
- Положительное целое число.
- 0 — выключена.
- 0 — нет лимита.
Значение по умолчанию: `100`.
**Пример**
@ -689,6 +691,46 @@ ClickHouse поддерживает динамическое изменение
<max_concurrent_queries>100</max_concurrent_queries>
```
## max_concurrent_insert_queries {#max-concurrent-insert-queries}
Определяет максимальное количество одновременных `INSERT` запросов.
!!! info "Примечание"
Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений.
Возможные значения:
- Положительное целое число.
- 0 — нет лимита.
Значение по умолчанию: `0`.
**Example**
``` xml
<max_concurrent_insert_queries>100</max_concurrent_insert_queries>
```
## max_concurrent_select_queries {#max-concurrent-select-queries}
Определяет максимальное количество одновременных `SELECT` запросов.
!!! info "Примечание"
Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений.
Возможные значения:
- Положительное целое число.
- 0 — нет лимита.
Значение по умолчанию: `0`.
**Example**
``` xml
<max_concurrent_select_queries>100</max_concurrent_select_queries>
```
## max_concurrent_queries_for_user {#max-concurrent-queries-for-user}
Определяет максимальное количество одновременно обрабатываемых запросов, связанных с таблицей семейства `MergeTree`, для пользователя.
@ -696,7 +738,9 @@ ClickHouse поддерживает динамическое изменение
Возможные значения:
- Положительное целое число.
- 0 — выключена.
- 0 — нет лимита.
Значение по умолчанию: `0`.
**Пример**
@ -712,7 +756,12 @@ ClickHouse поддерживает динамическое изменение
Изменение настройки для одного запроса или пользователя не влияет на другие запросы.
Значение по умолчанию: `0` — отсутствие ограничений.
Возможные значения:
- Положительное целое число.
- 0 — нет лимита.
Значение по умолчанию: `0`.
**Пример**

View File

@ -1641,18 +1641,19 @@ SELECT * FROM table_with_enum_column_for_csv_insert;
`INSERT` завершается успешно только в том случае, когда ClickHouse смог без ошибки записать данные в `insert_quorum` реплик за время `insert_quorum_timeout`. Если по любой причине количество реплик с успешной записью не достигнет `insert_quorum`, то запись считается не состоявшейся и ClickHouse удалит вставленный блок из всех реплик, куда уже успел записать данные.
Все реплики в кворуме консистентны, т.е. содержат данные всех более ранних запросов `INSERT`. Последовательность `INSERT` линеаризуется.
Когда `insert_quorum_parallel` выключена, все реплики кворума консистентны, то есть содержат данные всех предыдущих запросов `INSERT` (последовательность `INSERT` линеаризуется). При чтении с диска данных, записанных с помощью `insert_quorum` и при выключенной `insert_quorum_parallel`, можно включить последовательную консистентность для запросов `SELECT` с помощью [select_sequential_consistency](#settings-select_sequential_consistency).
При чтении данных, записанных с `insert_quorum` можно использовать настройку [select_sequential_consistency](#settings-select_sequential_consistency).
ClickHouse генерирует исключение
ClickHouse генерирует исключение:
- Если количество доступных реплик на момент запроса меньше `insert_quorum`.
- При попытке записать данные в момент, когда предыдущий блок ещё не вставлен в `insert_quorum` реплик. Эта ситуация может возникнуть, если пользователь вызвал `INSERT` прежде, чем завершился предыдущий с `insert_quorum`.
- При выключенной `insert_quorum_parallel` и при попытке записать данные в момент, когда предыдущий блок еще не вставлен в `insert_quorum` реплик (несколько параллельных `INSERT`-запросов). Эта ситуация может возникнуть при попытке пользователя выполнить очередной запрос `INSERT` к той же таблице, прежде чем завершится предыдущий с `insert_quorum`.
См. также:
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
- [insert_quorum_parallel](#settings-insert_quorum_parallel)
- [select_sequential_consistency](#settings-select_sequential_consistency)
## insert_quorum_timeout {#settings-insert_quorum_timeout}
@ -1664,11 +1665,29 @@ ClickHouse генерирует исключение
См. также:
- [insert_quorum](#settings-insert_quorum)
- [insert_quorum_parallel](#settings-insert_quorum_parallel)
- [select_sequential_consistency](#settings-select_sequential_consistency)
## insert_quorum_parallel {#settings-insert_quorum_parallel}
Включает и выключает параллелизм для кворумных вставок (`INSERT`-запросы). Когда опция включена, возможно выполнять несколько кворумных `INSERT`-запросов одновременно, при этом запросы не дожидаются окончания друг друга . Когда опция выключена, одновременные записи с кворумом в одну и ту же таблицу будут отклонены (будет выполнена только одна из них).
Возможные значения:
- 0 — Выключена.
- 1 — Включена.
Значение по умолчанию: 1.
См. также:
- [insert_quorum](#settings-insert_quorum)
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
- [select_sequential_consistency](#settings-select_sequential_consistency)
## select_sequential_consistency {#settings-select_sequential_consistency}
Включает или выключает последовательную консистентность для запросов `SELECT`.
Включает или выключает последовательную консистентность для запросов `SELECT`. Необходимо, чтобы `insert_quorum_parallel` была выключена (по умолчанию включена), а опция `insert_quorum` включена.
Возможные значения:
@ -1681,10 +1700,13 @@ ClickHouse генерирует исключение
Когда последовательная консистентность включена, то ClickHouse позволит клиенту выполнить запрос `SELECT` только к тем репликам, которые содержат данные всех предыдущих запросов `INSERT`, выполненных с `insert_quorum`. Если клиент обратится к неполной реплике, то ClickHouse сгенерирует исключение. В запросе SELECT не будут участвовать данные, которые ещё не были записаны на кворум реплик.
Если `insert_quorum_parallel` включена (по умолчанию это так), тогда `select_sequential_consistency` не будет работать. Причина в том, что параллельные запросы `INSERT` можно записать в разные наборы реплик кворума, поэтому нет гарантии того, что в отдельно взятую реплику будут сделаны все записи.
См. также:
- [insert_quorum](#settings-insert_quorum)
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
- [insert_quorum_parallel](#settings-insert_quorum_parallel)
## insert_deduplicate {#settings-insert-deduplicate}

View File

@ -171,7 +171,7 @@ $ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --bas
</table_hits>
<!-- Next table to copy. It is not copied until previous table is copying. -->
</table_visits>
<table_visits>
...
</table_visits>
...

View File

@ -57,7 +57,7 @@ toTimezone(value, timezone)
**Аргументы**
- `value` — время или дата с временем. [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — часовой пояс для возвращаемого значения. [String](../../sql-reference/data-types/string.md).
- `timezone` — часовой пояс для возвращаемого значения. [String](../../sql-reference/data-types/string.md). Этот аргумент является константой, потому что `toTimezone` изменяет часовой пояс столбца (часовой пояс является атрибутом типов `DateTime*`).
**Возвращаемое значение**

View File

@ -3,10 +3,10 @@ toc_priority: 67
toc_title: NLP
---
# [экспериментально] Функции для работы с ествественным языком {#nlp-functions}
# [экспериментально] Функции для работы с естественным языком {#nlp-functions}
!!! warning "Предупреждение"
Сейчас использование функций для работы с ествественным языком является экспериментальной возможностью. Чтобы использовать данные функции, включите настройку `allow_experimental_nlp_functions = 1`.
Сейчас использование функций для работы с естественным языком является экспериментальной возможностью. Чтобы использовать данные функции, включите настройку `allow_experimental_nlp_functions = 1`.
## stem {#stem}
@ -84,7 +84,7 @@ SELECT lemmatize('en', 'wolves');
Находит синонимы к заданному слову. Представлены два типа расширений словарей: `plain` и `wordnet`.
Для работы расширения типа `plain` необходимо указать путь до простого текстового файла, где каждая строка соотвествует одному набору синонимов. Слова в данной строке должны быть разделены с помощью пробела или знака табуляции.
Для работы расширения типа `plain` необходимо указать путь до простого текстового файла, где каждая строка соответствует одному набору синонимов. Слова в данной строке должны быть разделены с помощью пробела или знака табуляции.
Для работы расширения типа `plain` необходимо указать путь до WordNet тезауруса. Тезаурус должен содержать WordNet sense index.

View File

@ -343,9 +343,9 @@ SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val);
Результат:
``` text
┌──────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐
│ -1.11100 │ Nullable(Decimal(9, 5)) │
└──────────┴────────────────────────────────────────────────────┘
┌────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐
│ -1.111 │ Nullable(Decimal(9, 5)) │
└────────┴────────────────────────────────────────────────────┘
```
Запрос:
@ -449,9 +449,9 @@ SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val);
Результат:
``` text
┌──────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐
│ -1.11100 │ Decimal(9, 5) │
└──────────┴────────────────────────────────────────────────────┘
┌────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐
│ -1.111 │ Decimal(9, 5) │
└────────┴────────────────────────────────────────────────────┘
```
Запрос:

View File

@ -21,7 +21,7 @@ GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.ta
- `user` — Пользователь ClickHouse.
`WITH GRANT OPTION` разрешает пользователю или роли выполнять запрос `GRANT`. Пользователь может выдавать только те привилегии, которые есть у него, той же или меньшей области действий.
`WITH REPLACE OPTION` заменяет все старые привилегии новыми привилегиями для `user` или `role`, Если не указано, добавьте новые привилегии для старых.
`WITH REPLACE OPTION` заменяет все старые привилегии новыми привилегиями для `user` или `role`, если не указано, добавляет новые привилегии.
## Синтаксис назначения ролей {#assign-role-syntax}
@ -34,7 +34,7 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US
- `user` — Пользователь ClickHouse.
`WITH ADMIN OPTION` присваивает привилегию [ADMIN OPTION](#admin-option-privilege) пользователю или роли.
`WITH REPLACE OPTION` заменяет все старые роли новыми ролями для пользователя `user` или `role`, Если не указано, добавьте новые роли в старые.
`WITH REPLACE OPTION` заменяет все старые роли новыми ролями для пользователя `user` или `role`, если не указано, добавляет новые новые роли.
## Использование {#grant-usage}

View File

@ -95,7 +95,7 @@ def build_for_lang(lang, args):
site_dir=site_dir,
strict=True,
theme=theme_cfg,
copyright='©20162021 ClickHouse, Inc.',
copyright='©20162022 ClickHouse, Inc.',
use_directory_urls=True,
repo_name='ClickHouse/ClickHouse',
repo_url='https://github.com/ClickHouse/ClickHouse/',

View File

@ -0,0 +1 @@
../../../en/faq/operations/multi-region-replication.md

View File

@ -407,7 +407,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
`query` 是一个预定义的`predefined_query_handler`查询它由ClickHouse在匹配HTTP请求并返回查询结果时执行。这是一个必须的配置。
以下是定义的[max_threads](../operations/settings/settings.md#settings-max_threads)和`max_alter_threads`设置, 然后查询系统表以检查这些设置是否设置成功。
以下是定义的[max_threads](../operations/settings/settings.md#settings-max_threads)和`max_final_threads`设置, 然后查询系统表以检查这些设置是否设置成功。
示例:
@ -430,9 +430,9 @@ $ curl -v 'http://localhost:8123/predefined_query'
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_alter_threads?max_threads=1&max_alter_threads=2'
$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2'
1
max_alter_threads 2
max_final_threads 2
```
!!! note "警告"
@ -444,7 +444,7 @@ max_alter_threads 2
ClickHouse提取并执行与HTTP请求URL中的`query_param_name`值对应的值。`query_param_name`的默认值是`/query`。这是一个可选的配置。如果配置文件中没有定义,则不会传入参数。
为了试验这个功能,示例定义了[max_threads](../operations/settings/settings.md#settings-max_threads)和`max_alter_threads``queries`设置是否成功的值。
为了试验这个功能,示例定义了[max_threads](../operations/settings/settings.md#settings-max_threads)和`max_final_threads``queries`设置是否成功的值。
示例:
@ -462,9 +462,9 @@ ClickHouse提取并执行与HTTP请求URL中的`query_param_name`值对应的值
```
``` bash
$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_alter_threads=2&param_name_1=max_threads&param_name_2=max_alter_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_final_threads=2&param_name_1=max_threads&param_name_2=max_final_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D'
max_threads 1
max_alter_threads 2
max_final_threads 2
```
### static {#static}

View File

@ -1,29 +1,89 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# system.columns {#system-columns}
# 系统。列 {#system-columns}
此系统表包含所有表中列的信息。
包含有关所有表中列的信息。
你可以使用这个表来获得类似于 [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table) 查询的信息,但是可以同时获得多个表的信息。
您可以使用此表获取类似于以下内容的信息 [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table) 查询,但对于多个表一次
[临时表](../../sql-reference/statements/create/table.md#temporary-tables)中的列只在创建它们的会话中的 `system.columns` 中才可见,并且它们的 `database` 字段显示为空
`system.columns` 表包含以下列(列类型显示在括号中):
`system.columns` 表包含以下列 (括号中显示的是列类型):
- `database` (String) — Database name.
- `table` (String) — Table name.
- `name` (String) — Column name.
- `type` (String) — Column type.
- `default_kind` (String) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`)为默认值,如果没有定义,则为空字符串。
- `default_expression` (String) — Expression for the default value, or an empty string if it is not defined.
- `data_compressed_bytes` (UInt64) — The size of compressed data, in bytes.
- `data_uncompressed_bytes` (UInt64) — The size of decompressed data, in bytes.
- `marks_bytes` (UInt64) — The size of marks, in bytes.
- `comment` (String) — Comment on the column, or an empty string if it is not defined.
- `is_in_partition_key` (UInt8) — Flag that indicates whether the column is in the partition expression.
- `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression.
- `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression.
- `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression.
- `database` ([String](../../sql-reference/data-types/string.md)) — 数据库名称。
- `table` ([String](../../sql-reference/data-types/string.md)) — 表名。
- `name` ([String](../../sql-reference/data-types/string.md)) — 列名。
- `type` ([String](../../sql-reference/data-types/string.md)) — 列类型。
- `position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 列在表中的顺序位置从1开始。
- `default_kind` ([String](../../sql-reference/data-types/string.md)) — 默认值的表达式类型(`DEFAULT`, `MATERIALIZED`, `ALIAS`) ,如果没有定义,则为空字符串。
- `default_expression` ([String](../../sql-reference/data-types/string.md)) — 默认值的表达式,如果未定义则为空字符串。
- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 压缩数据的大小,以字节为单位。
- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 解压后的数据的大小,以字节为单位。
- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 标记的大小,以字节为单位。
- `comment` ([String](../../sql-reference/data-types/string.md)) — 列注释,如果没有定义,则为空字符串。
- `is_in_partition_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 列是否在分区表达式中的标志。
- `is_in_sorting_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 列是否在排序键表达式中的标志。
- `is_in_primary_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 列是否在主键表达式中的标志。
- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 列是否在采样键表达式中的标志。
- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — 压缩编码的名称。
- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 二进制数据、字符数据或文本数据和图像的最大长度(以字节为单位)。在 ClickHouse 中只对 `FixedString` 数据类型有意义。否则,将返回 `NULL` 值。
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 近似数字型数据、精确数字型数据、整数型数据或货币数据的精度。在 ClickHouse 中,对于整数类型是比特率(bitness),对于 `Decimal` 类型是十进制精度。否则,将返回 `NULL` 值。
- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 数字系统的基数是近似数字型数据、精确数字型数据、整数型数据或货币数据的精度。在 ClickHouse 中对于整数类型是2对于 `Decimal` 类型是10。否则将返回 `NULL` 值。
- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 近似数字型数据、精确数字型数据、整数型数据或货币数据的比例。在 ClickHouse 中只对 `Decimal` 类型有意义。否则,将返回 `NULL` 值。
- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — `DateTime64` 数据类型的小数精度。对于其他数据类型,将返回 `NULL` 值。
**示例**
```sql
SELECT * FROM system.columns LIMIT 2 FORMAT Vertical;
```
```text
Row 1:
──────
database: INFORMATION_SCHEMA
table: COLUMNS
name: table_catalog
type: String
position: 1
default_kind:
default_expression:
data_compressed_bytes: 0
data_uncompressed_bytes: 0
marks_bytes: 0
comment:
is_in_partition_key: 0
is_in_sorting_key: 0
is_in_primary_key: 0
is_in_sampling_key: 0
compression_codec:
character_octet_length: ᴺᵁᴸᴸ
numeric_precision: ᴺᵁᴸᴸ
numeric_precision_radix: ᴺᵁᴸᴸ
numeric_scale: ᴺᵁᴸᴸ
datetime_precision: ᴺᵁᴸᴸ
Row 2:
──────
database: INFORMATION_SCHEMA
table: COLUMNS
name: table_schema
type: String
position: 2
default_kind:
default_expression:
data_compressed_bytes: 0
data_uncompressed_bytes: 0
marks_bytes: 0
comment:
is_in_partition_key: 0
is_in_sorting_key: 0
is_in_primary_key: 0
is_in_sampling_key: 0
compression_codec:
character_octet_length: ᴺᵁᴸᴸ
numeric_precision: ᴺᵁᴸᴸ
numeric_precision_radix: ᴺᵁᴸᴸ
numeric_scale: ᴺᵁᴸᴸ
datetime_precision: ᴺᵁᴸᴸ
```
[原文](https://clickhouse.com/docs/zh/operations/system-tables/columns) <!--hide-->

View File

@ -1,15 +1,10 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# system.contributors {#system-contributors}
# 系统。贡献者 {#system-contributors}
包含有关贡献者的信息。 该顺序在查询执行时是随机的。
此系统表包含有关贡献者的信息。排列顺序是在查询执行时随机生成的。
列:
- `name` (String) — Contributor (author) name from git log.
- `name` (String) — git 日志中的贡献者 (作者) 名字。
**示例**
@ -32,7 +27,7 @@ SELECT * FROM system.contributors LIMIT 10
└──────────────────┘
```
要在表中找出自己,请使用查询:
要在表中找到你自己,请这样查询:
``` sql
SELECT * FROM system.contributors WHERE name = 'Olga Khvostikova'
@ -43,3 +38,5 @@ SELECT * FROM system.contributors WHERE name = 'Olga Khvostikova'
│ Olga Khvostikova │
└──────────────────┘
```
[原文](https://clickhouse.com/docs/zh/operations/system-tables/contributors) <!--hide-->

View File

@ -1,14 +1,11 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# system.detached_parts {#system_tables-detached_parts}
# 系统。detached_parts {#system_tables-detached_parts}
包含关于 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 表的分离分区的信息。`reason` 列详细说明了该分区被分离的原因。
包含有关分离部分的信息 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 桌子 该 `reason` 列指定分离部件的原因
对于用户分离的分区,原因是空的。你可以通过 [ALTER TABLE ATTACH PARTITION\|PART](../../sql-reference/statements/alter/partition.md#alter_attach-partition) 命令添加这些分区
对于用户分离的部件,原因是空的。 这些部件可以附加 [ALTER TABLE ATTACH PARTITION\|PART](../../sql-reference/statements/alter.md#alter_attach-partition) 指挥部
关于其他列的描述,请参见 [system.parts](../../operations/system-tables/parts.md#system_tables-parts)。
有关其他列的说明,请参阅 [系统。零件](../../operations/system-tables/parts.md#system_tables-parts).
如果分区名称无效,一些列的值可能是`NULL`。你可以通过[ALTER TABLE DROP DETACHED PART](../../sql-reference/statements/alter/partition.md#alter_drop-detached)来删除这些分区。
如果部件名称无效,某些列的值可能为 `NULL`. 这些部分可以删除 [ALTER TABLE DROP DETACHED PART](../../sql-reference/statements/alter.md#alter_drop-detached).
[原文](https://clickhouse.com/docs/zh/operations/system-tables/detached_parts) <!--hide-->

View File

@ -1,19 +1,14 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# system.metrics {#system_tables-metrics}
# 系统。指标 {#system_tables-metrics}
包含可以立即计算或具有当前值的指标。 例如,同时处理的查询的数量或当前副本的延迟。 此表始终是最新的。
此系统表包含可以即时计算或具有当前值的指标。例如,同时处理的查询数量或当前的复制延迟。这个表始终是最新的。
列:
- `metric` ([字符串](../../sql-reference/data-types/string.md)) — Metric name.
- `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — Metric value.
- `description` ([字符串](../../sql-reference/data-types/string.md)) — Metric description.
- `metric` ([字符串](../../sql-reference/data-types/string.md)) — 指标名称.
- `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — 指标的值.
- `description` ([字符串](../../sql-reference/data-types/string.md)) — 指标的描述.
支持的指标列表,您可以在 [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) ClickHouse的源文件。
对于支持的指标列表,您可以查看 [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) ClickHouse 的源文件。
**示例**
@ -38,7 +33,7 @@ SELECT * FROM system.metrics LIMIT 10
**另请参阅**
- [系统。asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics.
- [系统。活动](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred.
- [系统。metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
- [测](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring.
- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — 包含周期性的计算指标。
- [system.events](../../operations/system-tables/events.md#system_tables-events) — 包含发生的一些事件。
- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — 包含`system.metrics`表和`system.events`表的历史指标值。
- [控](../../operations/monitoring.md) — ClickHouse 监控的基本概念。

View File

@ -1,12 +1,32 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# system.numbers {#system-numbers}
# 系统。数字 {#system-numbers}
这个表有一个名为 `number` 的 UInt64 列,包含了几乎所有从 0 开始的自然数。
此表包含一个名为UInt64的列 `number` 它包含几乎所有从零开始的自然数
你可以用这个表进行测试,或者如果你需要进行暴力搜索
您可以使用此表进行测试,或者如果您需要进行暴力搜索
从该表的读取是不并行的
从此表中读取的内容不是并行的。
**示例**
```sql
:) SELECT * FROM system.numbers LIMIT 10;
```
```text
┌─number─┐
│ 0 │
│ 1 │
│ 2 │
│ 3 │
│ 4 │
│ 5 │
│ 6 │
│ 7 │
│ 8 │
│ 9 │
└────────┘
10 rows in set. Elapsed: 0.001 sec.
```
[原文](https://clickhouse.com/docs/zh/operations/system-tables/numbers) <!--hide-->

View File

@ -1,85 +1,167 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# system.parts {#system_tables-parts}
# 系统。零件 {#system_tables-parts}
此系统表包含 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 表分区的相关信息。
包含有关的部分信息 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 桌子
每行描述一个数据部分。
每一行描述一个数据分区。
列:
- `partition` (String) The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter.md#query_language_queries_alter) 查询。
- `partition` ([String](../../sql-reference/data-types/string.md)) 分区名称。请参阅 [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter) 查询的说明,来了解什么是分区
格式:
- `YYYYMM` 用于按月自动分区。
- `any_string` 手动分区时。
- `any_string` 手动分区时,是其他格式的字符串
- `name` (`String`) Name of the data part.
- `name` ([String](../../sql-reference/data-types/string.md)) 数据分区的名称。
- `active` (`UInt8`) Flag that indicates whether the data part is active. If a data part is active, it's used in a table. Otherwise, it's deleted. Inactive data parts remain after merging.
- `part_type` ([String](../../sql-reference/data-types/string.md)) — 数据分区的存储格式。
- `marks` (`UInt64`) The number of marks. To get the approximate number of rows in a data part, multiply `marks` 通过索引粒度通常为8192此提示不适用于自适应粒度
可能的值:
- `rows` (`UInt64`) The number of rows.
- `Wide` — 每一列在文件系统中的一个单独文件中存储。
- `Compact` — 所有列在文件系统中的一个文件中存储。
- `bytes_on_disk` (`UInt64`) Total size of all the data part files in bytes.
数据存储格式由 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 表的 `min_bytes_for_wide_part``min_rows_for_wide_part` 控制。
- `data_compressed_bytes` (`UInt64`) Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included.
- `active` ([UInt8](../../sql-reference/data-types/int-uint.md)) 指示数据分区是否处于活动状态的标志。如果数据分区处于活动状态,则此数据正在被表使用。反之,则不活跃(deleted)。合并后仍会保留非活跃的数据分区。
- `data_uncompressed_bytes` (`UInt64`) Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included.
- `marks` ([UInt64](../../sql-reference/data-types/int-uint.md)) 标记数。要获得数据分区中的大致行数:使用`marks`(标记数)乘以索引粒度(通常为 8192)。不适用于自适应颗粒度。
- `marks_bytes` (`UInt64`) The size of the file with marks.
- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) 行数.
- `modification_time` (`DateTime`) The time the directory with the data part was modified. This usually corresponds to the time of data part creation.\|
- `bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) 数据总大小(以字节为单位)。
- `remove_time` (`DateTime`) The time when the data part became inactive.
- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) 数据分区中压缩数据的总大小。不包括所有辅助文件(例如,带有标记的文件)。
- `refcount` (`UInt32`) The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges.
- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) 数据分区中未压缩数据的总大小。不包括所有辅助文件(例如,带有标记的文件)。
- `min_date` (`Date`) The minimum value of the date key in the data part.
- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) 带有标记的文件的大小。
- `max_date` (`Date`) The maximum value of the date key in the data part.
- `secondary_indices_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) 数据分区中二级索引的压缩数据总大小。所有的辅助文件(例如,带有标记的文件)都不包括在内。
- `min_time` (`DateTime`) The minimum value of the date and time key in the data part.
- `secondary_indices_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) 数据分区中二级索引的未压缩数据的总大小。所有的辅助文件(例如,带有标记的文件)都不包括在内。
- `max_time`(`DateTime`) The maximum value of the date and time key in the data part.
- `secondary_indices_marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) 带标记的二级索引的文件大小。
- `partition_id` (`String`) ID of the partition.
- `modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) 包含数据分区的目录被修改的时间。这通常对应于数据部分创建的时间。
- `min_block_number` (`UInt64`) The minimum number of data parts that make up the current part after merging.
- `remove_time` ([DateTime](../../sql-reference/data-types/datetime.md)) 数据分区变为非活动状态的时间。
- `max_block_number` (`UInt64`) The maximum number of data parts that make up the current part after merging.
- `refcount` ([UInt32](../../sql-reference/data-types/int-uint.md)) 使用数据部分的位置数。大于 2 的值表示数据部分用于查询或是用于合并。
- `level` (`UInt32`) Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts.
- `min_date` ([Date](../../sql-reference/data-types/date.md)) 数据部分中日期键的最小值。
- `data_version` (`UInt64`) Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than `data_version`).
- `max_date` ([Date](../../sql-reference/data-types/date.md)) 数据部分中日期键的最大值。
- `primary_key_bytes_in_memory` (`UInt64`) The amount of memory (in bytes) used by primary key values.
- `min_time` ([DateTime](../../sql-reference/data-types/datetime.md)) 数据部分中日期和时间键的最小值。
- `primary_key_bytes_in_memory_allocated` (`UInt64`) The amount of memory (in bytes) reserved for primary key values.
- `max_time`([DateTime](../../sql-reference/data-types/datetime.md)) 数据部分中日期和时间键的最大值。
- `is_frozen` (`UInt8`) Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup doesn't exist. For more details, see [FREEZE PARTITION](../../sql-reference/statements/alter.md#alter_freeze-partition)
- `partition_id` ([String](../../sql-reference/data-types/string.md)) 分区的 ID。
- `database` (`String`) Name of the database.
- `min_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) 合并后构成当前部分的最小数据部分数量。
- `table` (`String`) Name of the table.
- `max_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) 合并后构成当前部分的最大数据部分数量。
- `engine` (`String`) Name of the table engine without parameters.
- `level` ([UInt32](../../sql-reference/data-types/int-uint.md)) 合并树的深度。值为 0 表示该分区是通过插入创建的,而不是通过合并创建的。
- `path` (`String`) Absolute path to the folder with data part files.
- `data_version` ([UInt64](../../sql-reference/data-types/int-uint.md)) 用于确定应将哪些订正(mutations)应用于数据部分(版本高于 `data_version` 的订正(mutations))的数字。
- `disk` (`String`) Name of a disk that stores the data part.
- `primary_key_bytes_in_memory` ([UInt64](../../sql-reference/data-types/int-uint.md)) 主键值使用的内存量(以字节为单位)。
- `hash_of_all_files` (`String`) [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) 的压缩文件
- `primary_key_bytes_in_memory_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md)) 为主键值保留的内存量(以字节为单位)
- `hash_of_uncompressed_files` (`String`) [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) 未压缩的文件(带标记的文件,索引文件等。).
- `is_frozen` ([UInt8](../../sql-reference/data-types/int-uint.md)) 显示分区数据备份存在的标志。1备份存在。0备份不存在。更多细节见 [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md#alter_freeze-partition)。
- `uncompressed_hash_of_compressed_files` (`String`) [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) 压缩文件中的数据,就好像它们是未压缩的
- `database` ([String](../../sql-reference/data-types/string.md)) 数据库的名称
- `bytes` (`UInt64`) Alias for `bytes_on_disk`.
- `table` ([String](../../sql-reference/data-types/string.md)) 表的名称。
- `marks_size` (`UInt64`) Alias for `marks_bytes`.
- `engine` ([String](../../sql-reference/data-types/string.md)) 不带参数的表引擎名称。
- `path` ([String](../../sql-reference/data-types/string.md)) 包含数据部分文件的文件夹的绝对路径。
- `disk` ([String](../../sql-reference/data-types/string.md)) 存储数据部分的磁盘的名称。
- `hash_of_all_files` ([String](../../sql-reference/data-types/string.md)) 压缩文件的 [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128)。
- `hash_of_uncompressed_files` ([String](../../sql-reference/data-types/string.md)) 未压缩文件(带有标记的文件、索引文件等)的 [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128)。
- `uncompressed_hash_of_compressed_files` ([String](../../sql-reference/data-types/string.md)) 压缩文件中的数据(没有压缩时)的 [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128)。
- `delete_ttl_info_min` ([DateTime](../../sql-reference/data-types/datetime.md)) — [TTL DELETE 规则](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) 的日期和时间键的最小值。
- `delete_ttl_info_max` ([DateTime](../../sql-reference/data-types/datetime.md)) — [TTL DELETE 规则](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) 的日期和时间键的最大值。
- `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 表达式的数组。 每个表达式定义一个 [TTL MOVE 规则](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
!!! note "警告"
保留 `move_ttl_info.expression` 数组主要是为了向后兼容,现在检查 `TTL MOVE` 规则最简单的方法是使用 `move_ttl_info.min``move_ttl_info.max` 字段。
- `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — 日期值和时间值的数组。数组中的每个元素都描述了一个 [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) 的最小键值。
- `move_ttl_info.max` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — 日期值和时间值的数组。数组中的每个元素都描述了一个 [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) 的最大键值。
- `bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) `bytes_on_disk`的别名。
- `marks_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) `marks_bytes`的别名。
**示例**
``` sql
SELECT * FROM system.parts LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
partition: tuple()
name: all_1_4_1_6
part_type: Wide
active: 1
marks: 2
rows: 6
bytes_on_disk: 310
data_compressed_bytes: 157
data_uncompressed_bytes: 91
secondary_indices_compressed_bytes: 58
secondary_indices_uncompressed_bytes: 6
secondary_indices_marks_bytes: 48
marks_bytes: 144
modification_time: 2020-06-18 13:01:49
remove_time: 1970-01-01 00:00:00
refcount: 1
min_date: 1970-01-01
max_date: 1970-01-01
min_time: 1970-01-01 00:00:00
max_time: 1970-01-01 00:00:00
partition_id: all
min_block_number: 1
max_block_number: 4
level: 1
data_version: 6
primary_key_bytes_in_memory: 8
primary_key_bytes_in_memory_allocated: 64
is_frozen: 0
database: default
table: months
engine: MergeTree
disk_name: default
path: /var/lib/clickhouse/data/default/months/all_1_4_1_6/
hash_of_all_files: 2d0657a16d9430824d35e327fcbd87bf
hash_of_uncompressed_files: 84950cc30ba867c77a408ae21332ba29
uncompressed_hash_of_compressed_files: 1ad78f1c6843bbfb99a2c931abe7df7d
delete_ttl_info_min: 1970-01-01 00:00:00
delete_ttl_info_max: 1970-01-01 00:00:00
move_ttl_info.expression: []
move_ttl_info.min: []
move_ttl_info.max: []
```
**另请参阅**
- [MergeTree(合并树)家族](../../engines/table-engines/mergetree-family/mergetree.md)
- [列和表的 TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl)
[原文](https://clickhouse.com/docs/zh/operations/system-tables/parts) <!--hide-->

View File

@ -1,27 +1,22 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# system.settings {#system-tables-system-settings}
# 系统。设置 {#system-tables-system-settings}
包含有关当前用户的会话设置的信息。
包含当前用户会话设置的相关信息。
列:
- `name` ([字符串](../../sql-reference/data-types/string.md)) — Setting name.
- `value` ([字符串](../../sql-reference/data-types/string.md)) — Setting value.
- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting is changed from its default value.
- `description` ([字符串](../../sql-reference/data-types/string.md)) — Short setting description.
- `min` ([可为空](../../sql-reference/data-types/nullable.md)([字符串](../../sql-reference/data-types/string.md))) — Minimum value of the setting, if any is set via [制约因素](../../operations/settings/constraints-on-settings.md#constraints-on-settings). 如果设置没有最小值,则包含 [NULL](../../sql-reference/syntax.md#null-literal).
- `max` ([可为空](../../sql-reference/data-types/nullable.md)([字符串](../../sql-reference/data-types/string.md))) — Maximum value of the setting, if any is set via [制约因素](../../operations/settings/constraints-on-settings.md#constraints-on-settings). 如果设置没有最大值,则包含 [NULL](../../sql-reference/syntax.md#null-literal).
- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting:
- `0`Current user can change the setting.
- `1`Current user can't change the setting.
- `name` ([字符串](../../sql-reference/data-types/string.md)) — 设置名称。
- `value` ([字符串](../../sql-reference/data-types/string.md)) — 设置的值。
- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 显示该设置是否从其默认值修改。
- `description` ([字符串](../../sql-reference/data-types/string.md)) — 该设置的简要描述。
- `min` ([可为空](../../sql-reference/data-types/nullable.md)([字符串](../../sql-reference/data-types/string.md))) — 该设置的最小值,如果有最小值,则是通过[约束](../../operations/settings/constraints-on-settings.md#constraints-on-settings)设置的。如果该设置没有最小值,则包含 [NULL](../../sql-reference/syntax.md#null-literal).
- `max` ([可为空](../../sql-reference/data-types/nullable.md)([字符串](../../sql-reference/data-types/string.md))) — 该设置的最大值, 如果有最大值,则是通过[约束](../../operations/settings/constraints-on-settings.md#constraints-on-settings)设置的。如果该设置没有最大值,则包含 [NULL](../../sql-reference/syntax.md#null-literal).
- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 当前用户是否可以修改该设置:
- `0`当前用户可以修改此设置.
- `1`当前用户不能修改此设置.
**示例**
下面的示例演示如何获取有关名称包含的设置的信息 `min_i`.
下面的例子显示了如何获得设置名称中包含`min_i`的设置信息。
``` sql
SELECT *
@ -37,10 +32,10 @@ WHERE name LIKE '%min_i%'
└─────────────────────────────────────────────┴───────────┴─────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────┴──────┴──────────┘
```
使用 `WHERE changed` 可以是有用的,例如,当你想检查:
比如,当你想要检查以下情况时,使用 `WHERE changed` 会很有用:
- 配置文件中的设置是否正确加载并正在使用。
- 在当前会话中更改的设置。
- 配置文件中的设置是否正确加载并正在使用。
- 在当前会话中更改的设置。
<!-- -->
@ -52,4 +47,6 @@ SELECT * FROM system.settings WHERE changed AND name='load_balancing'
- [设置](../../operations/settings/index.md#session-settings-intro)
- [查询权限](../../operations/settings/permissions-for-queries.md#settings_readonly)
- [对设置的限制](../../operations/settings/constraints-on-settings.md)
- [对设置的约束](../../operations/settings/constraints-on-settings.md)
[原文](https://clickhouse.com/docs/zh/operations/system-tables/settings) <!--hide-->

View File

@ -158,7 +158,7 @@ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-
</table_hits>
<!-- Next table to copy. It is not copied until previous table is copying. -->
</table_visits>
<table_visits>
...
</table_visits>
...

View File

@ -12,4 +12,4 @@ toc_priority: 107
计算Pearson相关系数: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `corrStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `corrStable` 函数。 它的工作速度较慢,但提供较低的计算错误。

View File

@ -12,4 +12,4 @@ covarPop(x, y)
计算 `Σ((x - x̅)(y - y̅)) / n` 的值。
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarPopStable` 函数。 它的工作速度较慢,但提供了较低的计算错误。
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarPopStable` 函数。 它的工作速度较慢,但提供了较低的计算错误。

View File

@ -14,4 +14,4 @@ covarSamp(x, y)
返回Float64。 当 `n <= 1`, 返回 +∞。
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。

View File

@ -46,7 +46,7 @@ quantileTiming(level)(expr)
类型: `Float32`
!!! note "注"
如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。
如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。
**示例**

View File

@ -48,7 +48,7 @@ quantileTimingWeighted(level)(expr, weight)
类型: `Float32`
!!! note "注"
如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。
如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf)被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 参见 [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。
**示例**

View File

@ -4,7 +4,7 @@ toc_priority: 30
# stddevPop {#stddevpop}
结果等于 [varPop] (../../../sql-reference/aggregate-functions/reference/varpop.md)的平方根。
结果等于 [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md)的平方根。
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。

View File

@ -7,4 +7,4 @@ toc_priority: 31
结果等于 [varSamp] (../../../sql-reference/aggregate-functions/reference/varsamp.md)的平方根。
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。

View File

@ -9,4 +9,4 @@ toc_priority: 32
换句话说,计算一组数据的离差。 返回 `Float64`
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varPopStable` 函数。 它的工作速度较慢,但提供较低的计算错误。

View File

@ -11,5 +11,5 @@ toc_priority: 33
返回 `Float64`。 当 `n <= 1`,返回 `+∞`
!!! note "注"
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。

View File

@ -167,9 +167,9 @@ SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val)
```
``` text
┌──────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐
│ -1.11100 │ Nullable(Decimal(9, 5)) │
└──────────┴────────────────────────────────────────────────────┘
┌────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐
│ -1.111 │ Nullable(Decimal(9, 5)) │
└────────┴────────────────────────────────────────────────────┘
```
``` sql
@ -210,9 +210,9 @@ SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val)
```
``` text
┌──────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐
│ -1.11100 │ Decimal(9, 5) │
└──────────┴────────────────────────────────────────────────────┘
┌────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐
│ -1.111 │ Decimal(9, 5) │
└────────┴────────────────────────────────────────────────────┘
```
``` sql

View File

@ -1 +0,0 @@
../../../../en/sql-reference/statements/create/function.md

View File

@ -0,0 +1,60 @@
---
toc_priority: 38
toc_title: FUNCTION
---
# CREATE FUNCTION {#create-function}
用一个lambda表达式创建用户自定义函数。该表达式必须由函数参数、常数、运算符或其他函数调用组成。
**语法**
```sql
CREATE FUNCTION name AS (parameter0, ...) -> expression
```
一个函数可以有任意数量的参数。
存在一些限制如下:
- 函数名在用户自定义函数和系统函数中必须是唯一的。
- 递归函数是不允许的。
- 函数所使用的所有变量必须在其参数列表中指定。
如果违反了任何限制,就会产生异常。
**示例**
查询:
```sql
CREATE FUNCTION linear_equation AS (x, k, b) -> k*x + b;
SELECT number, linear_equation(number, 2, 1) FROM numbers(3);
```
结果:
``` text
┌─number─┬─plus(multiply(2, number), 1)─┐
│ 0 │ 1 │
│ 1 │ 3 │
│ 2 │ 5 │
└────────┴──────────────────────────────┘
```
在下面的查询中,[conditional function](../../../sql-reference/functions/conditional-functions.md)在用户自定义函数中被调用:
```sql
CREATE FUNCTION parity_str AS (n) -> if(n % 2, 'odd', 'even');
SELECT number, parity_str(number) FROM numbers(3);
```
结果:
``` text
┌─number─┬─if(modulo(number, 2), 'odd', 'even')─┐
│ 0 │ even │
│ 1 │ odd │
│ 2 │ even │
└────────┴──────────────────────────────────────┘
```

View File

@ -342,6 +342,9 @@ private:
}
}
/// Now we don't block the Ctrl+C signal and second signal will terminate the program without waiting.
interrupt_listener.unblock();
pool.wait();
total_watch.stop();
@ -586,7 +589,6 @@ public:
#ifndef __clang__
#pragma GCC optimize("-fno-var-tracking-assignments")
#endif
#pragma GCC diagnostic ignored "-Wmissing-declarations"
int mainEntryClickHouseBenchmark(int argc, char ** argv)
{

View File

@ -25,7 +25,6 @@
#include <Common/formatReadable.h>
#include <Common/TerminalSize.h>
#include <Common/Config/configReadClient.h>
#include "Common/MemoryTracker.h"
#include <Core/QueryProcessingStage.h>
#include <Client/TestHint.h>
@ -56,11 +55,6 @@
#pragma GCC optimize("-fno-var-tracking-assignments")
#endif
namespace CurrentMetrics
{
extern const Metric MemoryTracking;
}
namespace fs = std::filesystem;
@ -410,16 +404,6 @@ try
std::cout << std::fixed << std::setprecision(3);
std::cerr << std::fixed << std::setprecision(3);
/// Limit on total memory usage
size_t max_client_memory_usage = config().getInt64("max_memory_usage_in_client", 0 /*default value*/);
if (max_client_memory_usage != 0)
{
total_memory_tracker.setHardLimit(max_client_memory_usage);
total_memory_tracker.setDescription("(total)");
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
}
registerFormats();
registerFunctions();
registerAggregateFunctions();
@ -1014,7 +998,6 @@ void Client::addOptions(OptionsDescription & options_description)
("opentelemetry-tracestate", po::value<std::string>(), "OpenTelemetry tracestate header as described by W3C Trace Context recommendation")
("no-warnings", "disable warnings when client connects to server")
("max_memory_usage_in_client", po::value<int>(), "sets memory limit in client")
;
/// Commandline options related to external tables.

View File

@ -153,10 +153,12 @@ static void createGroup(const String & group_name)
if (!group_name.empty())
{
#if defined(OS_DARWIN)
// TODO: implement.
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unable to create a group in macOS");
#elif defined(OS_FREEBSD)
std::string command = fmt::format("pw groupadd {}", group_name);
fmt::print(" {}\n", command);
executeScript(command);
#else
std::string command = fmt::format("groupadd -r {}", group_name);
fmt::print(" {}\n", command);
@ -170,10 +172,14 @@ static void createUser(const String & user_name, [[maybe_unused]] const String &
if (!user_name.empty())
{
#if defined(OS_DARWIN)
// TODO: implement.
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unable to create a user in macOS");
#elif defined(OS_FREEBSD)
std::string command = group_name.empty()
? fmt::format("pw useradd -s /bin/false -d /nonexistent -n {}", user_name)
: fmt::format("pw useradd -s /bin/false -d /nonexistent -g {} -n {}", group_name, user_name);
fmt::print(" {}\n", command);
executeScript(command);
#else
std::string command = group_name.empty()
? fmt::format("useradd -r --shell /bin/false --home-dir /nonexistent --user-group {}", user_name)
@ -185,6 +191,20 @@ static void createUser(const String & user_name, [[maybe_unused]] const String &
}
static std::string formatWithSudo(std::string command, bool needed = true)
{
if (!needed)
return command;
#if defined(OS_FREEBSD)
/// FreeBSD does not have 'sudo' installed.
return fmt::format("su -m root -c '{}'", command);
#else
return fmt::format("sudo {}", command);
#endif
}
int mainEntryClickHouseInstall(int argc, char ** argv)
{
try
@ -207,10 +227,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
if (options.count("help"))
{
std::cout << "Usage: "
<< (getuid() == 0 ? "" : "sudo ")
<< argv[0]
<< " install [options]\n";
std::cout << "Usage: " << formatWithSudo(std::string(argv[0]) + " install [options]", getuid() != 0) << '\n';
std::cout << desc << '\n';
return 1;
}
@ -233,6 +250,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
path.pop_back();
fs::path binary_self_path(path);
#elif defined(OS_FREEBSD)
/// https://stackoverflow.com/questions/1023306/finding-current-executables-path-without-proc-self-exe
fs::path binary_self_path = argc >= 1 ? argv[0] : "/proc/curproc/file";
#else
fs::path binary_self_path = "/proc/self/exe";
#endif
@ -314,7 +334,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
catch (const Exception & e)
{
if (e.code() == ErrorCodes::CANNOT_OPEN_FILE && geteuid() != 0)
std::cerr << "Install must be run as root: sudo ./clickhouse install\n";
std::cerr << "Install must be run as root: " << formatWithSudo("./clickhouse install") << '\n';
throw;
}
@ -824,9 +844,10 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
fmt::print(
"\nClickHouse has been successfully installed.\n"
"\nRestart clickhouse-server with:\n"
" sudo clickhouse restart\n"
" {}\n"
"\nStart clickhouse-client with:\n"
" clickhouse-client{}\n\n",
formatWithSudo("clickhouse restart"),
maybe_password);
}
else
@ -834,9 +855,10 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
fmt::print(
"\nClickHouse has been successfully installed.\n"
"\nStart clickhouse-server with:\n"
" sudo clickhouse start\n"
" {}\n"
"\nStart clickhouse-client with:\n"
" clickhouse-client{}\n\n",
formatWithSudo("clickhouse start"),
maybe_password);
}
}
@ -845,7 +867,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
std::cerr << getCurrentExceptionMessage(false) << '\n';
if (getuid() != 0)
std::cerr << "\nRun with sudo.\n";
std::cerr << "\nRun with " << formatWithSudo("...") << "\n";
return getCurrentExceptionCode();
}
@ -901,6 +923,9 @@ namespace
if (!user.empty())
{
#if defined(OS_FREEBSD)
command = fmt::format("su -m '{}' -c '{}'", user, command);
#else
bool may_need_sudo = geteuid() != 0;
if (may_need_sudo)
{
@ -910,7 +935,10 @@ namespace
command = fmt::format("sudo -u '{}' {}", user, command);
}
else
{
command = fmt::format("su -s /bin/sh '{}' -c '{}'", user, command);
}
#endif
}
fmt::print("Will run {}\n", command);
@ -1114,10 +1142,7 @@ int mainEntryClickHouseStart(int argc, char ** argv)
if (options.count("help"))
{
std::cout << "Usage: "
<< (getuid() == 0 ? "" : "sudo ")
<< argv[0]
<< " start\n";
std::cout << "Usage: " << formatWithSudo(std::string(argv[0]) + " start", getuid() != 0) << '\n';
return 1;
}
@ -1155,10 +1180,7 @@ int mainEntryClickHouseStop(int argc, char ** argv)
if (options.count("help"))
{
std::cout << "Usage: "
<< (getuid() == 0 ? "" : "sudo ")
<< argv[0]
<< " stop\n";
std::cout << "Usage: " << formatWithSudo(std::string(argv[0]) + " stop", getuid() != 0) << '\n';
return 1;
}
@ -1191,10 +1213,7 @@ int mainEntryClickHouseStatus(int argc, char ** argv)
if (options.count("help"))
{
std::cout << "Usage: "
<< (getuid() == 0 ? "" : "sudo ")
<< argv[0]
<< " status\n";
std::cout << "Usage: " << formatWithSudo(std::string(argv[0]) + " status", getuid() != 0) << '\n';
return 1;
}
@ -1233,10 +1252,7 @@ int mainEntryClickHouseRestart(int argc, char ** argv)
if (options.count("help"))
{
std::cout << "Usage: "
<< (getuid() == 0 ? "" : "sudo ")
<< argv[0]
<< " restart\n";
std::cout << "Usage: " << formatWithSudo(std::string(argv[0]) + " restart", getuid() != 0) << '\n';
return 1;
}

View File

@ -313,11 +313,11 @@ void LocalServer::cleanup()
std::string LocalServer::getInitialCreateTableQuery()
{
if (!config().has("table-structure"))
if (!config().has("table-structure") && !config().has("table-file"))
return {};
auto table_name = backQuoteIfNeed(config().getString("table-name", "table"));
auto table_structure = config().getString("table-structure");
auto table_structure = config().getString("table-structure", "auto");
auto data_format = backQuoteIfNeed(config().getString("table-data-format", "TSV"));
String table_file;
@ -332,7 +332,12 @@ std::string LocalServer::getInitialCreateTableQuery()
table_file = quoteString(config().getString("table-file"));
}
return fmt::format("CREATE TABLE {} ({}) ENGINE = File({}, {});",
if (table_structure == "auto")
table_structure = "";
else
table_structure = "(" + table_structure + ")";
return fmt::format("CREATE TABLE {} {} ENGINE = File({}, {});",
table_name, table_structure, data_format, table_file);
}
@ -422,7 +427,7 @@ try
#else
is_interactive = stdin_is_a_tty
&& (config().hasOption("interactive")
|| (!config().has("query") && !config().has("table-structure") && queries_files.empty()));
|| (!config().has("query") && !config().has("table-structure") && queries_files.empty() && !config().has("table-file")));
#endif
if (!is_interactive)
{

View File

@ -91,6 +91,25 @@ T execute(nanodbc::ConnectionHolderPtr connection_holder, std::function<T(nanodb
connection_holder->updateConnection();
return query_func(connection_holder->get());
}
/// psqlodbc driver error handling is incomplete and under some scenarious
/// it doesn't propagate correct errors to the caller.
/// As a quick workaround we run a quick "ping" query over the connection
/// on generic errors.
/// If "ping" fails, recycle the connection and try the query once more.
if (e.state().starts_with("HY00"))
{
try
{
just_execute(connection_holder->get(), "SELECT 1");
}
catch (...)
{
connection_holder->updateConnection();
return query_func(connection_holder->get());
}
}
throw;
}
}

View File

@ -924,6 +924,12 @@ if (ThreadFuzzer::instance().isEffective())
if (config->has("max_concurrent_queries"))
global_context->getProcessList().setMaxSize(config->getInt("max_concurrent_queries", 0));
if (config->has("max_concurrent_insert_queries"))
global_context->getProcessList().setMaxInsertQueriesAmount(config->getInt("max_concurrent_insert_queries", 0));
if (config->has("max_concurrent_select_queries"))
global_context->getProcessList().setMaxSelectQueriesAmount(config->getInt("max_concurrent_select_queries", 0));
if (config->has("keeper_server"))
global_context->updateKeeperConfiguration(*config);

View File

@ -160,6 +160,7 @@ enum class AccessType
M(SYSTEM_FLUSH_DISTRIBUTED, "FLUSH DISTRIBUTED", TABLE, SYSTEM_FLUSH) \
M(SYSTEM_FLUSH_LOGS, "FLUSH LOGS", GLOBAL, SYSTEM_FLUSH) \
M(SYSTEM_FLUSH, "", GROUP, SYSTEM) \
M(SYSTEM_THREAD_FUZZER, "SYSTEM START THREAD FUZZER, SYSTEM STOP THREAD FUZZER, START THREAD FUZZER, STOP THREAD FUZZER", GLOBAL, SYSTEM) \
M(SYSTEM, "", GROUP, ALL) /* allows to execute SYSTEM {SHUTDOWN|RELOAD CONFIG|...} */ \
\
M(dictGet, "dictHas, dictGetHierarchy, dictIsIn", DICTIONARY, ALL) /* allows to execute functions dictGet(), dictHas(), dictGetHierarchy(), dictIsIn() */\

View File

@ -0,0 +1,44 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/CrossTab.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <memory>
#include <cmath>
namespace DB
{
namespace
{
struct ContingencyData : CrossTabData
{
static const char * getName()
{
return "contingency";
}
Float64 getResult() const
{
if (count < 2)
return std::numeric_limits<Float64>::quiet_NaN();
Float64 phi = getPhiSquared();
return sqrt(phi / (phi + count));
}
};
}
void registerAggregateFunctionContingency(AggregateFunctionFactory & factory)
{
factory.registerFunction(ContingencyData::getName(),
[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertBinary(name, argument_types);
assertNoParameters(name, parameters);
return std::make_shared<AggregateFunctionCrossTab<ContingencyData>>(argument_types);
});
}
}

View File

@ -0,0 +1,44 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/CrossTab.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <memory>
#include <cmath>
namespace DB
{
namespace
{
struct CramersVData : CrossTabData
{
static const char * getName()
{
return "cramersV";
}
Float64 getResult() const
{
if (count < 2)
return std::numeric_limits<Float64>::quiet_NaN();
UInt64 q = std::min(count_a.size(), count_b.size());
return sqrt(getPhiSquared() / (q - 1));
}
};
}
void registerAggregateFunctionCramersV(AggregateFunctionFactory & factory)
{
factory.registerFunction(CramersVData::getName(),
[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertBinary(name, argument_types);
assertNoParameters(name, parameters);
return std::make_shared<AggregateFunctionCrossTab<CramersVData>>(argument_types);
});
}
}

View File

@ -0,0 +1,54 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/CrossTab.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <memory>
#include <cmath>
namespace DB
{
namespace
{
struct CramersVBiasCorrectedData : CrossTabData
{
static const char * getName()
{
return "cramersVBiasCorrected";
}
Float64 getResult() const
{
if (count < 2)
return std::numeric_limits<Float64>::quiet_NaN();
Float64 phi = getPhiSquared();
Float64 a_size_adjusted = count_a.size() - 1;
Float64 b_size_adjusted = count_b.size() - 1;
Float64 count_adjusted = count - 1;
Float64 res = std::max(0.0, phi - a_size_adjusted * b_size_adjusted / count_adjusted);
Float64 correction_a = count_a.size() - a_size_adjusted * a_size_adjusted / count_adjusted;
Float64 correction_b = count_b.size() - b_size_adjusted * b_size_adjusted / count_adjusted;
res /= std::min(correction_a, correction_b) - 1;
return sqrt(res);
}
};
}
void registerAggregateFunctionCramersVBiasCorrected(AggregateFunctionFactory & factory)
{
factory.registerFunction(CramersVBiasCorrectedData::getName(),
[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertBinary(name, argument_types);
assertNoParameters(name, parameters);
return std::make_shared<AggregateFunctionCrossTab<CramersVBiasCorrectedData>>(argument_types);
});
}
}

View File

@ -6,6 +6,7 @@
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <base/arithmeticOverflow.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypesNumber.h>
@ -15,6 +16,7 @@
#include <unordered_set>
namespace DB
{
@ -23,12 +25,11 @@ namespace ErrorCodes
extern const int TOO_LARGE_ARRAY_SIZE;
}
/**
* Calculate total length of intervals without intersections. Each interval is the pair of numbers [begin, end];
* Return UInt64 for integral types (UInt/Int*, Date/DateTime) and return Float64 for Float*.
*
* Implementation simply stores intervals sorted by beginning and sums lengths at final.
*/
/** Calculate total length of intervals without intersections. Each interval is the pair of numbers [begin, end];
* Returns UInt64 for integral types (UInt/Int*, Date/DateTime) and returns Float64 for Float*.
*
* Implementation simply stores intervals sorted by beginning and sums lengths at final.
*/
template <typename T>
struct AggregateFunctionIntervalLengthSumData
{
@ -43,10 +44,14 @@ struct AggregateFunctionIntervalLengthSumData
void add(T begin, T end)
{
/// Reversed intervals are counted by absolute value of their length.
if (unlikely(end < begin))
std::swap(begin, end);
else if (unlikely(begin == end))
return;
if (sorted && !segments.empty())
{
sorted = segments.back().first <= begin;
}
segments.emplace_back(begin, end);
}
@ -130,6 +135,11 @@ template <typename T, typename Data>
class AggregateFunctionIntervalLengthSum final : public IAggregateFunctionDataHelper<Data, AggregateFunctionIntervalLengthSum<T, Data>>
{
private:
static auto NO_SANITIZE_UNDEFINED length(typename Data::Segment segment)
{
return segment.second - segment.first;
}
template <typename TResult>
TResult getIntervalLengthSum(Data & data) const
{
@ -140,21 +150,24 @@ private:
TResult res = 0;
typename Data::Segment cur_segment = data.segments[0];
typename Data::Segment curr_segment = data.segments[0];
for (size_t i = 1, sz = data.segments.size(); i < sz; ++i)
for (size_t i = 1, size = data.segments.size(); i < size; ++i)
{
/// Check if current interval intersect with next one then add length, otherwise advance interval end
if (cur_segment.second < data.segments[i].first)
{
res += cur_segment.second - cur_segment.first;
cur_segment = data.segments[i];
}
else
cur_segment.second = std::max(cur_segment.second, data.segments[i].second);
}
const typename Data::Segment & next_segment = data.segments[i];
res += cur_segment.second - cur_segment.first;
/// Check if current interval intersects with next one then add length, otherwise advance interval end.
if (curr_segment.second < next_segment.first)
{
res += length(curr_segment);
curr_segment = next_segment;
}
else if (next_segment.second > curr_segment.second)
{
curr_segment.second = next_segment.second;
}
}
res += length(curr_segment);
return res;
}

View File

@ -0,0 +1,20 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionNothing.h>
#include <AggregateFunctions/FactoryHelpers.h>
namespace DB
{
struct Settings;
void registerAggregateFunctionNothing(AggregateFunctionFactory & factory)
{
factory.registerFunction("nothing", [](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);
return std::make_shared<AggregateFunctionNothing>(argument_types, parameters);
});
}
}

View File

@ -4,6 +4,8 @@
#include <DataTypes/DataTypeNothing.h>
#include <Columns/IColumn.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
namespace DB
@ -26,7 +28,7 @@ public:
DataTypePtr getReturnType() const override
{
return argument_types.front();
return argument_types.empty() ? std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>()) : argument_types.front();
}
bool allocatesMemoryInArena() const override { return false; }
@ -62,12 +64,16 @@ public:
{
}
void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional<size_t>) const override
void serialize(ConstAggregateDataPtr __restrict, WriteBuffer & buf, std::optional<size_t>) const override
{
writeChar('\0', buf);
}
void deserialize(AggregateDataPtr, ReadBuffer &, std::optional<size_t>, Arena *) const override
void deserialize(AggregateDataPtr, ReadBuffer & buf, std::optional<size_t>, Arena *) const override
{
[[maybe_unused]] char symbol;
readChar(symbol, buf);
assert(symbol == '\0');
}
void insertResultInto(AggregateDataPtr, IColumn & to, Arena *) const override

View File

@ -0,0 +1,61 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/CrossTab.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <memory>
#include <cmath>
namespace DB
{
namespace
{
struct TheilsUData : CrossTabData
{
static const char * getName()
{
return "theilsU";
}
Float64 getResult() const
{
if (count < 2)
return std::numeric_limits<Float64>::quiet_NaN();
Float64 h_a = 0.0;
for (const auto & [key, value] : count_a)
{
Float64 value_float = value;
h_a += (value_float / count) * log(value_float / count);
}
Float64 dep = 0.0;
for (const auto & [key, value] : count_ab)
{
Float64 value_ab = value;
Float64 value_b = count_b.at(key.items[1]);
dep += (value_ab / count) * log(value_ab / value_b);
}
dep -= h_a;
dep /= h_a;
return dep;
}
};
}
void registerAggregateFunctionTheilsU(AggregateFunctionFactory & factory)
{
factory.registerFunction(TheilsUData::getName(),
[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertBinary(name, argument_types);
assertNoParameters(name, parameters);
return std::make_shared<AggregateFunctionCrossTab<TheilsUData>>(argument_types);
});
}
}

Some files were not shown because too many files have changed in this diff Show More