mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Merge branch 'master' into non-experimental-qpl-deflate
This commit is contained in:
commit
bc7df2baca
@ -74,6 +74,7 @@ ConstructorInitializerIndentWidth: 4
|
||||
ContinuationIndentWidth: 4
|
||||
DerivePointerAlignment: false
|
||||
DisableFormat: false
|
||||
IndentRequiresClause: false
|
||||
IndentWidth: 4
|
||||
IndentWrappedFunctionNames: false
|
||||
MacroBlockBegin: ''
|
||||
|
1
.github/workflows/woboq.yml
vendored
1
.github/workflows/woboq.yml
vendored
@ -12,6 +12,7 @@ jobs:
|
||||
# don't use dockerhub push because this image updates so rarely
|
||||
WoboqCodebrowser:
|
||||
runs-on: [self-hosted, style-checker]
|
||||
timeout-minutes: 420 # the task is pretty heavy, so there's an additional hour
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
|
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -19,7 +19,7 @@
|
||||
url = https://github.com/google/googletest
|
||||
[submodule "contrib/capnproto"]
|
||||
path = contrib/capnproto
|
||||
url = https://github.com/capnproto/capnproto
|
||||
url = https://github.com/ClickHouse/capnproto
|
||||
[submodule "contrib/double-conversion"]
|
||||
path = contrib/double-conversion
|
||||
url = https://github.com/google/double-conversion
|
||||
|
@ -6,8 +6,10 @@ rules:
|
||||
level: warning
|
||||
indent-sequences: consistent
|
||||
line-length:
|
||||
# there are some bash -c "", so this is OK
|
||||
max: 300
|
||||
# there are:
|
||||
# - bash -c "", so this is OK
|
||||
# - yaml in tests
|
||||
max: 1000
|
||||
level: warning
|
||||
comments:
|
||||
min-spaces-from-content: 1
|
||||
|
@ -11,3 +11,8 @@ constexpr double interpolateExponential(double min, double max, double ratio)
|
||||
assert(min > 0 && ratio >= 0 && ratio <= 1);
|
||||
return min * std::pow(max / min, ratio);
|
||||
}
|
||||
|
||||
constexpr double interpolateLinear(double min, double max, double ratio)
|
||||
{
|
||||
return std::lerp(min, max, ratio);
|
||||
}
|
||||
|
2
contrib/capnproto
vendored
2
contrib/capnproto
vendored
@ -1 +1 @@
|
||||
Subproject commit dc8b50b999777bcb23c89bb5907c785c3f654441
|
||||
Subproject commit 976209a6d18074804f60d18ef99b6a809d27dadf
|
@ -4,7 +4,7 @@ if (SANITIZE OR NOT (
|
||||
))
|
||||
if (ENABLE_JEMALLOC)
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL}
|
||||
"jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64, aarch64, or ppc64le Linux or FreeBSD builds and RelWithDebInfo macOS builds.")
|
||||
"jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64, aarch64, or ppc64le Linux or FreeBSD builds and RelWithDebInfo macOS builds. Use -DENABLE_JEMALLOC=0")
|
||||
endif ()
|
||||
set (ENABLE_JEMALLOC OFF)
|
||||
else ()
|
||||
|
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
esac
|
||||
|
||||
ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
|
||||
ARG VERSION="23.5.2.7"
|
||||
ARG VERSION="23.5.3.24"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="23.5.2.7"
|
||||
ARG VERSION="23.5.3.24"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -22,7 +22,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="23.5.2.7"
|
||||
ARG VERSION="23.5.3.24"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
|
@ -20,6 +20,7 @@ For more information and documentation see https://clickhouse.com/.
|
||||
|
||||
- The amd64 image requires support for [SSE3 instructions](https://en.wikipedia.org/wiki/SSE3). Virtually all x86 CPUs after 2005 support SSE3.
|
||||
- The arm64 image requires support for the [ARMv8.2-A architecture](https://en.wikipedia.org/wiki/AArch64#ARMv8.2-A). Most ARM CPUs after 2017 support ARMv8.2-A. A notable exception is Raspberry Pi 4 from 2019 whose CPU only supports ARMv8.0-A.
|
||||
- Since the Clickhouse 23.3 Ubuntu image started using `ubuntu:22.04` as its base image, it requires docker version >= `20.10.10`, or use `docker run -- privileged` instead. Alternatively, try the Clickhouse Alpine image.
|
||||
|
||||
## How to use this image
|
||||
|
||||
|
@ -12,10 +12,10 @@ RUN apt-get update --yes && \
|
||||
# We need to get the repository's HEAD each time despite, so we invalidate layers' cache
|
||||
ARG CACHE_INVALIDATOR=0
|
||||
RUN mkdir /sqlancer && \
|
||||
wget -q -O- https://github.com/sqlancer/sqlancer/archive/master.tar.gz | \
|
||||
wget -q -O- https://github.com/sqlancer/sqlancer/archive/main.tar.gz | \
|
||||
tar zx -C /sqlancer && \
|
||||
cd /sqlancer/sqlancer-master && \
|
||||
mvn package -DskipTests && \
|
||||
cd /sqlancer/sqlancer-main && \
|
||||
mvn --no-transfer-progress package -DskipTests && \
|
||||
rm -r /root/.m2
|
||||
|
||||
COPY run.sh /
|
||||
|
@ -16,7 +16,6 @@ def process_result(result_folder):
|
||||
"TLPGroupBy",
|
||||
"TLPHaving",
|
||||
"TLPWhere",
|
||||
"TLPWhereGroupBy",
|
||||
"NoREC",
|
||||
]
|
||||
failed_tests = []
|
||||
|
@ -33,7 +33,7 @@ cd /workspace
|
||||
|
||||
for _ in $(seq 1 60); do if [[ $(wget -q 'localhost:8123' -O-) == 'Ok.' ]]; then break ; else sleep 1; fi ; done
|
||||
|
||||
cd /sqlancer/sqlancer-master
|
||||
cd /sqlancer/sqlancer-main
|
||||
|
||||
TIMEOUT=300
|
||||
NUM_QUERIES=1000
|
||||
|
@ -15,6 +15,9 @@ dpkg -i package_folder/clickhouse-client_*.deb
|
||||
|
||||
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
source /usr/share/clickhouse-test/ci/attach_gdb.lib || true # FIXME: to not break old builds, clean on 2023-09-01
|
||||
|
||||
# install test configs
|
||||
/usr/share/clickhouse-test/config/install.sh
|
||||
|
||||
@ -85,6 +88,8 @@ fi
|
||||
|
||||
sleep 5
|
||||
|
||||
attach_gdb_to_clickhouse || true # FIXME: to not break old builds, clean on 2023-09-01
|
||||
|
||||
function run_tests()
|
||||
{
|
||||
set -x
|
||||
|
@ -61,6 +61,7 @@ configure
|
||||
|
||||
# it contains some new settings, but we can safely remove it
|
||||
rm /etc/clickhouse-server/config.d/merge_tree.xml
|
||||
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
|
||||
|
||||
start
|
||||
stop
|
||||
@ -88,6 +89,7 @@ configure
|
||||
|
||||
# it contains some new settings, but we can safely remove it
|
||||
rm /etc/clickhouse-server/config.d/merge_tree.xml
|
||||
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
|
||||
|
||||
start
|
||||
|
||||
|
19
docs/changelogs/v22.8.19.10-lts.md
Normal file
19
docs/changelogs/v22.8.19.10-lts.md
Normal file
@ -0,0 +1,19 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v22.8.19.10-lts (989bc2fe8b0) FIXME as compared to v22.8.18.31-lts (4de7a95a544)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Fix a versions' tweak for tagged commits, improve version_helper [#51035](https://github.com/ClickHouse/ClickHouse/pull/51035) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Sqlancer has changed master to main [#51060](https://github.com/ClickHouse/ClickHouse/pull/51060) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
22
docs/changelogs/v23.3.4.17-lts.md
Normal file
22
docs/changelogs/v23.3.4.17-lts.md
Normal file
@ -0,0 +1,22 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.3.4.17-lts (2c99b73ff40) FIXME as compared to v23.3.3.52-lts (cb963c474db)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix crash when Pool::Entry::disconnect() is called [#50334](https://github.com/ClickHouse/ClickHouse/pull/50334) ([Val Doroshchuk](https://github.com/valbok)).
|
||||
* Avoid storing logs in Keeper containing unknown operation [#50751](https://github.com/ClickHouse/ClickHouse/pull/50751) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Don't mark a part as broken on `Poco::TimeoutException` [#50811](https://github.com/ClickHouse/ClickHouse/pull/50811) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix a versions' tweak for tagged commits, improve version_helper [#51035](https://github.com/ClickHouse/ClickHouse/pull/51035) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Sqlancer has changed master to main [#51060](https://github.com/ClickHouse/ClickHouse/pull/51060) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
22
docs/changelogs/v23.4.4.16-stable.md
Normal file
22
docs/changelogs/v23.4.4.16-stable.md
Normal file
@ -0,0 +1,22 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.4.4.16-stable (747ba4fc6a0) FIXME as compared to v23.4.3.48-stable (d9199f8d3cc)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix crash when Pool::Entry::disconnect() is called [#50334](https://github.com/ClickHouse/ClickHouse/pull/50334) ([Val Doroshchuk](https://github.com/valbok)).
|
||||
* Fix iceberg V2 optional metadata parsing [#50974](https://github.com/ClickHouse/ClickHouse/pull/50974) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Don't mark a part as broken on `Poco::TimeoutException` [#50811](https://github.com/ClickHouse/ClickHouse/pull/50811) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix a versions' tweak for tagged commits, improve version_helper [#51035](https://github.com/ClickHouse/ClickHouse/pull/51035) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Sqlancer has changed master to main [#51060](https://github.com/ClickHouse/ClickHouse/pull/51060) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
26
docs/changelogs/v23.5.3.24-stable.md
Normal file
26
docs/changelogs/v23.5.3.24-stable.md
Normal file
@ -0,0 +1,26 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.5.3.24-stable (76f54616d3b) FIXME as compared to v23.5.2.7-stable (5751aa1ab9f)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix Log family table return wrong rows count after truncate [#50585](https://github.com/ClickHouse/ClickHouse/pull/50585) ([flynn](https://github.com/ucasfl)).
|
||||
* Fix bug in `uniqExact` parallel merging [#50590](https://github.com/ClickHouse/ClickHouse/pull/50590) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Revert recent grace hash join changes [#50699](https://github.com/ClickHouse/ClickHouse/pull/50699) ([vdimir](https://github.com/vdimir)).
|
||||
* Avoid storing logs in Keeper containing unknown operation [#50751](https://github.com/ClickHouse/ClickHouse/pull/50751) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Add compat setting for non-const timezones [#50834](https://github.com/ClickHouse/ClickHouse/pull/50834) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix iceberg V2 optional metadata parsing [#50974](https://github.com/ClickHouse/ClickHouse/pull/50974) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Don't mark a part as broken on `Poco::TimeoutException` [#50811](https://github.com/ClickHouse/ClickHouse/pull/50811) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix a versions' tweak for tagged commits, improve version_helper [#51035](https://github.com/ClickHouse/ClickHouse/pull/51035) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Sqlancer has changed master to main [#51060](https://github.com/ClickHouse/ClickHouse/pull/51060) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
@ -53,6 +53,7 @@ Engines in the family:
|
||||
- [JDBC](../../engines/table-engines/integrations/jdbc.md)
|
||||
- [MySQL](../../engines/table-engines/integrations/mysql.md)
|
||||
- [MongoDB](../../engines/table-engines/integrations/mongodb.md)
|
||||
- [Redis](../../engines/table-engines/integrations/redis.md)
|
||||
- [HDFS](../../engines/table-engines/integrations/hdfs.md)
|
||||
- [S3](../../engines/table-engines/integrations/s3.md)
|
||||
- [Kafka](../../engines/table-engines/integrations/kafka.md)
|
||||
|
@ -48,4 +48,4 @@ SELECT * FROM test_table;
|
||||
|
||||
## See also
|
||||
|
||||
[Azure Blob Storage Table Function](/docs/en/sql-reference/table-functions/azureBlobStorage.md)
|
||||
[Azure Blob Storage Table Function](/docs/en/sql-reference/table-functions/azureBlobStorage)
|
||||
|
@ -233,6 +233,12 @@ libhdfs3 support HDFS namenode HA.
|
||||
- `_path` — Path to the file.
|
||||
- `_file` — Name of the file.
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
- [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
||||
- [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
|
||||
|
@ -7,4 +7,3 @@ sidebar_label: Integrations
|
||||
# Table Engines for Integrations
|
||||
|
||||
ClickHouse provides various means for integrating with external systems, including table engines. Like with all other table engines, the configuration is done using `CREATE TABLE` or `ALTER TABLE` queries. Then from a user perspective, the configured integration looks like a normal table, but queries to it are proxied to the external system. This transparent querying is one of the key advantages of this approach over alternative integration methods, like dictionaries or table functions, which require the use of custom query methods on each use.
|
||||
|
||||
|
@ -35,6 +35,10 @@ The table structure can differ from the original MySQL table structure:
|
||||
- Column types may differ from those in the original MySQL table. ClickHouse tries to [cast](../../../engines/database-engines/mysql.md#data_types-support) values to the ClickHouse data types.
|
||||
- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays.
|
||||
|
||||
:::note
|
||||
The MySQL Table Engine is currently not available on the ClickHouse builds for MacOS ([issue](https://github.com/ClickHouse/ClickHouse/issues/21191))
|
||||
:::
|
||||
|
||||
**Engine Parameters**
|
||||
|
||||
- `host:port` — MySQL server address.
|
||||
|
@ -136,7 +136,7 @@ postgresql> SELECT * FROM test;
|
||||
|
||||
### Creating Table in ClickHouse, and connecting to PostgreSQL table created above
|
||||
|
||||
This example uses the [PostgreSQL table engine](/docs/en/engines/table-engines/integrations/postgresql.md) to connect the ClickHouse table to the PostgreSQL table:
|
||||
This example uses the [PostgreSQL table engine](/docs/en/engines/table-engines/integrations/postgresql.md) to connect the ClickHouse table to the PostgreSQL table and use both SELECT and INSERT statements to the PostgreSQL database:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE default.postgresql_table
|
||||
@ -150,10 +150,21 @@ ENGINE = PostgreSQL('localhost:5432', 'public', 'test', 'postges_user', 'postgre
|
||||
|
||||
### Inserting initial data from PostgreSQL table into ClickHouse table, using a SELECT query
|
||||
|
||||
The [postgresql table function](/docs/en/sql-reference/table-functions/postgresql.md) copies the data from PostgreSQL to ClickHouse, which is often used for improving the query performance of the data by querying or performing analytics in ClickHouse rather than in PostgreSQL, or can also be used for migrating data from PostgreSQL to ClickHouse:
|
||||
The [postgresql table function](/docs/en/sql-reference/table-functions/postgresql.md) copies the data from PostgreSQL to ClickHouse, which is often used for improving the query performance of the data by querying or performing analytics in ClickHouse rather than in PostgreSQL, or can also be used for migrating data from PostgreSQL to ClickHouse. Since we will be copying the data from PostgreSQL to ClickHouse, we will use a MergeTree table engine in ClickHouse and call it postgresql_copy:
|
||||
|
||||
``` sql
|
||||
INSERT INTO default.postgresql_table
|
||||
CREATE TABLE default.postgresql_copy
|
||||
(
|
||||
`float_nullable` Nullable(Float32),
|
||||
`str` String,
|
||||
`int_id` Int32
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (int_id);
|
||||
```
|
||||
|
||||
``` sql
|
||||
INSERT INTO default.postgresql_copy
|
||||
SELECT * FROM postgresql('localhost:5432', 'public', 'test', 'postges_user', 'postgres_password');
|
||||
```
|
||||
|
||||
@ -164,13 +175,13 @@ If then performing ongoing synchronization between the PostgreSQL table and Clic
|
||||
This would require keeping track of the max ID or timestamp previously added, such as the following:
|
||||
|
||||
``` sql
|
||||
SELECT max(`int_id`) AS maxIntID FROM default.postgresql_table;
|
||||
SELECT max(`int_id`) AS maxIntID FROM default.postgresql_copy;
|
||||
```
|
||||
|
||||
Then inserting values from PostgreSQL table greater than the max
|
||||
|
||||
``` sql
|
||||
INSERT INTO default.postgresql_table
|
||||
INSERT INTO default.postgresql_copy
|
||||
SELECT * FROM postgresql('localhost:5432', 'public', 'test', 'postges_user', 'postgres_password');
|
||||
WHERE int_id > maxIntID;
|
||||
```
|
||||
@ -178,7 +189,7 @@ WHERE int_id > maxIntID;
|
||||
### Selecting data from the resulting ClickHouse table
|
||||
|
||||
``` sql
|
||||
SELECT * FROM postgresql_table WHERE str IN ('test');
|
||||
SELECT * FROM postgresql_copy WHERE str IN ('test');
|
||||
```
|
||||
|
||||
``` text
|
||||
|
119
docs/en/engines/table-engines/integrations/redis.md
Normal file
119
docs/en/engines/table-engines/integrations/redis.md
Normal file
@ -0,0 +1,119 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/redis
|
||||
sidebar_position: 43
|
||||
sidebar_label: Redis
|
||||
---
|
||||
|
||||
# Redis
|
||||
|
||||
This engine allows integrating ClickHouse with [Redis](https://redis.io/). For Redis takes kv model, we strongly recommend you only query it in a point way, such as `where k=xx` or `where k in (xx, xx)`.
|
||||
|
||||
## Creating a Table {#creating-a-table}
|
||||
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name
|
||||
(
|
||||
name1 [type1],
|
||||
name2 [type2],
|
||||
...
|
||||
) ENGINE = Redis(host:port[, db_index[, password[, pool_size]]]) PRIMARY KEY(primary_key_name);
|
||||
```
|
||||
|
||||
**Engine Parameters**
|
||||
|
||||
- `host:port` — Redis server address, you can ignore port and default Redis port 6379 will be used.
|
||||
|
||||
- `db_index` — Redis db index range from 0 to 15, default is 0.
|
||||
|
||||
- `password` — User password, default is blank string.
|
||||
|
||||
- `pool_size` — Redis max connection pool size, default is 16.
|
||||
|
||||
- `primary_key_name` - any column name in the column list.
|
||||
|
||||
- `primary` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a Redis key.
|
||||
|
||||
- columns other than the primary key will be serialized in binary as Redis value in corresponding order.
|
||||
|
||||
- queries with key equals or in filtering will be optimized to multi keys lookup from Redis. If queries without filtering key full table scan will happen which is a heavy operation.
|
||||
|
||||
## Usage Example {#usage-example}
|
||||
|
||||
Create a table in ClickHouse which allows to read data from Redis:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE redis_table
|
||||
(
|
||||
`k` String,
|
||||
`m` String,
|
||||
`n` UInt32
|
||||
)
|
||||
ENGINE = Redis('redis1:6379') PRIMARY KEY(k);
|
||||
```
|
||||
|
||||
Insert:
|
||||
|
||||
```sql
|
||||
INSERT INTO redis_table Values('1', 1, '1', 1.0), ('2', 2, '2', 2.0);
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT COUNT(*) FROM redis_table;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─count()─┐
|
||||
│ 2 │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT * FROM redis_table WHERE key='1';
|
||||
```
|
||||
|
||||
```text
|
||||
┌─key─┬─v1─┬─v2─┬─v3─┐
|
||||
│ 1 │ 1 │ 1 │ 1 │
|
||||
└─────┴────┴────┴────┘
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT * FROM redis_table WHERE v1=2;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─key─┬─v1─┬─v2─┬─v3─┐
|
||||
│ 2 │ 2 │ 2 │ 2 │
|
||||
└─────┴────┴────┴────┘
|
||||
```
|
||||
|
||||
Update:
|
||||
|
||||
Note that the primary key cannot be updated.
|
||||
|
||||
```sql
|
||||
ALTER TABLE redis_table UPDATE v1=2 WHERE key='1';
|
||||
```
|
||||
|
||||
Delete:
|
||||
|
||||
```sql
|
||||
ALTER TABLE redis_table DELETE WHERE key='1';
|
||||
```
|
||||
|
||||
Truncate:
|
||||
|
||||
Flush Redis db asynchronously. Also `Truncate` support SYNC mode.
|
||||
|
||||
```sql
|
||||
TRUNCATE TABLE redis_table SYNC;
|
||||
```
|
||||
|
||||
|
||||
## Limitations {#limitations}
|
||||
|
||||
Redis engine also supports scanning queries, such as `where k > xx`, but it has some limitations:
|
||||
1. Scanning query may produce some duplicated keys in a very rare case when it is rehashing. See details in [Redis Scan](https://github.com/redis/redis/blob/e4d183afd33e0b2e6e8d1c79a832f678a04a7886/src/dict.c#L1186-L1269)
|
||||
2. During the scanning, keys could be created and deleted, so the resulting dataset can not represent a valid point in time.
|
@ -127,6 +127,12 @@ CREATE TABLE table_with_asterisk (name String, value UInt32)
|
||||
ENGINE = S3('https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/{some,another}_folder/*', 'CSV');
|
||||
```
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
- [s3_create_multiple_files](/docs/en/operations/settings/settings.md#s3_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
||||
- [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
|
||||
## S3-related Settings {#settings}
|
||||
|
||||
The following settings can be set before query execution or placed into configuration file.
|
||||
|
@ -1,104 +1,142 @@
|
||||
# Approximate Nearest Neighbor Search Indexes [experimental] {#table_engines-ANNIndex}
|
||||
|
||||
Nearest neighborhood search refers to the problem of finding the point(s) with the smallest distance to a given point in an n-dimensional
|
||||
space. Since exact search is in practice usually typically too slow, the task is often solved with approximate algorithms. A popular use
|
||||
case of of neighbor search is finding similar pictures (texts) for a given picture (text). Pictures (texts) can be decomposed into
|
||||
[embeddings](https://cloud.google.com/architecture/overview-extracting-and-serving-feature-embeddings-for-machine-learning), and instead of
|
||||
comparing pictures (texts) pixel-by-pixel (character-by-character), only the embeddings are compared.
|
||||
Nearest neighborhood search is the problem of finding the M closest points for a given point in an N-dimensional vector space. The most
|
||||
straightforward approach to solve this problem is a brute force search where the distance between all points in the vector space and the
|
||||
reference point is computed. This method guarantees perfect accuracy but it is usually too slow for practical applications. Thus, nearest
|
||||
neighborhood search problems are often solved with [approximative algorithms](https://github.com/erikbern/ann-benchmarks). Approximative
|
||||
nearest neighborhood search techniques, in conjunction with [embedding
|
||||
methods](https://cloud.google.com/architecture/overview-extracting-and-serving-feature-embeddings-for-machine-learning) allow to search huge
|
||||
amounts of media (pictures, songs, articles, etc.) in milliseconds.
|
||||
|
||||
In terms of SQL, the problem can be expressed as follows:
|
||||
Blogs:
|
||||
- [Vector Search with ClickHouse - Part 1](https://clickhouse.com/blog/vector-search-clickhouse-p1)
|
||||
- [Vector Search with ClickHouse - Part 2](https://clickhouse.com/blog/vector-search-clickhouse-p2)
|
||||
|
||||
|
||||
In terms of SQL, the nearest neighborhood problem can be expressed as follows:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table
|
||||
WHERE L2Distance(column, Point) < MaxDistance
|
||||
ORDER BY Distance(vectors, Point)
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
`vectors` contains N-dimensional values of type [Array](../../../sql-reference/data-types/array.md) or
|
||||
[Tuple](../../../sql-reference/data-types/tuple.md), for example embeddings. Function `Distance` computes the distance between two vectors.
|
||||
Often, the the Euclidean (L2) distance is chosen as distance function but [other
|
||||
distance functions](/docs/en/sql-reference/functions/distance-functions.md) are also possible. `Point` is the reference point, e.g. `(0.17,
|
||||
0.33, ...)`, and `N` limits the number of search results.
|
||||
|
||||
An alternative formulation of the nearest neighborhood search problem looks as follows:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table
|
||||
ORDER BY L2Distance(column, Point)
|
||||
WHERE Distance(vectors, Point) < MaxDistance
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
The queries are expensive because the L2 (Euclidean) distance between `Point` and all points in `column` and must be computed. To speed this process up, Approximate Nearest Neighbor Search Indexes (ANN indexes) store a compact representation of the search space (using clustering, search trees, etc.) which allows to compute an approximate answer quickly.
|
||||
While the first query returns the top-`N` closest points to the reference point, the second query returns all points closer to the reference
|
||||
point than a maximally allowed radius `MaxDistance`. Parameter `N` limits the number of returned values which is useful for situations where
|
||||
`MaxDistance` is difficult to determine in advance.
|
||||
|
||||
# Creating ANN Indexes
|
||||
With brute force search, both queries are expensive (linear in the number of points) because the distance between all points in `vectors` and
|
||||
`Point` must be computed. To speed this process up, Approximate Nearest Neighbor Search Indexes (ANN indexes) store a compact representation
|
||||
of the search space (using clustering, search trees, etc.) which allows to compute an approximate answer much quicker (in sub-linear time).
|
||||
|
||||
As long as ANN indexes are experimental, you first need to `SET allow_experimental_annoy_index = 1`.
|
||||
# Creating and Using ANN Indexes
|
||||
|
||||
Syntax to create an ANN index over an `Array` column:
|
||||
Syntax to create an ANN index over an [Array](../../../sql-reference/data-types/array.md) column:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table
|
||||
(
|
||||
`id` Int64,
|
||||
`embedding` Array(Float32),
|
||||
INDEX <ann_index_name> embedding TYPE <ann_index_type>(<ann_index_parameters>) GRANULARITY <N>
|
||||
`vectors` Array(Float32),
|
||||
INDEX [ann_index_name vectors TYPE [ann_index_type]([ann_index_parameters]) [GRANULARITY [N]]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
Syntax to create an ANN index over a `Tuple` column:
|
||||
Syntax to create an ANN index over a [Tuple](../../../sql-reference/data-types/tuple.md) column:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table
|
||||
(
|
||||
`id` Int64,
|
||||
`embedding` Tuple(Float32[, Float32[, ...]]),
|
||||
INDEX <ann_index_name> embedding TYPE <ann_index_type>(<ann_index_parameters>) GRANULARITY <N>
|
||||
`vectors` Tuple(Float32[, Float32[, ...]]),
|
||||
INDEX [ann_index_name] vectors TYPE [ann_index_type]([ann_index_parameters]) [GRANULARITY [N]]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
ANN indexes are built during column insertion and merge and `INSERT` and `OPTIMIZE` statements will be slower than for ordinary tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively there are much more read requests than write requests.
|
||||
|
||||
Similar to regular skip indexes, ANN indexes are constructed over granules and each indexed block consists of `GRANULARITY = <N>`-many
|
||||
granules. For example, if the primary index granularity of the table is 8192 (setting `index_granularity = 8192`) and `GRANULARITY = 2`,
|
||||
then each indexed block will consist of 16384 rows. However, unlike skip indexes, ANN indexes are not only able to skip the entire indexed
|
||||
block, they are able to skip individual granules in indexed blocks. As a result, the `GRANULARITY` parameter has a different meaning in ANN
|
||||
indexes than in normal skip indexes. Basically, the bigger `GRANULARITY` is chosen, the more data is provided to a single ANN index, and the
|
||||
higher the chance that with the right hyper parameters, the index will remember the data structure better.
|
||||
|
||||
# Using ANN Indexes
|
||||
ANN indexes are built during column insertion and merge. As a result, `INSERT` and `OPTIMIZE` statements will be slower than for ordinary
|
||||
tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write
|
||||
requests.
|
||||
|
||||
ANN indexes support two types of queries:
|
||||
|
||||
- WHERE queries:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table
|
||||
WHERE DistanceFunction(column, Point) < MaxDistance
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
- ORDER BY queries:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table
|
||||
[WHERE ...]
|
||||
ORDER BY DistanceFunction(column, Point)
|
||||
ORDER BY Distance(vectors, Point)
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
`DistanceFunction` is a [distance function](/docs/en/sql-reference/functions/distance-functions.md), `Point` is a reference vector (e.g. `(0.17, 0.33, ...)`) and `MaxDistance` is a floating point value which restricts the size of the neighbourhood.
|
||||
- WHERE queries:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table
|
||||
WHERE Distance(vectors, Point) < MaxDistance
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
:::tip
|
||||
To avoid writing out large vectors, you can use [query parameters](/docs/en//interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters), e.g.
|
||||
To avoid writing out large vectors, you can use [query
|
||||
parameters](/docs/en/interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters), e.g.
|
||||
|
||||
```bash
|
||||
clickhouse-client --param_vec='hello' --query="SELECT * FROM table WHERE L2Distance(embedding, {vec: Array(Float32)}) < 1.0"
|
||||
clickhouse-client --param_vec='hello' --query="SELECT * FROM table WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0"
|
||||
```
|
||||
:::
|
||||
|
||||
ANN indexes cannot speed up queries that contain both a `WHERE DistanceFunction(column, Point) < MaxDistance` and an `ORDER BY DistanceFunction(column, Point)` clause. Also, the approximate algorithms used to determine the nearest neighbors require a limit, hence queries that use an ANN index must have a `LIMIT` clause.
|
||||
**Restrictions**: Queries that contain both a `WHERE Distance(vectors, Point) < MaxDistance` and an `ORDER BY Distance(vectors, Point)`
|
||||
clause cannot use ANN indexes. Also, the approximate algorithms used to determine the nearest neighbors require a limit, hence queries
|
||||
without `LIMIT` clause cannot utilize ANN indexes. Also ANN indexes are only used if the query has a `LIMIT` value smaller than setting
|
||||
`max_limit_for_ann_queries` (default: 1 million rows). This is a safeguard to prevent large memory allocations by external libraries for
|
||||
approximate neighbor search.
|
||||
|
||||
**Differences to Skip Indexes** Similar to regular [skip indexes](https://clickhouse.com/docs/en/optimize/skipping-indexes), ANN indexes are
|
||||
constructed over granules and each indexed block consists of `GRANULARITY = [N]`-many granules (`[N]` = 1 by default for normal skip
|
||||
indexes). For example, if the primary index granularity of the table is 8192 (setting `index_granularity = 8192`) and `GRANULARITY = 2`,
|
||||
then each indexed block will contain 16384 rows. However, data structures and algorithms for approximate neighborhood search (usually
|
||||
provided by external libraries) are inherently row-oriented. They store a compact representation of a set of rows and also return rows for
|
||||
ANN queries. This causes some rather unintuitive differences in the way ANN indexes behave compared to normal skip indexes.
|
||||
|
||||
When a user defines a ANN index on a column, ClickHouse internally creates a ANN "sub-index" for each index block. The sub-index is "local"
|
||||
in the sense that it only knows about the rows of its containing index block. In the previous example and assuming that a column has 65536
|
||||
rows, we obtain four index blocks (spanning eight granules) and a ANN sub-index for each index block. A sub-index is theoretically able to
|
||||
return the rows with the N closest points within its index block directly. However, since ClickHouse loads data from disk to memory at the
|
||||
granularity of granules, sub-indexes extrapolate matching rows to granule granularity. This is different from regular skip indexes which
|
||||
skip data at the granularity of index blocks.
|
||||
|
||||
The `GRANULARITY` parameter determines how many ANN sub-indexes are created. Bigger `GRANULARITY` values mean fewer but larger ANN
|
||||
sub-indexes, up to the point where a column (or a column's data part) has only a single sub-index. In that case, the sub-index has a
|
||||
"global" view of all column rows and can directly return all granules of the column (part) with relevant rows (there are at most
|
||||
`LIMIT [N]`-many such granules). In a second step, ClickHouse will load these granules and identify the actually best rows by performing a
|
||||
brute-force distance calculation over all rows of the granules. With a small `GRANULARITY` value, each of the sub-indexes returns up to
|
||||
`LIMIT N`-many granules. As a result, more granules need to be loaded and post-filtered. Note that the search accuracy is with both cases
|
||||
equally good, only the processing performance differs. It is generally recommended to use a large `GRANULARITY` for ANN indexes and fall
|
||||
back to a smaller `GRANULARITY` values only in case of problems like excessive memory consumption of the ANN structures. If no `GRANULARITY`
|
||||
was specified for ANN indexes, the default value is 100 million.
|
||||
|
||||
An ANN index is only used if the query has a `LIMIT` value smaller than setting `max_limit_for_ann_queries` (default: 1 million rows). This is a safety measure which helps to avoid large memory consumption by external libraries for approximate neighbor search.
|
||||
|
||||
# Available ANN Indexes
|
||||
|
||||
@ -106,51 +144,68 @@ An ANN index is only used if the query has a `LIMIT` value smaller than setting
|
||||
|
||||
## Annoy {#annoy}
|
||||
|
||||
(currently disabled on ARM due to memory safety problems with the algorithm)
|
||||
Annoy indexes are currently experimental, to use them you first need to `SET allow_experimental_annoy_index = 1`. They are also currently
|
||||
disabled on ARM due to memory safety problems with the algorithm.
|
||||
|
||||
This type of ANN index implements [the Annoy algorithm](https://github.com/spotify/annoy) which uses a recursive division of the space in random linear surfaces (lines in 2D, planes in 3D etc.).
|
||||
This type of ANN index implements [the Annoy algorithm](https://github.com/spotify/annoy) which is based on a recursive division of the
|
||||
space in random linear surfaces (lines in 2D, planes in 3D etc.).
|
||||
|
||||
Syntax to create a Annoy index over a `Array` column:
|
||||
<div class='vimeo-container'>
|
||||
<iframe src="//www.youtube.com/embed/QkCCyLW0ehU"
|
||||
width="640"
|
||||
height="360"
|
||||
frameborder="0"
|
||||
allow="autoplay;
|
||||
fullscreen;
|
||||
picture-in-picture"
|
||||
allowfullscreen>
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
Syntax to create an Annoy index over an [Array](../../../sql-reference/data-types/array.md) column:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table
|
||||
(
|
||||
id Int64,
|
||||
embedding Array(Float32),
|
||||
INDEX <ann_index_name> embedding TYPE annoy([DistanceName[, NumTrees]]) GRANULARITY N
|
||||
vectors Array(Float32),
|
||||
INDEX [ann_index_name] vectors TYPE annoy([Distance[, NumTrees]]) [GRANULARITY N]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
Syntax to create a Annoy index over a `Tuple` column:
|
||||
Syntax to create an ANN index over a [Tuple](../../../sql-reference/data-types/tuple.md) column:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table
|
||||
(
|
||||
id Int64,
|
||||
embedding Tuple(Float32[, Float32[, ...]]),
|
||||
INDEX <ann_index_name> embedding TYPE annoy([DistanceName[, NumTrees]]) GRANULARITY N
|
||||
vectors Tuple(Float32[, Float32[, ...]]),
|
||||
INDEX [ann_index_name] vectors TYPE annoy([Distance[, NumTrees]]) [GRANULARITY N]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
Parameter `DistanceName` is name of a distance function (default `L2Distance`). Annoy currently supports `L2Distance` and `cosineDistance` as distance functions. Parameter `NumTrees` (default: 100) is the number of trees which the algorithm will create. Higher values of `NumTree` mean slower `CREATE` and `SELECT` statements (approximately linearly), but increase the accuracy of search results.
|
||||
Annoy currently supports `L2Distance` and `cosineDistance` as distance function `Distance`. If no distance function was specified during
|
||||
index creation, `L2Distance` is used as default. Parameter `NumTrees` is the number of trees which the algorithm creates (default if not
|
||||
specified: 100). Higher values of `NumTree` mean more accurate search results but slower index creation / query times (approximately
|
||||
linearly) as well as larger index sizes.
|
||||
|
||||
:::note
|
||||
Indexes over columns of type `Array` will generally work faster than indexes on `Tuple` columns. All arrays **must** have same length. Use [CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1 CHECK length(embedding) = 256`.
|
||||
Indexes over columns of type `Array` will generally work faster than indexes on `Tuple` columns. All arrays **must** have same length. Use
|
||||
[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1
|
||||
CHECK length(vectors) = 256`.
|
||||
:::
|
||||
|
||||
Setting `annoy_index_search_k_nodes` (default: `NumTrees * LIMIT`) determines how many tree nodes are inspected during SELECTs. It can be used to
|
||||
balance runtime and accuracy at runtime.
|
||||
Setting `annoy_index_search_k_nodes` (default: `NumTrees * LIMIT`) determines how many tree nodes are inspected during SELECTs. Larger
|
||||
values mean more accurate results at the cost of longer query runtime:
|
||||
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT *
|
||||
FROM table_name [WHERE ...]
|
||||
ORDER BY L2Distance(column, Point)
|
||||
FROM table_name
|
||||
ORDER BY L2Distance(vectors, Point)
|
||||
LIMIT N
|
||||
SETTINGS annoy_index_search_k_nodes=100
|
||||
SETTINGS annoy_index_search_k_nodes=100;
|
||||
```
|
||||
|
@ -491,7 +491,7 @@ Syntax: `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, ran
|
||||
|
||||
#### Special-purpose
|
||||
|
||||
- An experimental index to support approximate nearest neighbor (ANN) search. See [here](annindexes.md) for details.
|
||||
- Experimental indexes to support approximate nearest neighbor (ANN) search. See [here](annindexes.md) for details.
|
||||
- An experimental inverted index to support full-text search. See [here](invertedindexes.md) for details.
|
||||
|
||||
### Functions Support {#functions-support}
|
||||
@ -853,7 +853,7 @@ Tags:
|
||||
- `max_data_part_size_bytes` — the maximum size of a part that can be stored on any of the volume’s disks. If the a size of a merged part estimated to be bigger than `max_data_part_size_bytes` then this part will be written to a next volume. Basically this feature allows to keep new/small parts on a hot (SSD) volume and move them to a cold (HDD) volume when they reach large size. Do not use this setting if your policy has only one volume.
|
||||
- `move_factor` — when the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1). ClickHouse sorts existing parts by size from largest to smallest (in descending order) and selects parts with the total size that is sufficient to meet the `move_factor` condition. If the total size of all parts is insufficient, all parts will be moved.
|
||||
- `prefer_not_to_merge` — Disables merging of data parts on this volume. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks.
|
||||
- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3).
|
||||
- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default (if enabled) if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3). If disabled then already expired data part is written into a default volume and then right after moved to TTL volume.
|
||||
- `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`.
|
||||
|
||||
Configuration examples:
|
||||
@ -1138,7 +1138,7 @@ These parameters define the cache layer:
|
||||
|
||||
Cache parameters:
|
||||
- `path` — The path where metadata for the cache is stored.
|
||||
- `max_size` — The size (amount of memory) that the cache can grow to.
|
||||
- `max_size` — The size (amount of disk space) that the cache can grow to.
|
||||
|
||||
:::tip
|
||||
There are several other cache parameters that you can use to tune your storage, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) for the details.
|
||||
|
@ -92,3 +92,11 @@ $ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64
|
||||
`PARTITION BY` — Optional. It is possible to create separate files by partitioning the data on a partition key. In most cases, you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression).
|
||||
|
||||
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
|
||||
|
||||
## Settings {#settings}
|
||||
|
||||
- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
|
||||
- [engine_file_truncate_on_insert](/docs/en/operations/settings/settings.md#engine-file-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
- [engine_file_allow_create_multiple_files](/docs/en/operations/settings/settings.md#engine_file_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
||||
- [engine_file_skip_empty_files](/docs/en/operations/settings/settings.md#engine_file_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - method of reading data from storage file, one of: `read`, `pread`, `mmap`. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.
|
||||
|
@ -102,3 +102,7 @@ SELECT * FROM url_engine_table
|
||||
`PARTITION BY` — Optional. It is possible to create separate files by partitioning the data on a partition key. In most cases, you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression).
|
||||
|
||||
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
|
@ -194,7 +194,129 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
|
||||
- `--print-profile-events` – Print `ProfileEvents` packets.
|
||||
- `--profile-events-delay-ms` – Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet).
|
||||
|
||||
Since version 20.5, `clickhouse-client` has automatic syntax highlighting (always enabled).
|
||||
Instead of `--host`, `--port`, `--user` and `--password` options, ClickHouse client also supports connection strings (see next section).
|
||||
|
||||
|
||||
## Connection string {#connection_string}
|
||||
|
||||
clickhouse-client alternatively supports connecting to clickhouse server using a connection string similar to [MongoDB](https://www.mongodb.com/docs/manual/reference/connection-string/), [PostgreSQL](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING), [MySQL](https://dev.mysql.com/doc/refman/8.0/en/connecting-using-uri-or-key-value-pairs.html#connecting-using-uri). It has the following syntax:
|
||||
|
||||
```text
|
||||
clickhouse:[//[user[:password]@][hosts_and_ports]][/database][?query_parameters]
|
||||
```
|
||||
|
||||
Where
|
||||
|
||||
- `user` - (optional) is a user name,
|
||||
- `password` - (optional) is a user password. If `:` is specified and the password is blank, the client will prompt for the user's password.
|
||||
- `hosts_and_ports` - (optional) is a list of hosts and optional ports `host[:port] [, host:[port]], ...`,
|
||||
- `database` - (optional) is the database name,
|
||||
- `query_parameters` - (optional) is a list of key-value pairs `param1=value1[,¶m2=value2], ...`. For some parameters, no value is required. Parameter names and values are case-sensitive.
|
||||
|
||||
If no user is specified, `default` user without password will be used.
|
||||
If no host is specified, the `localhost` will be used (localhost).
|
||||
If no port is specified is not specified, `9000` will be used as port.
|
||||
If no database is specified, the `default` database will be used.
|
||||
|
||||
If the user name, password or database was specified in the connection string, it cannot be specified using `--user`, `--password` or `--database` (and vice versa).
|
||||
|
||||
The host component can either be an a host name and IP address. Put an IPv6 address in square brackets to specify it:
|
||||
|
||||
```text
|
||||
clickhouse://[2001:db8::1234]
|
||||
```
|
||||
|
||||
URI allows multiple hosts to be connected to. Connection strings can contain multiple hosts. ClickHouse-client will try to connect to these hosts in order (i.e. from left to right). After the connection is established, no attempt to connect to the remaining hosts is made.
|
||||
|
||||
The connection string must be specified as the first argument of clickhouse-client. The connection string can be combined with arbitrary other [command-line-options](#command-line-options) except `--host/-h` and `--port`.
|
||||
|
||||
The following keys are allowed for component `query_parameter`:
|
||||
|
||||
- `secure` or shorthanded `s` - no value. If specified, client will connect to the server over a secure connection (TLS). See `secure` in [command-line-options](#command-line-options)
|
||||
|
||||
### Percent encoding {#connection_string_uri_percent_encoding}
|
||||
|
||||
Non-US ASCII, spaces and special characters in the `user`, `password`, `hosts`, `database` and `query parameters` must be [percent-encoded](https://en.wikipedia.org/wiki/URL_encoding).
|
||||
|
||||
### Examples {#connection_string_examples}
|
||||
|
||||
Connect to localhost using port 9000 and execute the query `SELECT 1`.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://localhost:9000 --query "SELECT 1"
|
||||
```
|
||||
|
||||
Connect to localhost using user `john` with password `secret`, host `127.0.0.1` and port `9000`
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://john:secret@127.0.0.1:9000
|
||||
```
|
||||
|
||||
Connect to localhost using default user, host with IPV6 address `[::1]` and port `9000`.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://[::1]:9000
|
||||
```
|
||||
|
||||
Connect to localhost using port 9000 in multiline mode.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://localhost:9000 '-m'
|
||||
```
|
||||
|
||||
Connect to localhost using port 9000 with the user `default`.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://default@localhost:9000
|
||||
|
||||
# equivalent to:
|
||||
clickhouse-client clickhouse://localhost:9000 --user default
|
||||
```
|
||||
|
||||
Connect to localhost using port 9000 to `my_database` database.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://localhost:9000/my_database
|
||||
|
||||
# equivalent to:
|
||||
clickhouse-client clickhouse://localhost:9000 --database my_database
|
||||
```
|
||||
|
||||
Connect to localhost using port 9000 to `my_database` database specified in the connection string and a secure connection using shorthanded 's' URI parameter.
|
||||
|
||||
```bash
|
||||
clickhouse-client clickhouse://localhost/my_database?s
|
||||
|
||||
# equivalent to:
|
||||
clickhouse-client clickhouse://localhost/my_database -s
|
||||
```
|
||||
|
||||
Connect to default host using default port, default user, and default database.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse:
|
||||
```
|
||||
|
||||
Connect to the default host using the default port, using user `my_user` and no password.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://my_user@
|
||||
|
||||
# Using a blank password between : and @ means to asking user to enter the password before starting the connection.
|
||||
clickhouse-client clickhouse://my_user:@
|
||||
```
|
||||
|
||||
Connect to localhost using email as the user name. `@` symbol is percent encoded to `%40`.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://some_user%40some_mail.com@localhost:9000
|
||||
```
|
||||
|
||||
Connect to one of provides hosts: `192.168.1.15`, `192.168.1.25`.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://192.168.1.15,192.168.1.25
|
||||
```
|
||||
|
||||
### Configuration Files {#configuration_files}
|
||||
|
||||
|
@ -193,6 +193,7 @@ SELECT * FROM nestedt FORMAT TSV
|
||||
- [output_format_tsv_crlf_end_of_line](/docs/en/operations/settings/settings-formats.md/#output_format_tsv_crlf_end_of_line) - if it is set true, end of line in TSV output format will be `\r\n` instead of `\n`. Default value - `false`.
|
||||
- [input_format_tsv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`.
|
||||
- [input_format_tsv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_detect_header) - automatically detect header with names and types in TSV format. Default value - `true`.
|
||||
- [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`.
|
||||
|
||||
## TabSeparatedRaw {#tabseparatedraw}
|
||||
|
||||
@ -467,7 +468,9 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
|
||||
- [output_format_csv_crlf_end_of_line](/docs/en/operations/settings/settings-formats.md/#output_format_csv_crlf_end_of_line) - if it is set to true, end of line in CSV output format will be `\r\n` instead of `\n`. Default value - `false`.
|
||||
- [input_format_csv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_first_lines) - skip the specified number of lines at the beginning of data. Default value - `0`.
|
||||
- [input_format_csv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_csv_detect_header) - automatically detect header with names and types in CSV format. Default value - `true`.
|
||||
- [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`.
|
||||
- [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
|
||||
- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
|
||||
|
||||
## CSVWithNames {#csvwithnames}
|
||||
|
||||
@ -495,7 +498,9 @@ the types from input data will be compared with the types of the corresponding c
|
||||
|
||||
Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](/docs/en/operations/settings/settings-formats.md/#format_custom_escaping_rule) setting and delimiters from [format_custom_field_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_field_delimiter), [format_custom_row_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_before_delimiter), [format_custom_row_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_after_delimiter), [format_custom_row_between_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_between_delimiter), [format_custom_result_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_before_delimiter) and [format_custom_result_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_after_delimiter) settings, not from format strings.
|
||||
|
||||
If setting [input_format_custom_detect_header](/docs/en/operations/settings/settings.md/#input_format_custom_detect_header) is enabled, ClickHouse will automatically detect header with names and types if any.
|
||||
If setting [input_format_custom_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) is enabled, ClickHouse will automatically detect header with names and types if any.
|
||||
|
||||
If setting [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) is enabled, trailing empty lines at the end of file will be skipped.
|
||||
|
||||
There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [TemplateIgnoreSpaces](#templateignorespaces).
|
||||
|
||||
@ -1873,13 +1878,13 @@ The table below shows supported data types and how they match ClickHouse [data t
|
||||
| `string (uuid)` \** | [UUID](/docs/en/sql-reference/data-types/uuid.md) | `string (uuid)` \** |
|
||||
| `fixed(16)` | [Int128/UInt128](/docs/en/sql-reference/data-types/int-uint.md) | `fixed(16)` |
|
||||
| `fixed(32)` | [Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `fixed(32)` |
|
||||
| `record` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `record` |
|
||||
|
||||
|
||||
|
||||
\* `bytes` is default, controlled by [output_format_avro_string_column_pattern](/docs/en/operations/settings/settings-formats.md/#output_format_avro_string_column_pattern)
|
||||
\** [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types)
|
||||
|
||||
Unsupported Avro data types: `record` (non-root), `map`
|
||||
|
||||
Unsupported Avro logical data types: `time-millis`, `time-micros`, `duration`
|
||||
|
||||
### Inserting Data {#inserting-data-1}
|
||||
@ -1918,7 +1923,26 @@ Output Avro file compression and sync interval can be configured with [output_fo
|
||||
|
||||
Using the ClickHouse [DESCRIBE](/docs/en/sql-reference/statements/describe-table) function, you can quickly view the inferred format of an Avro file like the following example. This example includes the URL of a publicly accessible Avro file in the ClickHouse S3 public bucket:
|
||||
|
||||
``` DESCRIBE url('https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/hits.avro','Avro');
|
||||
```
|
||||
DESCRIBE url('https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/hits.avro','Avro);
|
||||
```
|
||||
```
|
||||
┌─name───────────────────────┬─type────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ WatchID │ Int64 │ │ │ │ │ │
|
||||
│ JavaEnable │ Int32 │ │ │ │ │ │
|
||||
│ Title │ String │ │ │ │ │ │
|
||||
│ GoodEvent │ Int32 │ │ │ │ │ │
|
||||
│ EventTime │ Int32 │ │ │ │ │ │
|
||||
│ EventDate │ Date32 │ │ │ │ │ │
|
||||
│ CounterID │ Int32 │ │ │ │ │ │
|
||||
│ ClientIP │ Int32 │ │ │ │ │ │
|
||||
│ ClientIP6 │ FixedString(16) │ │ │ │ │ │
|
||||
│ RegionID │ Int32 │ │ │ │ │ │
|
||||
...
|
||||
│ IslandID │ FixedString(16) │ │ │ │ │ │
|
||||
│ RequestNum │ Int32 │ │ │ │ │ │
|
||||
│ RequestTry │ Int32 │ │ │ │ │ │
|
||||
└────────────────────────────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
## AvroConfluent {#data-format-avro-confluent}
|
||||
|
@ -329,8 +329,8 @@ SELECT count() FROM system.schema_inference_cache WHERE storage='S3'
|
||||
## Text formats {#text-formats}
|
||||
|
||||
For text formats, ClickHouse reads the data row by row, extracts column values according to the format,
|
||||
and then uses some recursive parsers and heuristics to determine the type for each value. The maximum number of rows read from the data in schema inference
|
||||
is controlled by the setting `input_format_max_rows_to_read_for_schema_inference` with default value 25000.
|
||||
and then uses some recursive parsers and heuristics to determine the type for each value. The maximum number of rows and bytes read from the data in schema inference
|
||||
is controlled by the settings `input_format_max_rows_to_read_for_schema_inference` (25000 by default) and `input_format_max_bytes_to_read_for_schema_inference` (32Mb by default).
|
||||
By default, all inferred types are [Nullable](../sql-reference/data-types/nullable.md), but you can change this by setting `schema_inference_make_columns_nullable` (see examples in the [settings](#settings-for-text-formats) section).
|
||||
|
||||
### JSON formats {#json-formats}
|
||||
@ -1144,13 +1144,15 @@ Line: value_1=2, value_2="Some string 2", value_3="[4, 5, NULL]"$$)
|
||||
|
||||
### Settings for text formats {#settings-for-text-formats}
|
||||
|
||||
#### input_format_max_rows_to_read_for_schema_inference
|
||||
#### input_format_max_rows_to_read_for_schema_inference/input_format_max_bytes_to_read_for_schema_inference
|
||||
|
||||
This setting controls the maximum number of rows to be read while schema inference.
|
||||
The more rows are read, the more time is spent on schema inference, but the greater the chance to
|
||||
These settings control the amount of data to be read while schema inference.
|
||||
The more rows/bytes are read, the more time is spent on schema inference, but the greater the chance to
|
||||
correctly determine the types (especially when the data contains a lot of nulls).
|
||||
|
||||
Default value: `25000`.
|
||||
Default values:
|
||||
- `25000` for `input_format_max_rows_to_read_for_schema_inference`.
|
||||
- `33554432` (32 Mb) for `input_format_max_bytes_to_read_for_schema_inference`.
|
||||
|
||||
#### column_names_for_schema_inference
|
||||
|
||||
@ -1643,7 +1645,7 @@ In schema inference for CapnProto format ClickHouse uses the following type matc
|
||||
## Strong-typed binary formats {#strong-typed-binary-formats}
|
||||
|
||||
In such formats, each serialized value contains information about its type (and possibly about its name), but there is no information about the whole table.
|
||||
In schema inference for such formats, ClickHouse reads data row by row (up to `input_format_max_rows_to_read_for_schema_inference` rows) and extracts
|
||||
In schema inference for such formats, ClickHouse reads data row by row (up to `input_format_max_rows_to_read_for_schema_inference` rows or `input_format_max_bytes_to_read_for_schema_inference` bytes) and extracts
|
||||
the type (and possibly name) for each value from the data and then converts these types to ClickHouse types.
|
||||
|
||||
### MsgPack {#msgpack}
|
||||
|
@ -83,6 +83,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des
|
||||
- [`compression_method`](/docs/en/sql-reference/statements/create/table.md/#column-compression-codecs) and compression_level
|
||||
- `password` for the file on disk
|
||||
- `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')`
|
||||
- `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables
|
||||
|
||||
### Usage examples
|
||||
|
||||
@ -398,4 +399,4 @@ To disallow concurrent backup/restore, you can use these settings respectively.
|
||||
```
|
||||
|
||||
The default value for both is true, so by default concurrent backup/restores are allowed.
|
||||
When these settings are false on a cluster, only 1 backup/restore is allowed to run on a cluster at a time.
|
||||
When these settings are false on a cluster, only 1 backup/restore is allowed to run on a cluster at a time.
|
||||
|
@ -202,7 +202,7 @@ Default: 15
|
||||
|
||||
## dns_max_consecutive_failures
|
||||
|
||||
Max connection failures before dropping host from ClickHouse DNS cache
|
||||
Max consecutive resolving failures before dropping a host from ClickHouse DNS cache
|
||||
|
||||
Type: UInt32
|
||||
|
||||
|
@ -137,6 +137,12 @@ The maximum rows of data to read for automatic schema inference.
|
||||
|
||||
Default value: `25'000`.
|
||||
|
||||
## input_format_max_bytes_to_read_for_schema_inference {#input_format_max_bytes_to_read_for_schema_inference}
|
||||
|
||||
The maximum amount of data in bytes to read for automatic schema inference.
|
||||
|
||||
Default value: `33554432` (32 Mb).
|
||||
|
||||
## column_names_for_schema_inference {#column_names_for_schema_inference}
|
||||
|
||||
The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'
|
||||
@ -728,6 +734,12 @@ My NULL
|
||||
My NULL
|
||||
```
|
||||
|
||||
### input_format_tsv_skip_trailing_empty_lines {input_format_tsv_skip_trailing_empty_lines}
|
||||
|
||||
When enabled, trailing empty lines at the end of TSV file will be skipped.
|
||||
|
||||
Disabled by default.
|
||||
|
||||
## CSV format settings {#csv-format-settings}
|
||||
|
||||
### format_csv_delimiter {#format_csv_delimiter}
|
||||
@ -882,6 +894,12 @@ My NULL
|
||||
My NULL
|
||||
```
|
||||
|
||||
### input_format_csv_skip_trailing_empty_lines {input_format_csv_skip_trailing_empty_lines}
|
||||
|
||||
When enabled, trailing empty lines at the end of CSV file will be skipped.
|
||||
|
||||
Disabled by default.
|
||||
|
||||
### input_format_csv_trim_whitespaces {#input_format_csv_trim_whitespaces}
|
||||
|
||||
Trims spaces and tabs in non-quoted CSV strings.
|
||||
@ -914,6 +932,38 @@ Result
|
||||
" string "
|
||||
```
|
||||
|
||||
### input_format_csv_allow_whitespace_or_tab_as_delimiter {#input_format_csv_allow_whitespace_or_tab_as_delimiter}
|
||||
|
||||
Allow to use whitespace or tab as field delimiter in CSV strings.
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
**Examples**
|
||||
|
||||
Query
|
||||
|
||||
```bash
|
||||
echo 'a b' | ./clickhouse local -q "select * from table FORMAT CSV" --input-format="CSV" --input_format_csv_allow_whitespace_or_tab_as_delimiter=true --format_csv_delimiter=' '
|
||||
```
|
||||
|
||||
Result
|
||||
|
||||
```text
|
||||
a b
|
||||
```
|
||||
|
||||
Query
|
||||
|
||||
```bash
|
||||
echo 'a b' | ./clickhouse local -q "select * from table FORMAT CSV" --input-format="CSV" --input_format_csv_allow_whitespace_or_tab_as_delimiter=true --format_csv_delimiter='\t'
|
||||
```
|
||||
|
||||
Result
|
||||
|
||||
```text
|
||||
a b
|
||||
```
|
||||
|
||||
## Values format settings {#values-format-settings}
|
||||
|
||||
### input_format_values_interpret_expressions {#input_format_values_interpret_expressions}
|
||||
@ -1475,6 +1525,12 @@ Sets the character that is interpreted as a suffix after the result set for [Cus
|
||||
|
||||
Default value: `''`.
|
||||
|
||||
### input_format_custom_skip_trailing_empty_lines {input_format_custom_skip_trailing_empty_lines}
|
||||
|
||||
When enabled, trailing empty lines at the end of file in CustomSeparated format will be skipped.
|
||||
|
||||
Disabled by default.
|
||||
|
||||
## Regexp format settings {#regexp-format-settings}
|
||||
|
||||
### format_regexp_escaping_rule {#format_regexp_escaping_rule}
|
||||
|
@ -1957,6 +1957,10 @@ Default value: empty string (disabled)
|
||||
For the replicated tables by default the only 100 of the most recent inserts for each partition are deduplicated (see [replicated_deduplication_window](merge-tree-settings.md/#replicated-deduplication-window), [replicated_deduplication_window_seconds](merge-tree-settings.md/#replicated-deduplication-window-seconds)).
|
||||
For not replicated tables see [non_replicated_deduplication_window](merge-tree-settings.md/#non-replicated-deduplication-window).
|
||||
|
||||
:::note
|
||||
`insert_deduplication_token` works on a partition level (the same as `insert_deduplication` checksum). Multiple partitions can have the same `insert_deduplication_token`.
|
||||
:::
|
||||
|
||||
Example:
|
||||
|
||||
```sql
|
||||
@ -3324,7 +3328,35 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## s3_truncate_on_insert
|
||||
## engine_file_allow_create_multiple_files {#engine_file_allow_create_multiple_files}
|
||||
|
||||
Enables or disables creating a new file on each insert in file engine tables if the format has the suffix (`JSON`, `ORC`, `Parquet`, etc.). If enabled, on each insert a new file will be created with a name following this pattern:
|
||||
|
||||
`data.Parquet` -> `data.1.Parquet` -> `data.2.Parquet`, etc.
|
||||
|
||||
Possible values:
|
||||
- 0 — `INSERT` query appends new data to the end of the file.
|
||||
- 1 — `INSERT` query creates a new file.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## engine_file_skip_empty_files {#engine_file_skip_empty_files}
|
||||
|
||||
Enables or disables skipping empty files in [File](../../engines/table-engines/special/file.md) engine tables.
|
||||
|
||||
Possible values:
|
||||
- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
|
||||
- 1 — `SELECT` returns empty result for empty file.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## storage_file_read_method {#storage_file_read_method}
|
||||
|
||||
Method of reading data from storage file, one of: `read`, `pread`, `mmap`. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).
|
||||
|
||||
Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.
|
||||
|
||||
## s3_truncate_on_insert {#s3_truncate_on_insert}
|
||||
|
||||
Enables or disables truncate before inserts in s3 engine tables. If disabled, an exception will be thrown on insert attempts if an S3 object already exists.
|
||||
|
||||
@ -3334,7 +3366,29 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## hdfs_truncate_on_insert
|
||||
## s3_create_new_file_on_insert {#s3_create_new_file_on_insert}
|
||||
|
||||
Enables or disables creating a new file on each insert in s3 engine tables. If enabled, on each insert a new S3 object will be created with the key, similar to this pattern:
|
||||
|
||||
initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc.
|
||||
|
||||
Possible values:
|
||||
- 0 — `INSERT` query appends new data to the end of the file.
|
||||
- 1 — `INSERT` query creates a new file.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## s3_skip_empty_files {#s3_skip_empty_files}
|
||||
|
||||
Enables or disables skipping empty files in [S3](../../engines/table-engines/integrations/s3.md) engine tables.
|
||||
|
||||
Possible values:
|
||||
- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
|
||||
- 1 — `SELECT` returns empty result for empty file.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## hdfs_truncate_on_insert {#hdfs_truncate_on_insert}
|
||||
|
||||
Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists.
|
||||
|
||||
@ -3344,31 +3398,7 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## engine_file_allow_create_multiple_files
|
||||
|
||||
Enables or disables creating a new file on each insert in file engine tables if the format has the suffix (`JSON`, `ORC`, `Parquet`, etc.). If enabled, on each insert a new file will be created with a name following this pattern:
|
||||
|
||||
`data.Parquet` -> `data.1.Parquet` -> `data.2.Parquet`, etc.
|
||||
|
||||
Possible values:
|
||||
- 0 — `INSERT` query appends new data to the end of the file.
|
||||
- 1 — `INSERT` query replaces existing content of the file with the new data.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## s3_create_new_file_on_insert
|
||||
|
||||
Enables or disables creating a new file on each insert in s3 engine tables. If enabled, on each insert a new S3 object will be created with the key, similar to this pattern:
|
||||
|
||||
initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc.
|
||||
|
||||
Possible values:
|
||||
- 0 — `INSERT` query appends new data to the end of the file.
|
||||
- 1 — `INSERT` query replaces existing content of the file with the new data.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## hdfs_create_new_file_on_insert
|
||||
## hdfs_create_new_file_on_insert {#hdfs_create_new_file_on_insert
|
||||
|
||||
Enables or disables creating a new file on each insert in HDFS engine tables. If enabled, on each insert a new HDFS file will be created with the name, similar to this pattern:
|
||||
|
||||
@ -3376,7 +3406,27 @@ initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc.
|
||||
|
||||
Possible values:
|
||||
- 0 — `INSERT` query appends new data to the end of the file.
|
||||
- 1 — `INSERT` query replaces existing content of the file with the new data.
|
||||
- 1 — `INSERT` query creates a new file.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## hdfs_skip_empty_files {#hdfs_skip_empty_files}
|
||||
|
||||
Enables or disables skipping empty files in [HDFS](../../engines/table-engines/integrations/hdfs.md) engine tables.
|
||||
|
||||
Possible values:
|
||||
- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
|
||||
- 1 — `SELECT` returns empty result for empty file.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## engine_url_skip_empty_files {#engine_url_skip_empty_files}
|
||||
|
||||
Enables or disables skipping empty files in [URL](../../engines/table-engines/special/url.md) engine tables.
|
||||
|
||||
Possible values:
|
||||
- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
|
||||
- 1 — `SELECT` returns empty result for empty file.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
|
@ -11,7 +11,8 @@ Columns:
|
||||
- `host` ([String](../../sql-reference/data-types/string.md)) — The hostname/IP of the ZooKeeper node that ClickHouse connected to.
|
||||
- `port` ([String](../../sql-reference/data-types/string.md)) — The port of the ZooKeeper node that ClickHouse connected to.
|
||||
- `index` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The index of the ZooKeeper node that ClickHouse connected to. The index is from ZooKeeper config.
|
||||
- `connected_time` ([String](../../sql-reference/data-types/string.md)) — When the connection was established
|
||||
- `connected_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — When the connection was established
|
||||
- `session_uptime_elapsed_seconds` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Seconds elapsed since the connection was established
|
||||
- `is_expired` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Is the current connection expired.
|
||||
- `keeper_api_version` ([String](../../sql-reference/data-types/string.md)) — Keeper API version.
|
||||
- `client_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Session id of the connection.
|
||||
@ -23,7 +24,7 @@ SELECT * FROM system.zookeeper_connection;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─name──────────────┬─host─────────┬─port─┬─index─┬──────connected_time─┬─is_expired─┬─keeper_api_version─┬──────────client_id─┐
|
||||
│ default_zookeeper │ 127.0.0.1 │ 2181 │ 0 │ 2023-05-19 14:30:16 │ 0 │ 0 │ 216349144108826660 │
|
||||
└───────────────────┴──────────────┴──────┴───────┴─────────────────────┴────────────┴────────────────────┴────────────────────┘
|
||||
┌─name────┬─host──────┬─port─┬─index─┬──────connected_time─┬─session_uptime_elapsed_seconds─┬─is_expired─┬─keeper_api_version─┬─client_id─┐
|
||||
│ default │ 127.0.0.1 │ 9181 │ 0 │ 2023-06-15 14:36:01 │ 3058 │ 0 │ 3 │ 5 │
|
||||
└─────────┴───────────┴──────┴───────┴─────────────────────┴────────────────────────────────┴────────────┴────────────────────┴───────────┘
|
||||
```
|
||||
|
@ -32,7 +32,7 @@ For example, Decimal32(4) can contain numbers from -99999.9999 to 99999.9999 wit
|
||||
|
||||
Internally data is represented as normal signed integers with respective bit width. Real value ranges that can be stored in memory are a bit larger than specified above, which are checked only on conversion from a string.
|
||||
|
||||
Because modern CPUs do not support 128-bit integers natively, operations on Decimal128 are emulated. Because of this Decimal128 works significantly slower than Decimal32/Decimal64.
|
||||
Because modern CPUs do not support 128-bit and 256-bit integers natively, operations on Decimal128 and Decimal256 are emulated. Thus, Decimal128 and Decimal256 work significantly slower than Decimal32/Decimal64.
|
||||
|
||||
## Operations and Result Type
|
||||
|
||||
@ -59,6 +59,10 @@ Some functions on Decimal return result as Float64 (for example, var or stddev).
|
||||
|
||||
During calculations on Decimal, integer overflows might happen. Excessive digits in a fraction are discarded (not rounded). Excessive digits in integer part will lead to an exception.
|
||||
|
||||
:::warning
|
||||
Overflow check is not implemented for Decimal128 and Decimal256. In case of overflow incorrect result is returned, no exception is thrown.
|
||||
:::
|
||||
|
||||
``` sql
|
||||
SELECT toDecimal32(2, 4) AS x, x / 3
|
||||
```
|
||||
|
@ -33,7 +33,7 @@ SELECT
|
||||
toTypeName(toNullable('') AS val) AS source_type,
|
||||
toTypeName(toString(val)) AS to_type_result_type,
|
||||
toTypeName(CAST(val, 'String')) AS cast_result_type
|
||||
|
||||
|
||||
┌─source_type──────┬─to_type_result_type─┬─cast_result_type─┐
|
||||
│ Nullable(String) │ Nullable(String) │ String │
|
||||
└──────────────────┴─────────────────────┴──────────────────┘
|
||||
@ -203,7 +203,7 @@ Result:
|
||||
|
||||
## toDate
|
||||
|
||||
Converts the argument to [Date](/docs/en/sql-reference/data-types/date.md) data type.
|
||||
Converts the argument to [Date](/docs/en/sql-reference/data-types/date.md) data type.
|
||||
|
||||
If the argument is [DateTime](/docs/en/sql-reference/data-types/datetime.md) or [DateTime64](/docs/en/sql-reference/data-types/datetime64.md), it truncates it and leaves the date component of the DateTime:
|
||||
|
||||
@ -232,7 +232,7 @@ SELECT
|
||||
│ 2022-12-30 │ Date │
|
||||
└────────────┴──────────────────────────────────┘
|
||||
|
||||
1 row in set. Elapsed: 0.001 sec.
|
||||
1 row in set. Elapsed: 0.001 sec.
|
||||
```
|
||||
|
||||
```sql
|
||||
@ -314,20 +314,183 @@ SELECT
|
||||
└─────────────────────┴───────────────┴─────────────┴─────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## toDateOrZero
|
||||
|
||||
The same as [toDate](#todate) but returns lower boundary of [Date](/docs/en/sql-reference/data-types/date.md) if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toDateOrZero('2022-12-30'), toDateOrZero('');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─toDateOrZero('2022-12-30')─┬─toDateOrZero('')─┐
|
||||
│ 2022-12-30 │ 1970-01-01 │
|
||||
└────────────────────────────┴──────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## toDateOrNull
|
||||
|
||||
The same as [toDate](#todate) but returns `NULL` if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toDateOrNull('2022-12-30'), toDateOrNull('');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─toDateOrNull('2022-12-30')─┬─toDateOrNull('')─┐
|
||||
│ 2022-12-30 │ ᴺᵁᴸᴸ │
|
||||
└────────────────────────────┴──────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## toDateOrDefault
|
||||
|
||||
Like [toDate](#todate) but if unsuccessful, returns a default value which is either the second argument (if specified), or otherwise the lower boundary of [Date](/docs/en/sql-reference/data-types/date.md).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
toDateOrDefault(expr [, default_value])
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toDateOrDefault('2022-12-30'), toDateOrDefault('', '2023-01-01'::Date);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─toDateOrDefault('2022-12-30')─┬─toDateOrDefault('', CAST('2023-01-01', 'Date'))─┐
|
||||
│ 2022-12-30 │ 2023-01-01 │
|
||||
└───────────────────────────────┴─────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## toDateTime
|
||||
|
||||
Converts an input value to [DateTime](/docs/en/sql-reference/data-types/datetime.md).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
toDateTime(expr[, time_zone ])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [Int](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md).
|
||||
- `time_zone` — Time zone. [String](/docs/en/sql-reference/data-types/string.md).
|
||||
|
||||
If `expr` is a number, it is interpreted as the number of seconds since the beginning of the Unix Epoch (as Unix timestamp).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A date time. [DateTime](/docs/en/sql-reference/data-types/datetime.md)
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toDateTime('2022-12-30 13:44:17'), toDateTime(1685457500, 'UTC');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─toDateTime('2022-12-30 13:44:17')─┬─toDateTime(1685457500, 'UTC')─┐
|
||||
│ 2022-12-30 13:44:17 │ 2023-05-30 14:38:20 │
|
||||
└───────────────────────────────────┴───────────────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## toDateTimeOrZero
|
||||
|
||||
The same as [toDateTime](#todatetime) but returns lower boundary of [DateTime](/docs/en/sql-reference/data-types/datetime.md) if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toDateTimeOrZero('2022-12-30 13:44:17'), toDateTimeOrZero('');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─toDateTimeOrZero('2022-12-30 13:44:17')─┬─toDateTimeOrZero('')─┐
|
||||
│ 2022-12-30 13:44:17 │ 1970-01-01 00:00:00 │
|
||||
└─────────────────────────────────────────┴──────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## toDateTimeOrNull
|
||||
|
||||
The same as [toDateTime](#todatetime) but returns `NULL` if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toDateTimeOrNull('2022-12-30 13:44:17'), toDateTimeOrNull('');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─toDateTimeOrNull('2022-12-30 13:44:17')─┬─toDateTimeOrNull('')─┐
|
||||
│ 2022-12-30 13:44:17 │ ᴺᵁᴸᴸ │
|
||||
└─────────────────────────────────────────┴──────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## toDateTimeOrDefault
|
||||
|
||||
Like [toDateTime](#todatetime) but if unsuccessful, returns a default value which is either the third argument (if specified), or otherwise the lower boundary of [DateTime](/docs/en/sql-reference/data-types/datetime.md).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
toDateTimeOrDefault(expr [, time_zone [, default_value]])
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toDateTimeOrDefault('2022-12-30 13:44:17'), toDateTimeOrDefault('', 'UTC', '2023-01-01'::DateTime('UTC'));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─toDateTimeOrDefault('2022-12-30 13:44:17')─┬─toDateTimeOrDefault('', 'UTC', CAST('2023-01-01', 'DateTime(\'UTC\')'))─┐
|
||||
│ 2022-12-30 13:44:17 │ 2023-01-01 00:00:00 │
|
||||
└────────────────────────────────────────────┴─────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## toDate32
|
||||
|
||||
Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by [Date32](/docs/en/sql-reference/data-types/date32.md). If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, it's borders are taken into account.
|
||||
@ -519,6 +682,11 @@ SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul') AS value, toTypeN
|
||||
└─────────────────────────┴─────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## toDateTime64OrZero
|
||||
|
||||
## toDateTime64OrNull
|
||||
|
||||
## toDateTime64OrDefault
|
||||
|
||||
## toDecimal(32\|64\|128\|256)
|
||||
|
||||
@ -1247,7 +1415,7 @@ Returns DateTime values parsed from input string according to a MySQL style form
|
||||
**Supported format specifiers**
|
||||
|
||||
All format specifiers listed in [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) except:
|
||||
- %Q: Quarter (1-4)
|
||||
- %Q: Quarter (1-4)
|
||||
|
||||
**Example**
|
||||
|
||||
@ -1341,10 +1509,12 @@ parseDateTimeBestEffort(time_string [, time_zone])
|
||||
- A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time).
|
||||
- A string with a date and a time component: `YYYYMMDDhhmmss`, `DD/MM/YYYY hh:mm:ss`, `DD-MM-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc.
|
||||
- A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `DD/MM/YYYY`, `DD-MM-YY` etc.
|
||||
- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case `YYYY-MM` are substituted as `2000-01`.
|
||||
- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case `MM` is substituted by `01`.
|
||||
- A string that includes the date and time along with time zone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`.
|
||||
- A [syslog timestamp](https://datatracker.ietf.org/doc/html/rfc3164#section-4.1.2): `Mmm dd hh:mm:ss`. For example, `Jun 9 14:20:32`.
|
||||
|
||||
For all of the formats with separator the function parses months names expressed by their full name or by the first three letters of a month name. Examples: `24/DEC/18`, `24-Dec-18`, `01-September-2018`.
|
||||
If the year is not specified, it is considered to be equal to the current year. If the resulting DateTime happen to be in the future (even by a second after the current moment), then the current year is substituted by the previous year.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -1415,23 +1585,46 @@ Result:
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffort('10 20:19');
|
||||
SELECT toYear(now()) as year, parseDateTimeBestEffort('10 20:19');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─parseDateTimeBestEffort('10 20:19')─┐
|
||||
│ 2000-01-10 20:19:00 │
|
||||
└─────────────────────────────────────┘
|
||||
┌─year─┬─parseDateTimeBestEffort('10 20:19')─┐
|
||||
│ 2023 │ 2023-01-10 20:19:00 │
|
||||
└──────┴─────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
WITH
|
||||
now() AS ts_now,
|
||||
formatDateTime(ts_around, '%b %e %T') AS syslog_arg
|
||||
SELECT
|
||||
ts_now,
|
||||
syslog_arg,
|
||||
parseDateTimeBestEffort(syslog_arg)
|
||||
FROM (SELECT arrayJoin([ts_now - 30, ts_now + 30]) AS ts_around);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌──────────────ts_now─┬─syslog_arg──────┬─parseDateTimeBestEffort(syslog_arg)─┐
|
||||
│ 2023-06-30 23:59:30 │ Jun 30 23:59:00 │ 2023-06-30 23:59:00 │
|
||||
│ 2023-06-30 23:59:30 │ Jul 1 00:00:00 │ 2022-07-01 00:00:00 │
|
||||
└─────────────────────┴─────────────────┴─────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [RFC 1123](https://tools.ietf.org/html/rfc1123)
|
||||
- [RFC 1123](https://datatracker.ietf.org/doc/html/rfc1123)
|
||||
- [toDate](#todate)
|
||||
- [toDateTime](#todatetime)
|
||||
- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/)
|
||||
- [RFC 3164](https://datatracker.ietf.org/doc/html/rfc3164#section-4.1.2)
|
||||
|
||||
## parseDateTimeBestEffortUS
|
||||
|
||||
|
@ -82,6 +82,35 @@ LIFETIME(MIN 0 MAX 1000)
|
||||
LAYOUT(FLAT())
|
||||
```
|
||||
|
||||
:::note
|
||||
When using the SQL console in [ClickHouse Cloud](https://clickhouse.com), you must specify a user (`default` or any other user with the role `default_role`) and password when creating a dictionary.
|
||||
:::note
|
||||
|
||||
```sql
|
||||
CREATE USER IF NOT EXISTS clickhouse_admin
|
||||
IDENTIFIED WITH sha256_password BY 'passworD43$x';
|
||||
|
||||
GRANT default_role TO clickhouse_admin;
|
||||
|
||||
CREATE DATABASE foo_db;
|
||||
|
||||
CREATE TABLE foo_db.source_table (
|
||||
id UInt64,
|
||||
value String
|
||||
) ENGINE = MergeTree
|
||||
PRIMARY KEY id;
|
||||
|
||||
CREATE DICTIONARY foo_db.id_value_dictionary
|
||||
(
|
||||
id UInt64,
|
||||
value String
|
||||
)
|
||||
PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(TABLE 'source_table' USER 'clickhouse_admin' PASSWORD 'passworD43$x' DB 'foo_db' ))
|
||||
LAYOUT(FLAT())
|
||||
LIFETIME(MIN 0 MAX 1000);
|
||||
```
|
||||
|
||||
### Create a dictionary from a table in a remote ClickHouse service
|
||||
|
||||
Input table (in the remote ClickHouse service) `source_table`:
|
||||
|
@ -55,6 +55,9 @@ With the described implementation now we can see what can negatively affect 'DEL
|
||||
- Table having a very large number of data parts
|
||||
- Having a lot of data in Compact parts—in a Compact part, all columns are stored in one file.
|
||||
|
||||
:::note
|
||||
Currently, Lightweight delete does not work for tables with projection as rows in projection may be affected and require the projection to be rebuilt. Rebuilding projection makes the deletion not lightweight, so this is not supported.
|
||||
:::
|
||||
|
||||
## Related content
|
||||
|
||||
|
@ -1,10 +1,10 @@
|
||||
---
|
||||
slug: /en/sql-reference/table-functions/azure_blob_storage
|
||||
sidebar_label: azure_blob_storage
|
||||
slug: /en/sql-reference/table-functions/azureBlobStorage
|
||||
sidebar_label: azureBlobStorage
|
||||
keywords: [azure blob storage]
|
||||
---
|
||||
|
||||
# azure\_blob\_storage Table Function
|
||||
# azureBlobStorage Table Function
|
||||
|
||||
Provides a table-like interface to select/insert files in [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs). This table function is similar to the [s3 function](../../sql-reference/table-functions/s3.md).
|
||||
|
||||
|
@ -196,6 +196,16 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
|
||||
- `_path` — Path to the file.
|
||||
- `_file` — Name of the file.
|
||||
|
||||
## Settings
|
||||
|
||||
- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
|
||||
- [engine_file_truncate_on_insert](/docs/en/operations/settings/settings.md#engine-file-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
- [engine_file_allow_create_multiple_files](/docs/en/operations/settings/settings.md#engine_file_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
||||
- [engine_file_skip_empty_files](/docs/en/operations/settings/settings.md#engine_file_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - method of reading data from storage file, one of: read, pread, mmap (only for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.
|
||||
|
||||
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns)
|
||||
|
@ -1,7 +1,7 @@
|
||||
---
|
||||
slug: /en/sql-reference/table-functions/gcs
|
||||
sidebar_position: 45
|
||||
sidebar_label: s3
|
||||
sidebar_label: gcs
|
||||
keywords: [gcs, bucket]
|
||||
---
|
||||
|
||||
|
@ -97,6 +97,12 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
|
||||
- `_path` — Path to the file.
|
||||
- `_file` — Name of the file.
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
- [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
||||
- [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns)
|
||||
|
@ -107,6 +107,30 @@ SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123');
|
||||
└────────┴───────┘
|
||||
```
|
||||
|
||||
Copying data from MySQL table into ClickHouse table:
|
||||
|
||||
```sql
|
||||
CREATE TABLE mysql_copy
|
||||
(
|
||||
`id` UInt64,
|
||||
`datetime` DateTime('UTC'),
|
||||
`description` String,
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (id,datetime);
|
||||
|
||||
INSERT INTO mysql_copy
|
||||
SELECT * FROM mysql('host:port', 'database', 'table', 'user', 'password');
|
||||
```
|
||||
|
||||
Or if copying only an incremental batch from MySQL based on the max current id:
|
||||
|
||||
```sql
|
||||
INSERT INTO mysql_copy
|
||||
SELECT * FROM mysql('host:port', 'database', 'table', 'user', 'password')
|
||||
WHERE id > (SELECT max(id) from mysql_copy);
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [The ‘MySQL’ table engine](../../engines/table-engines/integrations/mysql.md)
|
||||
|
67
docs/en/sql-reference/table-functions/redis.md
Normal file
67
docs/en/sql-reference/table-functions/redis.md
Normal file
@ -0,0 +1,67 @@
|
||||
---
|
||||
slug: /en/sql-reference/table-functions/redis
|
||||
sidebar_position: 43
|
||||
sidebar_label: redis
|
||||
---
|
||||
|
||||
# redis
|
||||
|
||||
This table function allows integrating ClickHouse with [Redis](https://redis.io/).
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
redis(host:port, key, structure[, db_index[, password[, pool_size]]])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `host:port` — Redis server address, you can ignore port and default Redis port 6379 will be used.
|
||||
|
||||
- `key` — any column name in the column list.
|
||||
|
||||
- `structure` — The schema for the ClickHouse table returned from this function.
|
||||
|
||||
- `db_index` — Redis db index range from 0 to 15, default is 0.
|
||||
|
||||
- `password` — User password, default is blank string.
|
||||
|
||||
- `pool_size` — Redis max connection pool size, default is 16.
|
||||
|
||||
- `primary` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a Redis key.
|
||||
|
||||
- columns other than the primary key will be serialized in binary as Redis value in corresponding order.
|
||||
|
||||
- queries with key equals or in filtering will be optimized to multi keys lookup from Redis. If queries without filtering key full table scan will happen which is a heavy operation.
|
||||
|
||||
|
||||
**Returned Value**
|
||||
|
||||
A table object with key as Redis key, other columns packaged together as Redis value.
|
||||
|
||||
## Usage Example {#usage-example}
|
||||
|
||||
Create a table in ClickHouse which allows to read data from Redis:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE redis_table
|
||||
(
|
||||
`k` String,
|
||||
`m` String,
|
||||
`n` UInt32
|
||||
)
|
||||
ENGINE = Redis('redis1:6379') PRIMARY KEY(k);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT * FROM redis(
|
||||
'redis1:6379',
|
||||
'key',
|
||||
'key String, v1 String, v2 UInt32'
|
||||
)
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [The `Redis` table engine](/docs/en/engines/table-engines/integrations/redis.md)
|
||||
- [Using redis as a dictionary source](/docs/en/sql-reference/dictionaries/index.md#redis)
|
@ -202,6 +202,12 @@ FROM s3(
|
||||
LIMIT 5;
|
||||
```
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
- [s3_create_multiple_files](/docs/en/operations/settings/settings.md#s3_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
||||
- [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [S3 engine](../../engines/table-engines/integrations/s3.md)
|
||||
|
@ -53,6 +53,10 @@ Character `|` inside patterns is used to specify failover addresses. They are it
|
||||
- `_path` — Path to the `URL`.
|
||||
- `_file` — Resource name of the `URL`.
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns)
|
||||
|
@ -142,7 +142,129 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe
|
||||
- `--history_file` - путь к файлу с историей команд.
|
||||
- `--param_<name>` — значение параметра для [запроса с параметрами](#cli-queries-with-parameters).
|
||||
|
||||
Начиная с версии 20.5, в `clickhouse-client` есть автоматическая подсветка синтаксиса (включена всегда).
|
||||
Вместо параметров `--host`, `--port`, `--user` и `--password` клиент ClickHouse также поддерживает строки подключения (смотри следующий раздел).
|
||||
|
||||
## Строка подключения {#connection_string}
|
||||
|
||||
clickhouse-client также поддерживает подключение к серверу clickhouse с помощью строки подключения, аналогичной [MongoDB](https://www.mongodb.com/docs/manual/reference/connection-string/), [PostgreSQL](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING), [MySQL](https://dev.mysql.com/doc/refman/8.0/en/connecting-using-uri-or-key-value-pairs.html#connecting-using-uri). Она имеет следующий синтаксис:
|
||||
|
||||
```text
|
||||
clickhouse:[//[user[:password]@][hosts_and_ports]][/database][?query_parameters]
|
||||
```
|
||||
|
||||
Где
|
||||
|
||||
- `user` - (необязательно) - это имя пользователя,
|
||||
- `password` - (необязательно) - Пароль пользователя. Если символ `:` укаказан, и пароль пуст, то клиент запросит ввести пользователя пароль.
|
||||
- `hosts_and_ports` - (необязательно) - список хостов и необязательных портов. `host[:port] [, host:[port]], ...`,
|
||||
- `database` - (необязательно) - это имя базы данных,
|
||||
- `query_parameters` - (опционально) список пар ключ-значение `param1=value1[,¶m2=value2], ...`. Для некоторых параметров значение не требуется. Имена и значения параметров чувствительны к регистру.
|
||||
|
||||
Если user не указан, будут использоваться имя пользователя `default`.
|
||||
Если host не указан, будет использован хост `localhost`.
|
||||
Если port не указан, будет использоваться порт `9000`.
|
||||
Если база данных не указана, будет использоваться база данных `default`.
|
||||
|
||||
Если имя пользователя, пароль или база данных были указаны в строке подключения, их нельзя указать с помощью `--user`, `--password` или `--database` (и наоборот).
|
||||
|
||||
Параметр host может быть либо именем хоста, либо IP-адресом. Для указания IPv6-адреса поместите его в квадратные скобки:
|
||||
|
||||
```text
|
||||
clickhouse://[2001:db8::1234]
|
||||
```
|
||||
|
||||
URI позволяет подключаться к нескольким хостам. Строки подключения могут содержать несколько хостов. ClickHouse-client будет пытаться подключиться к этим хостам по порядку (т.е. слева направо). После установления соединения попытки подключения к оставшимся хостам не предпринимаются.
|
||||
|
||||
|
||||
|
||||
Строка подключения должна быть указана в первом аргументе clickhouse-client. Строка подключения может комбинироваться с другими [параметрами командной строки] (#command-line-options) кроме `--host/-h` и `--port`.
|
||||
|
||||
Для компонента `query_parameter` разрешены следующие ключи:
|
||||
|
||||
- `secure` или сокращенно `s` - без значение. Если параметр указан, то соединение с сервером будет осуществляться по защищенному каналу (TLS). См. `secure` в [command-line-options](#command-line-options).
|
||||
|
||||
### Кодирование URI {#connection_string_uri_percent_encoding}
|
||||
|
||||
Не US ASCII и специальные символы в имени пользователя, пароле, хостах, базе данных и параметрах запроса должны быть [закодированы](https://ru.wikipedia.org/wiki/URL#%D0%9A%D0%BE%D0%B4%D0%B8%D1%80%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5_URL).
|
||||
|
||||
### Примеры {#connection_string_examples}
|
||||
|
||||
Подключиться к localhost через порт 9000 и выполнить запрос `SELECT 1`
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://localhost:9000 --query "SELECT 1"
|
||||
```
|
||||
Подключиться к localhost, используя пользователя `john` с паролем `secret`, хост `127.0.0.1` и порт `9000`
|
||||
|
||||
``bash
|
||||
clickhouse-client clickhouse://john:secret@127.0.0.1:9000
|
||||
```
|
||||
|
||||
Подключиться к localhost, используя пользователя по умолчанию, хост с IPV6 адресом `[::1]` и порт `9000`.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://[::1]:9000
|
||||
```
|
||||
|
||||
Подключиться к localhost через порт 9000 многострочном режиме.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://localhost:9000 '-m'
|
||||
```
|
||||
|
||||
Подключиться к localhost через порт 9000 с пользователем default.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://default@localhost:9000
|
||||
|
||||
# Эквивалетно:
|
||||
clickhouse-client clickhouse://localhost:9000 --user default
|
||||
```
|
||||
|
||||
Подключиться к localhost через порт 9000 с базой данных `my_database`
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://localhost:9000/my_database
|
||||
|
||||
# Эквивалетно:
|
||||
clickhouse-client clickhouse://localhost:9000 --database my_database
|
||||
```
|
||||
|
||||
Подключиться к localhost через порт 9000 с базой данных `my_database`, указанной в строке подключения, используя безопасным соединением при помощи короткого варианта параметра URI 's'.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://localhost/my_database?s
|
||||
|
||||
# Эквивалетно:
|
||||
clickhouse-client clickhouse://localhost/my_database -s
|
||||
```
|
||||
|
||||
Подключиться к хосту по умолчанию с использованием порта по умолчанию, пользователя по умолчанию, и базы данных по умолчанию.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse:
|
||||
```
|
||||
|
||||
Подключиться к хосту по умолчанию через порт по умолчанию, используя имя пользователя `my_user` без пароля.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://my_user@
|
||||
|
||||
# Использование пустого пароля между : и @ означает, что пользователь должен ввести пароль перед началом соединения.
|
||||
clickhouse-client clickhouse://my_user:@
|
||||
```
|
||||
|
||||
Подключиться к localhost, используя электронную почту, как имя пользователя. Символ `@` закодирован как `%40`.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://some_user%40some_mail.com@localhost:9000
|
||||
```
|
||||
|
||||
Подключится к одному из хостов: `192.168.1.15`, `192.168.1.25`.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://192.168.1.15,192.168.1.25
|
||||
```
|
||||
|
||||
### Конфигурационные файлы {#configuration_files}
|
||||
|
||||
|
@ -31,7 +31,7 @@ sidebar_label: Decimal
|
||||
## Внутреннее представление {#vnutrennee-predstavlenie}
|
||||
|
||||
Внутри данные представляются как знаковые целые числа, соответсвующей разрядности. Реальные диапазоны, хранящиеся в ячейках памяти несколько больше заявленных. Заявленные диапазоны Decimal проверяются только при вводе числа из строкового представления.
|
||||
Поскольку современные CPU не поддерживают 128-битные числа, операции над Decimal128 эмулируются программно. Decimal128 работает в разы медленней чем Decimal32/Decimal64.
|
||||
Поскольку современные CPU не поддерживают 128-битные и 256-битные числа, для операций над Decimal128 и Decimal256 эмулируются программно. Данные типы работают в разы медленнее, чем Decimal32/Decimal64.
|
||||
|
||||
## Операции и типы результата {#operatsii-i-tipy-rezultata}
|
||||
|
||||
@ -59,6 +59,10 @@ sidebar_label: Decimal
|
||||
|
||||
При выполнении операций над типом Decimal могут происходить целочисленные переполнения. Лишняя дробная часть отбрасывается (не округляется). Лишняя целочисленная часть приводит к исключению.
|
||||
|
||||
:::warning
|
||||
Проверка переполнения не реализована для Decimal128 и Decimal256. В случае переполнения неверный результат будёт возвращён без выбрасывания исключения.
|
||||
:::
|
||||
|
||||
``` sql
|
||||
SELECT toDecimal32(2, 4) AS x, x / 3
|
||||
```
|
||||
|
@ -165,22 +165,217 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8);
|
||||
|
||||
## toDate {#todate}
|
||||
|
||||
Cиноним: `DATE`.
|
||||
Конвертирует аргумент в значение [Date](/docs/ru/sql-reference/data-types/date.md).
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
toDate(expr)
|
||||
```
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `expr` — Значение для преобразования. [String](/docs/ru/sql-reference/data-types/string.md), [Int](/docs/ru/sql-reference/data-types/int-uint.md), [Date](/docs/ru/sql-reference/data-types/date.md) или [DateTime](/docs/ru/sql-reference/data-types/datetime.md).
|
||||
|
||||
Если `expr` является числом выглядит как UNIX timestamp (больше чем 65535), оно интерпретируется как DateTime, затем обрезается до Date учитывавая текущую часовой пояс. Если `expr` является числом и меньше чем 65536, оно интерпретируется как количество дней с 1970-01-01.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Календарная дата. [Date](/docs/ru/sql-reference/data-types/date.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toDate('2022-12-30'), toDate(1685457500);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```response
|
||||
┌─toDate('2022-12-30')─┬─toDate(1685457500)─┐
|
||||
│ 2022-12-30 │ 2023-05-30 │
|
||||
└──────────────────────┴────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## toDateOrZero {#todateorzero}
|
||||
|
||||
Как [toDate](#todate), но в случае неудачи возвращает нижнюю границу [Date](/docs/ru/sql-reference/data-types/date.md)). Поддерживается только аргумент типа [String](/docs/ru/sql-reference/data-types/string.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toDateOrZero('2022-12-30'), toDateOrZero('');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```response
|
||||
┌─toDateOrZero('2022-12-30')─┬─toDateOrZero('')─┐
|
||||
│ 2022-12-30 │ 1970-01-01 │
|
||||
└────────────────────────────┴──────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## toDateOrNull {#todateornull}
|
||||
|
||||
Как [toDate](#todate), но в случае неудачи возвращает `NULL`. Поддерживается только аргумент типа [String](/docs/ru/sql-reference/data-types/string.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toDateOrNull('2022-12-30'), toDateOrNull('');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```response
|
||||
┌─toDateOrNull('2022-12-30')─┬─toDateOrNull('')─┐
|
||||
│ 2022-12-30 │ ᴺᵁᴸᴸ │
|
||||
└────────────────────────────┴──────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## toDateOrDefault {#todateordefault}
|
||||
|
||||
Как [toDate](#todate), но в случае неудачи возвращает значение по умолчанию (или второй аргумент (если указан), или нижняя граница [Date](/docs/ru/sql-reference/data-types/date.md)).
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
toDateOrDefault(expr [, default_value])
|
||||
```
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toDateOrDefault('2022-12-30'), toDateOrDefault('', '2023-01-01'::Date);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```response
|
||||
┌─toDateOrDefault('2022-12-30')─┬─toDateOrDefault('', CAST('2023-01-01', 'Date'))─┐
|
||||
│ 2022-12-30 │ 2023-01-01 │
|
||||
└───────────────────────────────┴─────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## toDateTime {#todatetime}
|
||||
|
||||
Конвертирует аргумент в значение [DateTime](/docs/ru/sql-reference/data-types/datetime.md).
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
toDateTime(expr[, time_zone ])
|
||||
```
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `expr` — Значение для преобразования. [String](/docs/ru/sql-reference/data-types/string.md), [Int](/docs/ru/sql-reference/data-types/int-uint.md), [Date](/docs/ru/sql-reference/data-types/date.md) или [DateTime](/docs/ru/sql-reference/data-types/datetime.md).
|
||||
- `time_zone` — Часовой пояс. [String](/docs/ru/sql-reference/data-types/string.md).
|
||||
|
||||
Если `expr` является числом, оно интерпретируется как количество секунд от начала unix эпохи.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Время. [DateTime](/docs/ru/sql-reference/data-types/datetime.md)
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toDateTime('2022-12-30 13:44:17'), toDateTime(1685457500, 'UTC');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```response
|
||||
┌─toDateTime('2022-12-30 13:44:17')─┬─toDateTime(1685457500, 'UTC')─┐
|
||||
│ 2022-12-30 13:44:17 │ 2023-05-30 14:38:20 │
|
||||
└───────────────────────────────────┴───────────────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## toDateTimeOrZero {#todatetimeorzero}
|
||||
|
||||
Как [toDateTime](#todatetime), но в случае неудачи возвращает нижнюю границу [DateTime](/docs/ru/sql-reference/data-types/datetime.md)). Поддерживается только аргумент типа [String](/docs/ru/sql-reference/data-types/string.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toDateTimeOrZero('2022-12-30 13:44:17'), toDateTimeOrZero('');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```response
|
||||
┌─toDateTimeOrZero('2022-12-30 13:44:17')─┬─toDateTimeOrZero('')─┐
|
||||
│ 2022-12-30 13:44:17 │ 1970-01-01 00:00:00 │
|
||||
└─────────────────────────────────────────┴──────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## toDateTimeOrNull {#todatetimeornull}
|
||||
|
||||
Как [toDateTime](#todatetime), но в случае неудачи возвращает `NULL`. Поддерживается только аргумент типа [String](/docs/ru/sql-reference/data-types/string.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toDateTimeOrNull('2022-12-30 13:44:17'), toDateTimeOrNull('');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─toDateTimeOrNull('2022-12-30 13:44:17')─┬─toDateTimeOrNull('')─┐
|
||||
│ 2022-12-30 13:44:17 │ ᴺᵁᴸᴸ │
|
||||
└─────────────────────────────────────────┴──────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## toDateTimeOrDefault {#todatetimeordefault}
|
||||
|
||||
Как [toDateTime](#todatetime), но в случае неудачи возвращает значение по умолчанию (или третий аргумент (если указан), или нижняя граница [DateTime](/docs/ru/sql-reference/data-types/datetime.md)).
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
toDateTimeOrDefault(expr, [, time_zone [, default_value]])
|
||||
```
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toDateTimeOrDefault('2022-12-30 13:44:17'), toDateTimeOrDefault('', 'UTC', '2023-01-01'::DateTime('UTC'));
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```response
|
||||
┌─toDateTimeOrDefault('2022-12-30 13:44:17')─┬─toDateTimeOrDefault('', 'UTC', CAST('2023-01-01', 'DateTime(\'UTC\')'))─┐
|
||||
│ 2022-12-30 13:44:17 │ 2023-01-01 00:00:00 │
|
||||
└────────────────────────────────────────────┴─────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
## toDate32 {#todate32}
|
||||
|
||||
Конвертирует аргумент в значение типа [Date32](../../sql-reference/data-types/date32.md). Если значение выходит за границы диапазона, возвращается пограничное значение `Date32`. Если аргумент имеет тип [Date](../../sql-reference/data-types/date.md), учитываются границы типа `Date`.
|
||||
@ -301,6 +496,14 @@ SELECT
|
||||
└─────────────────────────────────────────────────────────┴───────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## toDateTime64
|
||||
|
||||
## toDateTime64OrZero
|
||||
|
||||
## toDateTime64OrNull
|
||||
|
||||
## toDateTime64OrDefault
|
||||
|
||||
## toDecimal(32\|64\|128\|256) {#todecimal3264128}
|
||||
|
||||
Преобразует `value` к типу данных [Decimal](../../sql-reference/functions/type-conversion-functions.md) с точностью `S`. `value` может быть числом или строкой. Параметр `S` (scale) задаёт число десятичных знаков.
|
||||
@ -1020,10 +1223,12 @@ parseDateTimeBestEffort(time_string[, time_zone])
|
||||
- [Unix timestamp](https://ru.wikipedia.org/wiki/Unix-время) в строковом представлении. 9 или 10 символов.
|
||||
- Строка с датой и временем: `YYYYMMDDhhmmss`, `DD/MM/YYYY hh:mm:ss`, `DD-MM-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc.
|
||||
- Строка с датой, но без времени: `YYYY`, `YYYYMM`, `YYYY*MM`, `DD/MM/YYYY`, `DD-MM-YY` и т.д.
|
||||
- Строка с временем, и с днём: `DD`, `DD hh`, `DD hh:mm`. В этом случае `YYYY-MM` принимается равным `2000-01`.
|
||||
- Строка с временем, и с днём: `DD`, `DD hh`, `DD hh:mm`. В этом случае `MM` принимается равным `01`.
|
||||
- Строка, содержащая дату и время вместе с информацией о часовом поясе: `YYYY-MM-DD hh:mm:ss ±h:mm`, и т.д. Например, `2020-12-12 17:36:00 -5:00`.
|
||||
- Строка, содержащая дату и время в формате [syslog timestamp](https://datatracker.ietf.org/doc/html/rfc3164#section-4.1.2): `Mmm dd hh:mm:ss`. Например, `Jun 9 14:20:32`.
|
||||
|
||||
Для всех форматов с разделителями функция распознаёт названия месяцев, выраженных в виде полного англоязычного имени месяца или в виде первых трёх символов имени месяца. Примеры: `24/DEC/18`, `24-Dec-18`, `01-September-2018`.
|
||||
Если год не указан, вместо него подставляется текущий год. Если в результате получается будущее время (даже на одну секунду впереди текущего момента времени), то текущий год заменяется на прошлый.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
@ -1094,23 +1299,46 @@ AS parseDateTimeBestEffort;
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffort('10 20:19');
|
||||
SELECT toYear(now()) as year, parseDateTimeBestEffort('10 20:19');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffort('10 20:19')─┐
|
||||
│ 2000-01-10 20:19:00 │
|
||||
└─────────────────────────────────────┘
|
||||
┌─year─┬─parseDateTimeBestEffort('10 20:19')─┐
|
||||
│ 2023 │ 2023-01-10 20:19:00 │
|
||||
└──────┴─────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
WITH
|
||||
now() AS ts_now,
|
||||
formatDateTime(ts_around, '%b %e %T') AS syslog_arg
|
||||
SELECT
|
||||
ts_now,
|
||||
syslog_arg,
|
||||
parseDateTimeBestEffort(syslog_arg)
|
||||
FROM (SELECT arrayJoin([ts_now - 30, ts_now + 30]) AS ts_around);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌──────────────ts_now─┬─syslog_arg──────┬─parseDateTimeBestEffort(syslog_arg)─┐
|
||||
│ 2023-06-30 23:59:30 │ Jun 30 23:59:00 │ 2023-06-30 23:59:00 │
|
||||
│ 2023-06-30 23:59:30 │ Jul 1 00:00:00 │ 2022-07-01 00:00:00 │
|
||||
└─────────────────────┴─────────────────┴─────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [Информация о формате ISO 8601 от @xkcd](https://xkcd.com/1179/)
|
||||
- [RFC 1123](https://tools.ietf.org/html/rfc1123)
|
||||
- [RFC 1123](https://datatracker.ietf.org/doc/html/rfc1123)
|
||||
- [toDate](#todate)
|
||||
- [toDateTime](#todatetime)
|
||||
- [RFC 3164](https://datatracker.ietf.org/doc/html/rfc3164#section-4.1.2)
|
||||
|
||||
## parseDateTimeBestEffortUS {#parsedatetimebesteffortUS}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /ru/whats-new/changelog/2017
|
||||
sidebar_position: 6
|
||||
sidebar_position: 60
|
||||
sidebar_label: 2017
|
||||
title: 2017 Changelog
|
||||
---
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /ru/whats-new/changelog/2018
|
||||
sidebar_position: 5
|
||||
sidebar_position: 50
|
||||
sidebar_label: 2018
|
||||
title: 2018 Changelog
|
||||
---
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /ru/whats-new/changelog/2019
|
||||
sidebar_position: 4
|
||||
sidebar_position: 40
|
||||
sidebar_label: 2019
|
||||
title: 2019 Changelog
|
||||
---
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /ru/whats-new/changelog/2020
|
||||
sidebar_position: 3
|
||||
sidebar_position: 30
|
||||
sidebar_label: 2020
|
||||
title: 2020 Changelog
|
||||
---
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /ru/whats-new/changelog/2021
|
||||
sidebar_position: 2
|
||||
sidebar_position: 20
|
||||
sidebar_label: 2021
|
||||
title: 2021 Changelog
|
||||
---
|
||||
|
10
docs/ru/whats-new/changelog/2022.mdx
Normal file
10
docs/ru/whats-new/changelog/2022.mdx
Normal file
@ -0,0 +1,10 @@
|
||||
---
|
||||
slug: /ru/whats-new/changelog/2022
|
||||
sidebar_position: 10
|
||||
sidebar_label: 2022
|
||||
title: 2022 Changelog
|
||||
---
|
||||
|
||||
import Changelog from '@site/docs/en/whats-new/changelog/2022.md';
|
||||
|
||||
<Changelog />
|
@ -2,5 +2,5 @@ label: 'Changelog'
|
||||
collapsible: true
|
||||
collapsed: true
|
||||
link:
|
||||
type: doc
|
||||
id: ru/whats-new/changelog/index
|
||||
type: generated-index
|
||||
title: Changelog
|
||||
|
@ -1,7 +1,7 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2022
|
||||
title: 2022 Changelog
|
||||
sidebar_label: 2023
|
||||
title: 2023 Changelog
|
||||
slug: /ru/whats-new/changelog/index
|
||||
---
|
||||
|
||||
|
@ -6,4 +6,4 @@ sidebar_label: Changelog
|
||||
|
||||
# Changelog
|
||||
|
||||
You can view the latest Changelog at [https://clickhouse.com/docs/en/whats-new/changelog/](https://clickhouse.com/docs/en/whats-new/changelog/)
|
||||
You can view the latest Changelog at [https://clickhouse.com/docs/en/whats-new/changelog/](/docs/en/whats-new/changelog/index.md)
|
||||
|
@ -409,8 +409,15 @@ if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
|
||||
list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-converter)
|
||||
endif ()
|
||||
if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
|
||||
add_custom_target (clickhouse-keeper-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper-client DEPENDS clickhouse)
|
||||
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-client" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
if (NOT BUILD_STANDALONE_KEEPER)
|
||||
add_custom_target (clickhouse-keeper-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper-client DEPENDS clickhouse)
|
||||
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-client" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
# symlink to standalone keeper binary
|
||||
else ()
|
||||
add_custom_target (clickhouse-keeper-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse-keeper clickhouse-keeper-client DEPENDS clickhouse-keeper)
|
||||
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-client" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT clickhouse-keeper)
|
||||
endif ()
|
||||
|
||||
list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-client)
|
||||
endif ()
|
||||
if (ENABLE_CLICKHOUSE_DISKS)
|
||||
|
@ -5,13 +5,13 @@
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <optional>
|
||||
#include <string_view>
|
||||
#include <Common/scope_guard_safe.h>
|
||||
#include <boost/program_options.hpp>
|
||||
#include <boost/algorithm/string/replace.hpp>
|
||||
#include <filesystem>
|
||||
#include <string>
|
||||
#include "Client.h"
|
||||
#include "Client/ConnectionString.h"
|
||||
#include "Core/Protocol.h"
|
||||
#include "Parsers/formatAST.h"
|
||||
|
||||
@ -977,13 +977,7 @@ void Client::addOptions(OptionsDescription & options_description)
|
||||
("connection", po::value<std::string>(), "connection to use (from the client config), by default connection name is hostname")
|
||||
("secure,s", "Use TLS connection")
|
||||
("user,u", po::value<std::string>()->default_value("default"), "user")
|
||||
/** If "--password [value]" is used but the value is omitted, the bad argument exception will be thrown.
|
||||
* implicit_value is used to avoid this exception (to allow user to type just "--password")
|
||||
* Since currently boost provides no way to check if a value has been set implicitly for an option,
|
||||
* the "\n" is used to distinguish this case because there is hardly a chance a user would use "\n"
|
||||
* as the password.
|
||||
*/
|
||||
("password", po::value<std::string>()->implicit_value("\n", ""), "password")
|
||||
("password", po::value<std::string>(), "password")
|
||||
("ask-password", "ask-password")
|
||||
("quota_key", po::value<std::string>(), "A string to differentiate quotas when the user have keyed quotas configured on server")
|
||||
|
||||
@ -1248,6 +1242,9 @@ void Client::readArguments(
|
||||
std::vector<Arguments> & external_tables_arguments,
|
||||
std::vector<Arguments> & hosts_and_ports_arguments)
|
||||
{
|
||||
bool has_connection_string = argc >= 2 && tryParseConnectionString(std::string_view(argv[1]), common_arguments, hosts_and_ports_arguments);
|
||||
int start_argument_index = has_connection_string ? 2 : 1;
|
||||
|
||||
/** We allow different groups of arguments:
|
||||
* - common arguments;
|
||||
* - arguments for any number of external tables each in form "--external args...",
|
||||
@ -1260,10 +1257,13 @@ void Client::readArguments(
|
||||
std::string prev_host_arg;
|
||||
std::string prev_port_arg;
|
||||
|
||||
for (int arg_num = 1; arg_num < argc; ++arg_num)
|
||||
for (int arg_num = start_argument_index; arg_num < argc; ++arg_num)
|
||||
{
|
||||
std::string_view arg = argv[arg_num];
|
||||
|
||||
if (has_connection_string)
|
||||
checkIfCmdLineOptionCanBeUsedWithConnectionString(arg);
|
||||
|
||||
if (arg == "--external")
|
||||
{
|
||||
in_external_group = true;
|
||||
@ -1391,6 +1391,14 @@ void Client::readArguments(
|
||||
arg = argv[arg_num];
|
||||
addMultiquery(arg, common_arguments);
|
||||
}
|
||||
else if (arg == "--password" && ((arg_num + 1) >= argc || std::string_view(argv[arg_num + 1]).starts_with('-')))
|
||||
{
|
||||
common_arguments.emplace_back(arg);
|
||||
/// No password was provided by user. Add '\n' as implicit password,
|
||||
/// which encodes that client should ask user for the password.
|
||||
/// '\n' is used because there is hardly a chance that a user would use '\n' as a password.
|
||||
common_arguments.emplace_back("\n");
|
||||
}
|
||||
else
|
||||
common_arguments.emplace_back(arg);
|
||||
}
|
||||
|
@ -127,42 +127,42 @@ void KeeperClient::defineOptions(Poco::Util::OptionSet & options)
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("host", "h", "server hostname. default `localhost`")
|
||||
.argument("host")
|
||||
.argument("<host>")
|
||||
.binding("host"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("port", "p", "server port. default `2181`")
|
||||
.argument("port")
|
||||
.argument("<port>")
|
||||
.binding("port"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("query", "q", "will execute given query, then exit.")
|
||||
.argument("query")
|
||||
.argument("<query>")
|
||||
.binding("query"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("connection-timeout", "", "set connection timeout in seconds. default 10s.")
|
||||
.argument("connection-timeout")
|
||||
.argument("<seconds>")
|
||||
.binding("connection-timeout"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("session-timeout", "", "set session timeout in seconds. default 10s.")
|
||||
.argument("session-timeout")
|
||||
.argument("<seconds>")
|
||||
.binding("session-timeout"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("operation-timeout", "", "set operation timeout in seconds. default 10s.")
|
||||
.argument("operation-timeout")
|
||||
.argument("<seconds>")
|
||||
.binding("operation-timeout"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("history-file", "", "set path of history file. default `~/.keeper-client-history`")
|
||||
.argument("history-file")
|
||||
.argument("<file>")
|
||||
.binding("history-file"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("log-level", "", "set log level")
|
||||
.argument("log-level")
|
||||
.argument("<level>")
|
||||
.binding("log-level"));
|
||||
}
|
||||
|
||||
|
@ -112,6 +112,18 @@ if (BUILD_STANDALONE_KEEPER)
|
||||
clickhouse-keeper.cpp
|
||||
)
|
||||
|
||||
# List of resources for clickhouse-keeper client
|
||||
if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
|
||||
list(APPEND CLICKHOUSE_KEEPER_STANDALONE_SOURCES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/KeeperClient.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/Commands.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/Parser.cpp
|
||||
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Client/LineReader.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Client/ReplxxLineReader.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_STANDALONE_SOURCES})
|
||||
|
||||
# Remove some redundant dependencies
|
||||
@ -122,6 +134,10 @@ if (BUILD_STANDALONE_KEEPER)
|
||||
target_include_directories(clickhouse-keeper PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/../../src/Core/include") # uses some includes from core
|
||||
target_include_directories(clickhouse-keeper PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/../../src") # uses some includes from common
|
||||
|
||||
if (ENABLE_CLICKHOUSE_KEEPER_CLIENT AND TARGET ch_rust::skim)
|
||||
target_link_libraries(clickhouse-keeper PRIVATE ch_rust::skim)
|
||||
endif()
|
||||
|
||||
target_link_libraries(clickhouse-keeper
|
||||
PRIVATE
|
||||
ch_contrib::abseil_swiss_tables
|
||||
|
@ -34,6 +34,8 @@
|
||||
#include "Core/Defines.h"
|
||||
#include "config.h"
|
||||
#include "config_version.h"
|
||||
#include "config_tools.h"
|
||||
|
||||
|
||||
#if USE_SSL
|
||||
# include <Poco/Net/Context.h>
|
||||
@ -131,7 +133,10 @@ int Keeper::run()
|
||||
if (config().hasOption("help"))
|
||||
{
|
||||
Poco::Util::HelpFormatter help_formatter(Keeper::options());
|
||||
auto header_str = fmt::format("{} [OPTION] [-- [ARG]...]\n"
|
||||
auto header_str = fmt::format("{0} [OPTION] [-- [ARG]...]\n"
|
||||
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
|
||||
"{0} client [OPTION]\n"
|
||||
#endif
|
||||
"positional arguments can be used to rewrite config.xml properties, for example, --http_port=8010",
|
||||
commandName());
|
||||
help_formatter.setHeader(header_str);
|
||||
|
@ -1,6 +1,30 @@
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include "config_tools.h"
|
||||
|
||||
|
||||
int mainEntryClickHouseKeeper(int argc, char ** argv);
|
||||
|
||||
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
|
||||
int mainEntryClickHouseKeeperClient(int argc, char ** argv);
|
||||
#endif
|
||||
|
||||
int main(int argc_, char ** argv_)
|
||||
{
|
||||
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
|
||||
|
||||
if (argc_ >= 2)
|
||||
{
|
||||
/// 'clickhouse-keeper --client ...' and 'clickhouse-keeper client ...' are OK
|
||||
if (strcmp(argv_[1], "--client") == 0 || strcmp(argv_[1], "client") == 0)
|
||||
{
|
||||
argv_[1] = argv_[0];
|
||||
return mainEntryClickHouseKeeperClient(--argc_, argv_ + 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (argc_ > 0 && (strcmp(argv_[0], "clickhouse-keeper-client") == 0 || endsWith(argv_[0], "/clickhouse-keeper-client")))
|
||||
return mainEntryClickHouseKeeperClient(argc_, argv_);
|
||||
#endif
|
||||
|
||||
return mainEntryClickHouseKeeper(argc_, argv_);
|
||||
}
|
||||
|
@ -1705,7 +1705,6 @@ try
|
||||
#endif
|
||||
|
||||
/// Must be done after initialization of `servers`, because async_metrics will access `servers` variable from its thread.
|
||||
|
||||
async_metrics.start();
|
||||
|
||||
{
|
||||
|
@ -449,7 +449,7 @@ let queries = [
|
||||
FROM system.metric_log
|
||||
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
|
||||
GROUP BY t
|
||||
ORDER BY t`
|
||||
ORDER BY t WITH FILL STEP {rounding:UInt32}`
|
||||
},
|
||||
{
|
||||
"title": "CPU Usage (cores)",
|
||||
@ -457,7 +457,7 @@ ORDER BY t`
|
||||
FROM system.metric_log
|
||||
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
|
||||
GROUP BY t
|
||||
ORDER BY t`
|
||||
ORDER BY t WITH FILL STEP {rounding:UInt32}`
|
||||
},
|
||||
{
|
||||
"title": "Queries Running",
|
||||
@ -465,7 +465,7 @@ ORDER BY t`
|
||||
FROM system.metric_log
|
||||
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
|
||||
GROUP BY t
|
||||
ORDER BY t`
|
||||
ORDER BY t WITH FILL STEP {rounding:UInt32}`
|
||||
},
|
||||
{
|
||||
"title": "Merges Running",
|
||||
@ -473,7 +473,7 @@ ORDER BY t`
|
||||
FROM system.metric_log
|
||||
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
|
||||
GROUP BY t
|
||||
ORDER BY t`
|
||||
ORDER BY t WITH FILL STEP {rounding:UInt32}`
|
||||
},
|
||||
{
|
||||
"title": "Selected Bytes/second",
|
||||
@ -481,7 +481,7 @@ ORDER BY t`
|
||||
FROM system.metric_log
|
||||
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
|
||||
GROUP BY t
|
||||
ORDER BY t`
|
||||
ORDER BY t WITH FILL STEP {rounding:UInt32}`
|
||||
},
|
||||
{
|
||||
"title": "IO Wait",
|
||||
@ -489,7 +489,7 @@ ORDER BY t`
|
||||
FROM system.metric_log
|
||||
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
|
||||
GROUP BY t
|
||||
ORDER BY t`
|
||||
ORDER BY t WITH FILL STEP {rounding:UInt32}`
|
||||
},
|
||||
{
|
||||
"title": "CPU Wait",
|
||||
@ -497,7 +497,7 @@ ORDER BY t`
|
||||
FROM system.metric_log
|
||||
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
|
||||
GROUP BY t
|
||||
ORDER BY t`
|
||||
ORDER BY t WITH FILL STEP {rounding:UInt32}`
|
||||
},
|
||||
{
|
||||
"title": "OS CPU Usage (Userspace)",
|
||||
@ -506,7 +506,7 @@ FROM system.asynchronous_metric_log
|
||||
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
|
||||
AND metric = 'OSUserTimeNormalized'
|
||||
GROUP BY t
|
||||
ORDER BY t`
|
||||
ORDER BY t WITH FILL STEP {rounding:UInt32}`
|
||||
},
|
||||
{
|
||||
"title": "OS CPU Usage (Kernel)",
|
||||
@ -515,7 +515,7 @@ FROM system.asynchronous_metric_log
|
||||
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
|
||||
AND metric = 'OSSystemTimeNormalized'
|
||||
GROUP BY t
|
||||
ORDER BY t`
|
||||
ORDER BY t WITH FILL STEP {rounding:UInt32}`
|
||||
},
|
||||
{
|
||||
"title": "Read From Disk",
|
||||
@ -523,7 +523,7 @@ ORDER BY t`
|
||||
FROM system.metric_log
|
||||
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
|
||||
GROUP BY t
|
||||
ORDER BY t`
|
||||
ORDER BY t WITH FILL STEP {rounding:UInt32}`
|
||||
},
|
||||
{
|
||||
"title": "Read From Filesystem",
|
||||
@ -531,7 +531,7 @@ ORDER BY t`
|
||||
FROM system.metric_log
|
||||
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
|
||||
GROUP BY t
|
||||
ORDER BY t`
|
||||
ORDER BY t WITH FILL STEP {rounding:UInt32}`
|
||||
},
|
||||
{
|
||||
"title": "Memory (tracked)",
|
||||
@ -539,7 +539,7 @@ ORDER BY t`
|
||||
FROM system.metric_log
|
||||
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
|
||||
GROUP BY t
|
||||
ORDER BY t`
|
||||
ORDER BY t WITH FILL STEP {rounding:UInt32}`
|
||||
},
|
||||
{
|
||||
"title": "Load Average (15 minutes)",
|
||||
@ -548,7 +548,7 @@ FROM system.asynchronous_metric_log
|
||||
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
|
||||
AND metric = 'LoadAverage15'
|
||||
GROUP BY t
|
||||
ORDER BY t`
|
||||
ORDER BY t WITH FILL STEP {rounding:UInt32}`
|
||||
},
|
||||
{
|
||||
"title": "Selected Rows/second",
|
||||
@ -556,7 +556,7 @@ ORDER BY t`
|
||||
FROM system.metric_log
|
||||
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
|
||||
GROUP BY t
|
||||
ORDER BY t`
|
||||
ORDER BY t WITH FILL STEP {rounding:UInt32}`
|
||||
},
|
||||
{
|
||||
"title": "Inserted Rows/second",
|
||||
@ -564,7 +564,7 @@ ORDER BY t`
|
||||
FROM system.metric_log
|
||||
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
|
||||
GROUP BY t
|
||||
ORDER BY t`
|
||||
ORDER BY t WITH FILL STEP {rounding:UInt32}`
|
||||
},
|
||||
{
|
||||
"title": "Total MergeTree Parts",
|
||||
@ -573,7 +573,7 @@ FROM system.asynchronous_metric_log
|
||||
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
|
||||
AND metric = 'TotalPartsOfMergeTreeTables'
|
||||
GROUP BY t
|
||||
ORDER BY t`
|
||||
ORDER BY t WITH FILL STEP {rounding:UInt32}`
|
||||
},
|
||||
{
|
||||
"title": "Max Parts For Partition",
|
||||
@ -582,7 +582,7 @@ FROM system.asynchronous_metric_log
|
||||
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
|
||||
AND metric = 'MaxPartCountForPartition'
|
||||
GROUP BY t
|
||||
ORDER BY t`
|
||||
ORDER BY t WITH FILL STEP {rounding:UInt32}`
|
||||
}
|
||||
];
|
||||
|
||||
|
@ -201,6 +201,7 @@ enum class AccessType
|
||||
M(URL, "", GLOBAL, SOURCES) \
|
||||
M(REMOTE, "", GLOBAL, SOURCES) \
|
||||
M(MONGO, "", GLOBAL, SOURCES) \
|
||||
M(REDIS, "", GLOBAL, SOURCES) \
|
||||
M(MEILISEARCH, "", GLOBAL, SOURCES) \
|
||||
M(MYSQL, "", GLOBAL, SOURCES) \
|
||||
M(POSTGRES, "", GLOBAL, SOURCES) \
|
||||
|
@ -333,7 +333,7 @@ void ContextAccess::calculateAccessRights() const
|
||||
boost::algorithm::join(roles_info->getCurrentRolesNames(), ", "),
|
||||
boost::algorithm::join(roles_info->getEnabledRolesNames(), ", "));
|
||||
}
|
||||
LOG_TRACE(trace_log, "Settings: readonly={}, allow_ddl={}, allow_introspection_functions={}", params.readonly, params.allow_ddl, params.allow_introspection);
|
||||
LOG_TRACE(trace_log, "Settings: readonly = {}, allow_ddl = {}, allow_introspection_functions = {}", params.readonly, params.allow_ddl, params.allow_introspection);
|
||||
LOG_TRACE(trace_log, "List of all grants: {}", access->toString());
|
||||
LOG_TRACE(trace_log, "List of all grants including implicit: {}", access_with_implicit->toString());
|
||||
}
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Access/GSSAcceptor.h>
|
||||
#include <base/defines.h>
|
||||
#include <base/types.h>
|
||||
#include <base/extended_types.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <map>
|
||||
@ -42,7 +43,7 @@ public:
|
||||
private:
|
||||
struct LDAPCacheEntry
|
||||
{
|
||||
std::size_t last_successful_params_hash = 0;
|
||||
UInt128 last_successful_params_hash = 0;
|
||||
std::chrono::steady_clock::time_point last_successful_authentication_timestamp;
|
||||
LDAPClient::SearchResultsList last_successful_role_search_results;
|
||||
};
|
||||
|
@ -122,7 +122,7 @@ public:
|
||||
size_t size;
|
||||
readVarUInt(size, in);
|
||||
|
||||
static constexpr size_t max_size = 1_GiB;
|
||||
static constexpr size_t max_size = 100_GiB;
|
||||
|
||||
if (size == 0)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect size (0) in groupBitmap.");
|
||||
|
@ -157,8 +157,8 @@ public:
|
||||
void read(DB::ReadBuffer & buf)
|
||||
{
|
||||
size_t size = 0;
|
||||
DB::readIntBinary<size_t>(size, buf);
|
||||
DB::readIntBinary<size_t>(total_values, buf);
|
||||
readBinaryLittleEndian(size, buf);
|
||||
readBinaryLittleEndian(total_values, buf);
|
||||
|
||||
/// Compatibility with old versions.
|
||||
if (size > total_values)
|
||||
@ -171,16 +171,16 @@ public:
|
||||
|
||||
samples.resize(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
DB::readPODBinary(samples[i], buf);
|
||||
readBinaryLittleEndian(samples[i], buf);
|
||||
|
||||
sorted = false;
|
||||
}
|
||||
|
||||
void write(DB::WriteBuffer & buf) const
|
||||
{
|
||||
size_t size = samples.size();
|
||||
DB::writeIntBinary<size_t>(size, buf);
|
||||
DB::writeIntBinary<size_t>(total_values, buf);
|
||||
const size_t size = samples.size();
|
||||
writeBinaryLittleEndian(size, buf);
|
||||
writeBinaryLittleEndian(total_values, buf);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
@ -190,12 +190,12 @@ public:
|
||||
/// Here we ensure that padding is zero without changing the protocol.
|
||||
/// TODO: After implementation of "versioning aggregate function state",
|
||||
/// change the serialization format.
|
||||
|
||||
Element elem;
|
||||
memset(&elem, 0, sizeof(elem));
|
||||
elem = samples[i];
|
||||
|
||||
DB::writePODBinary(elem, buf);
|
||||
DB::transformEndianness<std::endian::little>(elem);
|
||||
DB::writeString(reinterpret_cast<const char*>(&elem), sizeof(elem), buf);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -185,11 +185,10 @@ void BackupCoordinationReplicatedTables::addPartNames(PartNamesForTableReplica &
|
||||
const String & other_replica_name = **other.replica_names.begin();
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_BACKUP_TABLE,
|
||||
"Table {} on replica {} has part {} which is different from the part on replica {}. Must be the same",
|
||||
table_name_for_logs,
|
||||
replica_name,
|
||||
part_name,
|
||||
other_replica_name);
|
||||
"Table {} on replica {} has part {} different from the part on replica {} "
|
||||
"(checksum '{}' on replica {} != checksum '{}' on replica {})",
|
||||
table_name_for_logs, replica_name, part_name, other_replica_name,
|
||||
getHexUIntLowercase(checksum), replica_name, getHexUIntLowercase(other.checksum), other_replica_name);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -85,6 +85,9 @@ void BackupCoordinationStageSync::setError(const String & current_host, const Ex
|
||||
writeException(exception, buf, true);
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/error", buf.str());
|
||||
|
||||
/// When backup/restore fails, it removes the nodes from Zookeeper.
|
||||
/// Sometimes it fails to remove all nodes. It's possible that it removes /error node, but fails to remove /stage node,
|
||||
/// so the following line tries to preserve the error status.
|
||||
auto code = zookeeper->trySet(zookeeper_path, Stage::ERROR);
|
||||
if (code != Coordination::Error::ZOK)
|
||||
throw zkutil::KeeperException(code, zookeeper_path);
|
||||
|
@ -144,6 +144,7 @@ void BackupImpl::open(const ContextPtr & context)
|
||||
if (!uuid)
|
||||
uuid = UUIDHelpers::generateV4();
|
||||
lock_file_name = use_archive ? (archive_params.archive_name + ".lock") : ".lock";
|
||||
lock_file_before_first_file_checked = false;
|
||||
writing_finalized = false;
|
||||
|
||||
/// Check that we can write a backup there and create the lock file to own this destination.
|
||||
@ -833,13 +834,10 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
|
||||
if (writing_finalized)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup is already finalized");
|
||||
|
||||
bool should_check_lock_file = false;
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
++num_files;
|
||||
total_size += info.size;
|
||||
if (!num_entries)
|
||||
should_check_lock_file = true;
|
||||
}
|
||||
|
||||
auto src_disk = entry->getDisk();
|
||||
@ -859,7 +857,7 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
|
||||
return;
|
||||
}
|
||||
|
||||
if (!should_check_lock_file)
|
||||
if (!lock_file_before_first_file_checked.exchange(true))
|
||||
checkLockFile(true);
|
||||
|
||||
/// NOTE: `mutex` must be unlocked during copying otherwise writing will be in one thread maximum and hence slow.
|
||||
|
@ -141,6 +141,7 @@ private:
|
||||
std::shared_ptr<IArchiveReader> archive_reader;
|
||||
std::shared_ptr<IArchiveWriter> archive_writer;
|
||||
String lock_file_name;
|
||||
std::atomic<bool> lock_file_before_first_file_checked = false;
|
||||
|
||||
bool writing_finalized = false;
|
||||
bool deduplicate_files = true;
|
||||
|
@ -152,8 +152,7 @@ namespace
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (coordination)
|
||||
coordination->setError(Exception(getCurrentExceptionMessageAndPattern(true, true), getCurrentExceptionCode()));
|
||||
sendExceptionToCoordination(coordination, Exception(getCurrentExceptionMessageAndPattern(true, true), getCurrentExceptionCode()));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -413,6 +413,7 @@ dbms_target_link_libraries (
|
||||
boost::system
|
||||
clickhouse_common_io
|
||||
Poco::MongoDB
|
||||
Poco::Redis
|
||||
)
|
||||
|
||||
if (TARGET ch::mysqlxx)
|
||||
|
@ -18,7 +18,7 @@ ConnectionPoolPtr ConnectionPoolFactory::get(
|
||||
String client_name,
|
||||
Protocol::Compression compression,
|
||||
Protocol::Secure secure,
|
||||
Int64 priority)
|
||||
Priority priority)
|
||||
{
|
||||
Key key{
|
||||
max_connections, host, port, default_database, user, password, quota_key, cluster, cluster_secret, client_name, compression, secure, priority};
|
||||
@ -74,7 +74,7 @@ size_t ConnectionPoolFactory::KeyHash::operator()(const ConnectionPoolFactory::K
|
||||
hash_combine(seed, hash_value(k.client_name));
|
||||
hash_combine(seed, hash_value(k.compression));
|
||||
hash_combine(seed, hash_value(k.secure));
|
||||
hash_combine(seed, hash_value(k.priority));
|
||||
hash_combine(seed, hash_value(k.priority.value));
|
||||
return seed;
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/PoolBase.h>
|
||||
#include <Common/Priority.h>
|
||||
#include <Client/Connection.h>
|
||||
#include <IO/ConnectionTimeouts.h>
|
||||
#include <Core/Settings.h>
|
||||
@ -34,7 +35,7 @@ public:
|
||||
const Settings * settings = nullptr,
|
||||
bool force_connected = true) = 0;
|
||||
|
||||
virtual Int64 getPriority() const { return 1; }
|
||||
virtual Priority getPriority() const { return Priority{1}; }
|
||||
};
|
||||
|
||||
using ConnectionPoolPtr = std::shared_ptr<IConnectionPool>;
|
||||
@ -60,7 +61,7 @@ public:
|
||||
const String & client_name_,
|
||||
Protocol::Compression compression_,
|
||||
Protocol::Secure secure_,
|
||||
Int64 priority_ = 1)
|
||||
Priority priority_ = Priority{1})
|
||||
: Base(max_connections_,
|
||||
&Poco::Logger::get("ConnectionPool (" + host_ + ":" + toString(port_) + ")")),
|
||||
host(host_),
|
||||
@ -103,7 +104,7 @@ public:
|
||||
return host + ":" + toString(port);
|
||||
}
|
||||
|
||||
Int64 getPriority() const override
|
||||
Priority getPriority() const override
|
||||
{
|
||||
return priority;
|
||||
}
|
||||
@ -134,7 +135,7 @@ private:
|
||||
String client_name;
|
||||
Protocol::Compression compression; /// Whether to compress data when interacting with the server.
|
||||
Protocol::Secure secure; /// Whether to encrypt data when interacting with the server.
|
||||
Int64 priority; /// priority from <remote_servers>
|
||||
Priority priority; /// priority from <remote_servers>
|
||||
};
|
||||
|
||||
/**
|
||||
@ -157,7 +158,7 @@ public:
|
||||
String client_name;
|
||||
Protocol::Compression compression;
|
||||
Protocol::Secure secure;
|
||||
Int64 priority;
|
||||
Priority priority;
|
||||
};
|
||||
|
||||
struct KeyHash
|
||||
@ -180,7 +181,7 @@ public:
|
||||
String client_name,
|
||||
Protocol::Compression compression,
|
||||
Protocol::Secure secure,
|
||||
Int64 priority);
|
||||
Priority priority);
|
||||
private:
|
||||
mutable std::mutex mutex;
|
||||
using ConnectionPoolWeakPtr = std::weak_ptr<IConnectionPool>;
|
||||
|
@ -71,7 +71,7 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts
|
||||
return Base::get(max_ignored_errors, fallback_to_stale_replicas, try_get_entry, get_priority);
|
||||
}
|
||||
|
||||
Int64 ConnectionPoolWithFailover::getPriority() const
|
||||
Priority ConnectionPoolWithFailover::getPriority() const
|
||||
{
|
||||
return (*std::max_element(nested_pools.begin(), nested_pools.end(), [](const auto & a, const auto & b)
|
||||
{
|
||||
|
@ -48,7 +48,7 @@ public:
|
||||
const Settings * settings,
|
||||
bool force_connected) override; /// From IConnectionPool
|
||||
|
||||
Int64 getPriority() const override; /// From IConnectionPool
|
||||
Priority getPriority() const override; /// From IConnectionPool
|
||||
|
||||
/** Allocates up to the specified number of connections to work.
|
||||
* Connections provide access to different replicas of one shard.
|
||||
|
239
src/Client/ConnectionString.cpp
Normal file
239
src/Client/ConnectionString.cpp
Normal file
@ -0,0 +1,239 @@
|
||||
#include "ConnectionString.h"
|
||||
|
||||
#include <Common/Exception.h>
|
||||
#include <Poco/Exception.h>
|
||||
#include <Poco/URI.h>
|
||||
|
||||
#include <array>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
using namespace std::string_literals;
|
||||
using namespace std::literals::string_view_literals;
|
||||
|
||||
constexpr auto CONNECTION_URI_SCHEME = "clickhouse:"sv;
|
||||
|
||||
const std::unordered_map<std::string_view, std::string_view> PROHIBITED_CLIENT_OPTIONS = {
|
||||
/// Client option, client option long name
|
||||
{"-h", "--host"},
|
||||
{"--host", "--host"},
|
||||
{"--port", "--port"},
|
||||
{"--connection", "--connection"},
|
||||
};
|
||||
|
||||
std::string uriDecode(const std::string & uri_encoded_string, bool plus_as_space)
|
||||
{
|
||||
std::string decoded_string;
|
||||
Poco::URI::decode(uri_encoded_string, decoded_string, plus_as_space);
|
||||
return decoded_string;
|
||||
}
|
||||
|
||||
void getHostAndPort(const Poco::URI & uri, std::vector<std::vector<std::string>> & hosts_and_ports_arguments)
|
||||
{
|
||||
std::vector<std::string> host_and_port;
|
||||
const std::string & host = uri.getHost();
|
||||
if (!host.empty())
|
||||
{
|
||||
host_and_port.push_back("--host=" + uriDecode(host, false));
|
||||
}
|
||||
|
||||
// Port can be written without host (":9000"). Empty host name equals to default host.
|
||||
auto port = uri.getPort();
|
||||
if (port != 0)
|
||||
host_and_port.push_back("--port=" + std::to_string(port));
|
||||
|
||||
if (!host_and_port.empty())
|
||||
hosts_and_ports_arguments.push_back(std::move(host_and_port));
|
||||
}
|
||||
|
||||
void buildConnectionString(
|
||||
std::string_view host_and_port,
|
||||
std::string_view right_part,
|
||||
Poco::URI & uri,
|
||||
std::vector<std::vector<std::string>> & hosts_and_ports_arguments)
|
||||
{
|
||||
// User info does not matter in sub URI
|
||||
auto uri_string = std::string(CONNECTION_URI_SCHEME);
|
||||
if (!host_and_port.empty())
|
||||
{
|
||||
uri_string.append("//");
|
||||
uri_string.append(host_and_port);
|
||||
}
|
||||
|
||||
// Right part from string includes '/database?[params]'
|
||||
uri_string.append(right_part);
|
||||
try
|
||||
{
|
||||
uri = Poco::URI(uri_string);
|
||||
}
|
||||
catch (const Poco::URISyntaxException & invalid_uri_exception)
|
||||
{
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS,
|
||||
"Invalid connection string syntax {}: {}", uri_string, invalid_uri_exception.what());
|
||||
}
|
||||
|
||||
getHostAndPort(uri, hosts_and_ports_arguments);
|
||||
}
|
||||
|
||||
std::string makeArgument(const std::string & connection_string_parameter_name)
|
||||
{
|
||||
return (connection_string_parameter_name.size() == 1 ? "-"s : "--"s) + connection_string_parameter_name;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
bool tryParseConnectionString(
|
||||
std::string_view connection_string,
|
||||
std::vector<std::string> & common_arguments,
|
||||
std::vector<std::vector<std::string>> & hosts_and_ports_arguments)
|
||||
{
|
||||
if (connection_string == CONNECTION_URI_SCHEME)
|
||||
return true;
|
||||
|
||||
if (!connection_string.starts_with(CONNECTION_URI_SCHEME))
|
||||
return false;
|
||||
|
||||
size_t offset = CONNECTION_URI_SCHEME.size();
|
||||
if ((connection_string.substr(offset).starts_with("//")))
|
||||
offset += 2;
|
||||
|
||||
auto hosts_end_pos = std::string_view::npos;
|
||||
auto hosts_or_user_info_end_pos = connection_string.find_first_of("?/@", offset);
|
||||
|
||||
auto has_user_info = hosts_or_user_info_end_pos != std::string_view::npos && connection_string[hosts_or_user_info_end_pos] == '@';
|
||||
if (has_user_info)
|
||||
{
|
||||
// Move offset right after user info
|
||||
offset = hosts_or_user_info_end_pos + 1;
|
||||
hosts_end_pos = connection_string.find_first_of("?/@", offset);
|
||||
// Several '@' symbols in connection string is prohibited.
|
||||
// If user name contains '@' then it should be percent-encoded.
|
||||
// several users: 'usr1@host1,@usr2@host2' is invalid.
|
||||
if (hosts_end_pos != std::string_view::npos && connection_string[hosts_end_pos] == '@')
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Symbols '@' in URI in password or user name should be percent-encoded. Individual user names for different hosts also prohibited. {}",
|
||||
connection_string);
|
||||
}
|
||||
}
|
||||
else
|
||||
hosts_end_pos = hosts_or_user_info_end_pos;
|
||||
|
||||
const auto * hosts_end = hosts_end_pos != std::string_view::npos ? connection_string.begin() + hosts_end_pos
|
||||
: connection_string.end();
|
||||
|
||||
try
|
||||
{
|
||||
/** Poco::URI doesn't support several hosts in URI.
|
||||
* Split string clickhouse:[user[:password]@]host1:port1, ... , hostN:portN[database]?[query_parameters]
|
||||
* into multiple string for each host:
|
||||
* clickhouse:[user[:password]@]host1:port1[database]?[query_parameters]
|
||||
* ...
|
||||
* clickhouse:[user[:password]@]hostN:portN[database]?[query_parameters]
|
||||
*/
|
||||
Poco::URI uri;
|
||||
const auto * last_host_begin = connection_string.begin() + offset;
|
||||
for (const auto * it = last_host_begin; it != hosts_end; ++it)
|
||||
{
|
||||
if (*it == ',')
|
||||
{
|
||||
buildConnectionString({last_host_begin, it}, {hosts_end, connection_string.end()}, uri, hosts_and_ports_arguments);
|
||||
last_host_begin = it + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (uri.empty())
|
||||
{
|
||||
// URI has no host specified
|
||||
uri = std::string(connection_string);
|
||||
getHostAndPort(uri, hosts_and_ports_arguments);
|
||||
}
|
||||
else
|
||||
buildConnectionString({last_host_begin, hosts_end}, {hosts_end, connection_string.end()}, uri, hosts_and_ports_arguments);
|
||||
|
||||
Poco::URI::QueryParameters params = uri.getQueryParameters();
|
||||
for (const auto & param : params)
|
||||
{
|
||||
if (param.first == "secure" || param.first == "s")
|
||||
{
|
||||
if (!param.second.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "secure URI query parameter does not allow value");
|
||||
|
||||
common_arguments.push_back(makeArgument(param.first));
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "URI query parameter {} is not supported", param.first);
|
||||
}
|
||||
|
||||
auto user_info = uri.getUserInfo();
|
||||
if (!user_info.empty())
|
||||
{
|
||||
// Poco::URI doesn't decode user name/password by default.
|
||||
// But ClickHouse allows to have users with email user name like: 'john@some_mail.com'
|
||||
// john@some_mail.com should be percent-encoded: 'john%40some_mail.com'
|
||||
size_t pos = user_info.find(':');
|
||||
if (pos != std::string::npos)
|
||||
{
|
||||
common_arguments.push_back("--user");
|
||||
common_arguments.push_back(uriDecode(user_info.substr(0, pos), true));
|
||||
|
||||
++pos; // Skip ':'
|
||||
common_arguments.push_back("--password");
|
||||
if (user_info.size() > pos + 1)
|
||||
common_arguments.push_back(uriDecode(user_info.substr(pos), true));
|
||||
else
|
||||
{
|
||||
// in case of user_info == 'user:', ':' is specified, but password is empty
|
||||
// then add password argument "\n" which means: Ask user for a password.
|
||||
common_arguments.push_back("\n");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
common_arguments.push_back("--user");
|
||||
common_arguments.push_back(uriDecode(user_info, true));
|
||||
}
|
||||
}
|
||||
|
||||
const auto & database_name = uri.getPath();
|
||||
size_t start_symbol = !database_name.empty() && database_name[0] == '/' ? 1u : 0u;
|
||||
if (database_name.size() > start_symbol)
|
||||
{
|
||||
common_arguments.push_back("--database");
|
||||
common_arguments.push_back(start_symbol == 0u ? database_name : database_name.substr(start_symbol));
|
||||
}
|
||||
}
|
||||
catch (const Poco::URISyntaxException & invalid_uri_exception)
|
||||
{
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS,
|
||||
"Invalid connection string '{}': {}", connection_string, invalid_uri_exception.what());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void checkIfCmdLineOptionCanBeUsedWithConnectionString(std::string_view command_line_option)
|
||||
{
|
||||
if (PROHIBITED_CLIENT_OPTIONS.contains(command_line_option))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Mixing a connection string and {} option is prohibited", PROHIBITED_CLIENT_OPTIONS.at(command_line_option));
|
||||
}
|
||||
|
||||
}
|
27
src/Client/ConnectionString.h
Normal file
27
src/Client/ConnectionString.h
Normal file
@ -0,0 +1,27 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/** Tries to parse ClickHouse connection string.
|
||||
* if @connection_string starts with 'clickhouse:' then connection string will be parsed
|
||||
* and converted into a set of arguments for the client.
|
||||
* Connection string format is similar to URI "clickhouse:[//[user[:password]@][hosts_and_ports]][/dbname][?query_parameters]"
|
||||
* with the difference that hosts_and_ports can contain multiple hosts separated by ','.
|
||||
* example: clickhouse://user@host1:port1,host2:port2
|
||||
* @return Returns false if no connection string was specified. If a connection string was specified, returns true if it is valid, and throws an exception if it is invalid.
|
||||
* @exception Throws DB::Exception if URI has valid scheme (clickhouse:), but invalid internals.
|
||||
*/
|
||||
bool tryParseConnectionString(
|
||||
std::string_view connection_string,
|
||||
std::vector<std::string> & common_arguments,
|
||||
std::vector<std::vector<std::string>> & hosts_and_ports_arguments);
|
||||
|
||||
// Throws DB::Exception with BAD_ARGUMENTS if the given command line argument
|
||||
// is not allowed to be used with a connection string.
|
||||
void checkIfCmdLineOptionCanBeUsedWithConnectionString(std::string_view command_line_option);
|
||||
|
||||
}
|
@ -151,13 +151,13 @@ public:
|
||||
|
||||
ColumnPtr compress() const override;
|
||||
|
||||
void forEachSubcolumn(ColumnCallback callback) const override
|
||||
void forEachSubcolumn(MutableColumnCallback callback) override
|
||||
{
|
||||
callback(offsets);
|
||||
callback(data);
|
||||
}
|
||||
|
||||
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
|
||||
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
|
||||
{
|
||||
callback(*offsets);
|
||||
offsets->forEachSubcolumnRecursively(callback);
|
||||
|
@ -230,12 +230,12 @@ public:
|
||||
data->getExtremes(min, max);
|
||||
}
|
||||
|
||||
void forEachSubcolumn(ColumnCallback callback) const override
|
||||
void forEachSubcolumn(MutableColumnCallback callback) override
|
||||
{
|
||||
callback(data);
|
||||
}
|
||||
|
||||
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
|
||||
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
|
||||
{
|
||||
callback(*data);
|
||||
data->forEachSubcolumnRecursively(callback);
|
||||
|
@ -166,7 +166,7 @@ public:
|
||||
size_t byteSizeAt(size_t n) const override { return getDictionary().byteSizeAt(getIndexes().getUInt(n)); }
|
||||
size_t allocatedBytes() const override { return idx.getPositions()->allocatedBytes() + getDictionary().allocatedBytes(); }
|
||||
|
||||
void forEachSubcolumn(ColumnCallback callback) const override
|
||||
void forEachSubcolumn(MutableColumnCallback callback) override
|
||||
{
|
||||
callback(idx.getPositionsPtr());
|
||||
|
||||
@ -175,7 +175,7 @@ public:
|
||||
callback(dictionary.getColumnUniquePtr());
|
||||
}
|
||||
|
||||
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
|
||||
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
|
||||
{
|
||||
callback(*idx.getPositionsPtr());
|
||||
idx.getPositionsPtr()->forEachSubcolumnRecursively(callback);
|
||||
@ -340,7 +340,7 @@ private:
|
||||
explicit Dictionary(MutableColumnPtr && column_unique, bool is_shared);
|
||||
explicit Dictionary(ColumnPtr column_unique, bool is_shared);
|
||||
|
||||
const ColumnPtr & getColumnUniquePtr() const { return column_unique; }
|
||||
const WrappedPtr & getColumnUniquePtr() const { return column_unique; }
|
||||
WrappedPtr & getColumnUniquePtr() { return column_unique; }
|
||||
|
||||
const IColumnUnique & getColumnUnique() const { return static_cast<const IColumnUnique &>(*column_unique); }
|
||||
|
@ -273,12 +273,12 @@ void ColumnMap::getExtremes(Field & min, Field & max) const
|
||||
max = std::move(map_max_value);
|
||||
}
|
||||
|
||||
void ColumnMap::forEachSubcolumn(ColumnCallback callback) const
|
||||
void ColumnMap::forEachSubcolumn(MutableColumnCallback callback)
|
||||
{
|
||||
callback(nested);
|
||||
}
|
||||
|
||||
void ColumnMap::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
|
||||
void ColumnMap::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
|
||||
{
|
||||
callback(*nested);
|
||||
nested->forEachSubcolumnRecursively(callback);
|
||||
|
@ -88,8 +88,8 @@ public:
|
||||
size_t byteSizeAt(size_t n) const override;
|
||||
size_t allocatedBytes() const override;
|
||||
void protect() override;
|
||||
void forEachSubcolumn(ColumnCallback callback) const override;
|
||||
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
|
||||
void forEachSubcolumn(MutableColumnCallback callback) override;
|
||||
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
|
||||
bool structureEquals(const IColumn & rhs) const override;
|
||||
double getRatioOfDefaultRows(double sample_ratio) const override;
|
||||
UInt64 getNumberOfDefaultRows() const override;
|
||||
|
@ -130,13 +130,13 @@ public:
|
||||
|
||||
ColumnPtr compress() const override;
|
||||
|
||||
void forEachSubcolumn(ColumnCallback callback) const override
|
||||
void forEachSubcolumn(MutableColumnCallback callback) override
|
||||
{
|
||||
callback(nested_column);
|
||||
callback(null_map);
|
||||
}
|
||||
|
||||
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
|
||||
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
|
||||
{
|
||||
callback(*nested_column);
|
||||
nested_column->forEachSubcolumnRecursively(callback);
|
||||
|
@ -664,18 +664,18 @@ size_t ColumnObject::allocatedBytes() const
|
||||
return res;
|
||||
}
|
||||
|
||||
void ColumnObject::forEachSubcolumn(ColumnCallback callback) const
|
||||
void ColumnObject::forEachSubcolumn(MutableColumnCallback callback)
|
||||
{
|
||||
for (const auto & entry : subcolumns)
|
||||
for (const auto & part : entry->data.data)
|
||||
for (auto & entry : subcolumns)
|
||||
for (auto & part : entry->data.data)
|
||||
callback(part);
|
||||
}
|
||||
|
||||
void ColumnObject::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
|
||||
void ColumnObject::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
|
||||
{
|
||||
for (const auto & entry : subcolumns)
|
||||
for (auto & entry : subcolumns)
|
||||
{
|
||||
for (const auto & part : entry->data.data)
|
||||
for (auto & part : entry->data.data)
|
||||
{
|
||||
callback(*part);
|
||||
part->forEachSubcolumnRecursively(callback);
|
||||
|
@ -206,8 +206,8 @@ public:
|
||||
size_t size() const override;
|
||||
size_t byteSize() const override;
|
||||
size_t allocatedBytes() const override;
|
||||
void forEachSubcolumn(ColumnCallback callback) const override;
|
||||
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
|
||||
void forEachSubcolumn(MutableColumnCallback callback) override;
|
||||
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
|
||||
void insert(const Field & field) override;
|
||||
void insertDefault() override;
|
||||
void insertFrom(const IColumn & src, size_t n) override;
|
||||
|
@ -751,13 +751,13 @@ bool ColumnSparse::structureEquals(const IColumn & rhs) const
|
||||
return false;
|
||||
}
|
||||
|
||||
void ColumnSparse::forEachSubcolumn(ColumnCallback callback) const
|
||||
void ColumnSparse::forEachSubcolumn(MutableColumnCallback callback)
|
||||
{
|
||||
callback(values);
|
||||
callback(offsets);
|
||||
}
|
||||
|
||||
void ColumnSparse::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
|
||||
void ColumnSparse::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
|
||||
{
|
||||
callback(*values);
|
||||
values->forEachSubcolumnRecursively(callback);
|
||||
|
@ -140,8 +140,8 @@ public:
|
||||
|
||||
ColumnPtr compress() const override;
|
||||
|
||||
void forEachSubcolumn(ColumnCallback callback) const override;
|
||||
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
|
||||
void forEachSubcolumn(MutableColumnCallback callback) override;
|
||||
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
|
||||
|
||||
bool structureEquals(const IColumn & rhs) const override;
|
||||
|
||||
|
@ -31,14 +31,12 @@ ColumnString::ColumnString(const ColumnString & src)
|
||||
offsets(src.offsets.begin(), src.offsets.end()),
|
||||
chars(src.chars.begin(), src.chars.end())
|
||||
{
|
||||
if (!offsets.empty())
|
||||
{
|
||||
Offset last_offset = offsets.back();
|
||||
|
||||
/// This will also prevent possible overflow in offset.
|
||||
if (chars.size() != last_offset)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "String offsets has data inconsistent with chars array");
|
||||
}
|
||||
Offset last_offset = offsets.empty() ? 0 : offsets.back();
|
||||
/// This will also prevent possible overflow in offset.
|
||||
if (last_offset != chars.size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"String offsets has data inconsistent with chars array. Last offset: {}, array length: {}",
|
||||
last_offset, chars.size());
|
||||
}
|
||||
|
||||
|
||||
@ -157,6 +155,7 @@ ColumnPtr ColumnString::filter(const Filter & filt, ssize_t result_size_hint) co
|
||||
Offsets & res_offsets = res->offsets;
|
||||
|
||||
filterArraysImpl<UInt8>(chars, offsets, res_chars, res_offsets, filt, result_size_hint);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -571,10 +570,11 @@ void ColumnString::protect()
|
||||
|
||||
void ColumnString::validate() const
|
||||
{
|
||||
if (!offsets.empty() && offsets.back() != chars.size())
|
||||
Offset last_offset = offsets.empty() ? 0 : offsets.back();
|
||||
if (last_offset != chars.size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"ColumnString validation failed: size mismatch (internal logical error) {} != {}",
|
||||
offsets.back(), chars.size());
|
||||
last_offset, chars.size());
|
||||
}
|
||||
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user