Merge branch 'master' into non-experimental-qpl-deflate

This commit is contained in:
Robert Schulze 2023-06-20 20:06:01 +02:00 committed by GitHub
commit bc7df2baca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
470 changed files with 10863 additions and 3581 deletions

View File

@ -74,6 +74,7 @@ ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
DerivePointerAlignment: false
DisableFormat: false
IndentRequiresClause: false
IndentWidth: 4
IndentWrappedFunctionNames: false
MacroBlockBegin: ''

View File

@ -12,6 +12,7 @@ jobs:
# don't use dockerhub push because this image updates so rarely
WoboqCodebrowser:
runs-on: [self-hosted, style-checker]
timeout-minutes: 420 # the task is pretty heavy, so there's an additional hour
steps:
- name: Set envs
run: |

2
.gitmodules vendored
View File

@ -19,7 +19,7 @@
url = https://github.com/google/googletest
[submodule "contrib/capnproto"]
path = contrib/capnproto
url = https://github.com/capnproto/capnproto
url = https://github.com/ClickHouse/capnproto
[submodule "contrib/double-conversion"]
path = contrib/double-conversion
url = https://github.com/google/double-conversion

View File

@ -6,8 +6,10 @@ rules:
level: warning
indent-sequences: consistent
line-length:
# there are some bash -c "", so this is OK
max: 300
# there are:
# - bash -c "", so this is OK
# - yaml in tests
max: 1000
level: warning
comments:
min-spaces-from-content: 1

View File

@ -11,3 +11,8 @@ constexpr double interpolateExponential(double min, double max, double ratio)
assert(min > 0 && ratio >= 0 && ratio <= 1);
return min * std::pow(max / min, ratio);
}
constexpr double interpolateLinear(double min, double max, double ratio)
{
return std::lerp(min, max, ratio);
}

2
contrib/capnproto vendored

@ -1 +1 @@
Subproject commit dc8b50b999777bcb23c89bb5907c785c3f654441
Subproject commit 976209a6d18074804f60d18ef99b6a809d27dadf

View File

@ -4,7 +4,7 @@ if (SANITIZE OR NOT (
))
if (ENABLE_JEMALLOC)
message (${RECONFIGURE_MESSAGE_LEVEL}
"jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64, aarch64, or ppc64le Linux or FreeBSD builds and RelWithDebInfo macOS builds.")
"jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64, aarch64, or ppc64le Linux or FreeBSD builds and RelWithDebInfo macOS builds. Use -DENABLE_JEMALLOC=0")
endif ()
set (ENABLE_JEMALLOC OFF)
else ()

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
esac
ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
ARG VERSION="23.5.2.7"
ARG VERSION="23.5.3.24"
ARG PACKAGES="clickhouse-keeper"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="23.5.2.7"
ARG VERSION="23.5.3.24"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -22,7 +22,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="23.5.2.7"
ARG VERSION="23.5.3.24"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -20,6 +20,7 @@ For more information and documentation see https://clickhouse.com/.
- The amd64 image requires support for [SSE3 instructions](https://en.wikipedia.org/wiki/SSE3). Virtually all x86 CPUs after 2005 support SSE3.
- The arm64 image requires support for the [ARMv8.2-A architecture](https://en.wikipedia.org/wiki/AArch64#ARMv8.2-A). Most ARM CPUs after 2017 support ARMv8.2-A. A notable exception is Raspberry Pi 4 from 2019 whose CPU only supports ARMv8.0-A.
- Since the Clickhouse 23.3 Ubuntu image started using `ubuntu:22.04` as its base image, it requires docker version >= `20.10.10`, or use `docker run -- privileged` instead. Alternatively, try the Clickhouse Alpine image.
## How to use this image

View File

@ -12,10 +12,10 @@ RUN apt-get update --yes && \
# We need to get the repository's HEAD each time despite, so we invalidate layers' cache
ARG CACHE_INVALIDATOR=0
RUN mkdir /sqlancer && \
wget -q -O- https://github.com/sqlancer/sqlancer/archive/master.tar.gz | \
wget -q -O- https://github.com/sqlancer/sqlancer/archive/main.tar.gz | \
tar zx -C /sqlancer && \
cd /sqlancer/sqlancer-master && \
mvn package -DskipTests && \
cd /sqlancer/sqlancer-main && \
mvn --no-transfer-progress package -DskipTests && \
rm -r /root/.m2
COPY run.sh /

View File

@ -16,7 +16,6 @@ def process_result(result_folder):
"TLPGroupBy",
"TLPHaving",
"TLPWhere",
"TLPWhereGroupBy",
"NoREC",
]
failed_tests = []

View File

@ -33,7 +33,7 @@ cd /workspace
for _ in $(seq 1 60); do if [[ $(wget -q 'localhost:8123' -O-) == 'Ok.' ]]; then break ; else sleep 1; fi ; done
cd /sqlancer/sqlancer-master
cd /sqlancer/sqlancer-main
TIMEOUT=300
NUM_QUERIES=1000

View File

@ -15,6 +15,9 @@ dpkg -i package_folder/clickhouse-client_*.deb
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
# shellcheck disable=SC1091
source /usr/share/clickhouse-test/ci/attach_gdb.lib || true # FIXME: to not break old builds, clean on 2023-09-01
# install test configs
/usr/share/clickhouse-test/config/install.sh
@ -85,6 +88,8 @@ fi
sleep 5
attach_gdb_to_clickhouse || true # FIXME: to not break old builds, clean on 2023-09-01
function run_tests()
{
set -x

View File

@ -61,6 +61,7 @@ configure
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
start
stop
@ -88,6 +89,7 @@ configure
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
start

View File

@ -0,0 +1,19 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v22.8.19.10-lts (989bc2fe8b0) FIXME as compared to v22.8.18.31-lts (4de7a95a544)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)).
* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Fix a versions' tweak for tagged commits, improve version_helper [#51035](https://github.com/ClickHouse/ClickHouse/pull/51035) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Sqlancer has changed master to main [#51060](https://github.com/ClickHouse/ClickHouse/pull/51060) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,22 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.3.4.17-lts (2c99b73ff40) FIXME as compared to v23.3.3.52-lts (cb963c474db)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix crash when Pool::Entry::disconnect() is called [#50334](https://github.com/ClickHouse/ClickHouse/pull/50334) ([Val Doroshchuk](https://github.com/valbok)).
* Avoid storing logs in Keeper containing unknown operation [#50751](https://github.com/ClickHouse/ClickHouse/pull/50751) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)).
* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Don't mark a part as broken on `Poco::TimeoutException` [#50811](https://github.com/ClickHouse/ClickHouse/pull/50811) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix a versions' tweak for tagged commits, improve version_helper [#51035](https://github.com/ClickHouse/ClickHouse/pull/51035) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Sqlancer has changed master to main [#51060](https://github.com/ClickHouse/ClickHouse/pull/51060) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,22 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.4.4.16-stable (747ba4fc6a0) FIXME as compared to v23.4.3.48-stable (d9199f8d3cc)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix crash when Pool::Entry::disconnect() is called [#50334](https://github.com/ClickHouse/ClickHouse/pull/50334) ([Val Doroshchuk](https://github.com/valbok)).
* Fix iceberg V2 optional metadata parsing [#50974](https://github.com/ClickHouse/ClickHouse/pull/50974) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)).
* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Don't mark a part as broken on `Poco::TimeoutException` [#50811](https://github.com/ClickHouse/ClickHouse/pull/50811) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix a versions' tweak for tagged commits, improve version_helper [#51035](https://github.com/ClickHouse/ClickHouse/pull/51035) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Sqlancer has changed master to main [#51060](https://github.com/ClickHouse/ClickHouse/pull/51060) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,26 @@
---
sidebar_position: 1
sidebar_label: 2023
---
# 2023 Changelog
### ClickHouse release v23.5.3.24-stable (76f54616d3b) FIXME as compared to v23.5.2.7-stable (5751aa1ab9f)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix Log family table return wrong rows count after truncate [#50585](https://github.com/ClickHouse/ClickHouse/pull/50585) ([flynn](https://github.com/ucasfl)).
* Fix bug in `uniqExact` parallel merging [#50590](https://github.com/ClickHouse/ClickHouse/pull/50590) ([Nikita Taranov](https://github.com/nickitat)).
* Revert recent grace hash join changes [#50699](https://github.com/ClickHouse/ClickHouse/pull/50699) ([vdimir](https://github.com/vdimir)).
* Avoid storing logs in Keeper containing unknown operation [#50751](https://github.com/ClickHouse/ClickHouse/pull/50751) ([Antonio Andelic](https://github.com/antonio2368)).
* Add compat setting for non-const timezones [#50834](https://github.com/ClickHouse/ClickHouse/pull/50834) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix iceberg V2 optional metadata parsing [#50974](https://github.com/ClickHouse/ClickHouse/pull/50974) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)).
* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Don't mark a part as broken on `Poco::TimeoutException` [#50811](https://github.com/ClickHouse/ClickHouse/pull/50811) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix a versions' tweak for tagged commits, improve version_helper [#51035](https://github.com/ClickHouse/ClickHouse/pull/51035) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Sqlancer has changed master to main [#51060](https://github.com/ClickHouse/ClickHouse/pull/51060) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -53,6 +53,7 @@ Engines in the family:
- [JDBC](../../engines/table-engines/integrations/jdbc.md)
- [MySQL](../../engines/table-engines/integrations/mysql.md)
- [MongoDB](../../engines/table-engines/integrations/mongodb.md)
- [Redis](../../engines/table-engines/integrations/redis.md)
- [HDFS](../../engines/table-engines/integrations/hdfs.md)
- [S3](../../engines/table-engines/integrations/s3.md)
- [Kafka](../../engines/table-engines/integrations/kafka.md)

View File

@ -48,4 +48,4 @@ SELECT * FROM test_table;
## See also
[Azure Blob Storage Table Function](/docs/en/sql-reference/table-functions/azureBlobStorage.md)
[Azure Blob Storage Table Function](/docs/en/sql-reference/table-functions/azureBlobStorage)

View File

@ -233,6 +233,12 @@ libhdfs3 support HDFS namenode HA.
- `_path` — Path to the file.
- `_file` — Name of the file.
## Storage Settings {#storage-settings}
- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
- [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
- [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
**See Also**
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)

View File

@ -7,4 +7,3 @@ sidebar_label: Integrations
# Table Engines for Integrations
ClickHouse provides various means for integrating with external systems, including table engines. Like with all other table engines, the configuration is done using `CREATE TABLE` or `ALTER TABLE` queries. Then from a user perspective, the configured integration looks like a normal table, but queries to it are proxied to the external system. This transparent querying is one of the key advantages of this approach over alternative integration methods, like dictionaries or table functions, which require the use of custom query methods on each use.

View File

@ -35,6 +35,10 @@ The table structure can differ from the original MySQL table structure:
- Column types may differ from those in the original MySQL table. ClickHouse tries to [cast](../../../engines/database-engines/mysql.md#data_types-support) values to the ClickHouse data types.
- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays.
:::note
The MySQL Table Engine is currently not available on the ClickHouse builds for MacOS ([issue](https://github.com/ClickHouse/ClickHouse/issues/21191))
:::
**Engine Parameters**
- `host:port` — MySQL server address.

View File

@ -136,7 +136,7 @@ postgresql> SELECT * FROM test;
### Creating Table in ClickHouse, and connecting to PostgreSQL table created above
This example uses the [PostgreSQL table engine](/docs/en/engines/table-engines/integrations/postgresql.md) to connect the ClickHouse table to the PostgreSQL table:
This example uses the [PostgreSQL table engine](/docs/en/engines/table-engines/integrations/postgresql.md) to connect the ClickHouse table to the PostgreSQL table and use both SELECT and INSERT statements to the PostgreSQL database:
``` sql
CREATE TABLE default.postgresql_table
@ -150,10 +150,21 @@ ENGINE = PostgreSQL('localhost:5432', 'public', 'test', 'postges_user', 'postgre
### Inserting initial data from PostgreSQL table into ClickHouse table, using a SELECT query
The [postgresql table function](/docs/en/sql-reference/table-functions/postgresql.md) copies the data from PostgreSQL to ClickHouse, which is often used for improving the query performance of the data by querying or performing analytics in ClickHouse rather than in PostgreSQL, or can also be used for migrating data from PostgreSQL to ClickHouse:
The [postgresql table function](/docs/en/sql-reference/table-functions/postgresql.md) copies the data from PostgreSQL to ClickHouse, which is often used for improving the query performance of the data by querying or performing analytics in ClickHouse rather than in PostgreSQL, or can also be used for migrating data from PostgreSQL to ClickHouse. Since we will be copying the data from PostgreSQL to ClickHouse, we will use a MergeTree table engine in ClickHouse and call it postgresql_copy:
``` sql
INSERT INTO default.postgresql_table
CREATE TABLE default.postgresql_copy
(
`float_nullable` Nullable(Float32),
`str` String,
`int_id` Int32
)
ENGINE = MergeTree
ORDER BY (int_id);
```
``` sql
INSERT INTO default.postgresql_copy
SELECT * FROM postgresql('localhost:5432', 'public', 'test', 'postges_user', 'postgres_password');
```
@ -164,13 +175,13 @@ If then performing ongoing synchronization between the PostgreSQL table and Clic
This would require keeping track of the max ID or timestamp previously added, such as the following:
``` sql
SELECT max(`int_id`) AS maxIntID FROM default.postgresql_table;
SELECT max(`int_id`) AS maxIntID FROM default.postgresql_copy;
```
Then inserting values from PostgreSQL table greater than the max
``` sql
INSERT INTO default.postgresql_table
INSERT INTO default.postgresql_copy
SELECT * FROM postgresql('localhost:5432', 'public', 'test', 'postges_user', 'postgres_password');
WHERE int_id > maxIntID;
```
@ -178,7 +189,7 @@ WHERE int_id > maxIntID;
### Selecting data from the resulting ClickHouse table
``` sql
SELECT * FROM postgresql_table WHERE str IN ('test');
SELECT * FROM postgresql_copy WHERE str IN ('test');
```
``` text

View File

@ -0,0 +1,119 @@
---
slug: /en/engines/table-engines/integrations/redis
sidebar_position: 43
sidebar_label: Redis
---
# Redis
This engine allows integrating ClickHouse with [Redis](https://redis.io/). For Redis takes kv model, we strongly recommend you only query it in a point way, such as `where k=xx` or `where k in (xx, xx)`.
## Creating a Table {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name
(
name1 [type1],
name2 [type2],
...
) ENGINE = Redis(host:port[, db_index[, password[, pool_size]]]) PRIMARY KEY(primary_key_name);
```
**Engine Parameters**
- `host:port` — Redis server address, you can ignore port and default Redis port 6379 will be used.
- `db_index` — Redis db index range from 0 to 15, default is 0.
- `password` — User password, default is blank string.
- `pool_size` — Redis max connection pool size, default is 16.
- `primary_key_name` - any column name in the column list.
- `primary` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a Redis key.
- columns other than the primary key will be serialized in binary as Redis value in corresponding order.
- queries with key equals or in filtering will be optimized to multi keys lookup from Redis. If queries without filtering key full table scan will happen which is a heavy operation.
## Usage Example {#usage-example}
Create a table in ClickHouse which allows to read data from Redis:
``` sql
CREATE TABLE redis_table
(
`k` String,
`m` String,
`n` UInt32
)
ENGINE = Redis('redis1:6379') PRIMARY KEY(k);
```
Insert:
```sql
INSERT INTO redis_table Values('1', 1, '1', 1.0), ('2', 2, '2', 2.0);
```
Query:
``` sql
SELECT COUNT(*) FROM redis_table;
```
``` text
┌─count()─┐
│ 2 │
└─────────┘
```
``` sql
SELECT * FROM redis_table WHERE key='1';
```
```text
┌─key─┬─v1─┬─v2─┬─v3─┐
│ 1 │ 1 │ 1 │ 1 │
└─────┴────┴────┴────┘
```
``` sql
SELECT * FROM redis_table WHERE v1=2;
```
```text
┌─key─┬─v1─┬─v2─┬─v3─┐
│ 2 │ 2 │ 2 │ 2 │
└─────┴────┴────┴────┘
```
Update:
Note that the primary key cannot be updated.
```sql
ALTER TABLE redis_table UPDATE v1=2 WHERE key='1';
```
Delete:
```sql
ALTER TABLE redis_table DELETE WHERE key='1';
```
Truncate:
Flush Redis db asynchronously. Also `Truncate` support SYNC mode.
```sql
TRUNCATE TABLE redis_table SYNC;
```
## Limitations {#limitations}
Redis engine also supports scanning queries, such as `where k > xx`, but it has some limitations:
1. Scanning query may produce some duplicated keys in a very rare case when it is rehashing. See details in [Redis Scan](https://github.com/redis/redis/blob/e4d183afd33e0b2e6e8d1c79a832f678a04a7886/src/dict.c#L1186-L1269)
2. During the scanning, keys could be created and deleted, so the resulting dataset can not represent a valid point in time.

View File

@ -127,6 +127,12 @@ CREATE TABLE table_with_asterisk (name String, value UInt32)
ENGINE = S3('https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/{some,another}_folder/*', 'CSV');
```
## Storage Settings {#storage-settings}
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
- [s3_create_multiple_files](/docs/en/operations/settings/settings.md#s3_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
- [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
## S3-related Settings {#settings}
The following settings can be set before query execution or placed into configuration file.

View File

@ -1,104 +1,142 @@
# Approximate Nearest Neighbor Search Indexes [experimental] {#table_engines-ANNIndex}
Nearest neighborhood search refers to the problem of finding the point(s) with the smallest distance to a given point in an n-dimensional
space. Since exact search is in practice usually typically too slow, the task is often solved with approximate algorithms. A popular use
case of of neighbor search is finding similar pictures (texts) for a given picture (text). Pictures (texts) can be decomposed into
[embeddings](https://cloud.google.com/architecture/overview-extracting-and-serving-feature-embeddings-for-machine-learning), and instead of
comparing pictures (texts) pixel-by-pixel (character-by-character), only the embeddings are compared.
Nearest neighborhood search is the problem of finding the M closest points for a given point in an N-dimensional vector space. The most
straightforward approach to solve this problem is a brute force search where the distance between all points in the vector space and the
reference point is computed. This method guarantees perfect accuracy but it is usually too slow for practical applications. Thus, nearest
neighborhood search problems are often solved with [approximative algorithms](https://github.com/erikbern/ann-benchmarks). Approximative
nearest neighborhood search techniques, in conjunction with [embedding
methods](https://cloud.google.com/architecture/overview-extracting-and-serving-feature-embeddings-for-machine-learning) allow to search huge
amounts of media (pictures, songs, articles, etc.) in milliseconds.
In terms of SQL, the problem can be expressed as follows:
Blogs:
- [Vector Search with ClickHouse - Part 1](https://clickhouse.com/blog/vector-search-clickhouse-p1)
- [Vector Search with ClickHouse - Part 2](https://clickhouse.com/blog/vector-search-clickhouse-p2)
In terms of SQL, the nearest neighborhood problem can be expressed as follows:
``` sql
SELECT *
FROM table
WHERE L2Distance(column, Point) < MaxDistance
ORDER BY Distance(vectors, Point)
LIMIT N
```
`vectors` contains N-dimensional values of type [Array](../../../sql-reference/data-types/array.md) or
[Tuple](../../../sql-reference/data-types/tuple.md), for example embeddings. Function `Distance` computes the distance between two vectors.
Often, the the Euclidean (L2) distance is chosen as distance function but [other
distance functions](/docs/en/sql-reference/functions/distance-functions.md) are also possible. `Point` is the reference point, e.g. `(0.17,
0.33, ...)`, and `N` limits the number of search results.
An alternative formulation of the nearest neighborhood search problem looks as follows:
``` sql
SELECT *
FROM table
ORDER BY L2Distance(column, Point)
WHERE Distance(vectors, Point) < MaxDistance
LIMIT N
```
The queries are expensive because the L2 (Euclidean) distance between `Point` and all points in `column` and must be computed. To speed this process up, Approximate Nearest Neighbor Search Indexes (ANN indexes) store a compact representation of the search space (using clustering, search trees, etc.) which allows to compute an approximate answer quickly.
While the first query returns the top-`N` closest points to the reference point, the second query returns all points closer to the reference
point than a maximally allowed radius `MaxDistance`. Parameter `N` limits the number of returned values which is useful for situations where
`MaxDistance` is difficult to determine in advance.
# Creating ANN Indexes
With brute force search, both queries are expensive (linear in the number of points) because the distance between all points in `vectors` and
`Point` must be computed. To speed this process up, Approximate Nearest Neighbor Search Indexes (ANN indexes) store a compact representation
of the search space (using clustering, search trees, etc.) which allows to compute an approximate answer much quicker (in sub-linear time).
As long as ANN indexes are experimental, you first need to `SET allow_experimental_annoy_index = 1`.
# Creating and Using ANN Indexes
Syntax to create an ANN index over an `Array` column:
Syntax to create an ANN index over an [Array](../../../sql-reference/data-types/array.md) column:
```sql
CREATE TABLE table
(
`id` Int64,
`embedding` Array(Float32),
INDEX <ann_index_name> embedding TYPE <ann_index_type>(<ann_index_parameters>) GRANULARITY <N>
`vectors` Array(Float32),
INDEX [ann_index_name vectors TYPE [ann_index_type]([ann_index_parameters]) [GRANULARITY [N]]
)
ENGINE = MergeTree
ORDER BY id;
```
Syntax to create an ANN index over a `Tuple` column:
Syntax to create an ANN index over a [Tuple](../../../sql-reference/data-types/tuple.md) column:
```sql
CREATE TABLE table
(
`id` Int64,
`embedding` Tuple(Float32[, Float32[, ...]]),
INDEX <ann_index_name> embedding TYPE <ann_index_type>(<ann_index_parameters>) GRANULARITY <N>
`vectors` Tuple(Float32[, Float32[, ...]]),
INDEX [ann_index_name] vectors TYPE [ann_index_type]([ann_index_parameters]) [GRANULARITY [N]]
)
ENGINE = MergeTree
ORDER BY id;
```
ANN indexes are built during column insertion and merge and `INSERT` and `OPTIMIZE` statements will be slower than for ordinary tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively there are much more read requests than write requests.
Similar to regular skip indexes, ANN indexes are constructed over granules and each indexed block consists of `GRANULARITY = <N>`-many
granules. For example, if the primary index granularity of the table is 8192 (setting `index_granularity = 8192`) and `GRANULARITY = 2`,
then each indexed block will consist of 16384 rows. However, unlike skip indexes, ANN indexes are not only able to skip the entire indexed
block, they are able to skip individual granules in indexed blocks. As a result, the `GRANULARITY` parameter has a different meaning in ANN
indexes than in normal skip indexes. Basically, the bigger `GRANULARITY` is chosen, the more data is provided to a single ANN index, and the
higher the chance that with the right hyper parameters, the index will remember the data structure better.
# Using ANN Indexes
ANN indexes are built during column insertion and merge. As a result, `INSERT` and `OPTIMIZE` statements will be slower than for ordinary
tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write
requests.
ANN indexes support two types of queries:
- WHERE queries:
``` sql
SELECT *
FROM table
WHERE DistanceFunction(column, Point) < MaxDistance
LIMIT N
```
- ORDER BY queries:
``` sql
SELECT *
FROM table
[WHERE ...]
ORDER BY DistanceFunction(column, Point)
ORDER BY Distance(vectors, Point)
LIMIT N
```
`DistanceFunction` is a [distance function](/docs/en/sql-reference/functions/distance-functions.md), `Point` is a reference vector (e.g. `(0.17, 0.33, ...)`) and `MaxDistance` is a floating point value which restricts the size of the neighbourhood.
- WHERE queries:
``` sql
SELECT *
FROM table
WHERE Distance(vectors, Point) < MaxDistance
LIMIT N
```
:::tip
To avoid writing out large vectors, you can use [query parameters](/docs/en//interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters), e.g.
To avoid writing out large vectors, you can use [query
parameters](/docs/en/interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters), e.g.
```bash
clickhouse-client --param_vec='hello' --query="SELECT * FROM table WHERE L2Distance(embedding, {vec: Array(Float32)}) < 1.0"
clickhouse-client --param_vec='hello' --query="SELECT * FROM table WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0"
```
:::
ANN indexes cannot speed up queries that contain both a `WHERE DistanceFunction(column, Point) < MaxDistance` and an `ORDER BY DistanceFunction(column, Point)` clause. Also, the approximate algorithms used to determine the nearest neighbors require a limit, hence queries that use an ANN index must have a `LIMIT` clause.
**Restrictions**: Queries that contain both a `WHERE Distance(vectors, Point) < MaxDistance` and an `ORDER BY Distance(vectors, Point)`
clause cannot use ANN indexes. Also, the approximate algorithms used to determine the nearest neighbors require a limit, hence queries
without `LIMIT` clause cannot utilize ANN indexes. Also ANN indexes are only used if the query has a `LIMIT` value smaller than setting
`max_limit_for_ann_queries` (default: 1 million rows). This is a safeguard to prevent large memory allocations by external libraries for
approximate neighbor search.
**Differences to Skip Indexes** Similar to regular [skip indexes](https://clickhouse.com/docs/en/optimize/skipping-indexes), ANN indexes are
constructed over granules and each indexed block consists of `GRANULARITY = [N]`-many granules (`[N]` = 1 by default for normal skip
indexes). For example, if the primary index granularity of the table is 8192 (setting `index_granularity = 8192`) and `GRANULARITY = 2`,
then each indexed block will contain 16384 rows. However, data structures and algorithms for approximate neighborhood search (usually
provided by external libraries) are inherently row-oriented. They store a compact representation of a set of rows and also return rows for
ANN queries. This causes some rather unintuitive differences in the way ANN indexes behave compared to normal skip indexes.
When a user defines a ANN index on a column, ClickHouse internally creates a ANN "sub-index" for each index block. The sub-index is "local"
in the sense that it only knows about the rows of its containing index block. In the previous example and assuming that a column has 65536
rows, we obtain four index blocks (spanning eight granules) and a ANN sub-index for each index block. A sub-index is theoretically able to
return the rows with the N closest points within its index block directly. However, since ClickHouse loads data from disk to memory at the
granularity of granules, sub-indexes extrapolate matching rows to granule granularity. This is different from regular skip indexes which
skip data at the granularity of index blocks.
The `GRANULARITY` parameter determines how many ANN sub-indexes are created. Bigger `GRANULARITY` values mean fewer but larger ANN
sub-indexes, up to the point where a column (or a column's data part) has only a single sub-index. In that case, the sub-index has a
"global" view of all column rows and can directly return all granules of the column (part) with relevant rows (there are at most
`LIMIT [N]`-many such granules). In a second step, ClickHouse will load these granules and identify the actually best rows by performing a
brute-force distance calculation over all rows of the granules. With a small `GRANULARITY` value, each of the sub-indexes returns up to
`LIMIT N`-many granules. As a result, more granules need to be loaded and post-filtered. Note that the search accuracy is with both cases
equally good, only the processing performance differs. It is generally recommended to use a large `GRANULARITY` for ANN indexes and fall
back to a smaller `GRANULARITY` values only in case of problems like excessive memory consumption of the ANN structures. If no `GRANULARITY`
was specified for ANN indexes, the default value is 100 million.
An ANN index is only used if the query has a `LIMIT` value smaller than setting `max_limit_for_ann_queries` (default: 1 million rows). This is a safety measure which helps to avoid large memory consumption by external libraries for approximate neighbor search.
# Available ANN Indexes
@ -106,51 +144,68 @@ An ANN index is only used if the query has a `LIMIT` value smaller than setting
## Annoy {#annoy}
(currently disabled on ARM due to memory safety problems with the algorithm)
Annoy indexes are currently experimental, to use them you first need to `SET allow_experimental_annoy_index = 1`. They are also currently
disabled on ARM due to memory safety problems with the algorithm.
This type of ANN index implements [the Annoy algorithm](https://github.com/spotify/annoy) which uses a recursive division of the space in random linear surfaces (lines in 2D, planes in 3D etc.).
This type of ANN index implements [the Annoy algorithm](https://github.com/spotify/annoy) which is based on a recursive division of the
space in random linear surfaces (lines in 2D, planes in 3D etc.).
Syntax to create a Annoy index over a `Array` column:
<div class='vimeo-container'>
<iframe src="//www.youtube.com/embed/QkCCyLW0ehU"
width="640"
height="360"
frameborder="0"
allow="autoplay;
fullscreen;
picture-in-picture"
allowfullscreen>
</iframe>
</div>
Syntax to create an Annoy index over an [Array](../../../sql-reference/data-types/array.md) column:
```sql
CREATE TABLE table
(
id Int64,
embedding Array(Float32),
INDEX <ann_index_name> embedding TYPE annoy([DistanceName[, NumTrees]]) GRANULARITY N
vectors Array(Float32),
INDEX [ann_index_name] vectors TYPE annoy([Distance[, NumTrees]]) [GRANULARITY N]
)
ENGINE = MergeTree
ORDER BY id;
```
Syntax to create a Annoy index over a `Tuple` column:
Syntax to create an ANN index over a [Tuple](../../../sql-reference/data-types/tuple.md) column:
```sql
CREATE TABLE table
(
id Int64,
embedding Tuple(Float32[, Float32[, ...]]),
INDEX <ann_index_name> embedding TYPE annoy([DistanceName[, NumTrees]]) GRANULARITY N
vectors Tuple(Float32[, Float32[, ...]]),
INDEX [ann_index_name] vectors TYPE annoy([Distance[, NumTrees]]) [GRANULARITY N]
)
ENGINE = MergeTree
ORDER BY id;
```
Parameter `DistanceName` is name of a distance function (default `L2Distance`). Annoy currently supports `L2Distance` and `cosineDistance` as distance functions. Parameter `NumTrees` (default: 100) is the number of trees which the algorithm will create. Higher values of `NumTree` mean slower `CREATE` and `SELECT` statements (approximately linearly), but increase the accuracy of search results.
Annoy currently supports `L2Distance` and `cosineDistance` as distance function `Distance`. If no distance function was specified during
index creation, `L2Distance` is used as default. Parameter `NumTrees` is the number of trees which the algorithm creates (default if not
specified: 100). Higher values of `NumTree` mean more accurate search results but slower index creation / query times (approximately
linearly) as well as larger index sizes.
:::note
Indexes over columns of type `Array` will generally work faster than indexes on `Tuple` columns. All arrays **must** have same length. Use [CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1 CHECK length(embedding) = 256`.
Indexes over columns of type `Array` will generally work faster than indexes on `Tuple` columns. All arrays **must** have same length. Use
[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1
CHECK length(vectors) = 256`.
:::
Setting `annoy_index_search_k_nodes` (default: `NumTrees * LIMIT`) determines how many tree nodes are inspected during SELECTs. It can be used to
balance runtime and accuracy at runtime.
Setting `annoy_index_search_k_nodes` (default: `NumTrees * LIMIT`) determines how many tree nodes are inspected during SELECTs. Larger
values mean more accurate results at the cost of longer query runtime:
Example:
``` sql
```sql
SELECT *
FROM table_name [WHERE ...]
ORDER BY L2Distance(column, Point)
FROM table_name
ORDER BY L2Distance(vectors, Point)
LIMIT N
SETTINGS annoy_index_search_k_nodes=100
SETTINGS annoy_index_search_k_nodes=100;
```

View File

@ -491,7 +491,7 @@ Syntax: `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, ran
#### Special-purpose
- An experimental index to support approximate nearest neighbor (ANN) search. See [here](annindexes.md) for details.
- Experimental indexes to support approximate nearest neighbor (ANN) search. See [here](annindexes.md) for details.
- An experimental inverted index to support full-text search. See [here](invertedindexes.md) for details.
### Functions Support {#functions-support}
@ -853,7 +853,7 @@ Tags:
- `max_data_part_size_bytes` — the maximum size of a part that can be stored on any of the volumes disks. If the a size of a merged part estimated to be bigger than `max_data_part_size_bytes` then this part will be written to a next volume. Basically this feature allows to keep new/small parts on a hot (SSD) volume and move them to a cold (HDD) volume when they reach large size. Do not use this setting if your policy has only one volume.
- `move_factor` — when the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1). ClickHouse sorts existing parts by size from largest to smallest (in descending order) and selects parts with the total size that is sufficient to meet the `move_factor` condition. If the total size of all parts is insufficient, all parts will be moved.
- `prefer_not_to_merge` — Disables merging of data parts on this volume. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks.
- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3).
- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default (if enabled) if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3). If disabled then already expired data part is written into a default volume and then right after moved to TTL volume.
- `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`.
Configuration examples:
@ -1138,7 +1138,7 @@ These parameters define the cache layer:
Cache parameters:
- `path` — The path where metadata for the cache is stored.
- `max_size` — The size (amount of memory) that the cache can grow to.
- `max_size` — The size (amount of disk space) that the cache can grow to.
:::tip
There are several other cache parameters that you can use to tune your storage, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) for the details.

View File

@ -92,3 +92,11 @@ $ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64
`PARTITION BY` — Optional. It is possible to create separate files by partitioning the data on a partition key. In most cases, you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression).
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
## Settings {#settings}
- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
- [engine_file_truncate_on_insert](/docs/en/operations/settings/settings.md#engine-file-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
- [engine_file_allow_create_multiple_files](/docs/en/operations/settings/settings.md#engine_file_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
- [engine_file_skip_empty_files](/docs/en/operations/settings/settings.md#engine_file_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - method of reading data from storage file, one of: `read`, `pread`, `mmap`. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.

View File

@ -102,3 +102,7 @@ SELECT * FROM url_engine_table
`PARTITION BY` — Optional. It is possible to create separate files by partitioning the data on a partition key. In most cases, you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression).
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
## Storage Settings {#storage-settings}
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.

View File

@ -194,7 +194,129 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
- `--print-profile-events` Print `ProfileEvents` packets.
- `--profile-events-delay-ms` Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet).
Since version 20.5, `clickhouse-client` has automatic syntax highlighting (always enabled).
Instead of `--host`, `--port`, `--user` and `--password` options, ClickHouse client also supports connection strings (see next section).
## Connection string {#connection_string}
clickhouse-client alternatively supports connecting to clickhouse server using a connection string similar to [MongoDB](https://www.mongodb.com/docs/manual/reference/connection-string/), [PostgreSQL](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING), [MySQL](https://dev.mysql.com/doc/refman/8.0/en/connecting-using-uri-or-key-value-pairs.html#connecting-using-uri). It has the following syntax:
```text
clickhouse:[//[user[:password]@][hosts_and_ports]][/database][?query_parameters]
```
Where
- `user` - (optional) is a user name,
- `password` - (optional) is a user password. If `:` is specified and the password is blank, the client will prompt for the user's password.
- `hosts_and_ports` - (optional) is a list of hosts and optional ports `host[:port] [, host:[port]], ...`,
- `database` - (optional) is the database name,
- `query_parameters` - (optional) is a list of key-value pairs `param1=value1[,&param2=value2], ...`. For some parameters, no value is required. Parameter names and values are case-sensitive.
If no user is specified, `default` user without password will be used.
If no host is specified, the `localhost` will be used (localhost).
If no port is specified is not specified, `9000` will be used as port.
If no database is specified, the `default` database will be used.
If the user name, password or database was specified in the connection string, it cannot be specified using `--user`, `--password` or `--database` (and vice versa).
The host component can either be an a host name and IP address. Put an IPv6 address in square brackets to specify it:
```text
clickhouse://[2001:db8::1234]
```
URI allows multiple hosts to be connected to. Connection strings can contain multiple hosts. ClickHouse-client will try to connect to these hosts in order (i.e. from left to right). After the connection is established, no attempt to connect to the remaining hosts is made.
The connection string must be specified as the first argument of clickhouse-client. The connection string can be combined with arbitrary other [command-line-options](#command-line-options) except `--host/-h` and `--port`.
The following keys are allowed for component `query_parameter`:
- `secure` or shorthanded `s` - no value. If specified, client will connect to the server over a secure connection (TLS). See `secure` in [command-line-options](#command-line-options)
### Percent encoding {#connection_string_uri_percent_encoding}
Non-US ASCII, spaces and special characters in the `user`, `password`, `hosts`, `database` and `query parameters` must be [percent-encoded](https://en.wikipedia.org/wiki/URL_encoding).
### Examples {#connection_string_examples}
Connect to localhost using port 9000 and execute the query `SELECT 1`.
``` bash
clickhouse-client clickhouse://localhost:9000 --query "SELECT 1"
```
Connect to localhost using user `john` with password `secret`, host `127.0.0.1` and port `9000`
``` bash
clickhouse-client clickhouse://john:secret@127.0.0.1:9000
```
Connect to localhost using default user, host with IPV6 address `[::1]` and port `9000`.
``` bash
clickhouse-client clickhouse://[::1]:9000
```
Connect to localhost using port 9000 in multiline mode.
``` bash
clickhouse-client clickhouse://localhost:9000 '-m'
```
Connect to localhost using port 9000 with the user `default`.
``` bash
clickhouse-client clickhouse://default@localhost:9000
# equivalent to:
clickhouse-client clickhouse://localhost:9000 --user default
```
Connect to localhost using port 9000 to `my_database` database.
``` bash
clickhouse-client clickhouse://localhost:9000/my_database
# equivalent to:
clickhouse-client clickhouse://localhost:9000 --database my_database
```
Connect to localhost using port 9000 to `my_database` database specified in the connection string and a secure connection using shorthanded 's' URI parameter.
```bash
clickhouse-client clickhouse://localhost/my_database?s
# equivalent to:
clickhouse-client clickhouse://localhost/my_database -s
```
Connect to default host using default port, default user, and default database.
``` bash
clickhouse-client clickhouse:
```
Connect to the default host using the default port, using user `my_user` and no password.
``` bash
clickhouse-client clickhouse://my_user@
# Using a blank password between : and @ means to asking user to enter the password before starting the connection.
clickhouse-client clickhouse://my_user:@
```
Connect to localhost using email as the user name. `@` symbol is percent encoded to `%40`.
``` bash
clickhouse-client clickhouse://some_user%40some_mail.com@localhost:9000
```
Connect to one of provides hosts: `192.168.1.15`, `192.168.1.25`.
``` bash
clickhouse-client clickhouse://192.168.1.15,192.168.1.25
```
### Configuration Files {#configuration_files}

View File

@ -193,6 +193,7 @@ SELECT * FROM nestedt FORMAT TSV
- [output_format_tsv_crlf_end_of_line](/docs/en/operations/settings/settings-formats.md/#output_format_tsv_crlf_end_of_line) - if it is set true, end of line in TSV output format will be `\r\n` instead of `\n`. Default value - `false`.
- [input_format_tsv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`.
- [input_format_tsv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_detect_header) - automatically detect header with names and types in TSV format. Default value - `true`.
- [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`.
## TabSeparatedRaw {#tabseparatedraw}
@ -467,7 +468,9 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
- [output_format_csv_crlf_end_of_line](/docs/en/operations/settings/settings-formats.md/#output_format_csv_crlf_end_of_line) - if it is set to true, end of line in CSV output format will be `\r\n` instead of `\n`. Default value - `false`.
- [input_format_csv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_first_lines) - skip the specified number of lines at the beginning of data. Default value - `0`.
- [input_format_csv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_csv_detect_header) - automatically detect header with names and types in CSV format. Default value - `true`.
- [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`.
- [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
## CSVWithNames {#csvwithnames}
@ -495,7 +498,9 @@ the types from input data will be compared with the types of the corresponding c
Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](/docs/en/operations/settings/settings-formats.md/#format_custom_escaping_rule) setting and delimiters from [format_custom_field_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_field_delimiter), [format_custom_row_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_before_delimiter), [format_custom_row_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_after_delimiter), [format_custom_row_between_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_between_delimiter), [format_custom_result_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_before_delimiter) and [format_custom_result_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_after_delimiter) settings, not from format strings.
If setting [input_format_custom_detect_header](/docs/en/operations/settings/settings.md/#input_format_custom_detect_header) is enabled, ClickHouse will automatically detect header with names and types if any.
If setting [input_format_custom_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) is enabled, ClickHouse will automatically detect header with names and types if any.
If setting [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) is enabled, trailing empty lines at the end of file will be skipped.
There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [TemplateIgnoreSpaces](#templateignorespaces).
@ -1873,13 +1878,13 @@ The table below shows supported data types and how they match ClickHouse [data t
| `string (uuid)` \** | [UUID](/docs/en/sql-reference/data-types/uuid.md) | `string (uuid)` \** |
| `fixed(16)` | [Int128/UInt128](/docs/en/sql-reference/data-types/int-uint.md) | `fixed(16)` |
| `fixed(32)` | [Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `fixed(32)` |
| `record` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `record` |
\* `bytes` is default, controlled by [output_format_avro_string_column_pattern](/docs/en/operations/settings/settings-formats.md/#output_format_avro_string_column_pattern)
\** [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types)
Unsupported Avro data types: `record` (non-root), `map`
Unsupported Avro logical data types: `time-millis`, `time-micros`, `duration`
### Inserting Data {#inserting-data-1}
@ -1918,7 +1923,26 @@ Output Avro file compression and sync interval can be configured with [output_fo
Using the ClickHouse [DESCRIBE](/docs/en/sql-reference/statements/describe-table) function, you can quickly view the inferred format of an Avro file like the following example. This example includes the URL of a publicly accessible Avro file in the ClickHouse S3 public bucket:
``` DESCRIBE url('https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/hits.avro','Avro');
```
DESCRIBE url('https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/hits.avro','Avro);
```
```
┌─name───────────────────────┬─type────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ WatchID │ Int64 │ │ │ │ │ │
│ JavaEnable │ Int32 │ │ │ │ │ │
│ Title │ String │ │ │ │ │ │
│ GoodEvent │ Int32 │ │ │ │ │ │
│ EventTime │ Int32 │ │ │ │ │ │
│ EventDate │ Date32 │ │ │ │ │ │
│ CounterID │ Int32 │ │ │ │ │ │
│ ClientIP │ Int32 │ │ │ │ │ │
│ ClientIP6 │ FixedString(16) │ │ │ │ │ │
│ RegionID │ Int32 │ │ │ │ │ │
...
│ IslandID │ FixedString(16) │ │ │ │ │ │
│ RequestNum │ Int32 │ │ │ │ │ │
│ RequestTry │ Int32 │ │ │ │ │ │
└────────────────────────────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
## AvroConfluent {#data-format-avro-confluent}

View File

@ -329,8 +329,8 @@ SELECT count() FROM system.schema_inference_cache WHERE storage='S3'
## Text formats {#text-formats}
For text formats, ClickHouse reads the data row by row, extracts column values according to the format,
and then uses some recursive parsers and heuristics to determine the type for each value. The maximum number of rows read from the data in schema inference
is controlled by the setting `input_format_max_rows_to_read_for_schema_inference` with default value 25000.
and then uses some recursive parsers and heuristics to determine the type for each value. The maximum number of rows and bytes read from the data in schema inference
is controlled by the settings `input_format_max_rows_to_read_for_schema_inference` (25000 by default) and `input_format_max_bytes_to_read_for_schema_inference` (32Mb by default).
By default, all inferred types are [Nullable](../sql-reference/data-types/nullable.md), but you can change this by setting `schema_inference_make_columns_nullable` (see examples in the [settings](#settings-for-text-formats) section).
### JSON formats {#json-formats}
@ -1144,13 +1144,15 @@ Line: value_1=2, value_2="Some string 2", value_3="[4, 5, NULL]"$$)
### Settings for text formats {#settings-for-text-formats}
#### input_format_max_rows_to_read_for_schema_inference
#### input_format_max_rows_to_read_for_schema_inference/input_format_max_bytes_to_read_for_schema_inference
This setting controls the maximum number of rows to be read while schema inference.
The more rows are read, the more time is spent on schema inference, but the greater the chance to
These settings control the amount of data to be read while schema inference.
The more rows/bytes are read, the more time is spent on schema inference, but the greater the chance to
correctly determine the types (especially when the data contains a lot of nulls).
Default value: `25000`.
Default values:
- `25000` for `input_format_max_rows_to_read_for_schema_inference`.
- `33554432` (32 Mb) for `input_format_max_bytes_to_read_for_schema_inference`.
#### column_names_for_schema_inference
@ -1643,7 +1645,7 @@ In schema inference for CapnProto format ClickHouse uses the following type matc
## Strong-typed binary formats {#strong-typed-binary-formats}
In such formats, each serialized value contains information about its type (and possibly about its name), but there is no information about the whole table.
In schema inference for such formats, ClickHouse reads data row by row (up to `input_format_max_rows_to_read_for_schema_inference` rows) and extracts
In schema inference for such formats, ClickHouse reads data row by row (up to `input_format_max_rows_to_read_for_schema_inference` rows or `input_format_max_bytes_to_read_for_schema_inference` bytes) and extracts
the type (and possibly name) for each value from the data and then converts these types to ClickHouse types.
### MsgPack {#msgpack}

View File

@ -83,6 +83,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des
- [`compression_method`](/docs/en/sql-reference/statements/create/table.md/#column-compression-codecs) and compression_level
- `password` for the file on disk
- `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')`
- `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables
### Usage examples
@ -398,4 +399,4 @@ To disallow concurrent backup/restore, you can use these settings respectively.
```
The default value for both is true, so by default concurrent backup/restores are allowed.
When these settings are false on a cluster, only 1 backup/restore is allowed to run on a cluster at a time.
When these settings are false on a cluster, only 1 backup/restore is allowed to run on a cluster at a time.

View File

@ -202,7 +202,7 @@ Default: 15
## dns_max_consecutive_failures
Max connection failures before dropping host from ClickHouse DNS cache
Max consecutive resolving failures before dropping a host from ClickHouse DNS cache
Type: UInt32

View File

@ -137,6 +137,12 @@ The maximum rows of data to read for automatic schema inference.
Default value: `25'000`.
## input_format_max_bytes_to_read_for_schema_inference {#input_format_max_bytes_to_read_for_schema_inference}
The maximum amount of data in bytes to read for automatic schema inference.
Default value: `33554432` (32 Mb).
## column_names_for_schema_inference {#column_names_for_schema_inference}
The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'
@ -728,6 +734,12 @@ My NULL
My NULL
```
### input_format_tsv_skip_trailing_empty_lines {input_format_tsv_skip_trailing_empty_lines}
When enabled, trailing empty lines at the end of TSV file will be skipped.
Disabled by default.
## CSV format settings {#csv-format-settings}
### format_csv_delimiter {#format_csv_delimiter}
@ -882,6 +894,12 @@ My NULL
My NULL
```
### input_format_csv_skip_trailing_empty_lines {input_format_csv_skip_trailing_empty_lines}
When enabled, trailing empty lines at the end of CSV file will be skipped.
Disabled by default.
### input_format_csv_trim_whitespaces {#input_format_csv_trim_whitespaces}
Trims spaces and tabs in non-quoted CSV strings.
@ -914,6 +932,38 @@ Result
" string "
```
### input_format_csv_allow_whitespace_or_tab_as_delimiter {#input_format_csv_allow_whitespace_or_tab_as_delimiter}
Allow to use whitespace or tab as field delimiter in CSV strings.
Default value: `false`.
**Examples**
Query
```bash
echo 'a b' | ./clickhouse local -q "select * from table FORMAT CSV" --input-format="CSV" --input_format_csv_allow_whitespace_or_tab_as_delimiter=true --format_csv_delimiter=' '
```
Result
```text
a b
```
Query
```bash
echo 'a b' | ./clickhouse local -q "select * from table FORMAT CSV" --input-format="CSV" --input_format_csv_allow_whitespace_or_tab_as_delimiter=true --format_csv_delimiter='\t'
```
Result
```text
a b
```
## Values format settings {#values-format-settings}
### input_format_values_interpret_expressions {#input_format_values_interpret_expressions}
@ -1475,6 +1525,12 @@ Sets the character that is interpreted as a suffix after the result set for [Cus
Default value: `''`.
### input_format_custom_skip_trailing_empty_lines {input_format_custom_skip_trailing_empty_lines}
When enabled, trailing empty lines at the end of file in CustomSeparated format will be skipped.
Disabled by default.
## Regexp format settings {#regexp-format-settings}
### format_regexp_escaping_rule {#format_regexp_escaping_rule}

View File

@ -1957,6 +1957,10 @@ Default value: empty string (disabled)
For the replicated tables by default the only 100 of the most recent inserts for each partition are deduplicated (see [replicated_deduplication_window](merge-tree-settings.md/#replicated-deduplication-window), [replicated_deduplication_window_seconds](merge-tree-settings.md/#replicated-deduplication-window-seconds)).
For not replicated tables see [non_replicated_deduplication_window](merge-tree-settings.md/#non-replicated-deduplication-window).
:::note
`insert_deduplication_token` works on a partition level (the same as `insert_deduplication` checksum). Multiple partitions can have the same `insert_deduplication_token`.
:::
Example:
```sql
@ -3324,7 +3328,35 @@ Possible values:
Default value: `0`.
## s3_truncate_on_insert
## engine_file_allow_create_multiple_files {#engine_file_allow_create_multiple_files}
Enables or disables creating a new file on each insert in file engine tables if the format has the suffix (`JSON`, `ORC`, `Parquet`, etc.). If enabled, on each insert a new file will be created with a name following this pattern:
`data.Parquet` -> `data.1.Parquet` -> `data.2.Parquet`, etc.
Possible values:
- 0 — `INSERT` query appends new data to the end of the file.
- 1 — `INSERT` query creates a new file.
Default value: `0`.
## engine_file_skip_empty_files {#engine_file_skip_empty_files}
Enables or disables skipping empty files in [File](../../engines/table-engines/special/file.md) engine tables.
Possible values:
- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
- 1 — `SELECT` returns empty result for empty file.
Default value: `0`.
## storage_file_read_method {#storage_file_read_method}
Method of reading data from storage file, one of: `read`, `pread`, `mmap`. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).
Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.
## s3_truncate_on_insert {#s3_truncate_on_insert}
Enables or disables truncate before inserts in s3 engine tables. If disabled, an exception will be thrown on insert attempts if an S3 object already exists.
@ -3334,7 +3366,29 @@ Possible values:
Default value: `0`.
## hdfs_truncate_on_insert
## s3_create_new_file_on_insert {#s3_create_new_file_on_insert}
Enables or disables creating a new file on each insert in s3 engine tables. If enabled, on each insert a new S3 object will be created with the key, similar to this pattern:
initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc.
Possible values:
- 0 — `INSERT` query appends new data to the end of the file.
- 1 — `INSERT` query creates a new file.
Default value: `0`.
## s3_skip_empty_files {#s3_skip_empty_files}
Enables or disables skipping empty files in [S3](../../engines/table-engines/integrations/s3.md) engine tables.
Possible values:
- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
- 1 — `SELECT` returns empty result for empty file.
Default value: `0`.
## hdfs_truncate_on_insert {#hdfs_truncate_on_insert}
Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists.
@ -3344,31 +3398,7 @@ Possible values:
Default value: `0`.
## engine_file_allow_create_multiple_files
Enables or disables creating a new file on each insert in file engine tables if the format has the suffix (`JSON`, `ORC`, `Parquet`, etc.). If enabled, on each insert a new file will be created with a name following this pattern:
`data.Parquet` -> `data.1.Parquet` -> `data.2.Parquet`, etc.
Possible values:
- 0 — `INSERT` query appends new data to the end of the file.
- 1 — `INSERT` query replaces existing content of the file with the new data.
Default value: `0`.
## s3_create_new_file_on_insert
Enables or disables creating a new file on each insert in s3 engine tables. If enabled, on each insert a new S3 object will be created with the key, similar to this pattern:
initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc.
Possible values:
- 0 — `INSERT` query appends new data to the end of the file.
- 1 — `INSERT` query replaces existing content of the file with the new data.
Default value: `0`.
## hdfs_create_new_file_on_insert
## hdfs_create_new_file_on_insert {#hdfs_create_new_file_on_insert
Enables or disables creating a new file on each insert in HDFS engine tables. If enabled, on each insert a new HDFS file will be created with the name, similar to this pattern:
@ -3376,7 +3406,27 @@ initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc.
Possible values:
- 0 — `INSERT` query appends new data to the end of the file.
- 1 — `INSERT` query replaces existing content of the file with the new data.
- 1 — `INSERT` query creates a new file.
Default value: `0`.
## hdfs_skip_empty_files {#hdfs_skip_empty_files}
Enables or disables skipping empty files in [HDFS](../../engines/table-engines/integrations/hdfs.md) engine tables.
Possible values:
- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
- 1 — `SELECT` returns empty result for empty file.
Default value: `0`.
## engine_url_skip_empty_files {#engine_url_skip_empty_files}
Enables or disables skipping empty files in [URL](../../engines/table-engines/special/url.md) engine tables.
Possible values:
- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
- 1 — `SELECT` returns empty result for empty file.
Default value: `0`.

View File

@ -11,7 +11,8 @@ Columns:
- `host` ([String](../../sql-reference/data-types/string.md)) — The hostname/IP of the ZooKeeper node that ClickHouse connected to.
- `port` ([String](../../sql-reference/data-types/string.md)) — The port of the ZooKeeper node that ClickHouse connected to.
- `index` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The index of the ZooKeeper node that ClickHouse connected to. The index is from ZooKeeper config.
- `connected_time` ([String](../../sql-reference/data-types/string.md)) — When the connection was established
- `connected_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — When the connection was established
- `session_uptime_elapsed_seconds` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Seconds elapsed since the connection was established
- `is_expired` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Is the current connection expired.
- `keeper_api_version` ([String](../../sql-reference/data-types/string.md)) — Keeper API version.
- `client_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Session id of the connection.
@ -23,7 +24,7 @@ SELECT * FROM system.zookeeper_connection;
```
``` text
┌─name──────────────┬─host─────────┬─port─┬─index─┬──────connected_time─┬─is_expired─┬─keeper_api_version─┬──────────client_id─┐
│ default_zookeeper │ 127.0.0.1 │ 2181 │ 0 │ 2023-05-19 14:30:16 │ 0 │ 0 │ 216349144108826660
└─────────────────────────────────┴──────┴───────┴─────────────────────┴────────────────────────────────┴────────────────────┘
┌─name────┬─host──────┬─port─┬─index─┬──────connected_time─┬─session_uptime_elapsed_seconds─┬─is_expired─┬─keeper_api_version─┬─client_id─┐
│ default │ 127.0.0.1 │ 9181 │ 0 │ 2023-06-15 14:36:01 │ 3058 │ 0 │ 3 │ 5
└─────────┴───────────┴──────┴───────┴─────────────────────┴────────────────────────────────┴────────────┴────────────────────┴───────────┘
```

View File

@ -32,7 +32,7 @@ For example, Decimal32(4) can contain numbers from -99999.9999 to 99999.9999 wit
Internally data is represented as normal signed integers with respective bit width. Real value ranges that can be stored in memory are a bit larger than specified above, which are checked only on conversion from a string.
Because modern CPUs do not support 128-bit integers natively, operations on Decimal128 are emulated. Because of this Decimal128 works significantly slower than Decimal32/Decimal64.
Because modern CPUs do not support 128-bit and 256-bit integers natively, operations on Decimal128 and Decimal256 are emulated. Thus, Decimal128 and Decimal256 work significantly slower than Decimal32/Decimal64.
## Operations and Result Type
@ -59,6 +59,10 @@ Some functions on Decimal return result as Float64 (for example, var or stddev).
During calculations on Decimal, integer overflows might happen. Excessive digits in a fraction are discarded (not rounded). Excessive digits in integer part will lead to an exception.
:::warning
Overflow check is not implemented for Decimal128 and Decimal256. In case of overflow incorrect result is returned, no exception is thrown.
:::
``` sql
SELECT toDecimal32(2, 4) AS x, x / 3
```

View File

@ -33,7 +33,7 @@ SELECT
toTypeName(toNullable('') AS val) AS source_type,
toTypeName(toString(val)) AS to_type_result_type,
toTypeName(CAST(val, 'String')) AS cast_result_type
┌─source_type──────┬─to_type_result_type─┬─cast_result_type─┐
│ Nullable(String) │ Nullable(String) │ String │
└──────────────────┴─────────────────────┴──────────────────┘
@ -203,7 +203,7 @@ Result:
## toDate
Converts the argument to [Date](/docs/en/sql-reference/data-types/date.md) data type.
Converts the argument to [Date](/docs/en/sql-reference/data-types/date.md) data type.
If the argument is [DateTime](/docs/en/sql-reference/data-types/datetime.md) or [DateTime64](/docs/en/sql-reference/data-types/datetime64.md), it truncates it and leaves the date component of the DateTime:
@ -232,7 +232,7 @@ SELECT
│ 2022-12-30 │ Date │
└────────────┴──────────────────────────────────┘
1 row in set. Elapsed: 0.001 sec.
1 row in set. Elapsed: 0.001 sec.
```
```sql
@ -314,20 +314,183 @@ SELECT
└─────────────────────┴───────────────┴─────────────┴─────────────────────┘
```
## toDateOrZero
The same as [toDate](#todate) but returns lower boundary of [Date](/docs/en/sql-reference/data-types/date.md) if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported.
**Example**
Query:
``` sql
SELECT toDateOrZero('2022-12-30'), toDateOrZero('');
```
Result:
```response
┌─toDateOrZero('2022-12-30')─┬─toDateOrZero('')─┐
│ 2022-12-30 │ 1970-01-01 │
└────────────────────────────┴──────────────────┘
```
## toDateOrNull
The same as [toDate](#todate) but returns `NULL` if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported.
**Example**
Query:
``` sql
SELECT toDateOrNull('2022-12-30'), toDateOrNull('');
```
Result:
```response
┌─toDateOrNull('2022-12-30')─┬─toDateOrNull('')─┐
│ 2022-12-30 │ ᴺᵁᴸᴸ │
└────────────────────────────┴──────────────────┘
```
## toDateOrDefault
Like [toDate](#todate) but if unsuccessful, returns a default value which is either the second argument (if specified), or otherwise the lower boundary of [Date](/docs/en/sql-reference/data-types/date.md).
**Syntax**
``` sql
toDateOrDefault(expr [, default_value])
```
**Example**
Query:
``` sql
SELECT toDateOrDefault('2022-12-30'), toDateOrDefault('', '2023-01-01'::Date);
```
Result:
```response
┌─toDateOrDefault('2022-12-30')─┬─toDateOrDefault('', CAST('2023-01-01', 'Date'))─┐
│ 2022-12-30 │ 2023-01-01 │
└───────────────────────────────┴─────────────────────────────────────────────────┘
```
## toDateTime
Converts an input value to [DateTime](/docs/en/sql-reference/data-types/datetime.md).
**Syntax**
``` sql
toDateTime(expr[, time_zone ])
```
**Arguments**
- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [Int](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md).
- `time_zone` — Time zone. [String](/docs/en/sql-reference/data-types/string.md).
If `expr` is a number, it is interpreted as the number of seconds since the beginning of the Unix Epoch (as Unix timestamp).
**Returned value**
- A date time. [DateTime](/docs/en/sql-reference/data-types/datetime.md)
**Example**
Query:
``` sql
SELECT toDateTime('2022-12-30 13:44:17'), toDateTime(1685457500, 'UTC');
```
Result:
```response
┌─toDateTime('2022-12-30 13:44:17')─┬─toDateTime(1685457500, 'UTC')─┐
│ 2022-12-30 13:44:17 │ 2023-05-30 14:38:20 │
└───────────────────────────────────┴───────────────────────────────┘
```
## toDateTimeOrZero
The same as [toDateTime](#todatetime) but returns lower boundary of [DateTime](/docs/en/sql-reference/data-types/datetime.md) if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported.
**Example**
Query:
``` sql
SELECT toDateTimeOrZero('2022-12-30 13:44:17'), toDateTimeOrZero('');
```
Result:
```response
┌─toDateTimeOrZero('2022-12-30 13:44:17')─┬─toDateTimeOrZero('')─┐
│ 2022-12-30 13:44:17 │ 1970-01-01 00:00:00 │
└─────────────────────────────────────────┴──────────────────────┘
```
## toDateTimeOrNull
The same as [toDateTime](#todatetime) but returns `NULL` if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported.
**Example**
Query:
``` sql
SELECT toDateTimeOrNull('2022-12-30 13:44:17'), toDateTimeOrNull('');
```
Result:
```response
┌─toDateTimeOrNull('2022-12-30 13:44:17')─┬─toDateTimeOrNull('')─┐
│ 2022-12-30 13:44:17 │ ᴺᵁᴸᴸ │
└─────────────────────────────────────────┴──────────────────────┘
```
## toDateTimeOrDefault
Like [toDateTime](#todatetime) but if unsuccessful, returns a default value which is either the third argument (if specified), or otherwise the lower boundary of [DateTime](/docs/en/sql-reference/data-types/datetime.md).
**Syntax**
``` sql
toDateTimeOrDefault(expr [, time_zone [, default_value]])
```
**Example**
Query:
``` sql
SELECT toDateTimeOrDefault('2022-12-30 13:44:17'), toDateTimeOrDefault('', 'UTC', '2023-01-01'::DateTime('UTC'));
```
Result:
```response
┌─toDateTimeOrDefault('2022-12-30 13:44:17')─┬─toDateTimeOrDefault('', 'UTC', CAST('2023-01-01', 'DateTime(\'UTC\')'))─┐
│ 2022-12-30 13:44:17 │ 2023-01-01 00:00:00 │
└────────────────────────────────────────────┴─────────────────────────────────────────────────────────────────────────┘
```
## toDate32
Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by [Date32](/docs/en/sql-reference/data-types/date32.md). If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, it's borders are taken into account.
@ -519,6 +682,11 @@ SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul') AS value, toTypeN
└─────────────────────────┴─────────────────────────────────────────────────────────────────────┘
```
## toDateTime64OrZero
## toDateTime64OrNull
## toDateTime64OrDefault
## toDecimal(32\|64\|128\|256)
@ -1247,7 +1415,7 @@ Returns DateTime values parsed from input string according to a MySQL style form
**Supported format specifiers**
All format specifiers listed in [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) except:
- %Q: Quarter (1-4)
- %Q: Quarter (1-4)
**Example**
@ -1341,10 +1509,12 @@ parseDateTimeBestEffort(time_string [, time_zone])
- A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time).
- A string with a date and a time component: `YYYYMMDDhhmmss`, `DD/MM/YYYY hh:mm:ss`, `DD-MM-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc.
- A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `DD/MM/YYYY`, `DD-MM-YY` etc.
- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case `YYYY-MM` are substituted as `2000-01`.
- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case `MM` is substituted by `01`.
- A string that includes the date and time along with time zone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`.
- A [syslog timestamp](https://datatracker.ietf.org/doc/html/rfc3164#section-4.1.2): `Mmm dd hh:mm:ss`. For example, `Jun 9 14:20:32`.
For all of the formats with separator the function parses months names expressed by their full name or by the first three letters of a month name. Examples: `24/DEC/18`, `24-Dec-18`, `01-September-2018`.
If the year is not specified, it is considered to be equal to the current year. If the resulting DateTime happen to be in the future (even by a second after the current moment), then the current year is substituted by the previous year.
**Returned value**
@ -1415,23 +1585,46 @@ Result:
Query:
``` sql
SELECT parseDateTimeBestEffort('10 20:19');
SELECT toYear(now()) as year, parseDateTimeBestEffort('10 20:19');
```
Result:
```response
┌─parseDateTimeBestEffort('10 20:19')─┐
│ 2000-01-10 20:19:00 │
└─────────────────────────────────────┘
┌─year─┬─parseDateTimeBestEffort('10 20:19')─┐
│ 2023 │ 2023-01-10 20:19:00 │
└──────┴─────────────────────────────────────┘
```
Query:
``` sql
WITH
now() AS ts_now,
formatDateTime(ts_around, '%b %e %T') AS syslog_arg
SELECT
ts_now,
syslog_arg,
parseDateTimeBestEffort(syslog_arg)
FROM (SELECT arrayJoin([ts_now - 30, ts_now + 30]) AS ts_around);
```
Result:
```response
┌──────────────ts_now─┬─syslog_arg──────┬─parseDateTimeBestEffort(syslog_arg)─┐
│ 2023-06-30 23:59:30 │ Jun 30 23:59:00 │ 2023-06-30 23:59:00 │
│ 2023-06-30 23:59:30 │ Jul 1 00:00:00 │ 2022-07-01 00:00:00 │
└─────────────────────┴─────────────────┴─────────────────────────────────────┘
```
**See Also**
- [RFC 1123](https://tools.ietf.org/html/rfc1123)
- [RFC 1123](https://datatracker.ietf.org/doc/html/rfc1123)
- [toDate](#todate)
- [toDateTime](#todatetime)
- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/)
- [RFC 3164](https://datatracker.ietf.org/doc/html/rfc3164#section-4.1.2)
## parseDateTimeBestEffortUS

View File

@ -82,6 +82,35 @@ LIFETIME(MIN 0 MAX 1000)
LAYOUT(FLAT())
```
:::note
When using the SQL console in [ClickHouse Cloud](https://clickhouse.com), you must specify a user (`default` or any other user with the role `default_role`) and password when creating a dictionary.
:::note
```sql
CREATE USER IF NOT EXISTS clickhouse_admin
IDENTIFIED WITH sha256_password BY 'passworD43$x';
GRANT default_role TO clickhouse_admin;
CREATE DATABASE foo_db;
CREATE TABLE foo_db.source_table (
id UInt64,
value String
) ENGINE = MergeTree
PRIMARY KEY id;
CREATE DICTIONARY foo_db.id_value_dictionary
(
id UInt64,
value String
)
PRIMARY KEY id
SOURCE(CLICKHOUSE(TABLE 'source_table' USER 'clickhouse_admin' PASSWORD 'passworD43$x' DB 'foo_db' ))
LAYOUT(FLAT())
LIFETIME(MIN 0 MAX 1000);
```
### Create a dictionary from a table in a remote ClickHouse service
Input table (in the remote ClickHouse service) `source_table`:

View File

@ -55,6 +55,9 @@ With the described implementation now we can see what can negatively affect 'DEL
- Table having a very large number of data parts
- Having a lot of data in Compact parts—in a Compact part, all columns are stored in one file.
:::note
Currently, Lightweight delete does not work for tables with projection as rows in projection may be affected and require the projection to be rebuilt. Rebuilding projection makes the deletion not lightweight, so this is not supported.
:::
## Related content

View File

@ -1,10 +1,10 @@
---
slug: /en/sql-reference/table-functions/azure_blob_storage
sidebar_label: azure_blob_storage
slug: /en/sql-reference/table-functions/azureBlobStorage
sidebar_label: azureBlobStorage
keywords: [azure blob storage]
---
# azure\_blob\_storage Table Function
# azureBlobStorage Table Function
Provides a table-like interface to select/insert files in [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs). This table function is similar to the [s3 function](../../sql-reference/table-functions/s3.md).

View File

@ -196,6 +196,16 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
- `_path` — Path to the file.
- `_file` — Name of the file.
## Settings
- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
- [engine_file_truncate_on_insert](/docs/en/operations/settings/settings.md#engine-file-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
- [engine_file_allow_create_multiple_files](/docs/en/operations/settings/settings.md#engine_file_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
- [engine_file_skip_empty_files](/docs/en/operations/settings/settings.md#engine_file_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - method of reading data from storage file, one of: read, pread, mmap (only for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.
**See Also**
- [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns)

View File

@ -1,7 +1,7 @@
---
slug: /en/sql-reference/table-functions/gcs
sidebar_position: 45
sidebar_label: s3
sidebar_label: gcs
keywords: [gcs, bucket]
---

View File

@ -97,6 +97,12 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
- `_path` — Path to the file.
- `_file` — Name of the file.
## Storage Settings {#storage-settings}
- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
- [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
- [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
**See Also**
- [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns)

View File

@ -107,6 +107,30 @@ SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123');
└────────┴───────┘
```
Copying data from MySQL table into ClickHouse table:
```sql
CREATE TABLE mysql_copy
(
`id` UInt64,
`datetime` DateTime('UTC'),
`description` String,
)
ENGINE = MergeTree
ORDER BY (id,datetime);
INSERT INTO mysql_copy
SELECT * FROM mysql('host:port', 'database', 'table', 'user', 'password');
```
Or if copying only an incremental batch from MySQL based on the max current id:
```sql
INSERT INTO mysql_copy
SELECT * FROM mysql('host:port', 'database', 'table', 'user', 'password')
WHERE id > (SELECT max(id) from mysql_copy);
```
**See Also**
- [The MySQL table engine](../../engines/table-engines/integrations/mysql.md)

View File

@ -0,0 +1,67 @@
---
slug: /en/sql-reference/table-functions/redis
sidebar_position: 43
sidebar_label: redis
---
# redis
This table function allows integrating ClickHouse with [Redis](https://redis.io/).
**Syntax**
```sql
redis(host:port, key, structure[, db_index[, password[, pool_size]]])
```
**Arguments**
- `host:port` — Redis server address, you can ignore port and default Redis port 6379 will be used.
- `key` — any column name in the column list.
- `structure` — The schema for the ClickHouse table returned from this function.
- `db_index` — Redis db index range from 0 to 15, default is 0.
- `password` — User password, default is blank string.
- `pool_size` — Redis max connection pool size, default is 16.
- `primary` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a Redis key.
- columns other than the primary key will be serialized in binary as Redis value in corresponding order.
- queries with key equals or in filtering will be optimized to multi keys lookup from Redis. If queries without filtering key full table scan will happen which is a heavy operation.
**Returned Value**
A table object with key as Redis key, other columns packaged together as Redis value.
## Usage Example {#usage-example}
Create a table in ClickHouse which allows to read data from Redis:
``` sql
CREATE TABLE redis_table
(
`k` String,
`m` String,
`n` UInt32
)
ENGINE = Redis('redis1:6379') PRIMARY KEY(k);
```
```sql
SELECT * FROM redis(
'redis1:6379',
'key',
'key String, v1 String, v2 UInt32'
)
```
**See Also**
- [The `Redis` table engine](/docs/en/engines/table-engines/integrations/redis.md)
- [Using redis as a dictionary source](/docs/en/sql-reference/dictionaries/index.md#redis)

View File

@ -202,6 +202,12 @@ FROM s3(
LIMIT 5;
```
## Storage Settings {#storage-settings}
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
- [s3_create_multiple_files](/docs/en/operations/settings/settings.md#s3_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
- [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
**See Also**
- [S3 engine](../../engines/table-engines/integrations/s3.md)

View File

@ -53,6 +53,10 @@ Character `|` inside patterns is used to specify failover addresses. They are it
- `_path` — Path to the `URL`.
- `_file` — Resource name of the `URL`.
## Storage Settings {#storage-settings}
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
**See Also**
- [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns)

View File

@ -142,7 +142,129 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe
- `--history_file` - путь к файлу с историей команд.
- `--param_<name>` — значение параметра для [запроса с параметрами](#cli-queries-with-parameters).
Начиная с версии 20.5, в `clickhouse-client` есть автоматическая подсветка синтаксиса (включена всегда).
Вместо параметров `--host`, `--port`, `--user` и `--password` клиент ClickHouse также поддерживает строки подключения (смотри следующий раздел).
## Строка подключения {#connection_string}
clickhouse-client также поддерживает подключение к серверу clickhouse с помощью строки подключения, аналогичной [MongoDB](https://www.mongodb.com/docs/manual/reference/connection-string/), [PostgreSQL](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING), [MySQL](https://dev.mysql.com/doc/refman/8.0/en/connecting-using-uri-or-key-value-pairs.html#connecting-using-uri). Она имеет следующий синтаксис:
```text
clickhouse:[//[user[:password]@][hosts_and_ports]][/database][?query_parameters]
```
Где
- `user` - (необязательно) - это имя пользователя,
- `password` - (необязательно) - Пароль пользователя. Если символ `:` укаказан, и пароль пуст, то клиент запросит ввести пользователя пароль.
- `hosts_and_ports` - (необязательно) - список хостов и необязательных портов. `host[:port] [, host:[port]], ...`,
- `database` - (необязательно) - это имя базы данных,
- `query_parameters` - (опционально) список пар ключ-значение `param1=value1[,&param2=value2], ...`. Для некоторых параметров значение не требуется. Имена и значения параметров чувствительны к регистру.
Если user не указан, будут использоваться имя пользователя `default`.
Если host не указан, будет использован хост `localhost`.
Если port не указан, будет использоваться порт `9000`.
Если база данных не указана, будет использоваться база данных `default`.
Если имя пользователя, пароль или база данных были указаны в строке подключения, их нельзя указать с помощью `--user`, `--password` или `--database` (и наоборот).
Параметр host может быть либо именем хоста, либо IP-адресом. Для указания IPv6-адреса поместите его в квадратные скобки:
```text
clickhouse://[2001:db8::1234]
```
URI позволяет подключаться к нескольким хостам. Строки подключения могут содержать несколько хостов. ClickHouse-client будет пытаться подключиться к этим хостам по порядку (т.е. слева направо). После установления соединения попытки подключения к оставшимся хостам не предпринимаются.
Строка подключения должна быть указана в первом аргументе clickhouse-client. Строка подключения может комбинироваться с другими [параметрами командной строки] (#command-line-options) кроме `--host/-h` и `--port`.
Для компонента `query_parameter` разрешены следующие ключи:
- `secure` или сокращенно `s` - без значение. Если параметр указан, то соединение с сервером будет осуществляться по защищенному каналу (TLS). См. `secure` в [command-line-options](#command-line-options).
### Кодирование URI {#connection_string_uri_percent_encoding}
Не US ASCII и специальные символы в имени пользователя, пароле, хостах, базе данных и параметрах запроса должны быть [закодированы](https://ru.wikipedia.org/wiki/URL#%D0%9A%D0%BE%D0%B4%D0%B8%D1%80%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5_URL).
### Примеры {#connection_string_examples}
Подключиться к localhost через порт 9000 и выполнить запрос `SELECT 1`
``` bash
clickhouse-client clickhouse://localhost:9000 --query "SELECT 1"
```
Подключиться к localhost, используя пользователя `john` с паролем `secret`, хост `127.0.0.1` и порт `9000`
``bash
clickhouse-client clickhouse://john:secret@127.0.0.1:9000
```
Подключиться к localhost, используя пользователя по умолчанию, хост с IPV6 адресом `[::1]` и порт `9000`.
``` bash
clickhouse-client clickhouse://[::1]:9000
```
Подключиться к localhost через порт 9000 многострочном режиме.
``` bash
clickhouse-client clickhouse://localhost:9000 '-m'
```
Подключиться к localhost через порт 9000 с пользователем default.
``` bash
clickhouse-client clickhouse://default@localhost:9000
# Эквивалетно:
clickhouse-client clickhouse://localhost:9000 --user default
```
Подключиться к localhost через порт 9000 с базой данных `my_database`
``` bash
clickhouse-client clickhouse://localhost:9000/my_database
# Эквивалетно:
clickhouse-client clickhouse://localhost:9000 --database my_database
```
Подключиться к localhost через порт 9000 с базой данных `my_database`, указанной в строке подключения, используя безопасным соединением при помощи короткого варианта параметра URI 's'.
``` bash
clickhouse-client clickhouse://localhost/my_database?s
# Эквивалетно:
clickhouse-client clickhouse://localhost/my_database -s
```
Подключиться к хосту по умолчанию с использованием порта по умолчанию, пользователя по умолчанию, и базы данных по умолчанию.
``` bash
clickhouse-client clickhouse:
```
Подключиться к хосту по умолчанию через порт по умолчанию, используя имя пользователя `my_user` без пароля.
``` bash
clickhouse-client clickhouse://my_user@
# Использование пустого пароля между : и @ означает, что пользователь должен ввести пароль перед началом соединения.
clickhouse-client clickhouse://my_user:@
```
Подключиться к localhost, используя электронную почту, как имя пользователя. Символ `@` закодирован как `%40`.
``` bash
clickhouse-client clickhouse://some_user%40some_mail.com@localhost:9000
```
Подключится к одному из хостов: `192.168.1.15`, `192.168.1.25`.
``` bash
clickhouse-client clickhouse://192.168.1.15,192.168.1.25
```
### Конфигурационные файлы {#configuration_files}

View File

@ -31,7 +31,7 @@ sidebar_label: Decimal
## Внутреннее представление {#vnutrennee-predstavlenie}
Внутри данные представляются как знаковые целые числа, соответсвующей разрядности. Реальные диапазоны, хранящиеся в ячейках памяти несколько больше заявленных. Заявленные диапазоны Decimal проверяются только при вводе числа из строкового представления.
Поскольку современные CPU не поддерживают 128-битные числа, операции над Decimal128 эмулируются программно. Decimal128 работает в разы медленней чем Decimal32/Decimal64.
Поскольку современные CPU не поддерживают 128-битные и 256-битные числа, для операций над Decimal128 и Decimal256 эмулируются программно. Данные типы работают в разы медленнее, чем Decimal32/Decimal64.
## Операции и типы результата {#operatsii-i-tipy-rezultata}
@ -59,6 +59,10 @@ sidebar_label: Decimal
При выполнении операций над типом Decimal могут происходить целочисленные переполнения. Лишняя дробная часть отбрасывается (не округляется). Лишняя целочисленная часть приводит к исключению.
:::warning
Проверка переполнения не реализована для Decimal128 и Decimal256. В случае переполнения неверный результат будёт возвращён без выбрасывания исключения.
:::
``` sql
SELECT toDecimal32(2, 4) AS x, x / 3
```

View File

@ -165,22 +165,217 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8);
## toDate {#todate}
Cиноним: `DATE`.
Конвертирует аргумент в значение [Date](/docs/ru/sql-reference/data-types/date.md).
**Синтаксис**
``` sql
toDate(expr)
```
**Аргументы**
- `expr` — Значение для преобразования. [String](/docs/ru/sql-reference/data-types/string.md), [Int](/docs/ru/sql-reference/data-types/int-uint.md), [Date](/docs/ru/sql-reference/data-types/date.md) или [DateTime](/docs/ru/sql-reference/data-types/datetime.md).
Если `expr` является числом выглядит как UNIX timestamp (больше чем 65535), оно интерпретируется как DateTime, затем обрезается до Date учитывавая текущую часовой пояс. Если `expr` является числом и меньше чем 65536, оно интерпретируется как количество дней с 1970-01-01.
**Возвращаемое значение**
- Календарная дата. [Date](/docs/ru/sql-reference/data-types/date.md).
**Пример**
Запрос:
``` sql
SELECT toDate('2022-12-30'), toDate(1685457500);
```
Результат:
```response
┌─toDate('2022-12-30')─┬─toDate(1685457500)─┐
│ 2022-12-30 │ 2023-05-30 │
└──────────────────────┴────────────────────┘
```
## toDateOrZero {#todateorzero}
Как [toDate](#todate), но в случае неудачи возвращает нижнюю границу [Date](/docs/ru/sql-reference/data-types/date.md)). Поддерживается только аргумент типа [String](/docs/ru/sql-reference/data-types/string.md).
**Пример**
Запрос:
``` sql
SELECT toDateOrZero('2022-12-30'), toDateOrZero('');
```
Результат:
```response
┌─toDateOrZero('2022-12-30')─┬─toDateOrZero('')─┐
│ 2022-12-30 │ 1970-01-01 │
└────────────────────────────┴──────────────────┘
```
## toDateOrNull {#todateornull}
Как [toDate](#todate), но в случае неудачи возвращает `NULL`. Поддерживается только аргумент типа [String](/docs/ru/sql-reference/data-types/string.md).
**Пример**
Запрос:
``` sql
SELECT toDateOrNull('2022-12-30'), toDateOrNull('');
```
Результат:
```response
┌─toDateOrNull('2022-12-30')─┬─toDateOrNull('')─┐
│ 2022-12-30 │ ᴺᵁᴸᴸ │
└────────────────────────────┴──────────────────┘
```
## toDateOrDefault {#todateordefault}
Как [toDate](#todate), но в случае неудачи возвращает значение по умолчанию (или второй аргумент (если указан), или нижняя граница [Date](/docs/ru/sql-reference/data-types/date.md)).
**Синтаксис**
``` sql
toDateOrDefault(expr [, default_value])
```
**Пример**
Запрос:
``` sql
SELECT toDateOrDefault('2022-12-30'), toDateOrDefault('', '2023-01-01'::Date);
```
Результат:
```response
┌─toDateOrDefault('2022-12-30')─┬─toDateOrDefault('', CAST('2023-01-01', 'Date'))─┐
│ 2022-12-30 │ 2023-01-01 │
└───────────────────────────────┴─────────────────────────────────────────────────┘
```
## toDateTime {#todatetime}
Конвертирует аргумент в значение [DateTime](/docs/ru/sql-reference/data-types/datetime.md).
**Синтаксис**
``` sql
toDateTime(expr[, time_zone ])
```
**Аргументы**
- `expr` — Значение для преобразования. [String](/docs/ru/sql-reference/data-types/string.md), [Int](/docs/ru/sql-reference/data-types/int-uint.md), [Date](/docs/ru/sql-reference/data-types/date.md) или [DateTime](/docs/ru/sql-reference/data-types/datetime.md).
- `time_zone` — Часовой пояс. [String](/docs/ru/sql-reference/data-types/string.md).
Если `expr` является числом, оно интерпретируется как количество секунд от начала unix эпохи.
**Возвращаемое значение**
- Время. [DateTime](/docs/ru/sql-reference/data-types/datetime.md)
**Пример**
Запрос:
``` sql
SELECT toDateTime('2022-12-30 13:44:17'), toDateTime(1685457500, 'UTC');
```
Результат:
```response
┌─toDateTime('2022-12-30 13:44:17')─┬─toDateTime(1685457500, 'UTC')─┐
│ 2022-12-30 13:44:17 │ 2023-05-30 14:38:20 │
└───────────────────────────────────┴───────────────────────────────┘
```
## toDateTimeOrZero {#todatetimeorzero}
Как [toDateTime](#todatetime), но в случае неудачи возвращает нижнюю границу [DateTime](/docs/ru/sql-reference/data-types/datetime.md)). Поддерживается только аргумент типа [String](/docs/ru/sql-reference/data-types/string.md).
**Пример**
Запрос:
``` sql
SELECT toDateTimeOrZero('2022-12-30 13:44:17'), toDateTimeOrZero('');
```
Результат:
```response
┌─toDateTimeOrZero('2022-12-30 13:44:17')─┬─toDateTimeOrZero('')─┐
│ 2022-12-30 13:44:17 │ 1970-01-01 00:00:00 │
└─────────────────────────────────────────┴──────────────────────┘
```
## toDateTimeOrNull {#todatetimeornull}
Как [toDateTime](#todatetime), но в случае неудачи возвращает `NULL`. Поддерживается только аргумент типа [String](/docs/ru/sql-reference/data-types/string.md).
**Example**
Query:
``` sql
SELECT toDateTimeOrNull('2022-12-30 13:44:17'), toDateTimeOrNull('');
```
Result:
```response
┌─toDateTimeOrNull('2022-12-30 13:44:17')─┬─toDateTimeOrNull('')─┐
│ 2022-12-30 13:44:17 │ ᴺᵁᴸᴸ │
└─────────────────────────────────────────┴──────────────────────┘
```
## toDateTimeOrDefault {#todatetimeordefault}
Как [toDateTime](#todatetime), но в случае неудачи возвращает значение по умолчанию (или третий аргумент (если указан), или нижняя граница [DateTime](/docs/ru/sql-reference/data-types/datetime.md)).
**Синтаксис**
``` sql
toDateTimeOrDefault(expr, [, time_zone [, default_value]])
```
**Пример**
Запрос:
``` sql
SELECT toDateTimeOrDefault('2022-12-30 13:44:17'), toDateTimeOrDefault('', 'UTC', '2023-01-01'::DateTime('UTC'));
```
Результат:
```response
┌─toDateTimeOrDefault('2022-12-30 13:44:17')─┬─toDateTimeOrDefault('', 'UTC', CAST('2023-01-01', 'DateTime(\'UTC\')'))─┐
│ 2022-12-30 13:44:17 │ 2023-01-01 00:00:00 │
└────────────────────────────────────────────┴─────────────────────────────────────────────────────────────────────────┘
```
## toDate32 {#todate32}
Конвертирует аргумент в значение типа [Date32](../../sql-reference/data-types/date32.md). Если значение выходит за границы диапазона, возвращается пограничное значение `Date32`. Если аргумент имеет тип [Date](../../sql-reference/data-types/date.md), учитываются границы типа `Date`.
@ -301,6 +496,14 @@ SELECT
└─────────────────────────────────────────────────────────┴───────────────────────────────────────────────────────────┘
```
## toDateTime64
## toDateTime64OrZero
## toDateTime64OrNull
## toDateTime64OrDefault
## toDecimal(32\|64\|128\|256) {#todecimal3264128}
Преобразует `value` к типу данных [Decimal](../../sql-reference/functions/type-conversion-functions.md) с точностью `S`. `value` может быть числом или строкой. Параметр `S` (scale) задаёт число десятичных знаков.
@ -1020,10 +1223,12 @@ parseDateTimeBestEffort(time_string[, time_zone])
- [Unix timestamp](https://ru.wikipedia.org/wiki/Unix-время) в строковом представлении. 9 или 10 символов.
- Строка с датой и временем: `YYYYMMDDhhmmss`, `DD/MM/YYYY hh:mm:ss`, `DD-MM-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc.
- Строка с датой, но без времени: `YYYY`, `YYYYMM`, `YYYY*MM`, `DD/MM/YYYY`, `DD-MM-YY` и т.д.
- Строка с временем, и с днём: `DD`, `DD hh`, `DD hh:mm`. В этом случае `YYYY-MM` принимается равным `2000-01`.
- Строка с временем, и с днём: `DD`, `DD hh`, `DD hh:mm`. В этом случае `MM` принимается равным `01`.
- Строка, содержащая дату и время вместе с информацией о часовом поясе: `YYYY-MM-DD hh:mm:ss ±h:mm`, и т.д. Например, `2020-12-12 17:36:00 -5:00`.
- Строка, содержащая дату и время в формате [syslog timestamp](https://datatracker.ietf.org/doc/html/rfc3164#section-4.1.2): `Mmm dd hh:mm:ss`. Например, `Jun 9 14:20:32`.
Для всех форматов с разделителями функция распознаёт названия месяцев, выраженных в виде полного англоязычного имени месяца или в виде первых трёх символов имени месяца. Примеры: `24/DEC/18`, `24-Dec-18`, `01-September-2018`.
Если год не указан, вместо него подставляется текущий год. Если в результате получается будущее время (даже на одну секунду впереди текущего момента времени), то текущий год заменяется на прошлый.
**Возвращаемое значение**
@ -1094,23 +1299,46 @@ AS parseDateTimeBestEffort;
Запрос:
``` sql
SELECT parseDateTimeBestEffort('10 20:19');
SELECT toYear(now()) as year, parseDateTimeBestEffort('10 20:19');
```
Результат:
``` text
┌─parseDateTimeBestEffort('10 20:19')─┐
│ 2000-01-10 20:19:00 │
└─────────────────────────────────────┘
┌─year─┬─parseDateTimeBestEffort('10 20:19')─┐
│ 2023 │ 2023-01-10 20:19:00 │
└──────┴─────────────────────────────────────┘
```
Запрос:
``` sql
WITH
now() AS ts_now,
formatDateTime(ts_around, '%b %e %T') AS syslog_arg
SELECT
ts_now,
syslog_arg,
parseDateTimeBestEffort(syslog_arg)
FROM (SELECT arrayJoin([ts_now - 30, ts_now + 30]) AS ts_around);
```
Результат:
``` text
┌──────────────ts_now─┬─syslog_arg──────┬─parseDateTimeBestEffort(syslog_arg)─┐
│ 2023-06-30 23:59:30 │ Jun 30 23:59:00 │ 2023-06-30 23:59:00 │
│ 2023-06-30 23:59:30 │ Jul 1 00:00:00 │ 2022-07-01 00:00:00 │
└─────────────────────┴─────────────────┴─────────────────────────────────────┘
```
**Смотрите также**
- [Информация о формате ISO 8601 от @xkcd](https://xkcd.com/1179/)
- [RFC 1123](https://tools.ietf.org/html/rfc1123)
- [RFC 1123](https://datatracker.ietf.org/doc/html/rfc1123)
- [toDate](#todate)
- [toDateTime](#todatetime)
- [RFC 3164](https://datatracker.ietf.org/doc/html/rfc3164#section-4.1.2)
## parseDateTimeBestEffortUS {#parsedatetimebesteffortUS}

View File

@ -1,6 +1,6 @@
---
slug: /ru/whats-new/changelog/2017
sidebar_position: 6
sidebar_position: 60
sidebar_label: 2017
title: 2017 Changelog
---

View File

@ -1,6 +1,6 @@
---
slug: /ru/whats-new/changelog/2018
sidebar_position: 5
sidebar_position: 50
sidebar_label: 2018
title: 2018 Changelog
---

View File

@ -1,6 +1,6 @@
---
slug: /ru/whats-new/changelog/2019
sidebar_position: 4
sidebar_position: 40
sidebar_label: 2019
title: 2019 Changelog
---

View File

@ -1,6 +1,6 @@
---
slug: /ru/whats-new/changelog/2020
sidebar_position: 3
sidebar_position: 30
sidebar_label: 2020
title: 2020 Changelog
---

View File

@ -1,6 +1,6 @@
---
slug: /ru/whats-new/changelog/2021
sidebar_position: 2
sidebar_position: 20
sidebar_label: 2021
title: 2021 Changelog
---

View File

@ -0,0 +1,10 @@
---
slug: /ru/whats-new/changelog/2022
sidebar_position: 10
sidebar_label: 2022
title: 2022 Changelog
---
import Changelog from '@site/docs/en/whats-new/changelog/2022.md';
<Changelog />

View File

@ -2,5 +2,5 @@ label: 'Changelog'
collapsible: true
collapsed: true
link:
type: doc
id: ru/whats-new/changelog/index
type: generated-index
title: Changelog

View File

@ -1,7 +1,7 @@
---
sidebar_position: 1
sidebar_label: 2022
title: 2022 Changelog
sidebar_label: 2023
title: 2023 Changelog
slug: /ru/whats-new/changelog/index
---

View File

@ -6,4 +6,4 @@ sidebar_label: Changelog
# Changelog
You can view the latest Changelog at [https://clickhouse.com/docs/en/whats-new/changelog/](https://clickhouse.com/docs/en/whats-new/changelog/)
You can view the latest Changelog at [https://clickhouse.com/docs/en/whats-new/changelog/](/docs/en/whats-new/changelog/index.md)

View File

@ -409,8 +409,15 @@ if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-converter)
endif ()
if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
add_custom_target (clickhouse-keeper-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper-client DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-client" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
if (NOT BUILD_STANDALONE_KEEPER)
add_custom_target (clickhouse-keeper-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper-client DEPENDS clickhouse)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-client" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
# symlink to standalone keeper binary
else ()
add_custom_target (clickhouse-keeper-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse-keeper clickhouse-keeper-client DEPENDS clickhouse-keeper)
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-client" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT clickhouse-keeper)
endif ()
list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-client)
endif ()
if (ENABLE_CLICKHOUSE_DISKS)

View File

@ -5,13 +5,13 @@
#include <iostream>
#include <iomanip>
#include <optional>
#include <string_view>
#include <Common/scope_guard_safe.h>
#include <boost/program_options.hpp>
#include <boost/algorithm/string/replace.hpp>
#include <filesystem>
#include <string>
#include "Client.h"
#include "Client/ConnectionString.h"
#include "Core/Protocol.h"
#include "Parsers/formatAST.h"
@ -977,13 +977,7 @@ void Client::addOptions(OptionsDescription & options_description)
("connection", po::value<std::string>(), "connection to use (from the client config), by default connection name is hostname")
("secure,s", "Use TLS connection")
("user,u", po::value<std::string>()->default_value("default"), "user")
/** If "--password [value]" is used but the value is omitted, the bad argument exception will be thrown.
* implicit_value is used to avoid this exception (to allow user to type just "--password")
* Since currently boost provides no way to check if a value has been set implicitly for an option,
* the "\n" is used to distinguish this case because there is hardly a chance a user would use "\n"
* as the password.
*/
("password", po::value<std::string>()->implicit_value("\n", ""), "password")
("password", po::value<std::string>(), "password")
("ask-password", "ask-password")
("quota_key", po::value<std::string>(), "A string to differentiate quotas when the user have keyed quotas configured on server")
@ -1248,6 +1242,9 @@ void Client::readArguments(
std::vector<Arguments> & external_tables_arguments,
std::vector<Arguments> & hosts_and_ports_arguments)
{
bool has_connection_string = argc >= 2 && tryParseConnectionString(std::string_view(argv[1]), common_arguments, hosts_and_ports_arguments);
int start_argument_index = has_connection_string ? 2 : 1;
/** We allow different groups of arguments:
* - common arguments;
* - arguments for any number of external tables each in form "--external args...",
@ -1260,10 +1257,13 @@ void Client::readArguments(
std::string prev_host_arg;
std::string prev_port_arg;
for (int arg_num = 1; arg_num < argc; ++arg_num)
for (int arg_num = start_argument_index; arg_num < argc; ++arg_num)
{
std::string_view arg = argv[arg_num];
if (has_connection_string)
checkIfCmdLineOptionCanBeUsedWithConnectionString(arg);
if (arg == "--external")
{
in_external_group = true;
@ -1391,6 +1391,14 @@ void Client::readArguments(
arg = argv[arg_num];
addMultiquery(arg, common_arguments);
}
else if (arg == "--password" && ((arg_num + 1) >= argc || std::string_view(argv[arg_num + 1]).starts_with('-')))
{
common_arguments.emplace_back(arg);
/// No password was provided by user. Add '\n' as implicit password,
/// which encodes that client should ask user for the password.
/// '\n' is used because there is hardly a chance that a user would use '\n' as a password.
common_arguments.emplace_back("\n");
}
else
common_arguments.emplace_back(arg);
}

View File

@ -127,42 +127,42 @@ void KeeperClient::defineOptions(Poco::Util::OptionSet & options)
options.addOption(
Poco::Util::Option("host", "h", "server hostname. default `localhost`")
.argument("host")
.argument("<host>")
.binding("host"));
options.addOption(
Poco::Util::Option("port", "p", "server port. default `2181`")
.argument("port")
.argument("<port>")
.binding("port"));
options.addOption(
Poco::Util::Option("query", "q", "will execute given query, then exit.")
.argument("query")
.argument("<query>")
.binding("query"));
options.addOption(
Poco::Util::Option("connection-timeout", "", "set connection timeout in seconds. default 10s.")
.argument("connection-timeout")
.argument("<seconds>")
.binding("connection-timeout"));
options.addOption(
Poco::Util::Option("session-timeout", "", "set session timeout in seconds. default 10s.")
.argument("session-timeout")
.argument("<seconds>")
.binding("session-timeout"));
options.addOption(
Poco::Util::Option("operation-timeout", "", "set operation timeout in seconds. default 10s.")
.argument("operation-timeout")
.argument("<seconds>")
.binding("operation-timeout"));
options.addOption(
Poco::Util::Option("history-file", "", "set path of history file. default `~/.keeper-client-history`")
.argument("history-file")
.argument("<file>")
.binding("history-file"));
options.addOption(
Poco::Util::Option("log-level", "", "set log level")
.argument("log-level")
.argument("<level>")
.binding("log-level"));
}

View File

@ -112,6 +112,18 @@ if (BUILD_STANDALONE_KEEPER)
clickhouse-keeper.cpp
)
# List of resources for clickhouse-keeper client
if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
list(APPEND CLICKHOUSE_KEEPER_STANDALONE_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/KeeperClient.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/Commands.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/Parser.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Client/LineReader.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Client/ReplxxLineReader.cpp
)
endif()
clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_STANDALONE_SOURCES})
# Remove some redundant dependencies
@ -122,6 +134,10 @@ if (BUILD_STANDALONE_KEEPER)
target_include_directories(clickhouse-keeper PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/../../src/Core/include") # uses some includes from core
target_include_directories(clickhouse-keeper PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/../../src") # uses some includes from common
if (ENABLE_CLICKHOUSE_KEEPER_CLIENT AND TARGET ch_rust::skim)
target_link_libraries(clickhouse-keeper PRIVATE ch_rust::skim)
endif()
target_link_libraries(clickhouse-keeper
PRIVATE
ch_contrib::abseil_swiss_tables

View File

@ -34,6 +34,8 @@
#include "Core/Defines.h"
#include "config.h"
#include "config_version.h"
#include "config_tools.h"
#if USE_SSL
# include <Poco/Net/Context.h>
@ -131,7 +133,10 @@ int Keeper::run()
if (config().hasOption("help"))
{
Poco::Util::HelpFormatter help_formatter(Keeper::options());
auto header_str = fmt::format("{} [OPTION] [-- [ARG]...]\n"
auto header_str = fmt::format("{0} [OPTION] [-- [ARG]...]\n"
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
"{0} client [OPTION]\n"
#endif
"positional arguments can be used to rewrite config.xml properties, for example, --http_port=8010",
commandName());
help_formatter.setHeader(header_str);

View File

@ -1,6 +1,30 @@
#include <Common/StringUtils/StringUtils.h>
#include "config_tools.h"
int mainEntryClickHouseKeeper(int argc, char ** argv);
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
int mainEntryClickHouseKeeperClient(int argc, char ** argv);
#endif
int main(int argc_, char ** argv_)
{
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
if (argc_ >= 2)
{
/// 'clickhouse-keeper --client ...' and 'clickhouse-keeper client ...' are OK
if (strcmp(argv_[1], "--client") == 0 || strcmp(argv_[1], "client") == 0)
{
argv_[1] = argv_[0];
return mainEntryClickHouseKeeperClient(--argc_, argv_ + 1);
}
}
if (argc_ > 0 && (strcmp(argv_[0], "clickhouse-keeper-client") == 0 || endsWith(argv_[0], "/clickhouse-keeper-client")))
return mainEntryClickHouseKeeperClient(argc_, argv_);
#endif
return mainEntryClickHouseKeeper(argc_, argv_);
}

View File

@ -1705,7 +1705,6 @@ try
#endif
/// Must be done after initialization of `servers`, because async_metrics will access `servers` variable from its thread.
async_metrics.start();
{

View File

@ -449,7 +449,7 @@ let queries = [
FROM system.metric_log
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
GROUP BY t
ORDER BY t`
ORDER BY t WITH FILL STEP {rounding:UInt32}`
},
{
"title": "CPU Usage (cores)",
@ -457,7 +457,7 @@ ORDER BY t`
FROM system.metric_log
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
GROUP BY t
ORDER BY t`
ORDER BY t WITH FILL STEP {rounding:UInt32}`
},
{
"title": "Queries Running",
@ -465,7 +465,7 @@ ORDER BY t`
FROM system.metric_log
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
GROUP BY t
ORDER BY t`
ORDER BY t WITH FILL STEP {rounding:UInt32}`
},
{
"title": "Merges Running",
@ -473,7 +473,7 @@ ORDER BY t`
FROM system.metric_log
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
GROUP BY t
ORDER BY t`
ORDER BY t WITH FILL STEP {rounding:UInt32}`
},
{
"title": "Selected Bytes/second",
@ -481,7 +481,7 @@ ORDER BY t`
FROM system.metric_log
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
GROUP BY t
ORDER BY t`
ORDER BY t WITH FILL STEP {rounding:UInt32}`
},
{
"title": "IO Wait",
@ -489,7 +489,7 @@ ORDER BY t`
FROM system.metric_log
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
GROUP BY t
ORDER BY t`
ORDER BY t WITH FILL STEP {rounding:UInt32}`
},
{
"title": "CPU Wait",
@ -497,7 +497,7 @@ ORDER BY t`
FROM system.metric_log
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
GROUP BY t
ORDER BY t`
ORDER BY t WITH FILL STEP {rounding:UInt32}`
},
{
"title": "OS CPU Usage (Userspace)",
@ -506,7 +506,7 @@ FROM system.asynchronous_metric_log
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
AND metric = 'OSUserTimeNormalized'
GROUP BY t
ORDER BY t`
ORDER BY t WITH FILL STEP {rounding:UInt32}`
},
{
"title": "OS CPU Usage (Kernel)",
@ -515,7 +515,7 @@ FROM system.asynchronous_metric_log
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
AND metric = 'OSSystemTimeNormalized'
GROUP BY t
ORDER BY t`
ORDER BY t WITH FILL STEP {rounding:UInt32}`
},
{
"title": "Read From Disk",
@ -523,7 +523,7 @@ ORDER BY t`
FROM system.metric_log
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
GROUP BY t
ORDER BY t`
ORDER BY t WITH FILL STEP {rounding:UInt32}`
},
{
"title": "Read From Filesystem",
@ -531,7 +531,7 @@ ORDER BY t`
FROM system.metric_log
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
GROUP BY t
ORDER BY t`
ORDER BY t WITH FILL STEP {rounding:UInt32}`
},
{
"title": "Memory (tracked)",
@ -539,7 +539,7 @@ ORDER BY t`
FROM system.metric_log
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
GROUP BY t
ORDER BY t`
ORDER BY t WITH FILL STEP {rounding:UInt32}`
},
{
"title": "Load Average (15 minutes)",
@ -548,7 +548,7 @@ FROM system.asynchronous_metric_log
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
AND metric = 'LoadAverage15'
GROUP BY t
ORDER BY t`
ORDER BY t WITH FILL STEP {rounding:UInt32}`
},
{
"title": "Selected Rows/second",
@ -556,7 +556,7 @@ ORDER BY t`
FROM system.metric_log
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
GROUP BY t
ORDER BY t`
ORDER BY t WITH FILL STEP {rounding:UInt32}`
},
{
"title": "Inserted Rows/second",
@ -564,7 +564,7 @@ ORDER BY t`
FROM system.metric_log
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
GROUP BY t
ORDER BY t`
ORDER BY t WITH FILL STEP {rounding:UInt32}`
},
{
"title": "Total MergeTree Parts",
@ -573,7 +573,7 @@ FROM system.asynchronous_metric_log
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
AND metric = 'TotalPartsOfMergeTreeTables'
GROUP BY t
ORDER BY t`
ORDER BY t WITH FILL STEP {rounding:UInt32}`
},
{
"title": "Max Parts For Partition",
@ -582,7 +582,7 @@ FROM system.asynchronous_metric_log
WHERE event_date >= toDate(now() - {seconds:UInt32}) AND event_time >= now() - {seconds:UInt32}
AND metric = 'MaxPartCountForPartition'
GROUP BY t
ORDER BY t`
ORDER BY t WITH FILL STEP {rounding:UInt32}`
}
];

View File

@ -201,6 +201,7 @@ enum class AccessType
M(URL, "", GLOBAL, SOURCES) \
M(REMOTE, "", GLOBAL, SOURCES) \
M(MONGO, "", GLOBAL, SOURCES) \
M(REDIS, "", GLOBAL, SOURCES) \
M(MEILISEARCH, "", GLOBAL, SOURCES) \
M(MYSQL, "", GLOBAL, SOURCES) \
M(POSTGRES, "", GLOBAL, SOURCES) \

View File

@ -333,7 +333,7 @@ void ContextAccess::calculateAccessRights() const
boost::algorithm::join(roles_info->getCurrentRolesNames(), ", "),
boost::algorithm::join(roles_info->getEnabledRolesNames(), ", "));
}
LOG_TRACE(trace_log, "Settings: readonly={}, allow_ddl={}, allow_introspection_functions={}", params.readonly, params.allow_ddl, params.allow_introspection);
LOG_TRACE(trace_log, "Settings: readonly = {}, allow_ddl = {}, allow_introspection_functions = {}", params.readonly, params.allow_ddl, params.allow_introspection);
LOG_TRACE(trace_log, "List of all grants: {}", access->toString());
LOG_TRACE(trace_log, "List of all grants including implicit: {}", access_with_implicit->toString());
}

View File

@ -5,6 +5,7 @@
#include <Access/GSSAcceptor.h>
#include <base/defines.h>
#include <base/types.h>
#include <base/extended_types.h>
#include <chrono>
#include <map>
@ -42,7 +43,7 @@ public:
private:
struct LDAPCacheEntry
{
std::size_t last_successful_params_hash = 0;
UInt128 last_successful_params_hash = 0;
std::chrono::steady_clock::time_point last_successful_authentication_timestamp;
LDAPClient::SearchResultsList last_successful_role_search_results;
};

View File

@ -122,7 +122,7 @@ public:
size_t size;
readVarUInt(size, in);
static constexpr size_t max_size = 1_GiB;
static constexpr size_t max_size = 100_GiB;
if (size == 0)
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect size (0) in groupBitmap.");

View File

@ -157,8 +157,8 @@ public:
void read(DB::ReadBuffer & buf)
{
size_t size = 0;
DB::readIntBinary<size_t>(size, buf);
DB::readIntBinary<size_t>(total_values, buf);
readBinaryLittleEndian(size, buf);
readBinaryLittleEndian(total_values, buf);
/// Compatibility with old versions.
if (size > total_values)
@ -171,16 +171,16 @@ public:
samples.resize(size);
for (size_t i = 0; i < size; ++i)
DB::readPODBinary(samples[i], buf);
readBinaryLittleEndian(samples[i], buf);
sorted = false;
}
void write(DB::WriteBuffer & buf) const
{
size_t size = samples.size();
DB::writeIntBinary<size_t>(size, buf);
DB::writeIntBinary<size_t>(total_values, buf);
const size_t size = samples.size();
writeBinaryLittleEndian(size, buf);
writeBinaryLittleEndian(total_values, buf);
for (size_t i = 0; i < size; ++i)
{
@ -190,12 +190,12 @@ public:
/// Here we ensure that padding is zero without changing the protocol.
/// TODO: After implementation of "versioning aggregate function state",
/// change the serialization format.
Element elem;
memset(&elem, 0, sizeof(elem));
elem = samples[i];
DB::writePODBinary(elem, buf);
DB::transformEndianness<std::endian::little>(elem);
DB::writeString(reinterpret_cast<const char*>(&elem), sizeof(elem), buf);
}
}

View File

@ -185,11 +185,10 @@ void BackupCoordinationReplicatedTables::addPartNames(PartNamesForTableReplica &
const String & other_replica_name = **other.replica_names.begin();
throw Exception(
ErrorCodes::CANNOT_BACKUP_TABLE,
"Table {} on replica {} has part {} which is different from the part on replica {}. Must be the same",
table_name_for_logs,
replica_name,
part_name,
other_replica_name);
"Table {} on replica {} has part {} different from the part on replica {} "
"(checksum '{}' on replica {} != checksum '{}' on replica {})",
table_name_for_logs, replica_name, part_name, other_replica_name,
getHexUIntLowercase(checksum), replica_name, getHexUIntLowercase(other.checksum), other_replica_name);
}
}

View File

@ -85,6 +85,9 @@ void BackupCoordinationStageSync::setError(const String & current_host, const Ex
writeException(exception, buf, true);
zookeeper->createIfNotExists(zookeeper_path + "/error", buf.str());
/// When backup/restore fails, it removes the nodes from Zookeeper.
/// Sometimes it fails to remove all nodes. It's possible that it removes /error node, but fails to remove /stage node,
/// so the following line tries to preserve the error status.
auto code = zookeeper->trySet(zookeeper_path, Stage::ERROR);
if (code != Coordination::Error::ZOK)
throw zkutil::KeeperException(code, zookeeper_path);

View File

@ -144,6 +144,7 @@ void BackupImpl::open(const ContextPtr & context)
if (!uuid)
uuid = UUIDHelpers::generateV4();
lock_file_name = use_archive ? (archive_params.archive_name + ".lock") : ".lock";
lock_file_before_first_file_checked = false;
writing_finalized = false;
/// Check that we can write a backup there and create the lock file to own this destination.
@ -833,13 +834,10 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
if (writing_finalized)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup is already finalized");
bool should_check_lock_file = false;
{
std::lock_guard lock{mutex};
++num_files;
total_size += info.size;
if (!num_entries)
should_check_lock_file = true;
}
auto src_disk = entry->getDisk();
@ -859,7 +857,7 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
return;
}
if (!should_check_lock_file)
if (!lock_file_before_first_file_checked.exchange(true))
checkLockFile(true);
/// NOTE: `mutex` must be unlocked during copying otherwise writing will be in one thread maximum and hence slow.

View File

@ -141,6 +141,7 @@ private:
std::shared_ptr<IArchiveReader> archive_reader;
std::shared_ptr<IArchiveWriter> archive_writer;
String lock_file_name;
std::atomic<bool> lock_file_before_first_file_checked = false;
bool writing_finalized = false;
bool deduplicate_files = true;

View File

@ -152,8 +152,7 @@ namespace
}
catch (...)
{
if (coordination)
coordination->setError(Exception(getCurrentExceptionMessageAndPattern(true, true), getCurrentExceptionCode()));
sendExceptionToCoordination(coordination, Exception(getCurrentExceptionMessageAndPattern(true, true), getCurrentExceptionCode()));
}
}

View File

@ -413,6 +413,7 @@ dbms_target_link_libraries (
boost::system
clickhouse_common_io
Poco::MongoDB
Poco::Redis
)
if (TARGET ch::mysqlxx)

View File

@ -18,7 +18,7 @@ ConnectionPoolPtr ConnectionPoolFactory::get(
String client_name,
Protocol::Compression compression,
Protocol::Secure secure,
Int64 priority)
Priority priority)
{
Key key{
max_connections, host, port, default_database, user, password, quota_key, cluster, cluster_secret, client_name, compression, secure, priority};
@ -74,7 +74,7 @@ size_t ConnectionPoolFactory::KeyHash::operator()(const ConnectionPoolFactory::K
hash_combine(seed, hash_value(k.client_name));
hash_combine(seed, hash_value(k.compression));
hash_combine(seed, hash_value(k.secure));
hash_combine(seed, hash_value(k.priority));
hash_combine(seed, hash_value(k.priority.value));
return seed;
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Common/PoolBase.h>
#include <Common/Priority.h>
#include <Client/Connection.h>
#include <IO/ConnectionTimeouts.h>
#include <Core/Settings.h>
@ -34,7 +35,7 @@ public:
const Settings * settings = nullptr,
bool force_connected = true) = 0;
virtual Int64 getPriority() const { return 1; }
virtual Priority getPriority() const { return Priority{1}; }
};
using ConnectionPoolPtr = std::shared_ptr<IConnectionPool>;
@ -60,7 +61,7 @@ public:
const String & client_name_,
Protocol::Compression compression_,
Protocol::Secure secure_,
Int64 priority_ = 1)
Priority priority_ = Priority{1})
: Base(max_connections_,
&Poco::Logger::get("ConnectionPool (" + host_ + ":" + toString(port_) + ")")),
host(host_),
@ -103,7 +104,7 @@ public:
return host + ":" + toString(port);
}
Int64 getPriority() const override
Priority getPriority() const override
{
return priority;
}
@ -134,7 +135,7 @@ private:
String client_name;
Protocol::Compression compression; /// Whether to compress data when interacting with the server.
Protocol::Secure secure; /// Whether to encrypt data when interacting with the server.
Int64 priority; /// priority from <remote_servers>
Priority priority; /// priority from <remote_servers>
};
/**
@ -157,7 +158,7 @@ public:
String client_name;
Protocol::Compression compression;
Protocol::Secure secure;
Int64 priority;
Priority priority;
};
struct KeyHash
@ -180,7 +181,7 @@ public:
String client_name,
Protocol::Compression compression,
Protocol::Secure secure,
Int64 priority);
Priority priority);
private:
mutable std::mutex mutex;
using ConnectionPoolWeakPtr = std::weak_ptr<IConnectionPool>;

View File

@ -71,7 +71,7 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts
return Base::get(max_ignored_errors, fallback_to_stale_replicas, try_get_entry, get_priority);
}
Int64 ConnectionPoolWithFailover::getPriority() const
Priority ConnectionPoolWithFailover::getPriority() const
{
return (*std::max_element(nested_pools.begin(), nested_pools.end(), [](const auto & a, const auto & b)
{

View File

@ -48,7 +48,7 @@ public:
const Settings * settings,
bool force_connected) override; /// From IConnectionPool
Int64 getPriority() const override; /// From IConnectionPool
Priority getPriority() const override; /// From IConnectionPool
/** Allocates up to the specified number of connections to work.
* Connections provide access to different replicas of one shard.

View File

@ -0,0 +1,239 @@
#include "ConnectionString.h"
#include <Common/Exception.h>
#include <Poco/Exception.h>
#include <Poco/URI.h>
#include <array>
#include <iostream>
#include <string>
#include <unordered_map>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
}
namespace
{
using namespace std::string_literals;
using namespace std::literals::string_view_literals;
constexpr auto CONNECTION_URI_SCHEME = "clickhouse:"sv;
const std::unordered_map<std::string_view, std::string_view> PROHIBITED_CLIENT_OPTIONS = {
/// Client option, client option long name
{"-h", "--host"},
{"--host", "--host"},
{"--port", "--port"},
{"--connection", "--connection"},
};
std::string uriDecode(const std::string & uri_encoded_string, bool plus_as_space)
{
std::string decoded_string;
Poco::URI::decode(uri_encoded_string, decoded_string, plus_as_space);
return decoded_string;
}
void getHostAndPort(const Poco::URI & uri, std::vector<std::vector<std::string>> & hosts_and_ports_arguments)
{
std::vector<std::string> host_and_port;
const std::string & host = uri.getHost();
if (!host.empty())
{
host_and_port.push_back("--host=" + uriDecode(host, false));
}
// Port can be written without host (":9000"). Empty host name equals to default host.
auto port = uri.getPort();
if (port != 0)
host_and_port.push_back("--port=" + std::to_string(port));
if (!host_and_port.empty())
hosts_and_ports_arguments.push_back(std::move(host_and_port));
}
void buildConnectionString(
std::string_view host_and_port,
std::string_view right_part,
Poco::URI & uri,
std::vector<std::vector<std::string>> & hosts_and_ports_arguments)
{
// User info does not matter in sub URI
auto uri_string = std::string(CONNECTION_URI_SCHEME);
if (!host_and_port.empty())
{
uri_string.append("//");
uri_string.append(host_and_port);
}
// Right part from string includes '/database?[params]'
uri_string.append(right_part);
try
{
uri = Poco::URI(uri_string);
}
catch (const Poco::URISyntaxException & invalid_uri_exception)
{
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS,
"Invalid connection string syntax {}: {}", uri_string, invalid_uri_exception.what());
}
getHostAndPort(uri, hosts_and_ports_arguments);
}
std::string makeArgument(const std::string & connection_string_parameter_name)
{
return (connection_string_parameter_name.size() == 1 ? "-"s : "--"s) + connection_string_parameter_name;
}
}
namespace DB
{
bool tryParseConnectionString(
std::string_view connection_string,
std::vector<std::string> & common_arguments,
std::vector<std::vector<std::string>> & hosts_and_ports_arguments)
{
if (connection_string == CONNECTION_URI_SCHEME)
return true;
if (!connection_string.starts_with(CONNECTION_URI_SCHEME))
return false;
size_t offset = CONNECTION_URI_SCHEME.size();
if ((connection_string.substr(offset).starts_with("//")))
offset += 2;
auto hosts_end_pos = std::string_view::npos;
auto hosts_or_user_info_end_pos = connection_string.find_first_of("?/@", offset);
auto has_user_info = hosts_or_user_info_end_pos != std::string_view::npos && connection_string[hosts_or_user_info_end_pos] == '@';
if (has_user_info)
{
// Move offset right after user info
offset = hosts_or_user_info_end_pos + 1;
hosts_end_pos = connection_string.find_first_of("?/@", offset);
// Several '@' symbols in connection string is prohibited.
// If user name contains '@' then it should be percent-encoded.
// several users: 'usr1@host1,@usr2@host2' is invalid.
if (hosts_end_pos != std::string_view::npos && connection_string[hosts_end_pos] == '@')
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Symbols '@' in URI in password or user name should be percent-encoded. Individual user names for different hosts also prohibited. {}",
connection_string);
}
}
else
hosts_end_pos = hosts_or_user_info_end_pos;
const auto * hosts_end = hosts_end_pos != std::string_view::npos ? connection_string.begin() + hosts_end_pos
: connection_string.end();
try
{
/** Poco::URI doesn't support several hosts in URI.
* Split string clickhouse:[user[:password]@]host1:port1, ... , hostN:portN[database]?[query_parameters]
* into multiple string for each host:
* clickhouse:[user[:password]@]host1:port1[database]?[query_parameters]
* ...
* clickhouse:[user[:password]@]hostN:portN[database]?[query_parameters]
*/
Poco::URI uri;
const auto * last_host_begin = connection_string.begin() + offset;
for (const auto * it = last_host_begin; it != hosts_end; ++it)
{
if (*it == ',')
{
buildConnectionString({last_host_begin, it}, {hosts_end, connection_string.end()}, uri, hosts_and_ports_arguments);
last_host_begin = it + 1;
}
}
if (uri.empty())
{
// URI has no host specified
uri = std::string(connection_string);
getHostAndPort(uri, hosts_and_ports_arguments);
}
else
buildConnectionString({last_host_begin, hosts_end}, {hosts_end, connection_string.end()}, uri, hosts_and_ports_arguments);
Poco::URI::QueryParameters params = uri.getQueryParameters();
for (const auto & param : params)
{
if (param.first == "secure" || param.first == "s")
{
if (!param.second.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "secure URI query parameter does not allow value");
common_arguments.push_back(makeArgument(param.first));
}
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "URI query parameter {} is not supported", param.first);
}
auto user_info = uri.getUserInfo();
if (!user_info.empty())
{
// Poco::URI doesn't decode user name/password by default.
// But ClickHouse allows to have users with email user name like: 'john@some_mail.com'
// john@some_mail.com should be percent-encoded: 'john%40some_mail.com'
size_t pos = user_info.find(':');
if (pos != std::string::npos)
{
common_arguments.push_back("--user");
common_arguments.push_back(uriDecode(user_info.substr(0, pos), true));
++pos; // Skip ':'
common_arguments.push_back("--password");
if (user_info.size() > pos + 1)
common_arguments.push_back(uriDecode(user_info.substr(pos), true));
else
{
// in case of user_info == 'user:', ':' is specified, but password is empty
// then add password argument "\n" which means: Ask user for a password.
common_arguments.push_back("\n");
}
}
else
{
common_arguments.push_back("--user");
common_arguments.push_back(uriDecode(user_info, true));
}
}
const auto & database_name = uri.getPath();
size_t start_symbol = !database_name.empty() && database_name[0] == '/' ? 1u : 0u;
if (database_name.size() > start_symbol)
{
common_arguments.push_back("--database");
common_arguments.push_back(start_symbol == 0u ? database_name : database_name.substr(start_symbol));
}
}
catch (const Poco::URISyntaxException & invalid_uri_exception)
{
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS,
"Invalid connection string '{}': {}", connection_string, invalid_uri_exception.what());
}
return true;
}
void checkIfCmdLineOptionCanBeUsedWithConnectionString(std::string_view command_line_option)
{
if (PROHIBITED_CLIENT_OPTIONS.contains(command_line_option))
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Mixing a connection string and {} option is prohibited", PROHIBITED_CLIENT_OPTIONS.at(command_line_option));
}
}

View File

@ -0,0 +1,27 @@
#pragma once
#include <string>
#include <string_view>
#include <vector>
namespace DB
{
/** Tries to parse ClickHouse connection string.
* if @connection_string starts with 'clickhouse:' then connection string will be parsed
* and converted into a set of arguments for the client.
* Connection string format is similar to URI "clickhouse:[//[user[:password]@][hosts_and_ports]][/dbname][?query_parameters]"
* with the difference that hosts_and_ports can contain multiple hosts separated by ','.
* example: clickhouse://user@host1:port1,host2:port2
* @return Returns false if no connection string was specified. If a connection string was specified, returns true if it is valid, and throws an exception if it is invalid.
* @exception Throws DB::Exception if URI has valid scheme (clickhouse:), but invalid internals.
*/
bool tryParseConnectionString(
std::string_view connection_string,
std::vector<std::string> & common_arguments,
std::vector<std::vector<std::string>> & hosts_and_ports_arguments);
// Throws DB::Exception with BAD_ARGUMENTS if the given command line argument
// is not allowed to be used with a connection string.
void checkIfCmdLineOptionCanBeUsedWithConnectionString(std::string_view command_line_option);
}

View File

@ -151,13 +151,13 @@ public:
ColumnPtr compress() const override;
void forEachSubcolumn(ColumnCallback callback) const override
void forEachSubcolumn(MutableColumnCallback callback) override
{
callback(offsets);
callback(data);
}
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
{
callback(*offsets);
offsets->forEachSubcolumnRecursively(callback);

View File

@ -230,12 +230,12 @@ public:
data->getExtremes(min, max);
}
void forEachSubcolumn(ColumnCallback callback) const override
void forEachSubcolumn(MutableColumnCallback callback) override
{
callback(data);
}
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
{
callback(*data);
data->forEachSubcolumnRecursively(callback);

View File

@ -166,7 +166,7 @@ public:
size_t byteSizeAt(size_t n) const override { return getDictionary().byteSizeAt(getIndexes().getUInt(n)); }
size_t allocatedBytes() const override { return idx.getPositions()->allocatedBytes() + getDictionary().allocatedBytes(); }
void forEachSubcolumn(ColumnCallback callback) const override
void forEachSubcolumn(MutableColumnCallback callback) override
{
callback(idx.getPositionsPtr());
@ -175,7 +175,7 @@ public:
callback(dictionary.getColumnUniquePtr());
}
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
{
callback(*idx.getPositionsPtr());
idx.getPositionsPtr()->forEachSubcolumnRecursively(callback);
@ -340,7 +340,7 @@ private:
explicit Dictionary(MutableColumnPtr && column_unique, bool is_shared);
explicit Dictionary(ColumnPtr column_unique, bool is_shared);
const ColumnPtr & getColumnUniquePtr() const { return column_unique; }
const WrappedPtr & getColumnUniquePtr() const { return column_unique; }
WrappedPtr & getColumnUniquePtr() { return column_unique; }
const IColumnUnique & getColumnUnique() const { return static_cast<const IColumnUnique &>(*column_unique); }

View File

@ -273,12 +273,12 @@ void ColumnMap::getExtremes(Field & min, Field & max) const
max = std::move(map_max_value);
}
void ColumnMap::forEachSubcolumn(ColumnCallback callback) const
void ColumnMap::forEachSubcolumn(MutableColumnCallback callback)
{
callback(nested);
}
void ColumnMap::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
void ColumnMap::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
{
callback(*nested);
nested->forEachSubcolumnRecursively(callback);

View File

@ -88,8 +88,8 @@ public:
size_t byteSizeAt(size_t n) const override;
size_t allocatedBytes() const override;
void protect() override;
void forEachSubcolumn(ColumnCallback callback) const override;
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
void forEachSubcolumn(MutableColumnCallback callback) override;
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;
double getRatioOfDefaultRows(double sample_ratio) const override;
UInt64 getNumberOfDefaultRows() const override;

View File

@ -130,13 +130,13 @@ public:
ColumnPtr compress() const override;
void forEachSubcolumn(ColumnCallback callback) const override
void forEachSubcolumn(MutableColumnCallback callback) override
{
callback(nested_column);
callback(null_map);
}
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
{
callback(*nested_column);
nested_column->forEachSubcolumnRecursively(callback);

View File

@ -664,18 +664,18 @@ size_t ColumnObject::allocatedBytes() const
return res;
}
void ColumnObject::forEachSubcolumn(ColumnCallback callback) const
void ColumnObject::forEachSubcolumn(MutableColumnCallback callback)
{
for (const auto & entry : subcolumns)
for (const auto & part : entry->data.data)
for (auto & entry : subcolumns)
for (auto & part : entry->data.data)
callback(part);
}
void ColumnObject::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
void ColumnObject::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
{
for (const auto & entry : subcolumns)
for (auto & entry : subcolumns)
{
for (const auto & part : entry->data.data)
for (auto & part : entry->data.data)
{
callback(*part);
part->forEachSubcolumnRecursively(callback);

View File

@ -206,8 +206,8 @@ public:
size_t size() const override;
size_t byteSize() const override;
size_t allocatedBytes() const override;
void forEachSubcolumn(ColumnCallback callback) const override;
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
void forEachSubcolumn(MutableColumnCallback callback) override;
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
void insert(const Field & field) override;
void insertDefault() override;
void insertFrom(const IColumn & src, size_t n) override;

View File

@ -751,13 +751,13 @@ bool ColumnSparse::structureEquals(const IColumn & rhs) const
return false;
}
void ColumnSparse::forEachSubcolumn(ColumnCallback callback) const
void ColumnSparse::forEachSubcolumn(MutableColumnCallback callback)
{
callback(values);
callback(offsets);
}
void ColumnSparse::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
void ColumnSparse::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
{
callback(*values);
values->forEachSubcolumnRecursively(callback);

View File

@ -140,8 +140,8 @@ public:
ColumnPtr compress() const override;
void forEachSubcolumn(ColumnCallback callback) const override;
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
void forEachSubcolumn(MutableColumnCallback callback) override;
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;

View File

@ -31,14 +31,12 @@ ColumnString::ColumnString(const ColumnString & src)
offsets(src.offsets.begin(), src.offsets.end()),
chars(src.chars.begin(), src.chars.end())
{
if (!offsets.empty())
{
Offset last_offset = offsets.back();
/// This will also prevent possible overflow in offset.
if (chars.size() != last_offset)
throw Exception(ErrorCodes::LOGICAL_ERROR, "String offsets has data inconsistent with chars array");
}
Offset last_offset = offsets.empty() ? 0 : offsets.back();
/// This will also prevent possible overflow in offset.
if (last_offset != chars.size())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"String offsets has data inconsistent with chars array. Last offset: {}, array length: {}",
last_offset, chars.size());
}
@ -157,6 +155,7 @@ ColumnPtr ColumnString::filter(const Filter & filt, ssize_t result_size_hint) co
Offsets & res_offsets = res->offsets;
filterArraysImpl<UInt8>(chars, offsets, res_chars, res_offsets, filt, result_size_hint);
return res;
}
@ -571,10 +570,11 @@ void ColumnString::protect()
void ColumnString::validate() const
{
if (!offsets.empty() && offsets.back() != chars.size())
Offset last_offset = offsets.empty() ? 0 : offsets.back();
if (last_offset != chars.size())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"ColumnString validation failed: size mismatch (internal logical error) {} != {}",
offsets.back(), chars.size());
last_offset, chars.size());
}
}

Some files were not shown because too many files have changed in this diff Show More