mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Merge remote-tracking branch 'origin/master' into ADQM-870
This commit is contained in:
commit
30bea857fd
1
.github/workflows/woboq.yml
vendored
1
.github/workflows/woboq.yml
vendored
@ -12,6 +12,7 @@ jobs:
|
||||
# don't use dockerhub push because this image updates so rarely
|
||||
WoboqCodebrowser:
|
||||
runs-on: [self-hosted, style-checker]
|
||||
timeout-minutes: 420 # the task is pretty heavy, so there's an additional hour
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
|
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -19,7 +19,7 @@
|
||||
url = https://github.com/google/googletest
|
||||
[submodule "contrib/capnproto"]
|
||||
path = contrib/capnproto
|
||||
url = https://github.com/capnproto/capnproto
|
||||
url = https://github.com/ClickHouse/capnproto
|
||||
[submodule "contrib/double-conversion"]
|
||||
path = contrib/double-conversion
|
||||
url = https://github.com/google/double-conversion
|
||||
|
@ -6,8 +6,10 @@ rules:
|
||||
level: warning
|
||||
indent-sequences: consistent
|
||||
line-length:
|
||||
# there are some bash -c "", so this is OK
|
||||
max: 300
|
||||
# there are:
|
||||
# - bash -c "", so this is OK
|
||||
# - yaml in tests
|
||||
max: 1000
|
||||
level: warning
|
||||
comments:
|
||||
min-spaces-from-content: 1
|
||||
|
@ -4,7 +4,7 @@ if (SANITIZE OR NOT (
|
||||
))
|
||||
if (ENABLE_JEMALLOC)
|
||||
message (${RECONFIGURE_MESSAGE_LEVEL}
|
||||
"jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64, aarch64, or ppc64le Linux or FreeBSD builds and RelWithDebInfo macOS builds.")
|
||||
"jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64, aarch64, or ppc64le Linux or FreeBSD builds and RelWithDebInfo macOS builds. Use -DENABLE_JEMALLOC=0")
|
||||
endif ()
|
||||
set (ENABLE_JEMALLOC OFF)
|
||||
else ()
|
||||
|
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
esac
|
||||
|
||||
ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
|
||||
ARG VERSION="23.5.2.7"
|
||||
ARG VERSION="23.5.3.24"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="23.5.2.7"
|
||||
ARG VERSION="23.5.3.24"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -22,7 +22,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="23.5.2.7"
|
||||
ARG VERSION="23.5.3.24"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
|
@ -20,6 +20,7 @@ For more information and documentation see https://clickhouse.com/.
|
||||
|
||||
- The amd64 image requires support for [SSE3 instructions](https://en.wikipedia.org/wiki/SSE3). Virtually all x86 CPUs after 2005 support SSE3.
|
||||
- The arm64 image requires support for the [ARMv8.2-A architecture](https://en.wikipedia.org/wiki/AArch64#ARMv8.2-A). Most ARM CPUs after 2017 support ARMv8.2-A. A notable exception is Raspberry Pi 4 from 2019 whose CPU only supports ARMv8.0-A.
|
||||
- Since the Clickhouse 23.3 Ubuntu image started using `ubuntu:22.04` as its base image, it requires docker version >= `20.10.10`, or use `docker run -- privileged` instead. Alternatively, try the Clickhouse Alpine image.
|
||||
|
||||
## How to use this image
|
||||
|
||||
|
@ -12,10 +12,10 @@ RUN apt-get update --yes && \
|
||||
# We need to get the repository's HEAD each time despite, so we invalidate layers' cache
|
||||
ARG CACHE_INVALIDATOR=0
|
||||
RUN mkdir /sqlancer && \
|
||||
wget -q -O- https://github.com/sqlancer/sqlancer/archive/master.tar.gz | \
|
||||
wget -q -O- https://github.com/sqlancer/sqlancer/archive/main.tar.gz | \
|
||||
tar zx -C /sqlancer && \
|
||||
cd /sqlancer/sqlancer-master && \
|
||||
mvn package -DskipTests && \
|
||||
cd /sqlancer/sqlancer-main && \
|
||||
mvn --no-transfer-progress package -DskipTests && \
|
||||
rm -r /root/.m2
|
||||
|
||||
COPY run.sh /
|
||||
|
@ -59,8 +59,7 @@ install_packages previous_release_package_folder
|
||||
# available for dump via clickhouse-local
|
||||
configure
|
||||
|
||||
# it contains some new settings, but we can safely remove it
|
||||
rm /etc/clickhouse-server/config.d/merge_tree.xml
|
||||
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
|
||||
|
||||
start
|
||||
stop
|
||||
@ -86,8 +85,7 @@ export USE_S3_STORAGE_FOR_MERGE_TREE=1
|
||||
export ZOOKEEPER_FAULT_INJECTION=0
|
||||
configure
|
||||
|
||||
# it contains some new settings, but we can safely remove it
|
||||
rm /etc/clickhouse-server/config.d/merge_tree.xml
|
||||
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
|
||||
|
||||
start
|
||||
|
||||
|
19
docs/changelogs/v22.8.19.10-lts.md
Normal file
19
docs/changelogs/v22.8.19.10-lts.md
Normal file
@ -0,0 +1,19 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v22.8.19.10-lts (989bc2fe8b0) FIXME as compared to v22.8.18.31-lts (4de7a95a544)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Fix a versions' tweak for tagged commits, improve version_helper [#51035](https://github.com/ClickHouse/ClickHouse/pull/51035) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Sqlancer has changed master to main [#51060](https://github.com/ClickHouse/ClickHouse/pull/51060) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
22
docs/changelogs/v23.3.4.17-lts.md
Normal file
22
docs/changelogs/v23.3.4.17-lts.md
Normal file
@ -0,0 +1,22 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.3.4.17-lts (2c99b73ff40) FIXME as compared to v23.3.3.52-lts (cb963c474db)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix crash when Pool::Entry::disconnect() is called [#50334](https://github.com/ClickHouse/ClickHouse/pull/50334) ([Val Doroshchuk](https://github.com/valbok)).
|
||||
* Avoid storing logs in Keeper containing unknown operation [#50751](https://github.com/ClickHouse/ClickHouse/pull/50751) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Don't mark a part as broken on `Poco::TimeoutException` [#50811](https://github.com/ClickHouse/ClickHouse/pull/50811) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix a versions' tweak for tagged commits, improve version_helper [#51035](https://github.com/ClickHouse/ClickHouse/pull/51035) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Sqlancer has changed master to main [#51060](https://github.com/ClickHouse/ClickHouse/pull/51060) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
22
docs/changelogs/v23.4.4.16-stable.md
Normal file
22
docs/changelogs/v23.4.4.16-stable.md
Normal file
@ -0,0 +1,22 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.4.4.16-stable (747ba4fc6a0) FIXME as compared to v23.4.3.48-stable (d9199f8d3cc)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix crash when Pool::Entry::disconnect() is called [#50334](https://github.com/ClickHouse/ClickHouse/pull/50334) ([Val Doroshchuk](https://github.com/valbok)).
|
||||
* Fix iceberg V2 optional metadata parsing [#50974](https://github.com/ClickHouse/ClickHouse/pull/50974) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Don't mark a part as broken on `Poco::TimeoutException` [#50811](https://github.com/ClickHouse/ClickHouse/pull/50811) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix a versions' tweak for tagged commits, improve version_helper [#51035](https://github.com/ClickHouse/ClickHouse/pull/51035) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Sqlancer has changed master to main [#51060](https://github.com/ClickHouse/ClickHouse/pull/51060) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
26
docs/changelogs/v23.5.3.24-stable.md
Normal file
26
docs/changelogs/v23.5.3.24-stable.md
Normal file
@ -0,0 +1,26 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.5.3.24-stable (76f54616d3b) FIXME as compared to v23.5.2.7-stable (5751aa1ab9f)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Fix Log family table return wrong rows count after truncate [#50585](https://github.com/ClickHouse/ClickHouse/pull/50585) ([flynn](https://github.com/ucasfl)).
|
||||
* Fix bug in `uniqExact` parallel merging [#50590](https://github.com/ClickHouse/ClickHouse/pull/50590) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Revert recent grace hash join changes [#50699](https://github.com/ClickHouse/ClickHouse/pull/50699) ([vdimir](https://github.com/vdimir)).
|
||||
* Avoid storing logs in Keeper containing unknown operation [#50751](https://github.com/ClickHouse/ClickHouse/pull/50751) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Add compat setting for non-const timezones [#50834](https://github.com/ClickHouse/ClickHouse/pull/50834) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix iceberg V2 optional metadata parsing [#50974](https://github.com/ClickHouse/ClickHouse/pull/50974) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Don't mark a part as broken on `Poco::TimeoutException` [#50811](https://github.com/ClickHouse/ClickHouse/pull/50811) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix a versions' tweak for tagged commits, improve version_helper [#51035](https://github.com/ClickHouse/ClickHouse/pull/51035) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Sqlancer has changed master to main [#51060](https://github.com/ClickHouse/ClickHouse/pull/51060) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
@ -233,6 +233,12 @@ libhdfs3 support HDFS namenode HA.
|
||||
- `_path` — Path to the file.
|
||||
- `_file` — Name of the file.
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
- [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
||||
- [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
|
||||
|
@ -136,7 +136,7 @@ postgresql> SELECT * FROM test;
|
||||
|
||||
### Creating Table in ClickHouse, and connecting to PostgreSQL table created above
|
||||
|
||||
This example uses the [PostgreSQL table engine](/docs/en/engines/table-engines/integrations/postgresql.md) to connect the ClickHouse table to the PostgreSQL table:
|
||||
This example uses the [PostgreSQL table engine](/docs/en/engines/table-engines/integrations/postgresql.md) to connect the ClickHouse table to the PostgreSQL table and use both SELECT and INSERT statements to the PostgreSQL database:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE default.postgresql_table
|
||||
@ -150,10 +150,21 @@ ENGINE = PostgreSQL('localhost:5432', 'public', 'test', 'postges_user', 'postgre
|
||||
|
||||
### Inserting initial data from PostgreSQL table into ClickHouse table, using a SELECT query
|
||||
|
||||
The [postgresql table function](/docs/en/sql-reference/table-functions/postgresql.md) copies the data from PostgreSQL to ClickHouse, which is often used for improving the query performance of the data by querying or performing analytics in ClickHouse rather than in PostgreSQL, or can also be used for migrating data from PostgreSQL to ClickHouse:
|
||||
The [postgresql table function](/docs/en/sql-reference/table-functions/postgresql.md) copies the data from PostgreSQL to ClickHouse, which is often used for improving the query performance of the data by querying or performing analytics in ClickHouse rather than in PostgreSQL, or can also be used for migrating data from PostgreSQL to ClickHouse. Since we will be copying the data from PostgreSQL to ClickHouse, we will use a MergeTree table engine in ClickHouse and call it postgresql_copy:
|
||||
|
||||
``` sql
|
||||
INSERT INTO default.postgresql_table
|
||||
CREATE TABLE default.postgresql_copy
|
||||
(
|
||||
`float_nullable` Nullable(Float32),
|
||||
`str` String,
|
||||
`int_id` Int32
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (int_id);
|
||||
```
|
||||
|
||||
``` sql
|
||||
INSERT INTO default.postgresql_copy
|
||||
SELECT * FROM postgresql('localhost:5432', 'public', 'test', 'postges_user', 'postgres_password');
|
||||
```
|
||||
|
||||
@ -164,13 +175,13 @@ If then performing ongoing synchronization between the PostgreSQL table and Clic
|
||||
This would require keeping track of the max ID or timestamp previously added, such as the following:
|
||||
|
||||
``` sql
|
||||
SELECT max(`int_id`) AS maxIntID FROM default.postgresql_table;
|
||||
SELECT max(`int_id`) AS maxIntID FROM default.postgresql_copy;
|
||||
```
|
||||
|
||||
Then inserting values from PostgreSQL table greater than the max
|
||||
|
||||
``` sql
|
||||
INSERT INTO default.postgresql_table
|
||||
INSERT INTO default.postgresql_copy
|
||||
SELECT * FROM postgresql('localhost:5432', 'public', 'test', 'postges_user', 'postgres_password');
|
||||
WHERE int_id > maxIntID;
|
||||
```
|
||||
@ -178,7 +189,7 @@ WHERE int_id > maxIntID;
|
||||
### Selecting data from the resulting ClickHouse table
|
||||
|
||||
``` sql
|
||||
SELECT * FROM postgresql_table WHERE str IN ('test');
|
||||
SELECT * FROM postgresql_copy WHERE str IN ('test');
|
||||
```
|
||||
|
||||
``` text
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
slug: /en/sql-reference/table-functions/redis
|
||||
slug: /en/engines/table-engines/integrations/redis
|
||||
sidebar_position: 43
|
||||
sidebar_label: Redis
|
||||
---
|
||||
@ -34,7 +34,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name
|
||||
- `primary` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a Redis key.
|
||||
|
||||
- columns other than the primary key will be serialized in binary as Redis value in corresponding order.
|
||||
|
||||
|
||||
- queries with key equals or in filtering will be optimized to multi keys lookup from Redis. If queries without filtering key full table scan will happen which is a heavy operation.
|
||||
|
||||
## Usage Example {#usage-example}
|
||||
|
@ -127,6 +127,12 @@ CREATE TABLE table_with_asterisk (name String, value UInt32)
|
||||
ENGINE = S3('https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/{some,another}_folder/*', 'CSV');
|
||||
```
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
- [s3_create_multiple_files](/docs/en/operations/settings/settings.md#s3_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
||||
- [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
|
||||
## S3-related Settings {#settings}
|
||||
|
||||
The following settings can be set before query execution or placed into configuration file.
|
||||
|
@ -131,7 +131,6 @@ The `GRANULARITY` parameter determines how many ANN sub-indexes are created. Big
|
||||
sub-indexes, up to the point where a column (or a column's data part) has only a single sub-index. In that case, the sub-index has a
|
||||
"global" view of all column rows and can directly return all granules of the column (part) with relevant rows (there are at most
|
||||
`LIMIT [N]`-many such granules). In a second step, ClickHouse will load these granules and identify the actually best rows by performing a
|
||||
|
||||
brute-force distance calculation over all rows of the granules. With a small `GRANULARITY` value, each of the sub-indexes returns up to
|
||||
`LIMIT N`-many granules. As a result, more granules need to be loaded and post-filtered. Note that the search accuracy is with both cases
|
||||
equally good, only the processing performance differs. It is generally recommended to use a large `GRANULARITY` for ANN indexes and fall
|
||||
@ -152,7 +151,7 @@ This type of ANN index implements [the Annoy algorithm](https://github.com/spoti
|
||||
space in random linear surfaces (lines in 2D, planes in 3D etc.).
|
||||
|
||||
<div class='vimeo-container'>
|
||||
<iframe src="//www.youtube.com/watch?v=QkCCyLW0ehU"
|
||||
<iframe src="//www.youtube.com/embed/QkCCyLW0ehU"
|
||||
width="640"
|
||||
height="360"
|
||||
frameborder="0"
|
||||
|
@ -853,7 +853,7 @@ Tags:
|
||||
- `max_data_part_size_bytes` — the maximum size of a part that can be stored on any of the volume’s disks. If the a size of a merged part estimated to be bigger than `max_data_part_size_bytes` then this part will be written to a next volume. Basically this feature allows to keep new/small parts on a hot (SSD) volume and move them to a cold (HDD) volume when they reach large size. Do not use this setting if your policy has only one volume.
|
||||
- `move_factor` — when the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1). ClickHouse sorts existing parts by size from largest to smallest (in descending order) and selects parts with the total size that is sufficient to meet the `move_factor` condition. If the total size of all parts is insufficient, all parts will be moved.
|
||||
- `prefer_not_to_merge` — Disables merging of data parts on this volume. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks.
|
||||
- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3).
|
||||
- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default (if enabled) if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3). If disabled then already expired data part is written into a default volume and then right after moved to TTL volume.
|
||||
- `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`.
|
||||
|
||||
Configuration examples:
|
||||
|
@ -92,3 +92,11 @@ $ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64
|
||||
`PARTITION BY` — Optional. It is possible to create separate files by partitioning the data on a partition key. In most cases, you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression).
|
||||
|
||||
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
|
||||
|
||||
## Settings {#settings}
|
||||
|
||||
- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
|
||||
- [engine_file_truncate_on_insert](/docs/en/operations/settings/settings.md#engine-file-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
- [engine_file_allow_create_multiple_files](/docs/en/operations/settings/settings.md#engine_file_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
||||
- [engine_file_skip_empty_files](/docs/en/operations/settings/settings.md#engine_file_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - method of reading data from storage file, one of: `read`, `pread`, `mmap`. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.
|
||||
|
@ -102,3 +102,7 @@ SELECT * FROM url_engine_table
|
||||
`PARTITION BY` — Optional. It is possible to create separate files by partitioning the data on a partition key. In most cases, you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression).
|
||||
|
||||
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
|
@ -194,7 +194,129 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
|
||||
- `--print-profile-events` – Print `ProfileEvents` packets.
|
||||
- `--profile-events-delay-ms` – Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet).
|
||||
|
||||
Since version 20.5, `clickhouse-client` has automatic syntax highlighting (always enabled).
|
||||
Instead of `--host`, `--port`, `--user` and `--password` options, ClickHouse client also supports connection strings (see next section).
|
||||
|
||||
|
||||
## Connection string {#connection_string}
|
||||
|
||||
clickhouse-client alternatively supports connecting to clickhouse server using a connection string similar to [MongoDB](https://www.mongodb.com/docs/manual/reference/connection-string/), [PostgreSQL](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING), [MySQL](https://dev.mysql.com/doc/refman/8.0/en/connecting-using-uri-or-key-value-pairs.html#connecting-using-uri). It has the following syntax:
|
||||
|
||||
```text
|
||||
clickhouse:[//[user[:password]@][hosts_and_ports]][/database][?query_parameters]
|
||||
```
|
||||
|
||||
Where
|
||||
|
||||
- `user` - (optional) is a user name,
|
||||
- `password` - (optional) is a user password. If `:` is specified and the password is blank, the client will prompt for the user's password.
|
||||
- `hosts_and_ports` - (optional) is a list of hosts and optional ports `host[:port] [, host:[port]], ...`,
|
||||
- `database` - (optional) is the database name,
|
||||
- `query_parameters` - (optional) is a list of key-value pairs `param1=value1[,¶m2=value2], ...`. For some parameters, no value is required. Parameter names and values are case-sensitive.
|
||||
|
||||
If no user is specified, `default` user without password will be used.
|
||||
If no host is specified, the `localhost` will be used (localhost).
|
||||
If no port is specified is not specified, `9000` will be used as port.
|
||||
If no database is specified, the `default` database will be used.
|
||||
|
||||
If the user name, password or database was specified in the connection string, it cannot be specified using `--user`, `--password` or `--database` (and vice versa).
|
||||
|
||||
The host component can either be an a host name and IP address. Put an IPv6 address in square brackets to specify it:
|
||||
|
||||
```text
|
||||
clickhouse://[2001:db8::1234]
|
||||
```
|
||||
|
||||
URI allows multiple hosts to be connected to. Connection strings can contain multiple hosts. ClickHouse-client will try to connect to these hosts in order (i.e. from left to right). After the connection is established, no attempt to connect to the remaining hosts is made.
|
||||
|
||||
The connection string must be specified as the first argument of clickhouse-client. The connection string can be combined with arbitrary other [command-line-options](#command-line-options) except `--host/-h` and `--port`.
|
||||
|
||||
The following keys are allowed for component `query_parameter`:
|
||||
|
||||
- `secure` or shorthanded `s` - no value. If specified, client will connect to the server over a secure connection (TLS). See `secure` in [command-line-options](#command-line-options)
|
||||
|
||||
### Percent encoding {#connection_string_uri_percent_encoding}
|
||||
|
||||
Non-US ASCII, spaces and special characters in the `user`, `password`, `hosts`, `database` and `query parameters` must be [percent-encoded](https://en.wikipedia.org/wiki/URL_encoding).
|
||||
|
||||
### Examples {#connection_string_examples}
|
||||
|
||||
Connect to localhost using port 9000 and execute the query `SELECT 1`.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://localhost:9000 --query "SELECT 1"
|
||||
```
|
||||
|
||||
Connect to localhost using user `john` with password `secret`, host `127.0.0.1` and port `9000`
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://john:secret@127.0.0.1:9000
|
||||
```
|
||||
|
||||
Connect to localhost using default user, host with IPV6 address `[::1]` and port `9000`.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://[::1]:9000
|
||||
```
|
||||
|
||||
Connect to localhost using port 9000 in multiline mode.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://localhost:9000 '-m'
|
||||
```
|
||||
|
||||
Connect to localhost using port 9000 with the user `default`.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://default@localhost:9000
|
||||
|
||||
# equivalent to:
|
||||
clickhouse-client clickhouse://localhost:9000 --user default
|
||||
```
|
||||
|
||||
Connect to localhost using port 9000 to `my_database` database.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://localhost:9000/my_database
|
||||
|
||||
# equivalent to:
|
||||
clickhouse-client clickhouse://localhost:9000 --database my_database
|
||||
```
|
||||
|
||||
Connect to localhost using port 9000 to `my_database` database specified in the connection string and a secure connection using shorthanded 's' URI parameter.
|
||||
|
||||
```bash
|
||||
clickhouse-client clickhouse://localhost/my_database?s
|
||||
|
||||
# equivalent to:
|
||||
clickhouse-client clickhouse://localhost/my_database -s
|
||||
```
|
||||
|
||||
Connect to default host using default port, default user, and default database.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse:
|
||||
```
|
||||
|
||||
Connect to the default host using the default port, using user `my_user` and no password.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://my_user@
|
||||
|
||||
# Using a blank password between : and @ means to asking user to enter the password before starting the connection.
|
||||
clickhouse-client clickhouse://my_user:@
|
||||
```
|
||||
|
||||
Connect to localhost using email as the user name. `@` symbol is percent encoded to `%40`.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://some_user%40some_mail.com@localhost:9000
|
||||
```
|
||||
|
||||
Connect to one of provides hosts: `192.168.1.15`, `192.168.1.25`.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://192.168.1.15,192.168.1.25
|
||||
```
|
||||
|
||||
### Configuration Files {#configuration_files}
|
||||
|
||||
|
@ -470,6 +470,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
|
||||
- [input_format_csv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_csv_detect_header) - automatically detect header with names and types in CSV format. Default value - `true`.
|
||||
- [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`.
|
||||
- [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
|
||||
- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
|
||||
- [input_format_csv_ignore_extra_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_ignore_extra_columns) - ignore extra columns in CSV input (if your file has more columns than expected). Default value - `false`.
|
||||
- [input_format_csv_missing_as_default](/docs/en/operations/settings/settings-formats.md/#input_format_csv_missing_as_default) - treat missing fields in CSV input as default values. Default value - `false`.
|
||||
|
||||
@ -1879,13 +1880,13 @@ The table below shows supported data types and how they match ClickHouse [data t
|
||||
| `string (uuid)` \** | [UUID](/docs/en/sql-reference/data-types/uuid.md) | `string (uuid)` \** |
|
||||
| `fixed(16)` | [Int128/UInt128](/docs/en/sql-reference/data-types/int-uint.md) | `fixed(16)` |
|
||||
| `fixed(32)` | [Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `fixed(32)` |
|
||||
| `record` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `record` |
|
||||
|
||||
|
||||
|
||||
\* `bytes` is default, controlled by [output_format_avro_string_column_pattern](/docs/en/operations/settings/settings-formats.md/#output_format_avro_string_column_pattern)
|
||||
\** [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types)
|
||||
|
||||
Unsupported Avro data types: `record` (non-root), `map`
|
||||
|
||||
Unsupported Avro logical data types: `time-millis`, `time-micros`, `duration`
|
||||
|
||||
### Inserting Data {#inserting-data-1}
|
||||
@ -1924,7 +1925,26 @@ Output Avro file compression and sync interval can be configured with [output_fo
|
||||
|
||||
Using the ClickHouse [DESCRIBE](/docs/en/sql-reference/statements/describe-table) function, you can quickly view the inferred format of an Avro file like the following example. This example includes the URL of a publicly accessible Avro file in the ClickHouse S3 public bucket:
|
||||
|
||||
``` DESCRIBE url('https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/hits.avro','Avro');
|
||||
```
|
||||
DESCRIBE url('https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/hits.avro','Avro);
|
||||
```
|
||||
```
|
||||
┌─name───────────────────────┬─type────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ WatchID │ Int64 │ │ │ │ │ │
|
||||
│ JavaEnable │ Int32 │ │ │ │ │ │
|
||||
│ Title │ String │ │ │ │ │ │
|
||||
│ GoodEvent │ Int32 │ │ │ │ │ │
|
||||
│ EventTime │ Int32 │ │ │ │ │ │
|
||||
│ EventDate │ Date32 │ │ │ │ │ │
|
||||
│ CounterID │ Int32 │ │ │ │ │ │
|
||||
│ ClientIP │ Int32 │ │ │ │ │ │
|
||||
│ ClientIP6 │ FixedString(16) │ │ │ │ │ │
|
||||
│ RegionID │ Int32 │ │ │ │ │ │
|
||||
...
|
||||
│ IslandID │ FixedString(16) │ │ │ │ │ │
|
||||
│ RequestNum │ Int32 │ │ │ │ │ │
|
||||
│ RequestTry │ Int32 │ │ │ │ │ │
|
||||
└────────────────────────────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
## AvroConfluent {#data-format-avro-confluent}
|
||||
|
@ -83,6 +83,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des
|
||||
- [`compression_method`](/docs/en/sql-reference/statements/create/table.md/#column-compression-codecs) and compression_level
|
||||
- `password` for the file on disk
|
||||
- `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')`
|
||||
- `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables
|
||||
|
||||
### Usage examples
|
||||
|
||||
@ -398,4 +399,4 @@ To disallow concurrent backup/restore, you can use these settings respectively.
|
||||
```
|
||||
|
||||
The default value for both is true, so by default concurrent backup/restores are allowed.
|
||||
When these settings are false on a cluster, only 1 backup/restore is allowed to run on a cluster at a time.
|
||||
When these settings are false on a cluster, only 1 backup/restore is allowed to run on a cluster at a time.
|
||||
|
@ -202,7 +202,7 @@ Default: 15
|
||||
|
||||
## dns_max_consecutive_failures
|
||||
|
||||
Max connection failures before dropping host from ClickHouse DNS cache
|
||||
Max consecutive resolving failures before dropping a host from ClickHouse DNS cache
|
||||
|
||||
Type: UInt32
|
||||
|
||||
|
@ -943,6 +943,38 @@ Treat missing fields in CSV input as default values.
|
||||
|
||||
Disabled by default.
|
||||
|
||||
### input_format_csv_allow_whitespace_or_tab_as_delimiter {#input_format_csv_allow_whitespace_or_tab_as_delimiter}
|
||||
|
||||
Allow to use whitespace or tab as field delimiter in CSV strings.
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
**Examples**
|
||||
|
||||
Query
|
||||
|
||||
```bash
|
||||
echo 'a b' | ./clickhouse local -q "select * from table FORMAT CSV" --input-format="CSV" --input_format_csv_allow_whitespace_or_tab_as_delimiter=true --format_csv_delimiter=' '
|
||||
```
|
||||
|
||||
Result
|
||||
|
||||
```text
|
||||
a b
|
||||
```
|
||||
|
||||
Query
|
||||
|
||||
```bash
|
||||
echo 'a b' | ./clickhouse local -q "select * from table FORMAT CSV" --input-format="CSV" --input_format_csv_allow_whitespace_or_tab_as_delimiter=true --format_csv_delimiter='\t'
|
||||
```
|
||||
|
||||
Result
|
||||
|
||||
```text
|
||||
a b
|
||||
```
|
||||
|
||||
## Values format settings {#values-format-settings}
|
||||
|
||||
### input_format_values_interpret_expressions {#input_format_values_interpret_expressions}
|
||||
|
@ -3328,7 +3328,35 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## s3_truncate_on_insert
|
||||
## engine_file_allow_create_multiple_files {#engine_file_allow_create_multiple_files}
|
||||
|
||||
Enables or disables creating a new file on each insert in file engine tables if the format has the suffix (`JSON`, `ORC`, `Parquet`, etc.). If enabled, on each insert a new file will be created with a name following this pattern:
|
||||
|
||||
`data.Parquet` -> `data.1.Parquet` -> `data.2.Parquet`, etc.
|
||||
|
||||
Possible values:
|
||||
- 0 — `INSERT` query appends new data to the end of the file.
|
||||
- 1 — `INSERT` query creates a new file.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## engine_file_skip_empty_files {#engine_file_skip_empty_files}
|
||||
|
||||
Enables or disables skipping empty files in [File](../../engines/table-engines/special/file.md) engine tables.
|
||||
|
||||
Possible values:
|
||||
- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
|
||||
- 1 — `SELECT` returns empty result for empty file.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## storage_file_read_method {#storage_file_read_method}
|
||||
|
||||
Method of reading data from storage file, one of: `read`, `pread`, `mmap`. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).
|
||||
|
||||
Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.
|
||||
|
||||
## s3_truncate_on_insert {#s3_truncate_on_insert}
|
||||
|
||||
Enables or disables truncate before inserts in s3 engine tables. If disabled, an exception will be thrown on insert attempts if an S3 object already exists.
|
||||
|
||||
@ -3338,7 +3366,29 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## hdfs_truncate_on_insert
|
||||
## s3_create_new_file_on_insert {#s3_create_new_file_on_insert}
|
||||
|
||||
Enables or disables creating a new file on each insert in s3 engine tables. If enabled, on each insert a new S3 object will be created with the key, similar to this pattern:
|
||||
|
||||
initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc.
|
||||
|
||||
Possible values:
|
||||
- 0 — `INSERT` query appends new data to the end of the file.
|
||||
- 1 — `INSERT` query creates a new file.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## s3_skip_empty_files {#s3_skip_empty_files}
|
||||
|
||||
Enables or disables skipping empty files in [S3](../../engines/table-engines/integrations/s3.md) engine tables.
|
||||
|
||||
Possible values:
|
||||
- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
|
||||
- 1 — `SELECT` returns empty result for empty file.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## hdfs_truncate_on_insert {#hdfs_truncate_on_insert}
|
||||
|
||||
Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists.
|
||||
|
||||
@ -3348,31 +3398,7 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## engine_file_allow_create_multiple_files
|
||||
|
||||
Enables or disables creating a new file on each insert in file engine tables if the format has the suffix (`JSON`, `ORC`, `Parquet`, etc.). If enabled, on each insert a new file will be created with a name following this pattern:
|
||||
|
||||
`data.Parquet` -> `data.1.Parquet` -> `data.2.Parquet`, etc.
|
||||
|
||||
Possible values:
|
||||
- 0 — `INSERT` query appends new data to the end of the file.
|
||||
- 1 — `INSERT` query replaces existing content of the file with the new data.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## s3_create_new_file_on_insert
|
||||
|
||||
Enables or disables creating a new file on each insert in s3 engine tables. If enabled, on each insert a new S3 object will be created with the key, similar to this pattern:
|
||||
|
||||
initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc.
|
||||
|
||||
Possible values:
|
||||
- 0 — `INSERT` query appends new data to the end of the file.
|
||||
- 1 — `INSERT` query replaces existing content of the file with the new data.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## hdfs_create_new_file_on_insert
|
||||
## hdfs_create_new_file_on_insert {#hdfs_create_new_file_on_insert
|
||||
|
||||
Enables or disables creating a new file on each insert in HDFS engine tables. If enabled, on each insert a new HDFS file will be created with the name, similar to this pattern:
|
||||
|
||||
@ -3380,7 +3406,27 @@ initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc.
|
||||
|
||||
Possible values:
|
||||
- 0 — `INSERT` query appends new data to the end of the file.
|
||||
- 1 — `INSERT` query replaces existing content of the file with the new data.
|
||||
- 1 — `INSERT` query creates a new file.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## hdfs_skip_empty_files {#hdfs_skip_empty_files}
|
||||
|
||||
Enables or disables skipping empty files in [HDFS](../../engines/table-engines/integrations/hdfs.md) engine tables.
|
||||
|
||||
Possible values:
|
||||
- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
|
||||
- 1 — `SELECT` returns empty result for empty file.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## engine_url_skip_empty_files {#engine_url_skip_empty_files}
|
||||
|
||||
Enables or disables skipping empty files in [URL](../../engines/table-engines/special/url.md) engine tables.
|
||||
|
||||
Possible values:
|
||||
- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
|
||||
- 1 — `SELECT` returns empty result for empty file.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
|
@ -1509,10 +1509,12 @@ parseDateTimeBestEffort(time_string [, time_zone])
|
||||
- A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time).
|
||||
- A string with a date and a time component: `YYYYMMDDhhmmss`, `DD/MM/YYYY hh:mm:ss`, `DD-MM-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc.
|
||||
- A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `DD/MM/YYYY`, `DD-MM-YY` etc.
|
||||
- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case `YYYY-MM` are substituted as `2000-01`.
|
||||
- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case `MM` is substituted by `01`.
|
||||
- A string that includes the date and time along with time zone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`.
|
||||
- A [syslog timestamp](https://datatracker.ietf.org/doc/html/rfc3164#section-4.1.2): `Mmm dd hh:mm:ss`. For example, `Jun 9 14:20:32`.
|
||||
|
||||
For all of the formats with separator the function parses months names expressed by their full name or by the first three letters of a month name. Examples: `24/DEC/18`, `24-Dec-18`, `01-September-2018`.
|
||||
If the year is not specified, it is considered to be equal to the current year. If the resulting DateTime happen to be in the future (even by a second after the current moment), then the current year is substituted by the previous year.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -1583,23 +1585,46 @@ Result:
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffort('10 20:19');
|
||||
SELECT toYear(now()) as year, parseDateTimeBestEffort('10 20:19');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─parseDateTimeBestEffort('10 20:19')─┐
|
||||
│ 2000-01-10 20:19:00 │
|
||||
└─────────────────────────────────────┘
|
||||
┌─year─┬─parseDateTimeBestEffort('10 20:19')─┐
|
||||
│ 2023 │ 2023-01-10 20:19:00 │
|
||||
└──────┴─────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
WITH
|
||||
now() AS ts_now,
|
||||
formatDateTime(ts_around, '%b %e %T') AS syslog_arg
|
||||
SELECT
|
||||
ts_now,
|
||||
syslog_arg,
|
||||
parseDateTimeBestEffort(syslog_arg)
|
||||
FROM (SELECT arrayJoin([ts_now - 30, ts_now + 30]) AS ts_around);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌──────────────ts_now─┬─syslog_arg──────┬─parseDateTimeBestEffort(syslog_arg)─┐
|
||||
│ 2023-06-30 23:59:30 │ Jun 30 23:59:00 │ 2023-06-30 23:59:00 │
|
||||
│ 2023-06-30 23:59:30 │ Jul 1 00:00:00 │ 2022-07-01 00:00:00 │
|
||||
└─────────────────────┴─────────────────┴─────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [RFC 1123](https://tools.ietf.org/html/rfc1123)
|
||||
- [RFC 1123](https://datatracker.ietf.org/doc/html/rfc1123)
|
||||
- [toDate](#todate)
|
||||
- [toDateTime](#todatetime)
|
||||
- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/)
|
||||
- [RFC 3164](https://datatracker.ietf.org/doc/html/rfc3164#section-4.1.2)
|
||||
|
||||
## parseDateTimeBestEffortUS
|
||||
|
||||
|
@ -55,6 +55,9 @@ With the described implementation now we can see what can negatively affect 'DEL
|
||||
- Table having a very large number of data parts
|
||||
- Having a lot of data in Compact parts—in a Compact part, all columns are stored in one file.
|
||||
|
||||
:::note
|
||||
Currently, Lightweight delete does not work for tables with projection as rows in projection may be affected and require the projection to be rebuilt. Rebuilding projection makes the deletion not lightweight, so this is not supported.
|
||||
:::
|
||||
|
||||
## Related content
|
||||
|
||||
|
@ -196,6 +196,16 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
|
||||
- `_path` — Path to the file.
|
||||
- `_file` — Name of the file.
|
||||
|
||||
## Settings
|
||||
|
||||
- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
|
||||
- [engine_file_truncate_on_insert](/docs/en/operations/settings/settings.md#engine-file-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
- [engine_file_allow_create_multiple_files](/docs/en/operations/settings/settings.md#engine_file_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
||||
- [engine_file_skip_empty_files](/docs/en/operations/settings/settings.md#engine_file_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - method of reading data from storage file, one of: read, pread, mmap (only for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.
|
||||
|
||||
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns)
|
||||
|
@ -1,7 +1,7 @@
|
||||
---
|
||||
slug: /en/sql-reference/table-functions/gcs
|
||||
sidebar_position: 45
|
||||
sidebar_label: s3
|
||||
sidebar_label: gcs
|
||||
keywords: [gcs, bucket]
|
||||
---
|
||||
|
||||
|
@ -97,6 +97,12 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
|
||||
- `_path` — Path to the file.
|
||||
- `_file` — Name of the file.
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
- [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
||||
- [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns)
|
||||
|
@ -107,6 +107,30 @@ SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123');
|
||||
└────────┴───────┘
|
||||
```
|
||||
|
||||
Copying data from MySQL table into ClickHouse table:
|
||||
|
||||
```sql
|
||||
CREATE TABLE mysql_copy
|
||||
(
|
||||
`id` UInt64,
|
||||
`datetime` DateTime('UTC'),
|
||||
`description` String,
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (id,datetime);
|
||||
|
||||
INSERT INTO mysql_copy
|
||||
SELECT * FROM mysql('host:port', 'database', 'table', 'user', 'password');
|
||||
```
|
||||
|
||||
Or if copying only an incremental batch from MySQL based on the max current id:
|
||||
|
||||
```sql
|
||||
INSERT INTO mysql_copy
|
||||
SELECT * FROM mysql('host:port', 'database', 'table', 'user', 'password')
|
||||
WHERE id > (SELECT max(id) from mysql_copy);
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [The ‘MySQL’ table engine](../../engines/table-engines/integrations/mysql.md)
|
||||
|
@ -1,10 +1,10 @@
|
||||
---
|
||||
slug: /en/sql-reference/table-functions/redis
|
||||
sidebar_position: 10
|
||||
sidebar_label: Redis
|
||||
sidebar_position: 43
|
||||
sidebar_label: redis
|
||||
---
|
||||
|
||||
# Redis
|
||||
# redis
|
||||
|
||||
This table function allows integrating ClickHouse with [Redis](https://redis.io/).
|
||||
|
||||
|
@ -202,6 +202,12 @@ FROM s3(
|
||||
LIMIT 5;
|
||||
```
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
- [s3_create_multiple_files](/docs/en/operations/settings/settings.md#s3_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
||||
- [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [S3 engine](../../engines/table-engines/integrations/s3.md)
|
||||
|
@ -53,6 +53,10 @@ Character `|` inside patterns is used to specify failover addresses. They are it
|
||||
- `_path` — Path to the `URL`.
|
||||
- `_file` — Resource name of the `URL`.
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns)
|
||||
|
@ -142,7 +142,129 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe
|
||||
- `--history_file` - путь к файлу с историей команд.
|
||||
- `--param_<name>` — значение параметра для [запроса с параметрами](#cli-queries-with-parameters).
|
||||
|
||||
Начиная с версии 20.5, в `clickhouse-client` есть автоматическая подсветка синтаксиса (включена всегда).
|
||||
Вместо параметров `--host`, `--port`, `--user` и `--password` клиент ClickHouse также поддерживает строки подключения (смотри следующий раздел).
|
||||
|
||||
## Строка подключения {#connection_string}
|
||||
|
||||
clickhouse-client также поддерживает подключение к серверу clickhouse с помощью строки подключения, аналогичной [MongoDB](https://www.mongodb.com/docs/manual/reference/connection-string/), [PostgreSQL](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING), [MySQL](https://dev.mysql.com/doc/refman/8.0/en/connecting-using-uri-or-key-value-pairs.html#connecting-using-uri). Она имеет следующий синтаксис:
|
||||
|
||||
```text
|
||||
clickhouse:[//[user[:password]@][hosts_and_ports]][/database][?query_parameters]
|
||||
```
|
||||
|
||||
Где
|
||||
|
||||
- `user` - (необязательно) - это имя пользователя,
|
||||
- `password` - (необязательно) - Пароль пользователя. Если символ `:` укаказан, и пароль пуст, то клиент запросит ввести пользователя пароль.
|
||||
- `hosts_and_ports` - (необязательно) - список хостов и необязательных портов. `host[:port] [, host:[port]], ...`,
|
||||
- `database` - (необязательно) - это имя базы данных,
|
||||
- `query_parameters` - (опционально) список пар ключ-значение `param1=value1[,¶m2=value2], ...`. Для некоторых параметров значение не требуется. Имена и значения параметров чувствительны к регистру.
|
||||
|
||||
Если user не указан, будут использоваться имя пользователя `default`.
|
||||
Если host не указан, будет использован хост `localhost`.
|
||||
Если port не указан, будет использоваться порт `9000`.
|
||||
Если база данных не указана, будет использоваться база данных `default`.
|
||||
|
||||
Если имя пользователя, пароль или база данных были указаны в строке подключения, их нельзя указать с помощью `--user`, `--password` или `--database` (и наоборот).
|
||||
|
||||
Параметр host может быть либо именем хоста, либо IP-адресом. Для указания IPv6-адреса поместите его в квадратные скобки:
|
||||
|
||||
```text
|
||||
clickhouse://[2001:db8::1234]
|
||||
```
|
||||
|
||||
URI позволяет подключаться к нескольким хостам. Строки подключения могут содержать несколько хостов. ClickHouse-client будет пытаться подключиться к этим хостам по порядку (т.е. слева направо). После установления соединения попытки подключения к оставшимся хостам не предпринимаются.
|
||||
|
||||
|
||||
|
||||
Строка подключения должна быть указана в первом аргументе clickhouse-client. Строка подключения может комбинироваться с другими [параметрами командной строки] (#command-line-options) кроме `--host/-h` и `--port`.
|
||||
|
||||
Для компонента `query_parameter` разрешены следующие ключи:
|
||||
|
||||
- `secure` или сокращенно `s` - без значение. Если параметр указан, то соединение с сервером будет осуществляться по защищенному каналу (TLS). См. `secure` в [command-line-options](#command-line-options).
|
||||
|
||||
### Кодирование URI {#connection_string_uri_percent_encoding}
|
||||
|
||||
Не US ASCII и специальные символы в имени пользователя, пароле, хостах, базе данных и параметрах запроса должны быть [закодированы](https://ru.wikipedia.org/wiki/URL#%D0%9A%D0%BE%D0%B4%D0%B8%D1%80%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5_URL).
|
||||
|
||||
### Примеры {#connection_string_examples}
|
||||
|
||||
Подключиться к localhost через порт 9000 и выполнить запрос `SELECT 1`
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://localhost:9000 --query "SELECT 1"
|
||||
```
|
||||
Подключиться к localhost, используя пользователя `john` с паролем `secret`, хост `127.0.0.1` и порт `9000`
|
||||
|
||||
``bash
|
||||
clickhouse-client clickhouse://john:secret@127.0.0.1:9000
|
||||
```
|
||||
|
||||
Подключиться к localhost, используя пользователя по умолчанию, хост с IPV6 адресом `[::1]` и порт `9000`.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://[::1]:9000
|
||||
```
|
||||
|
||||
Подключиться к localhost через порт 9000 многострочном режиме.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://localhost:9000 '-m'
|
||||
```
|
||||
|
||||
Подключиться к localhost через порт 9000 с пользователем default.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://default@localhost:9000
|
||||
|
||||
# Эквивалетно:
|
||||
clickhouse-client clickhouse://localhost:9000 --user default
|
||||
```
|
||||
|
||||
Подключиться к localhost через порт 9000 с базой данных `my_database`
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://localhost:9000/my_database
|
||||
|
||||
# Эквивалетно:
|
||||
clickhouse-client clickhouse://localhost:9000 --database my_database
|
||||
```
|
||||
|
||||
Подключиться к localhost через порт 9000 с базой данных `my_database`, указанной в строке подключения, используя безопасным соединением при помощи короткого варианта параметра URI 's'.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://localhost/my_database?s
|
||||
|
||||
# Эквивалетно:
|
||||
clickhouse-client clickhouse://localhost/my_database -s
|
||||
```
|
||||
|
||||
Подключиться к хосту по умолчанию с использованием порта по умолчанию, пользователя по умолчанию, и базы данных по умолчанию.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse:
|
||||
```
|
||||
|
||||
Подключиться к хосту по умолчанию через порт по умолчанию, используя имя пользователя `my_user` без пароля.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://my_user@
|
||||
|
||||
# Использование пустого пароля между : и @ означает, что пользователь должен ввести пароль перед началом соединения.
|
||||
clickhouse-client clickhouse://my_user:@
|
||||
```
|
||||
|
||||
Подключиться к localhost, используя электронную почту, как имя пользователя. Символ `@` закодирован как `%40`.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://some_user%40some_mail.com@localhost:9000
|
||||
```
|
||||
|
||||
Подключится к одному из хостов: `192.168.1.15`, `192.168.1.25`.
|
||||
|
||||
``` bash
|
||||
clickhouse-client clickhouse://192.168.1.15,192.168.1.25
|
||||
```
|
||||
|
||||
### Конфигурационные файлы {#configuration_files}
|
||||
|
||||
|
@ -1223,10 +1223,12 @@ parseDateTimeBestEffort(time_string[, time_zone])
|
||||
- [Unix timestamp](https://ru.wikipedia.org/wiki/Unix-время) в строковом представлении. 9 или 10 символов.
|
||||
- Строка с датой и временем: `YYYYMMDDhhmmss`, `DD/MM/YYYY hh:mm:ss`, `DD-MM-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc.
|
||||
- Строка с датой, но без времени: `YYYY`, `YYYYMM`, `YYYY*MM`, `DD/MM/YYYY`, `DD-MM-YY` и т.д.
|
||||
- Строка с временем, и с днём: `DD`, `DD hh`, `DD hh:mm`. В этом случае `YYYY-MM` принимается равным `2000-01`.
|
||||
- Строка с временем, и с днём: `DD`, `DD hh`, `DD hh:mm`. В этом случае `MM` принимается равным `01`.
|
||||
- Строка, содержащая дату и время вместе с информацией о часовом поясе: `YYYY-MM-DD hh:mm:ss ±h:mm`, и т.д. Например, `2020-12-12 17:36:00 -5:00`.
|
||||
- Строка, содержащая дату и время в формате [syslog timestamp](https://datatracker.ietf.org/doc/html/rfc3164#section-4.1.2): `Mmm dd hh:mm:ss`. Например, `Jun 9 14:20:32`.
|
||||
|
||||
Для всех форматов с разделителями функция распознаёт названия месяцев, выраженных в виде полного англоязычного имени месяца или в виде первых трёх символов имени месяца. Примеры: `24/DEC/18`, `24-Dec-18`, `01-September-2018`.
|
||||
Если год не указан, вместо него подставляется текущий год. Если в результате получается будущее время (даже на одну секунду впереди текущего момента времени), то текущий год заменяется на прошлый.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
@ -1297,23 +1299,46 @@ AS parseDateTimeBestEffort;
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffort('10 20:19');
|
||||
SELECT toYear(now()) as year, parseDateTimeBestEffort('10 20:19');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffort('10 20:19')─┐
|
||||
│ 2000-01-10 20:19:00 │
|
||||
└─────────────────────────────────────┘
|
||||
┌─year─┬─parseDateTimeBestEffort('10 20:19')─┐
|
||||
│ 2023 │ 2023-01-10 20:19:00 │
|
||||
└──────┴─────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
WITH
|
||||
now() AS ts_now,
|
||||
formatDateTime(ts_around, '%b %e %T') AS syslog_arg
|
||||
SELECT
|
||||
ts_now,
|
||||
syslog_arg,
|
||||
parseDateTimeBestEffort(syslog_arg)
|
||||
FROM (SELECT arrayJoin([ts_now - 30, ts_now + 30]) AS ts_around);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌──────────────ts_now─┬─syslog_arg──────┬─parseDateTimeBestEffort(syslog_arg)─┐
|
||||
│ 2023-06-30 23:59:30 │ Jun 30 23:59:00 │ 2023-06-30 23:59:00 │
|
||||
│ 2023-06-30 23:59:30 │ Jul 1 00:00:00 │ 2022-07-01 00:00:00 │
|
||||
└─────────────────────┴─────────────────┴─────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [Информация о формате ISO 8601 от @xkcd](https://xkcd.com/1179/)
|
||||
- [RFC 1123](https://tools.ietf.org/html/rfc1123)
|
||||
- [RFC 1123](https://datatracker.ietf.org/doc/html/rfc1123)
|
||||
- [toDate](#todate)
|
||||
- [toDateTime](#todatetime)
|
||||
- [RFC 3164](https://datatracker.ietf.org/doc/html/rfc3164#section-4.1.2)
|
||||
|
||||
## parseDateTimeBestEffortUS {#parsedatetimebesteffortUS}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /ru/whats-new/changelog/2017
|
||||
sidebar_position: 6
|
||||
sidebar_position: 60
|
||||
sidebar_label: 2017
|
||||
title: 2017 Changelog
|
||||
---
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /ru/whats-new/changelog/2018
|
||||
sidebar_position: 5
|
||||
sidebar_position: 50
|
||||
sidebar_label: 2018
|
||||
title: 2018 Changelog
|
||||
---
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /ru/whats-new/changelog/2019
|
||||
sidebar_position: 4
|
||||
sidebar_position: 40
|
||||
sidebar_label: 2019
|
||||
title: 2019 Changelog
|
||||
---
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /ru/whats-new/changelog/2020
|
||||
sidebar_position: 3
|
||||
sidebar_position: 30
|
||||
sidebar_label: 2020
|
||||
title: 2020 Changelog
|
||||
---
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /ru/whats-new/changelog/2021
|
||||
sidebar_position: 2
|
||||
sidebar_position: 20
|
||||
sidebar_label: 2021
|
||||
title: 2021 Changelog
|
||||
---
|
||||
|
10
docs/ru/whats-new/changelog/2022.mdx
Normal file
10
docs/ru/whats-new/changelog/2022.mdx
Normal file
@ -0,0 +1,10 @@
|
||||
---
|
||||
slug: /ru/whats-new/changelog/2022
|
||||
sidebar_position: 10
|
||||
sidebar_label: 2022
|
||||
title: 2022 Changelog
|
||||
---
|
||||
|
||||
import Changelog from '@site/docs/en/whats-new/changelog/2022.md';
|
||||
|
||||
<Changelog />
|
@ -2,5 +2,5 @@ label: 'Changelog'
|
||||
collapsible: true
|
||||
collapsed: true
|
||||
link:
|
||||
type: doc
|
||||
id: ru/whats-new/changelog/index
|
||||
type: generated-index
|
||||
title: Changelog
|
||||
|
@ -1,7 +1,7 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2022
|
||||
title: 2022 Changelog
|
||||
sidebar_label: 2023
|
||||
title: 2023 Changelog
|
||||
slug: /ru/whats-new/changelog/index
|
||||
---
|
||||
|
||||
|
@ -6,4 +6,4 @@ sidebar_label: Changelog
|
||||
|
||||
# Changelog
|
||||
|
||||
You can view the latest Changelog at [https://clickhouse.com/docs/en/whats-new/changelog/](https://clickhouse.com/docs/en/whats-new/changelog/)
|
||||
You can view the latest Changelog at [https://clickhouse.com/docs/en/whats-new/changelog/](/docs/en/whats-new/changelog/index.md)
|
||||
|
@ -409,8 +409,15 @@ if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
|
||||
list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-converter)
|
||||
endif ()
|
||||
if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
|
||||
add_custom_target (clickhouse-keeper-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper-client DEPENDS clickhouse)
|
||||
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-client" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
if (NOT BUILD_STANDALONE_KEEPER)
|
||||
add_custom_target (clickhouse-keeper-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper-client DEPENDS clickhouse)
|
||||
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-client" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
# symlink to standalone keeper binary
|
||||
else ()
|
||||
add_custom_target (clickhouse-keeper-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse-keeper clickhouse-keeper-client DEPENDS clickhouse-keeper)
|
||||
install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-client" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT clickhouse-keeper)
|
||||
endif ()
|
||||
|
||||
list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-client)
|
||||
endif ()
|
||||
if (ENABLE_CLICKHOUSE_DISKS)
|
||||
|
@ -5,13 +5,13 @@
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <optional>
|
||||
#include <string_view>
|
||||
#include <Common/scope_guard_safe.h>
|
||||
#include <boost/program_options.hpp>
|
||||
#include <boost/algorithm/string/replace.hpp>
|
||||
#include <filesystem>
|
||||
#include <string>
|
||||
#include "Client.h"
|
||||
#include "Client/ConnectionString.h"
|
||||
#include "Core/Protocol.h"
|
||||
#include "Parsers/formatAST.h"
|
||||
|
||||
@ -977,13 +977,7 @@ void Client::addOptions(OptionsDescription & options_description)
|
||||
("connection", po::value<std::string>(), "connection to use (from the client config), by default connection name is hostname")
|
||||
("secure,s", "Use TLS connection")
|
||||
("user,u", po::value<std::string>()->default_value("default"), "user")
|
||||
/** If "--password [value]" is used but the value is omitted, the bad argument exception will be thrown.
|
||||
* implicit_value is used to avoid this exception (to allow user to type just "--password")
|
||||
* Since currently boost provides no way to check if a value has been set implicitly for an option,
|
||||
* the "\n" is used to distinguish this case because there is hardly a chance a user would use "\n"
|
||||
* as the password.
|
||||
*/
|
||||
("password", po::value<std::string>()->implicit_value("\n", ""), "password")
|
||||
("password", po::value<std::string>(), "password")
|
||||
("ask-password", "ask-password")
|
||||
("quota_key", po::value<std::string>(), "A string to differentiate quotas when the user have keyed quotas configured on server")
|
||||
|
||||
@ -1248,6 +1242,9 @@ void Client::readArguments(
|
||||
std::vector<Arguments> & external_tables_arguments,
|
||||
std::vector<Arguments> & hosts_and_ports_arguments)
|
||||
{
|
||||
bool has_connection_string = argc >= 2 && tryParseConnectionString(std::string_view(argv[1]), common_arguments, hosts_and_ports_arguments);
|
||||
int start_argument_index = has_connection_string ? 2 : 1;
|
||||
|
||||
/** We allow different groups of arguments:
|
||||
* - common arguments;
|
||||
* - arguments for any number of external tables each in form "--external args...",
|
||||
@ -1260,10 +1257,13 @@ void Client::readArguments(
|
||||
std::string prev_host_arg;
|
||||
std::string prev_port_arg;
|
||||
|
||||
for (int arg_num = 1; arg_num < argc; ++arg_num)
|
||||
for (int arg_num = start_argument_index; arg_num < argc; ++arg_num)
|
||||
{
|
||||
std::string_view arg = argv[arg_num];
|
||||
|
||||
if (has_connection_string)
|
||||
checkIfCmdLineOptionCanBeUsedWithConnectionString(arg);
|
||||
|
||||
if (arg == "--external")
|
||||
{
|
||||
in_external_group = true;
|
||||
@ -1391,6 +1391,14 @@ void Client::readArguments(
|
||||
arg = argv[arg_num];
|
||||
addMultiquery(arg, common_arguments);
|
||||
}
|
||||
else if (arg == "--password" && ((arg_num + 1) >= argc || std::string_view(argv[arg_num + 1]).starts_with('-')))
|
||||
{
|
||||
common_arguments.emplace_back(arg);
|
||||
/// No password was provided by user. Add '\n' as implicit password,
|
||||
/// which encodes that client should ask user for the password.
|
||||
/// '\n' is used because there is hardly a chance that a user would use '\n' as a password.
|
||||
common_arguments.emplace_back("\n");
|
||||
}
|
||||
else
|
||||
common_arguments.emplace_back(arg);
|
||||
}
|
||||
|
@ -127,42 +127,42 @@ void KeeperClient::defineOptions(Poco::Util::OptionSet & options)
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("host", "h", "server hostname. default `localhost`")
|
||||
.argument("host")
|
||||
.argument("<host>")
|
||||
.binding("host"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("port", "p", "server port. default `2181`")
|
||||
.argument("port")
|
||||
.argument("<port>")
|
||||
.binding("port"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("query", "q", "will execute given query, then exit.")
|
||||
.argument("query")
|
||||
.argument("<query>")
|
||||
.binding("query"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("connection-timeout", "", "set connection timeout in seconds. default 10s.")
|
||||
.argument("connection-timeout")
|
||||
.argument("<seconds>")
|
||||
.binding("connection-timeout"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("session-timeout", "", "set session timeout in seconds. default 10s.")
|
||||
.argument("session-timeout")
|
||||
.argument("<seconds>")
|
||||
.binding("session-timeout"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("operation-timeout", "", "set operation timeout in seconds. default 10s.")
|
||||
.argument("operation-timeout")
|
||||
.argument("<seconds>")
|
||||
.binding("operation-timeout"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("history-file", "", "set path of history file. default `~/.keeper-client-history`")
|
||||
.argument("history-file")
|
||||
.argument("<file>")
|
||||
.binding("history-file"));
|
||||
|
||||
options.addOption(
|
||||
Poco::Util::Option("log-level", "", "set log level")
|
||||
.argument("log-level")
|
||||
.argument("<level>")
|
||||
.binding("log-level"));
|
||||
}
|
||||
|
||||
|
@ -112,6 +112,18 @@ if (BUILD_STANDALONE_KEEPER)
|
||||
clickhouse-keeper.cpp
|
||||
)
|
||||
|
||||
# List of resources for clickhouse-keeper client
|
||||
if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
|
||||
list(APPEND CLICKHOUSE_KEEPER_STANDALONE_SOURCES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/KeeperClient.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/Commands.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/Parser.cpp
|
||||
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Client/LineReader.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Client/ReplxxLineReader.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_STANDALONE_SOURCES})
|
||||
|
||||
# Remove some redundant dependencies
|
||||
@ -122,6 +134,10 @@ if (BUILD_STANDALONE_KEEPER)
|
||||
target_include_directories(clickhouse-keeper PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/../../src/Core/include") # uses some includes from core
|
||||
target_include_directories(clickhouse-keeper PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/../../src") # uses some includes from common
|
||||
|
||||
if (ENABLE_CLICKHOUSE_KEEPER_CLIENT AND TARGET ch_rust::skim)
|
||||
target_link_libraries(clickhouse-keeper PRIVATE ch_rust::skim)
|
||||
endif()
|
||||
|
||||
target_link_libraries(clickhouse-keeper
|
||||
PRIVATE
|
||||
ch_contrib::abseil_swiss_tables
|
||||
|
@ -34,6 +34,8 @@
|
||||
#include "Core/Defines.h"
|
||||
#include "config.h"
|
||||
#include "config_version.h"
|
||||
#include "config_tools.h"
|
||||
|
||||
|
||||
#if USE_SSL
|
||||
# include <Poco/Net/Context.h>
|
||||
@ -131,7 +133,10 @@ int Keeper::run()
|
||||
if (config().hasOption("help"))
|
||||
{
|
||||
Poco::Util::HelpFormatter help_formatter(Keeper::options());
|
||||
auto header_str = fmt::format("{} [OPTION] [-- [ARG]...]\n"
|
||||
auto header_str = fmt::format("{0} [OPTION] [-- [ARG]...]\n"
|
||||
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
|
||||
"{0} client [OPTION]\n"
|
||||
#endif
|
||||
"positional arguments can be used to rewrite config.xml properties, for example, --http_port=8010",
|
||||
commandName());
|
||||
help_formatter.setHeader(header_str);
|
||||
|
@ -1,6 +1,30 @@
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include "config_tools.h"
|
||||
|
||||
|
||||
int mainEntryClickHouseKeeper(int argc, char ** argv);
|
||||
|
||||
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
|
||||
int mainEntryClickHouseKeeperClient(int argc, char ** argv);
|
||||
#endif
|
||||
|
||||
int main(int argc_, char ** argv_)
|
||||
{
|
||||
#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
|
||||
|
||||
if (argc_ >= 2)
|
||||
{
|
||||
/// 'clickhouse-keeper --client ...' and 'clickhouse-keeper client ...' are OK
|
||||
if (strcmp(argv_[1], "--client") == 0 || strcmp(argv_[1], "client") == 0)
|
||||
{
|
||||
argv_[1] = argv_[0];
|
||||
return mainEntryClickHouseKeeperClient(--argc_, argv_ + 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (argc_ > 0 && (strcmp(argv_[0], "clickhouse-keeper-client") == 0 || endsWith(argv_[0], "/clickhouse-keeper-client")))
|
||||
return mainEntryClickHouseKeeperClient(argc_, argv_);
|
||||
#endif
|
||||
|
||||
return mainEntryClickHouseKeeper(argc_, argv_);
|
||||
}
|
||||
|
@ -8,7 +8,9 @@
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/NullChannel.h>
|
||||
#include <Poco/SimpleFileChannel.h>
|
||||
#include <Databases/DatabaseFilesystem.h>
|
||||
#include <Databases/DatabaseMemory.h>
|
||||
#include <Databases/DatabasesOverlay.h>
|
||||
#include <Storages/System/attachSystemTables.h>
|
||||
#include <Storages/System/attachInformationSchemaTables.h>
|
||||
#include <Interpreters/DatabaseCatalog.h>
|
||||
@ -50,6 +52,8 @@
|
||||
#include <base/argsToConfig.h>
|
||||
#include <filesystem>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if defined(FUZZING_MODE)
|
||||
#include <Functions/getFuzzerData.h>
|
||||
#endif
|
||||
@ -170,6 +174,13 @@ static DatabasePtr createMemoryDatabaseIfNotExists(ContextPtr context, const Str
|
||||
return system_database;
|
||||
}
|
||||
|
||||
static DatabasePtr createClickHouseLocalDatabaseOverlay(const String & name_, ContextPtr context_)
|
||||
{
|
||||
auto databaseCombiner = std::make_shared<DatabasesOverlay>(name_, context_);
|
||||
databaseCombiner->registerNextDatabase(std::make_shared<DatabaseFilesystem>(name_, "", context_));
|
||||
databaseCombiner->registerNextDatabase(std::make_shared<DatabaseMemory>(name_, context_));
|
||||
return databaseCombiner;
|
||||
}
|
||||
|
||||
/// If path is specified and not empty, will try to setup server environment and load existing metadata
|
||||
void LocalServer::tryInitPath()
|
||||
@ -669,7 +680,7 @@ void LocalServer::processConfig()
|
||||
* if such tables will not be dropped, clickhouse-server will not be able to load them due to security reasons.
|
||||
*/
|
||||
std::string default_database = config().getString("default_database", "_local");
|
||||
DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared<DatabaseMemory>(default_database, global_context));
|
||||
DatabaseCatalog::instance().attachDatabase(default_database, createClickHouseLocalDatabaseOverlay(default_database, global_context));
|
||||
global_context->setCurrentDatabase(default_database);
|
||||
applyCmdOptions(global_context);
|
||||
|
||||
|
@ -1705,7 +1705,6 @@ try
|
||||
#endif
|
||||
|
||||
/// Must be done after initialization of `servers`, because async_metrics will access `servers` variable from its thread.
|
||||
|
||||
async_metrics.start();
|
||||
|
||||
{
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Access/GSSAcceptor.h>
|
||||
#include <base/defines.h>
|
||||
#include <base/types.h>
|
||||
#include <base/extended_types.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <map>
|
||||
@ -42,7 +43,7 @@ public:
|
||||
private:
|
||||
struct LDAPCacheEntry
|
||||
{
|
||||
std::size_t last_successful_params_hash = 0;
|
||||
UInt128 last_successful_params_hash = 0;
|
||||
std::chrono::steady_clock::time_point last_successful_authentication_timestamp;
|
||||
LDAPClient::SearchResultsList last_successful_role_search_results;
|
||||
};
|
||||
|
@ -185,11 +185,10 @@ void BackupCoordinationReplicatedTables::addPartNames(PartNamesForTableReplica &
|
||||
const String & other_replica_name = **other.replica_names.begin();
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_BACKUP_TABLE,
|
||||
"Table {} on replica {} has part {} which is different from the part on replica {}. Must be the same",
|
||||
table_name_for_logs,
|
||||
replica_name,
|
||||
part_name,
|
||||
other_replica_name);
|
||||
"Table {} on replica {} has part {} different from the part on replica {} "
|
||||
"(checksum '{}' on replica {} != checksum '{}' on replica {})",
|
||||
table_name_for_logs, replica_name, part_name, other_replica_name,
|
||||
getHexUIntLowercase(checksum), replica_name, getHexUIntLowercase(other.checksum), other_replica_name);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -85,6 +85,9 @@ void BackupCoordinationStageSync::setError(const String & current_host, const Ex
|
||||
writeException(exception, buf, true);
|
||||
zookeeper->createIfNotExists(zookeeper_path + "/error", buf.str());
|
||||
|
||||
/// When backup/restore fails, it removes the nodes from Zookeeper.
|
||||
/// Sometimes it fails to remove all nodes. It's possible that it removes /error node, but fails to remove /stage node,
|
||||
/// so the following line tries to preserve the error status.
|
||||
auto code = zookeeper->trySet(zookeeper_path, Stage::ERROR);
|
||||
if (code != Coordination::Error::ZOK)
|
||||
throw zkutil::KeeperException(code, zookeeper_path);
|
||||
|
@ -152,8 +152,7 @@ namespace
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (coordination)
|
||||
coordination->setError(Exception(getCurrentExceptionMessageAndPattern(true, true), getCurrentExceptionCode()));
|
||||
sendExceptionToCoordination(coordination, Exception(getCurrentExceptionMessageAndPattern(true, true), getCurrentExceptionCode()));
|
||||
}
|
||||
}
|
||||
|
||||
|
239
src/Client/ConnectionString.cpp
Normal file
239
src/Client/ConnectionString.cpp
Normal file
@ -0,0 +1,239 @@
|
||||
#include "ConnectionString.h"
|
||||
|
||||
#include <Common/Exception.h>
|
||||
#include <Poco/Exception.h>
|
||||
#include <Poco/URI.h>
|
||||
|
||||
#include <array>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
using namespace std::string_literals;
|
||||
using namespace std::literals::string_view_literals;
|
||||
|
||||
constexpr auto CONNECTION_URI_SCHEME = "clickhouse:"sv;
|
||||
|
||||
const std::unordered_map<std::string_view, std::string_view> PROHIBITED_CLIENT_OPTIONS = {
|
||||
/// Client option, client option long name
|
||||
{"-h", "--host"},
|
||||
{"--host", "--host"},
|
||||
{"--port", "--port"},
|
||||
{"--connection", "--connection"},
|
||||
};
|
||||
|
||||
std::string uriDecode(const std::string & uri_encoded_string, bool plus_as_space)
|
||||
{
|
||||
std::string decoded_string;
|
||||
Poco::URI::decode(uri_encoded_string, decoded_string, plus_as_space);
|
||||
return decoded_string;
|
||||
}
|
||||
|
||||
void getHostAndPort(const Poco::URI & uri, std::vector<std::vector<std::string>> & hosts_and_ports_arguments)
|
||||
{
|
||||
std::vector<std::string> host_and_port;
|
||||
const std::string & host = uri.getHost();
|
||||
if (!host.empty())
|
||||
{
|
||||
host_and_port.push_back("--host=" + uriDecode(host, false));
|
||||
}
|
||||
|
||||
// Port can be written without host (":9000"). Empty host name equals to default host.
|
||||
auto port = uri.getPort();
|
||||
if (port != 0)
|
||||
host_and_port.push_back("--port=" + std::to_string(port));
|
||||
|
||||
if (!host_and_port.empty())
|
||||
hosts_and_ports_arguments.push_back(std::move(host_and_port));
|
||||
}
|
||||
|
||||
void buildConnectionString(
|
||||
std::string_view host_and_port,
|
||||
std::string_view right_part,
|
||||
Poco::URI & uri,
|
||||
std::vector<std::vector<std::string>> & hosts_and_ports_arguments)
|
||||
{
|
||||
// User info does not matter in sub URI
|
||||
auto uri_string = std::string(CONNECTION_URI_SCHEME);
|
||||
if (!host_and_port.empty())
|
||||
{
|
||||
uri_string.append("//");
|
||||
uri_string.append(host_and_port);
|
||||
}
|
||||
|
||||
// Right part from string includes '/database?[params]'
|
||||
uri_string.append(right_part);
|
||||
try
|
||||
{
|
||||
uri = Poco::URI(uri_string);
|
||||
}
|
||||
catch (const Poco::URISyntaxException & invalid_uri_exception)
|
||||
{
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS,
|
||||
"Invalid connection string syntax {}: {}", uri_string, invalid_uri_exception.what());
|
||||
}
|
||||
|
||||
getHostAndPort(uri, hosts_and_ports_arguments);
|
||||
}
|
||||
|
||||
std::string makeArgument(const std::string & connection_string_parameter_name)
|
||||
{
|
||||
return (connection_string_parameter_name.size() == 1 ? "-"s : "--"s) + connection_string_parameter_name;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
bool tryParseConnectionString(
|
||||
std::string_view connection_string,
|
||||
std::vector<std::string> & common_arguments,
|
||||
std::vector<std::vector<std::string>> & hosts_and_ports_arguments)
|
||||
{
|
||||
if (connection_string == CONNECTION_URI_SCHEME)
|
||||
return true;
|
||||
|
||||
if (!connection_string.starts_with(CONNECTION_URI_SCHEME))
|
||||
return false;
|
||||
|
||||
size_t offset = CONNECTION_URI_SCHEME.size();
|
||||
if ((connection_string.substr(offset).starts_with("//")))
|
||||
offset += 2;
|
||||
|
||||
auto hosts_end_pos = std::string_view::npos;
|
||||
auto hosts_or_user_info_end_pos = connection_string.find_first_of("?/@", offset);
|
||||
|
||||
auto has_user_info = hosts_or_user_info_end_pos != std::string_view::npos && connection_string[hosts_or_user_info_end_pos] == '@';
|
||||
if (has_user_info)
|
||||
{
|
||||
// Move offset right after user info
|
||||
offset = hosts_or_user_info_end_pos + 1;
|
||||
hosts_end_pos = connection_string.find_first_of("?/@", offset);
|
||||
// Several '@' symbols in connection string is prohibited.
|
||||
// If user name contains '@' then it should be percent-encoded.
|
||||
// several users: 'usr1@host1,@usr2@host2' is invalid.
|
||||
if (hosts_end_pos != std::string_view::npos && connection_string[hosts_end_pos] == '@')
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Symbols '@' in URI in password or user name should be percent-encoded. Individual user names for different hosts also prohibited. {}",
|
||||
connection_string);
|
||||
}
|
||||
}
|
||||
else
|
||||
hosts_end_pos = hosts_or_user_info_end_pos;
|
||||
|
||||
const auto * hosts_end = hosts_end_pos != std::string_view::npos ? connection_string.begin() + hosts_end_pos
|
||||
: connection_string.end();
|
||||
|
||||
try
|
||||
{
|
||||
/** Poco::URI doesn't support several hosts in URI.
|
||||
* Split string clickhouse:[user[:password]@]host1:port1, ... , hostN:portN[database]?[query_parameters]
|
||||
* into multiple string for each host:
|
||||
* clickhouse:[user[:password]@]host1:port1[database]?[query_parameters]
|
||||
* ...
|
||||
* clickhouse:[user[:password]@]hostN:portN[database]?[query_parameters]
|
||||
*/
|
||||
Poco::URI uri;
|
||||
const auto * last_host_begin = connection_string.begin() + offset;
|
||||
for (const auto * it = last_host_begin; it != hosts_end; ++it)
|
||||
{
|
||||
if (*it == ',')
|
||||
{
|
||||
buildConnectionString({last_host_begin, it}, {hosts_end, connection_string.end()}, uri, hosts_and_ports_arguments);
|
||||
last_host_begin = it + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (uri.empty())
|
||||
{
|
||||
// URI has no host specified
|
||||
uri = std::string(connection_string);
|
||||
getHostAndPort(uri, hosts_and_ports_arguments);
|
||||
}
|
||||
else
|
||||
buildConnectionString({last_host_begin, hosts_end}, {hosts_end, connection_string.end()}, uri, hosts_and_ports_arguments);
|
||||
|
||||
Poco::URI::QueryParameters params = uri.getQueryParameters();
|
||||
for (const auto & param : params)
|
||||
{
|
||||
if (param.first == "secure" || param.first == "s")
|
||||
{
|
||||
if (!param.second.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "secure URI query parameter does not allow value");
|
||||
|
||||
common_arguments.push_back(makeArgument(param.first));
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "URI query parameter {} is not supported", param.first);
|
||||
}
|
||||
|
||||
auto user_info = uri.getUserInfo();
|
||||
if (!user_info.empty())
|
||||
{
|
||||
// Poco::URI doesn't decode user name/password by default.
|
||||
// But ClickHouse allows to have users with email user name like: 'john@some_mail.com'
|
||||
// john@some_mail.com should be percent-encoded: 'john%40some_mail.com'
|
||||
size_t pos = user_info.find(':');
|
||||
if (pos != std::string::npos)
|
||||
{
|
||||
common_arguments.push_back("--user");
|
||||
common_arguments.push_back(uriDecode(user_info.substr(0, pos), true));
|
||||
|
||||
++pos; // Skip ':'
|
||||
common_arguments.push_back("--password");
|
||||
if (user_info.size() > pos + 1)
|
||||
common_arguments.push_back(uriDecode(user_info.substr(pos), true));
|
||||
else
|
||||
{
|
||||
// in case of user_info == 'user:', ':' is specified, but password is empty
|
||||
// then add password argument "\n" which means: Ask user for a password.
|
||||
common_arguments.push_back("\n");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
common_arguments.push_back("--user");
|
||||
common_arguments.push_back(uriDecode(user_info, true));
|
||||
}
|
||||
}
|
||||
|
||||
const auto & database_name = uri.getPath();
|
||||
size_t start_symbol = !database_name.empty() && database_name[0] == '/' ? 1u : 0u;
|
||||
if (database_name.size() > start_symbol)
|
||||
{
|
||||
common_arguments.push_back("--database");
|
||||
common_arguments.push_back(start_symbol == 0u ? database_name : database_name.substr(start_symbol));
|
||||
}
|
||||
}
|
||||
catch (const Poco::URISyntaxException & invalid_uri_exception)
|
||||
{
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS,
|
||||
"Invalid connection string '{}': {}", connection_string, invalid_uri_exception.what());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void checkIfCmdLineOptionCanBeUsedWithConnectionString(std::string_view command_line_option)
|
||||
{
|
||||
if (PROHIBITED_CLIENT_OPTIONS.contains(command_line_option))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Mixing a connection string and {} option is prohibited", PROHIBITED_CLIENT_OPTIONS.at(command_line_option));
|
||||
}
|
||||
|
||||
}
|
27
src/Client/ConnectionString.h
Normal file
27
src/Client/ConnectionString.h
Normal file
@ -0,0 +1,27 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/** Tries to parse ClickHouse connection string.
|
||||
* if @connection_string starts with 'clickhouse:' then connection string will be parsed
|
||||
* and converted into a set of arguments for the client.
|
||||
* Connection string format is similar to URI "clickhouse:[//[user[:password]@][hosts_and_ports]][/dbname][?query_parameters]"
|
||||
* with the difference that hosts_and_ports can contain multiple hosts separated by ','.
|
||||
* example: clickhouse://user@host1:port1,host2:port2
|
||||
* @return Returns false if no connection string was specified. If a connection string was specified, returns true if it is valid, and throws an exception if it is invalid.
|
||||
* @exception Throws DB::Exception if URI has valid scheme (clickhouse:), but invalid internals.
|
||||
*/
|
||||
bool tryParseConnectionString(
|
||||
std::string_view connection_string,
|
||||
std::vector<std::string> & common_arguments,
|
||||
std::vector<std::vector<std::string>> & hosts_and_ports_arguments);
|
||||
|
||||
// Throws DB::Exception with BAD_ARGUMENTS if the given command line argument
|
||||
// is not allowed to be used with a connection string.
|
||||
void checkIfCmdLineOptionCanBeUsedWithConnectionString(std::string_view command_line_option);
|
||||
|
||||
}
|
@ -151,13 +151,13 @@ public:
|
||||
|
||||
ColumnPtr compress() const override;
|
||||
|
||||
void forEachSubcolumn(ColumnCallback callback) const override
|
||||
void forEachSubcolumn(MutableColumnCallback callback) override
|
||||
{
|
||||
callback(offsets);
|
||||
callback(data);
|
||||
}
|
||||
|
||||
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
|
||||
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
|
||||
{
|
||||
callback(*offsets);
|
||||
offsets->forEachSubcolumnRecursively(callback);
|
||||
|
@ -230,12 +230,12 @@ public:
|
||||
data->getExtremes(min, max);
|
||||
}
|
||||
|
||||
void forEachSubcolumn(ColumnCallback callback) const override
|
||||
void forEachSubcolumn(MutableColumnCallback callback) override
|
||||
{
|
||||
callback(data);
|
||||
}
|
||||
|
||||
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
|
||||
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
|
||||
{
|
||||
callback(*data);
|
||||
data->forEachSubcolumnRecursively(callback);
|
||||
|
@ -166,7 +166,7 @@ public:
|
||||
size_t byteSizeAt(size_t n) const override { return getDictionary().byteSizeAt(getIndexes().getUInt(n)); }
|
||||
size_t allocatedBytes() const override { return idx.getPositions()->allocatedBytes() + getDictionary().allocatedBytes(); }
|
||||
|
||||
void forEachSubcolumn(ColumnCallback callback) const override
|
||||
void forEachSubcolumn(MutableColumnCallback callback) override
|
||||
{
|
||||
callback(idx.getPositionsPtr());
|
||||
|
||||
@ -175,7 +175,7 @@ public:
|
||||
callback(dictionary.getColumnUniquePtr());
|
||||
}
|
||||
|
||||
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
|
||||
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
|
||||
{
|
||||
callback(*idx.getPositionsPtr());
|
||||
idx.getPositionsPtr()->forEachSubcolumnRecursively(callback);
|
||||
@ -340,7 +340,7 @@ private:
|
||||
explicit Dictionary(MutableColumnPtr && column_unique, bool is_shared);
|
||||
explicit Dictionary(ColumnPtr column_unique, bool is_shared);
|
||||
|
||||
const ColumnPtr & getColumnUniquePtr() const { return column_unique; }
|
||||
const WrappedPtr & getColumnUniquePtr() const { return column_unique; }
|
||||
WrappedPtr & getColumnUniquePtr() { return column_unique; }
|
||||
|
||||
const IColumnUnique & getColumnUnique() const { return static_cast<const IColumnUnique &>(*column_unique); }
|
||||
|
@ -273,12 +273,12 @@ void ColumnMap::getExtremes(Field & min, Field & max) const
|
||||
max = std::move(map_max_value);
|
||||
}
|
||||
|
||||
void ColumnMap::forEachSubcolumn(ColumnCallback callback) const
|
||||
void ColumnMap::forEachSubcolumn(MutableColumnCallback callback)
|
||||
{
|
||||
callback(nested);
|
||||
}
|
||||
|
||||
void ColumnMap::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
|
||||
void ColumnMap::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
|
||||
{
|
||||
callback(*nested);
|
||||
nested->forEachSubcolumnRecursively(callback);
|
||||
|
@ -88,8 +88,8 @@ public:
|
||||
size_t byteSizeAt(size_t n) const override;
|
||||
size_t allocatedBytes() const override;
|
||||
void protect() override;
|
||||
void forEachSubcolumn(ColumnCallback callback) const override;
|
||||
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
|
||||
void forEachSubcolumn(MutableColumnCallback callback) override;
|
||||
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
|
||||
bool structureEquals(const IColumn & rhs) const override;
|
||||
double getRatioOfDefaultRows(double sample_ratio) const override;
|
||||
UInt64 getNumberOfDefaultRows() const override;
|
||||
|
@ -130,13 +130,13 @@ public:
|
||||
|
||||
ColumnPtr compress() const override;
|
||||
|
||||
void forEachSubcolumn(ColumnCallback callback) const override
|
||||
void forEachSubcolumn(MutableColumnCallback callback) override
|
||||
{
|
||||
callback(nested_column);
|
||||
callback(null_map);
|
||||
}
|
||||
|
||||
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
|
||||
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
|
||||
{
|
||||
callback(*nested_column);
|
||||
nested_column->forEachSubcolumnRecursively(callback);
|
||||
|
@ -664,18 +664,18 @@ size_t ColumnObject::allocatedBytes() const
|
||||
return res;
|
||||
}
|
||||
|
||||
void ColumnObject::forEachSubcolumn(ColumnCallback callback) const
|
||||
void ColumnObject::forEachSubcolumn(MutableColumnCallback callback)
|
||||
{
|
||||
for (const auto & entry : subcolumns)
|
||||
for (const auto & part : entry->data.data)
|
||||
for (auto & entry : subcolumns)
|
||||
for (auto & part : entry->data.data)
|
||||
callback(part);
|
||||
}
|
||||
|
||||
void ColumnObject::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
|
||||
void ColumnObject::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
|
||||
{
|
||||
for (const auto & entry : subcolumns)
|
||||
for (auto & entry : subcolumns)
|
||||
{
|
||||
for (const auto & part : entry->data.data)
|
||||
for (auto & part : entry->data.data)
|
||||
{
|
||||
callback(*part);
|
||||
part->forEachSubcolumnRecursively(callback);
|
||||
|
@ -206,8 +206,8 @@ public:
|
||||
size_t size() const override;
|
||||
size_t byteSize() const override;
|
||||
size_t allocatedBytes() const override;
|
||||
void forEachSubcolumn(ColumnCallback callback) const override;
|
||||
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
|
||||
void forEachSubcolumn(MutableColumnCallback callback) override;
|
||||
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
|
||||
void insert(const Field & field) override;
|
||||
void insertDefault() override;
|
||||
void insertFrom(const IColumn & src, size_t n) override;
|
||||
|
@ -751,13 +751,13 @@ bool ColumnSparse::structureEquals(const IColumn & rhs) const
|
||||
return false;
|
||||
}
|
||||
|
||||
void ColumnSparse::forEachSubcolumn(ColumnCallback callback) const
|
||||
void ColumnSparse::forEachSubcolumn(MutableColumnCallback callback)
|
||||
{
|
||||
callback(values);
|
||||
callback(offsets);
|
||||
}
|
||||
|
||||
void ColumnSparse::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
|
||||
void ColumnSparse::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
|
||||
{
|
||||
callback(*values);
|
||||
values->forEachSubcolumnRecursively(callback);
|
||||
|
@ -140,8 +140,8 @@ public:
|
||||
|
||||
ColumnPtr compress() const override;
|
||||
|
||||
void forEachSubcolumn(ColumnCallback callback) const override;
|
||||
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
|
||||
void forEachSubcolumn(MutableColumnCallback callback) override;
|
||||
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
|
||||
|
||||
bool structureEquals(const IColumn & rhs) const override;
|
||||
|
||||
|
@ -31,14 +31,12 @@ ColumnString::ColumnString(const ColumnString & src)
|
||||
offsets(src.offsets.begin(), src.offsets.end()),
|
||||
chars(src.chars.begin(), src.chars.end())
|
||||
{
|
||||
if (!offsets.empty())
|
||||
{
|
||||
Offset last_offset = offsets.back();
|
||||
|
||||
/// This will also prevent possible overflow in offset.
|
||||
if (chars.size() != last_offset)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "String offsets has data inconsistent with chars array");
|
||||
}
|
||||
Offset last_offset = offsets.empty() ? 0 : offsets.back();
|
||||
/// This will also prevent possible overflow in offset.
|
||||
if (last_offset != chars.size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"String offsets has data inconsistent with chars array. Last offset: {}, array length: {}",
|
||||
last_offset, chars.size());
|
||||
}
|
||||
|
||||
|
||||
@ -157,6 +155,7 @@ ColumnPtr ColumnString::filter(const Filter & filt, ssize_t result_size_hint) co
|
||||
Offsets & res_offsets = res->offsets;
|
||||
|
||||
filterArraysImpl<UInt8>(chars, offsets, res_chars, res_offsets, filt, result_size_hint);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -571,10 +570,11 @@ void ColumnString::protect()
|
||||
|
||||
void ColumnString::validate() const
|
||||
{
|
||||
if (!offsets.empty() && offsets.back() != chars.size())
|
||||
Offset last_offset = offsets.empty() ? 0 : offsets.back();
|
||||
if (last_offset != chars.size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"ColumnString validation failed: size mismatch (internal logical error) {} != {}",
|
||||
offsets.back(), chars.size());
|
||||
last_offset, chars.size());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -495,15 +495,15 @@ void ColumnTuple::getExtremes(Field & min, Field & max) const
|
||||
max = max_tuple;
|
||||
}
|
||||
|
||||
void ColumnTuple::forEachSubcolumn(ColumnCallback callback) const
|
||||
void ColumnTuple::forEachSubcolumn(MutableColumnCallback callback)
|
||||
{
|
||||
for (const auto & column : columns)
|
||||
for (auto & column : columns)
|
||||
callback(column);
|
||||
}
|
||||
|
||||
void ColumnTuple::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
|
||||
void ColumnTuple::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
|
||||
{
|
||||
for (const auto & column : columns)
|
||||
for (auto & column : columns)
|
||||
{
|
||||
callback(*column);
|
||||
column->forEachSubcolumnRecursively(callback);
|
||||
|
@ -96,8 +96,8 @@ public:
|
||||
size_t byteSizeAt(size_t n) const override;
|
||||
size_t allocatedBytes() const override;
|
||||
void protect() override;
|
||||
void forEachSubcolumn(ColumnCallback callback) const override;
|
||||
void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
|
||||
void forEachSubcolumn(MutableColumnCallback callback) override;
|
||||
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
|
||||
bool structureEquals(const IColumn & rhs) const override;
|
||||
bool isCollationSupported() const override;
|
||||
ColumnPtr compress() const override;
|
||||
|
@ -62,19 +62,19 @@ ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const Field & defa
|
||||
return res;
|
||||
}
|
||||
|
||||
void IColumn::forEachSubcolumn(MutableColumnCallback callback)
|
||||
void IColumn::forEachSubcolumn(ColumnCallback callback) const
|
||||
{
|
||||
std::as_const(*this).forEachSubcolumn([&callback](const WrappedPtr & subcolumn)
|
||||
const_cast<IColumn*>(this)->forEachSubcolumn([&callback](WrappedPtr & subcolumn)
|
||||
{
|
||||
callback(const_cast<WrappedPtr &>(subcolumn));
|
||||
callback(std::as_const(subcolumn));
|
||||
});
|
||||
}
|
||||
|
||||
void IColumn::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
|
||||
void IColumn::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
|
||||
{
|
||||
std::as_const(*this).forEachSubcolumnRecursively([&callback](const IColumn & subcolumn)
|
||||
const_cast<IColumn*>(this)->forEachSubcolumnRecursively([&callback](IColumn & subcolumn)
|
||||
{
|
||||
callback(const_cast<IColumn &>(subcolumn));
|
||||
callback(std::as_const(subcolumn));
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -418,21 +418,23 @@ public:
|
||||
/// If the column contains subcolumns (such as Array, Nullable, etc), do callback on them.
|
||||
/// Shallow: doesn't do recursive calls; don't do call for itself.
|
||||
|
||||
using ColumnCallback = std::function<void(const WrappedPtr &)>;
|
||||
virtual void forEachSubcolumn(ColumnCallback) const {}
|
||||
|
||||
using MutableColumnCallback = std::function<void(WrappedPtr &)>;
|
||||
virtual void forEachSubcolumn(MutableColumnCallback callback);
|
||||
virtual void forEachSubcolumn(MutableColumnCallback) {}
|
||||
|
||||
/// Default implementation calls the mutable overload using const_cast.
|
||||
using ColumnCallback = std::function<void(const WrappedPtr &)>;
|
||||
virtual void forEachSubcolumn(ColumnCallback) const;
|
||||
|
||||
/// Similar to forEachSubcolumn but it also do recursive calls.
|
||||
/// In recursive calls it's prohibited to replace pointers
|
||||
/// to subcolumns, so we use another callback function.
|
||||
|
||||
using RecursiveColumnCallback = std::function<void(const IColumn &)>;
|
||||
virtual void forEachSubcolumnRecursively(RecursiveColumnCallback) const {}
|
||||
|
||||
using RecursiveMutableColumnCallback = std::function<void(IColumn &)>;
|
||||
virtual void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback);
|
||||
virtual void forEachSubcolumnRecursively(RecursiveMutableColumnCallback) {}
|
||||
|
||||
/// Default implementation calls the mutable overload using const_cast.
|
||||
using RecursiveColumnCallback = std::function<void(const IColumn &)>;
|
||||
virtual void forEachSubcolumnRecursively(RecursiveColumnCallback) const;
|
||||
|
||||
/// Columns have equal structure.
|
||||
/// If true - you can use "compareAt", "insertFrom", etc. methods.
|
||||
|
@ -197,8 +197,9 @@
|
||||
M(IOUringInFlightEvents, "Number of io_uring SQEs in flight") \
|
||||
M(ReadTaskRequestsSent, "The current number of callback requests in flight from the remote server back to the initiator server to choose the read task (for s3Cluster table function and similar). Measured on the remote server side.") \
|
||||
M(MergeTreeReadTaskRequestsSent, "The current number of callback requests in flight from the remote server back to the initiator server to choose the read task (for MergeTree tables). Measured on the remote server side.") \
|
||||
M(MergeTreeAllRangesAnnouncementsSent, "The current number of announcement being sent in flight from the remote server to the initiator server about the set of data parts (for MergeTree tables). Measured on the remote server side.")
|
||||
|
||||
M(MergeTreeAllRangesAnnouncementsSent, "The current number of announcement being sent in flight from the remote server to the initiator server about the set of data parts (for MergeTree tables). Measured on the remote server side.") \
|
||||
M(CreatedTimersInQueryProfiler, "Number of Created thread local timers in QueryProfiler") \
|
||||
M(ActiveTimersInQueryProfiler, "Number of Active thread local timers in QueryProfiler")
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
|
@ -104,7 +104,7 @@ DNSResolver::IPAddresses hostByName(const std::string & host)
|
||||
}
|
||||
catch (const Poco::Net::DNSException & e)
|
||||
{
|
||||
LOG_ERROR(&Poco::Logger::get("DNSResolver"), "Cannot resolve host ({}), error {}: {}.", host, e.code(), e.name());
|
||||
LOG_WARNING(&Poco::Logger::get("DNSResolver"), "Cannot resolve host ({}), error {}: {}.", host, e.code(), e.name());
|
||||
addresses.clear();
|
||||
}
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/TraceSender.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/StackTrace.h>
|
||||
#include <Common/thread_local_rng.h>
|
||||
@ -12,6 +13,11 @@
|
||||
|
||||
#include <random>
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric CreatedTimersInQueryProfiler;
|
||||
extern const Metric ActiveTimersInQueryProfiler;
|
||||
}
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
@ -85,48 +91,14 @@ namespace ErrorCodes
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
template <typename ProfilerImpl>
|
||||
QueryProfilerBase<ProfilerImpl>::QueryProfilerBase(UInt64 thread_id, int clock_type, UInt32 period, int pause_signal_)
|
||||
: log(&Poco::Logger::get("QueryProfiler"))
|
||||
, pause_signal(pause_signal_)
|
||||
#if USE_UNWIND
|
||||
Timer::Timer()
|
||||
: log(&Poco::Logger::get("Timer"))
|
||||
{}
|
||||
|
||||
void Timer::createIfNecessary(UInt64 thread_id, int clock_type, int pause_signal)
|
||||
{
|
||||
#if defined(SANITIZER)
|
||||
UNUSED(thread_id);
|
||||
UNUSED(clock_type);
|
||||
UNUSED(period);
|
||||
UNUSED(pause_signal);
|
||||
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler disabled because they cannot work under sanitizers");
|
||||
#elif !USE_UNWIND
|
||||
UNUSED(thread_id);
|
||||
UNUSED(clock_type);
|
||||
UNUSED(period);
|
||||
UNUSED(pause_signal);
|
||||
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler cannot work with stock libunwind");
|
||||
#else
|
||||
/// Sanity check.
|
||||
if (!hasPHDRCache())
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler cannot be used without PHDR cache, that is not available for TSan build");
|
||||
|
||||
/// Too high frequency can introduce infinite busy loop of signal handlers. We will limit maximum frequency (with 1000 signals per second).
|
||||
if (period < 1000000)
|
||||
period = 1000000;
|
||||
|
||||
struct sigaction sa{};
|
||||
sa.sa_sigaction = ProfilerImpl::signalHandler;
|
||||
sa.sa_flags = SA_SIGINFO | SA_RESTART;
|
||||
|
||||
if (sigemptyset(&sa.sa_mask))
|
||||
throwFromErrno("Failed to clean signal mask for query profiler", ErrorCodes::CANNOT_MANIPULATE_SIGSET);
|
||||
|
||||
if (sigaddset(&sa.sa_mask, pause_signal))
|
||||
throwFromErrno("Failed to add signal to mask for query profiler", ErrorCodes::CANNOT_MANIPULATE_SIGSET);
|
||||
|
||||
if (sigaction(pause_signal, &sa, nullptr))
|
||||
throwFromErrno("Failed to setup signal handler for query profiler", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
|
||||
|
||||
try
|
||||
if (!timer_id)
|
||||
{
|
||||
struct sigevent sev {};
|
||||
sev.sigev_notify = SIGEV_THREAD_ID;
|
||||
@ -151,25 +123,117 @@ QueryProfilerBase<ProfilerImpl>::QueryProfilerBase(UInt64 thread_id, int clock_t
|
||||
throwFromErrno("Failed to create thread timer", ErrorCodes::CANNOT_CREATE_TIMER);
|
||||
}
|
||||
timer_id.emplace(local_timer_id);
|
||||
CurrentMetrics::add(CurrentMetrics::CreatedTimersInQueryProfiler);
|
||||
}
|
||||
}
|
||||
|
||||
/// Randomize offset as uniform random value from 0 to period - 1.
|
||||
/// It will allow to sample short queries even if timer period is large.
|
||||
/// (For example, with period of 1 second, query with 50 ms duration will be sampled with 1 / 20 probability).
|
||||
/// It also helps to avoid interference (moire).
|
||||
UInt32 period_rand = std::uniform_int_distribution<UInt32>(0, period)(thread_local_rng);
|
||||
void Timer::set(UInt32 period)
|
||||
{
|
||||
/// Too high frequency can introduce infinite busy loop of signal handlers. We will limit maximum frequency (with 1000 signals per second).
|
||||
if (period < 1000000)
|
||||
period = 1000000;
|
||||
/// Randomize offset as uniform random value from 0 to period - 1.
|
||||
/// It will allow to sample short queries even if timer period is large.
|
||||
/// (For example, with period of 1 second, query with 50 ms duration will be sampled with 1 / 20 probability).
|
||||
/// It also helps to avoid interference (moire).
|
||||
UInt32 period_rand = std::uniform_int_distribution<UInt32>(0, period)(thread_local_rng);
|
||||
|
||||
struct timespec interval{.tv_sec = period / TIMER_PRECISION, .tv_nsec = period % TIMER_PRECISION};
|
||||
struct timespec offset{.tv_sec = period_rand / TIMER_PRECISION, .tv_nsec = period_rand % TIMER_PRECISION};
|
||||
struct timespec interval{.tv_sec = period / TIMER_PRECISION, .tv_nsec = period % TIMER_PRECISION};
|
||||
struct timespec offset{.tv_sec = period_rand / TIMER_PRECISION, .tv_nsec = period_rand % TIMER_PRECISION};
|
||||
|
||||
struct itimerspec timer_spec = {.it_interval = interval, .it_value = offset};
|
||||
if (timer_settime(*timer_id, 0, &timer_spec, nullptr))
|
||||
throwFromErrno("Failed to set thread timer period", ErrorCodes::CANNOT_SET_TIMER_PERIOD);
|
||||
struct itimerspec timer_spec = {.it_interval = interval, .it_value = offset};
|
||||
if (timer_settime(*timer_id, 0, &timer_spec, nullptr))
|
||||
throwFromErrno("Failed to set thread timer period", ErrorCodes::CANNOT_SET_TIMER_PERIOD);
|
||||
CurrentMetrics::add(CurrentMetrics::ActiveTimersInQueryProfiler);
|
||||
}
|
||||
|
||||
void Timer::stop()
|
||||
{
|
||||
if (timer_id)
|
||||
{
|
||||
struct timespec stop_timer{.tv_sec = 0, .tv_nsec = 0};
|
||||
struct itimerspec timer_spec = {.it_interval = stop_timer, .it_value = stop_timer};
|
||||
int err = timer_settime(*timer_id, 0, &timer_spec, nullptr);
|
||||
if (err)
|
||||
LOG_ERROR(log, "Failed to stop query profiler timer {}", errnoToString());
|
||||
chassert(!err && "Failed to stop query profiler timer");
|
||||
CurrentMetrics::sub(CurrentMetrics::ActiveTimersInQueryProfiler);
|
||||
}
|
||||
}
|
||||
|
||||
Timer::~Timer()
|
||||
{
|
||||
try
|
||||
{
|
||||
cleanup();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log);
|
||||
}
|
||||
}
|
||||
|
||||
void Timer::cleanup()
|
||||
{
|
||||
if (timer_id)
|
||||
{
|
||||
int err = timer_delete(*timer_id);
|
||||
if (err)
|
||||
LOG_ERROR(log, "Failed to delete query profiler timer {}", errnoToString());
|
||||
chassert(!err && "Failed to delete query profiler timer");
|
||||
|
||||
timer_id.reset();
|
||||
CurrentMetrics::sub(CurrentMetrics::CreatedTimersInQueryProfiler);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename ProfilerImpl>
|
||||
QueryProfilerBase<ProfilerImpl>::QueryProfilerBase(UInt64 thread_id, int clock_type, UInt32 period, int pause_signal_)
|
||||
: log(&Poco::Logger::get("QueryProfiler"))
|
||||
, pause_signal(pause_signal_)
|
||||
{
|
||||
#if defined(SANITIZER)
|
||||
UNUSED(thread_id);
|
||||
UNUSED(clock_type);
|
||||
UNUSED(period);
|
||||
UNUSED(pause_signal);
|
||||
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler disabled because they cannot work under sanitizers");
|
||||
#elif !USE_UNWIND
|
||||
UNUSED(thread_id);
|
||||
UNUSED(clock_type);
|
||||
UNUSED(period);
|
||||
UNUSED(pause_signal);
|
||||
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler cannot work with stock libunwind");
|
||||
#else
|
||||
/// Sanity check.
|
||||
if (!hasPHDRCache())
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler cannot be used without PHDR cache, that is not available for TSan build");
|
||||
|
||||
struct sigaction sa{};
|
||||
sa.sa_sigaction = ProfilerImpl::signalHandler;
|
||||
sa.sa_flags = SA_SIGINFO | SA_RESTART;
|
||||
|
||||
if (sigemptyset(&sa.sa_mask))
|
||||
throwFromErrno("Failed to clean signal mask for query profiler", ErrorCodes::CANNOT_MANIPULATE_SIGSET);
|
||||
|
||||
if (sigaddset(&sa.sa_mask, pause_signal))
|
||||
throwFromErrno("Failed to add signal to mask for query profiler", ErrorCodes::CANNOT_MANIPULATE_SIGSET);
|
||||
|
||||
if (sigaction(pause_signal, &sa, nullptr))
|
||||
throwFromErrno("Failed to setup signal handler for query profiler", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
|
||||
|
||||
try
|
||||
{
|
||||
timer.createIfNecessary(thread_id, clock_type, pause_signal);
|
||||
timer.set(period);
|
||||
signal_handler_disarmed = false;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryCleanup();
|
||||
timer.cleanup();
|
||||
throw;
|
||||
}
|
||||
#endif
|
||||
@ -178,22 +242,21 @@ QueryProfilerBase<ProfilerImpl>::QueryProfilerBase(UInt64 thread_id, int clock_t
|
||||
template <typename ProfilerImpl>
|
||||
QueryProfilerBase<ProfilerImpl>::~QueryProfilerBase()
|
||||
{
|
||||
tryCleanup();
|
||||
try
|
||||
{
|
||||
cleanup();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ProfilerImpl>
|
||||
void QueryProfilerBase<ProfilerImpl>::tryCleanup()
|
||||
void QueryProfilerBase<ProfilerImpl>::cleanup()
|
||||
{
|
||||
#if USE_UNWIND
|
||||
if (timer_id.has_value())
|
||||
{
|
||||
int err = timer_delete(*timer_id);
|
||||
if (err)
|
||||
LOG_ERROR(log, "Failed to delete query profiler timer {}", errnoToString());
|
||||
chassert(!err && "Failed to delete query profiler timer");
|
||||
timer_id.reset();
|
||||
}
|
||||
|
||||
timer.stop();
|
||||
signal_handler_disarmed = true;
|
||||
#endif
|
||||
}
|
||||
|
@ -27,6 +27,27 @@ namespace DB
|
||||
* Destructor tries to unset timer and restore previous signal handler.
|
||||
* Note that signal handler implementation is defined by template parameter. See QueryProfilerReal and QueryProfilerCPU.
|
||||
*/
|
||||
|
||||
#if USE_UNWIND
|
||||
class Timer
|
||||
{
|
||||
public:
|
||||
Timer();
|
||||
Timer(const Timer &) = delete;
|
||||
Timer & operator = (const Timer &) = delete;
|
||||
~Timer();
|
||||
|
||||
void createIfNecessary(UInt64 thread_id, int clock_type, int pause_signal);
|
||||
void set(UInt32 period);
|
||||
void stop();
|
||||
void cleanup();
|
||||
|
||||
private:
|
||||
Poco::Logger * log;
|
||||
std::optional<timer_t> timer_id;
|
||||
};
|
||||
#endif
|
||||
|
||||
template <typename ProfilerImpl>
|
||||
class QueryProfilerBase
|
||||
{
|
||||
@ -35,13 +56,12 @@ public:
|
||||
~QueryProfilerBase();
|
||||
|
||||
private:
|
||||
void tryCleanup();
|
||||
void cleanup();
|
||||
|
||||
Poco::Logger * log;
|
||||
|
||||
#if USE_UNWIND
|
||||
/// Timer id from timer_create(2)
|
||||
std::optional<timer_t> timer_id;
|
||||
inline static thread_local Timer timer = Timer();
|
||||
#endif
|
||||
|
||||
/// Pause signal to interrupt threads to get traces
|
||||
|
@ -96,6 +96,7 @@ class IColumn;
|
||||
M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \
|
||||
M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \
|
||||
M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \
|
||||
M(Bool, s3_skip_empty_files, false, "Allow to skip empty files in s3 table engine", 0) \
|
||||
M(Bool, azure_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in azure engine tables", 0) \
|
||||
M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \
|
||||
M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \
|
||||
@ -105,6 +106,7 @@ class IColumn;
|
||||
M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \
|
||||
M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \
|
||||
M(Bool, hdfs_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in hdfs engine tables", 0) \
|
||||
M(Bool, hdfs_skip_empty_files, false, "Allow to skip empty files in hdfs table engine", 0) \
|
||||
M(UInt64, hsts_max_age, 0, "Expired time for hsts. 0 means disable HSTS.", 0) \
|
||||
M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \
|
||||
M(Bool, use_uncompressed_cache, false, "Whether to use the cache of uncompressed blocks.", 0) \
|
||||
@ -612,6 +614,8 @@ class IColumn;
|
||||
M(Bool, engine_file_empty_if_not_exists, false, "Allows to select data from a file engine table without file", 0) \
|
||||
M(Bool, engine_file_truncate_on_insert, false, "Enables or disables truncate before insert in file engine tables", 0) \
|
||||
M(Bool, engine_file_allow_create_multiple_files, false, "Enables or disables creating a new file on each insert in file engine tables if format has suffix.", 0) \
|
||||
M(Bool, engine_file_skip_empty_files, false, "Allows to skip empty files in file table engine", 0) \
|
||||
M(Bool, engine_url_skip_empty_files, false, "Allows to skip empty files in url table engine", 0) \
|
||||
M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \
|
||||
M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \
|
||||
M(Bool, database_replicated_enforce_synchronous_settings, false, "Enforces synchronous waiting for some queries (see also database_atomic_wait_for_drop_and_detach_synchronously, mutation_sync, alter_sync). Not recommended to enable these settings.", 0) \
|
||||
@ -860,6 +864,7 @@ class IColumn;
|
||||
M(Bool, input_format_csv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in CSV format", 0) \
|
||||
M(Bool, input_format_tsv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in TSV format", 0) \
|
||||
M(Bool, input_format_csv_detect_header, true, "Automatically detect header with names and types in CSV format", 0) \
|
||||
M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \
|
||||
M(Bool, input_format_csv_trim_whitespaces, true, "Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings", 0) \
|
||||
M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \
|
||||
M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \
|
||||
|
@ -275,8 +275,8 @@ private:
|
||||
{
|
||||
size_t pos = message.find('\n');
|
||||
|
||||
LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) {}",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, thread_num, message.substr(0, pos));
|
||||
LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) {}",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash, thread_num, message.substr(0, pos));
|
||||
|
||||
/// Print trace from std::terminate exception line-by-line to make it easy for grep.
|
||||
while (pos != std::string_view::npos)
|
||||
@ -332,14 +332,14 @@ private:
|
||||
|
||||
if (query_id.empty())
|
||||
{
|
||||
LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) (no query) Received signal {} ({})",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id,
|
||||
LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) (no query) Received signal {} ({})",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash,
|
||||
thread_num, signal_description, sig);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id,
|
||||
LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})",
|
||||
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash,
|
||||
thread_num, query_id, query, signal_description, sig);
|
||||
}
|
||||
|
||||
|
@ -300,49 +300,6 @@ namespace
|
||||
MutableColumnPtr additional_keys_map;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
IndexMapsWithAdditionalKeys mapIndexWithAdditionalKeysRef(PaddedPODArray<T> & index, size_t dict_size)
|
||||
{
|
||||
PaddedPODArray<T> copy(index.cbegin(), index.cend());
|
||||
|
||||
HashMap<T, T> dict_map;
|
||||
HashMap<T, T> add_keys_map;
|
||||
|
||||
for (auto val : index)
|
||||
{
|
||||
if (val < dict_size)
|
||||
dict_map.insert({val, dict_map.size()});
|
||||
else
|
||||
add_keys_map.insert({val, add_keys_map.size()});
|
||||
}
|
||||
|
||||
auto dictionary_map = ColumnVector<T>::create(dict_map.size());
|
||||
auto additional_keys_map = ColumnVector<T>::create(add_keys_map.size());
|
||||
auto & dict_data = dictionary_map->getData();
|
||||
auto & add_keys_data = additional_keys_map->getData();
|
||||
|
||||
for (auto val : dict_map)
|
||||
dict_data[val.second] = val.first;
|
||||
|
||||
for (auto val : add_keys_map)
|
||||
add_keys_data[val.second] = val.first - dict_size;
|
||||
|
||||
for (auto & val : index)
|
||||
val = val < dict_size ? dict_map[val]
|
||||
: add_keys_map[val] + dict_map.size();
|
||||
|
||||
for (size_t i = 0; i < index.size(); ++i)
|
||||
{
|
||||
T expected = index[i] < dict_data.size() ? dict_data[index[i]]
|
||||
: add_keys_data[index[i] - dict_data.size()] + dict_size;
|
||||
if (expected != copy[i])
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected {}, but got {}", toString(expected), toString(copy[i]));
|
||||
|
||||
}
|
||||
|
||||
return {std::move(dictionary_map), std::move(additional_keys_map)};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
IndexMapsWithAdditionalKeys mapIndexWithAdditionalKeys(PaddedPODArray<T> & index, size_t dict_size)
|
||||
{
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <filesystem>
|
||||
#include <Databases/DatabaseAtomic.h>
|
||||
#include <Databases/DatabaseDictionary.h>
|
||||
#include <Databases/DatabaseFilesystem.h>
|
||||
#include <Databases/DatabaseLazy.h>
|
||||
#include <Databases/DatabaseMemory.h>
|
||||
#include <Databases/DatabaseOrdinary.h>
|
||||
@ -47,6 +48,14 @@
|
||||
#include <Databases/SQLite/DatabaseSQLite.h>
|
||||
#endif
|
||||
|
||||
#if USE_AWS_S3
|
||||
#include <Databases/DatabaseS3.h>
|
||||
#endif
|
||||
|
||||
#if USE_HDFS
|
||||
#include <Databases/DatabaseHDFS.h>
|
||||
#endif
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace DB
|
||||
@ -131,13 +140,13 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
|
||||
|
||||
static const std::unordered_set<std::string_view> database_engines{"Ordinary", "Atomic", "Memory",
|
||||
"Dictionary", "Lazy", "Replicated", "MySQL", "MaterializeMySQL", "MaterializedMySQL",
|
||||
"PostgreSQL", "MaterializedPostgreSQL", "SQLite"};
|
||||
"PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem", "S3", "HDFS"};
|
||||
|
||||
if (!database_engines.contains(engine_name))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine name `{}` does not exist", engine_name);
|
||||
|
||||
static const std::unordered_set<std::string_view> engines_with_arguments{"MySQL", "MaterializeMySQL", "MaterializedMySQL",
|
||||
"Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite"};
|
||||
"Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem", "S3", "HDFS"};
|
||||
|
||||
static const std::unordered_set<std::string_view> engines_with_table_overrides{"MaterializeMySQL", "MaterializedMySQL", "MaterializedPostgreSQL"};
|
||||
bool engine_may_have_arguments = engines_with_arguments.contains(engine_name);
|
||||
@ -432,6 +441,63 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
|
||||
}
|
||||
#endif
|
||||
|
||||
else if (engine_name == "Filesystem")
|
||||
{
|
||||
const ASTFunction * engine = engine_define->engine;
|
||||
|
||||
/// If init_path is empty, then the current path will be used
|
||||
std::string init_path;
|
||||
|
||||
if (engine->arguments && !engine->arguments->children.empty())
|
||||
{
|
||||
if (engine->arguments->children.size() != 1)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filesystem database requires at most 1 argument: filesystem_path");
|
||||
|
||||
const auto & arguments = engine->arguments->children;
|
||||
init_path = safeGetLiteralValue<String>(arguments[0], engine_name);
|
||||
}
|
||||
|
||||
return std::make_shared<DatabaseFilesystem>(database_name, init_path, context);
|
||||
}
|
||||
|
||||
#if USE_AWS_S3
|
||||
else if (engine_name == "S3")
|
||||
{
|
||||
const ASTFunction * engine = engine_define->engine;
|
||||
|
||||
DatabaseS3::Configuration config;
|
||||
|
||||
if (engine->arguments && !engine->arguments->children.empty())
|
||||
{
|
||||
ASTs & engine_args = engine->arguments->children;
|
||||
config = DatabaseS3::parseArguments(engine_args, context);
|
||||
}
|
||||
|
||||
return std::make_shared<DatabaseS3>(database_name, config, context);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if USE_HDFS
|
||||
else if (engine_name == "HDFS")
|
||||
{
|
||||
const ASTFunction * engine = engine_define->engine;
|
||||
|
||||
/// If source_url is empty, then table name must contain full url
|
||||
std::string source_url;
|
||||
|
||||
if (engine->arguments && !engine->arguments->children.empty())
|
||||
{
|
||||
if (engine->arguments->children.size() != 1)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS database requires at most 1 argument: source_url");
|
||||
|
||||
const auto & arguments = engine->arguments->children;
|
||||
source_url = safeGetLiteralValue<String>(arguments[0], engine_name);
|
||||
}
|
||||
|
||||
return std::make_shared<DatabaseHDFS>(database_name, source_url, context);
|
||||
}
|
||||
#endif
|
||||
|
||||
throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Unknown database engine: {}", engine_name);
|
||||
}
|
||||
|
||||
|
247
src/Databases/DatabaseFilesystem.cpp
Normal file
247
src/Databases/DatabaseFilesystem.cpp
Normal file
@ -0,0 +1,247 @@
|
||||
#include <Databases/DatabaseFilesystem.h>
|
||||
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int UNKNOWN_TABLE;
|
||||
extern const int DATABASE_ACCESS_DENIED;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int FILE_DOESNT_EXIST;
|
||||
}
|
||||
|
||||
DatabaseFilesystem::DatabaseFilesystem(const String & name_, const String & path_, ContextPtr context_)
|
||||
: IDatabase(name_), WithContext(context_->getGlobalContext()), path(path_), log(&Poco::Logger::get("DatabaseFileSystem(" + name_ + ")"))
|
||||
{
|
||||
bool is_local = context_->getApplicationType() == Context::ApplicationType::LOCAL;
|
||||
fs::path user_files_path = is_local ? "" : fs::canonical(getContext()->getUserFilesPath());
|
||||
|
||||
if (fs::path(path).is_relative())
|
||||
{
|
||||
path = user_files_path / path;
|
||||
}
|
||||
else if (!is_local && !pathStartsWith(fs::path(path), user_files_path))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Path must be inside user-files path: {}", user_files_path.string());
|
||||
}
|
||||
|
||||
path = fs::absolute(path).lexically_normal();
|
||||
if (!fs::exists(path))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path does not exist: {}", path);
|
||||
}
|
||||
|
||||
std::string DatabaseFilesystem::getTablePath(const std::string & table_name) const
|
||||
{
|
||||
fs::path table_path = fs::path(path) / table_name;
|
||||
return table_path.lexically_normal().string();
|
||||
}
|
||||
|
||||
void DatabaseFilesystem::addTable(const std::string & table_name, StoragePtr table_storage) const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto [_, inserted] = loaded_tables.emplace(table_name, table_storage);
|
||||
if (!inserted)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Table with name `{}` already exists in database `{}` (engine {})",
|
||||
table_name, getDatabaseName(), getEngineName());
|
||||
}
|
||||
|
||||
bool DatabaseFilesystem::checkTableFilePath(const std::string & table_path, ContextPtr context_, bool throw_on_error) const
|
||||
{
|
||||
/// If run in Local mode, no need for path checking.
|
||||
bool check_path = context_->getApplicationType() != Context::ApplicationType::LOCAL;
|
||||
const auto & user_files_path = context_->getUserFilesPath();
|
||||
|
||||
/// Check access for file before checking its existence.
|
||||
if (check_path && !fileOrSymlinkPathStartsWith(table_path, user_files_path))
|
||||
{
|
||||
if (throw_on_error)
|
||||
throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "File is not inside {}", user_files_path);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Check if the corresponding file exists.
|
||||
if (!fs::exists(table_path))
|
||||
{
|
||||
if (throw_on_error)
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File does not exist: {}", table_path);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!fs::is_regular_file(table_path))
|
||||
{
|
||||
if (throw_on_error)
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST,
|
||||
"File is directory, but expected a file: {}", table_path);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
StoragePtr DatabaseFilesystem::tryGetTableFromCache(const std::string & name) const
|
||||
{
|
||||
StoragePtr table = nullptr;
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto it = loaded_tables.find(name);
|
||||
if (it != loaded_tables.end())
|
||||
table = it->second;
|
||||
}
|
||||
|
||||
/// Invalidate cache if file no longer exists.
|
||||
if (table && !fs::exists(getTablePath(name)))
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
loaded_tables.erase(name);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
bool DatabaseFilesystem::isTableExist(const String & name, ContextPtr context_) const
|
||||
{
|
||||
if (tryGetTableFromCache(name))
|
||||
return true;
|
||||
|
||||
return checkTableFilePath(getTablePath(name), context_, /* throw_on_error */false);
|
||||
}
|
||||
|
||||
StoragePtr DatabaseFilesystem::getTableImpl(const String & name, ContextPtr context_) const
|
||||
{
|
||||
/// Check if table exists in loaded tables map.
|
||||
if (auto table = tryGetTableFromCache(name))
|
||||
return table;
|
||||
|
||||
auto table_path = getTablePath(name);
|
||||
checkTableFilePath(table_path, context_, /* throw_on_error */true);
|
||||
|
||||
/// If the file exists, create a new table using TableFunctionFile and return it.
|
||||
auto args = makeASTFunction("file", std::make_shared<ASTLiteral>(table_path));
|
||||
|
||||
auto table_function = TableFunctionFactory::instance().get(args, context_);
|
||||
if (!table_function)
|
||||
return nullptr;
|
||||
|
||||
/// TableFunctionFile throws exceptions, if table cannot be created.
|
||||
auto table_storage = table_function->execute(args, context_, name);
|
||||
if (table_storage)
|
||||
addTable(name, table_storage);
|
||||
|
||||
return table_storage;
|
||||
}
|
||||
|
||||
StoragePtr DatabaseFilesystem::getTable(const String & name, ContextPtr context_) const
|
||||
{
|
||||
/// getTableImpl can throw exceptions, do not catch them to show correct error to user.
|
||||
if (auto storage = getTableImpl(name, context_))
|
||||
return storage;
|
||||
|
||||
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist",
|
||||
backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name));
|
||||
}
|
||||
|
||||
StoragePtr DatabaseFilesystem::tryGetTable(const String & name, ContextPtr context_) const
|
||||
{
|
||||
try
|
||||
{
|
||||
return getTableImpl(name, context_);
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
/// Ignore exceptions thrown by TableFunctionFile, which indicate that there is no table
|
||||
/// see tests/02722_database_filesystem.sh for more details.
|
||||
if (e.code() == ErrorCodes::BAD_ARGUMENTS
|
||||
|| e.code() == ErrorCodes::DATABASE_ACCESS_DENIED
|
||||
|| e.code() == ErrorCodes::FILE_DOESNT_EXIST)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
bool DatabaseFilesystem::empty() const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return loaded_tables.empty();
|
||||
}
|
||||
|
||||
ASTPtr DatabaseFilesystem::getCreateDatabaseQuery() const
|
||||
{
|
||||
const auto & settings = getContext()->getSettingsRef();
|
||||
const String query = fmt::format("CREATE DATABASE {} ENGINE = Filesystem('{}')", backQuoteIfNeed(getDatabaseName()), path);
|
||||
|
||||
ParserCreateQuery parser;
|
||||
ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth);
|
||||
|
||||
if (const auto database_comment = getDatabaseComment(); !database_comment.empty())
|
||||
{
|
||||
auto & ast_create_query = ast->as<ASTCreateQuery &>();
|
||||
ast_create_query.set(ast_create_query.comment, std::make_shared<ASTLiteral>(database_comment));
|
||||
}
|
||||
|
||||
return ast;
|
||||
}
|
||||
|
||||
void DatabaseFilesystem::shutdown()
|
||||
{
|
||||
Tables tables_snapshot;
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
tables_snapshot = loaded_tables;
|
||||
}
|
||||
|
||||
for (const auto & kv : tables_snapshot)
|
||||
{
|
||||
auto table_id = kv.second->getStorageID();
|
||||
kv.second->flushAndShutdown();
|
||||
}
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
loaded_tables.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an empty vector because the database is read-only and no tables can be backed up
|
||||
*/
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> DatabaseFilesystem::getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Returns an empty iterator because the database does not have its own tables
|
||||
* But only caches them for quick access
|
||||
*/
|
||||
DatabaseTablesIteratorPtr DatabaseFilesystem::getTablesIterator(ContextPtr, const FilterByNameFunction &) const
|
||||
{
|
||||
return std::make_unique<DatabaseTablesSnapshotIterator>(Tables{}, getDatabaseName());
|
||||
}
|
||||
|
||||
}
|
67
src/Databases/DatabaseFilesystem.h
Normal file
67
src/Databases/DatabaseFilesystem.h
Normal file
@ -0,0 +1,67 @@
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <Databases/IDatabase.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Storages/IStorage_fwd.h>
|
||||
#include <base/types.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
/**
|
||||
* DatabaseFilesystem allows to interact with files stored on the local filesystem.
|
||||
* Uses TableFunctionFile to implicitly load file when a user requests the table,
|
||||
* and provides a read-only access to the data in the file.
|
||||
* Tables are cached inside the database for quick access
|
||||
*
|
||||
* Used in clickhouse-local to access local files.
|
||||
* For clickhouse-server requires allows to access file only from user_files directory.
|
||||
*/
|
||||
class DatabaseFilesystem : public IDatabase, protected WithContext
|
||||
{
|
||||
public:
|
||||
DatabaseFilesystem(const String & name, const String & path, ContextPtr context);
|
||||
|
||||
String getEngineName() const override { return "Filesystem"; }
|
||||
|
||||
bool isTableExist(const String & name, ContextPtr context) const override;
|
||||
|
||||
StoragePtr getTable(const String & name, ContextPtr context) const override;
|
||||
|
||||
StoragePtr tryGetTable(const String & name, ContextPtr context) const override;
|
||||
|
||||
bool shouldBeEmptyOnDetach() const override { return false; } /// Contains only temporary tables.
|
||||
|
||||
bool empty() const override;
|
||||
|
||||
bool isReadOnly() const override { return true; }
|
||||
|
||||
ASTPtr getCreateDatabaseQuery() const override;
|
||||
|
||||
void shutdown() override;
|
||||
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const override;
|
||||
|
||||
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override;
|
||||
|
||||
protected:
|
||||
StoragePtr getTableImpl(const String & name, ContextPtr context) const;
|
||||
|
||||
StoragePtr tryGetTableFromCache(const std::string & name) const;
|
||||
|
||||
std::string getTablePath(const std::string & table_name) const;
|
||||
|
||||
void addTable(const std::string & table_name, StoragePtr table_storage) const;
|
||||
|
||||
bool checkTableFilePath(const std::string & table_path, ContextPtr context_, bool throw_on_error) const;
|
||||
|
||||
private:
|
||||
String path;
|
||||
mutable Tables loaded_tables TSA_GUARDED_BY(mutex);
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
||||
}
|
234
src/Databases/DatabaseHDFS.cpp
Normal file
234
src/Databases/DatabaseHDFS.cpp
Normal file
@ -0,0 +1,234 @@
|
||||
#include "config.h"
|
||||
|
||||
#if USE_HDFS
|
||||
|
||||
#include <Databases/DatabaseHDFS.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Storages/HDFS/HDFSCommon.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
|
||||
#include <Poco/URI.h>
|
||||
#include <re2/re2.h>
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int UNKNOWN_TABLE;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int FILE_DOESNT_EXIST;
|
||||
extern const int UNACCEPTABLE_URL;
|
||||
extern const int ACCESS_DENIED;
|
||||
extern const int DATABASE_ACCESS_DENIED;
|
||||
extern const int HDFS_ERROR;
|
||||
extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
|
||||
}
|
||||
|
||||
static constexpr std::string_view HDFS_HOST_REGEXP = "^hdfs://[^/]*";
|
||||
|
||||
|
||||
DatabaseHDFS::DatabaseHDFS(const String & name_, const String & source_url, ContextPtr context_)
|
||||
: IDatabase(name_)
|
||||
, WithContext(context_->getGlobalContext())
|
||||
, source(source_url)
|
||||
, log(&Poco::Logger::get("DatabaseHDFS(" + name_ + ")"))
|
||||
{
|
||||
if (!source.empty())
|
||||
{
|
||||
if (!re2::RE2::FullMatch(source, std::string(HDFS_HOST_REGEXP)))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs host: {}. "
|
||||
"It should have structure 'hdfs://<host_name>:<port>'", source);
|
||||
|
||||
context_->getGlobalContext()->getRemoteHostFilter().checkURL(Poco::URI(source));
|
||||
}
|
||||
}
|
||||
|
||||
void DatabaseHDFS::addTable(const std::string & table_name, StoragePtr table_storage) const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto [_, inserted] = loaded_tables.emplace(table_name, table_storage);
|
||||
if (!inserted)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Table with name `{}` already exists in database `{}` (engine {})",
|
||||
table_name, getDatabaseName(), getEngineName());
|
||||
}
|
||||
|
||||
std::string DatabaseHDFS::getTablePath(const std::string & table_name) const
|
||||
{
|
||||
if (table_name.starts_with("hdfs://"))
|
||||
return table_name;
|
||||
|
||||
if (source.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs url: {}. "
|
||||
"It should have structure 'hdfs://<host_name>:<port>/path'", table_name);
|
||||
|
||||
return fs::path(source) / table_name;
|
||||
}
|
||||
|
||||
bool DatabaseHDFS::checkUrl(const std::string & url, ContextPtr context_, bool throw_on_error) const
|
||||
{
|
||||
try
|
||||
{
|
||||
checkHDFSURL(url);
|
||||
context_->getGlobalContext()->getRemoteHostFilter().checkURL(Poco::URI(url));
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (throw_on_error)
|
||||
throw;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DatabaseHDFS::isTableExist(const String & name, ContextPtr context_) const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (loaded_tables.find(name) != loaded_tables.end())
|
||||
return true;
|
||||
|
||||
return checkUrl(name, context_, false);
|
||||
}
|
||||
|
||||
StoragePtr DatabaseHDFS::getTableImpl(const String & name, ContextPtr context_) const
|
||||
{
|
||||
/// Check if the table exists in the loaded tables map.
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto it = loaded_tables.find(name);
|
||||
if (it != loaded_tables.end())
|
||||
return it->second;
|
||||
}
|
||||
|
||||
auto url = getTablePath(name);
|
||||
|
||||
checkUrl(url, context_, true);
|
||||
|
||||
auto args = makeASTFunction("hdfs", std::make_shared<ASTLiteral>(url));
|
||||
|
||||
auto table_function = TableFunctionFactory::instance().get(args, context_);
|
||||
if (!table_function)
|
||||
return nullptr;
|
||||
|
||||
/// TableFunctionHDFS throws exceptions, if table cannot be created.
|
||||
auto table_storage = table_function->execute(args, context_, name);
|
||||
if (table_storage)
|
||||
addTable(name, table_storage);
|
||||
|
||||
return table_storage;
|
||||
}
|
||||
|
||||
StoragePtr DatabaseHDFS::getTable(const String & name, ContextPtr context_) const
|
||||
{
|
||||
/// Rethrow all exceptions from TableFunctionHDFS to show correct error to user.
|
||||
if (auto storage = getTableImpl(name, context_))
|
||||
return storage;
|
||||
|
||||
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist",
|
||||
backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name));
|
||||
}
|
||||
|
||||
StoragePtr DatabaseHDFS::tryGetTable(const String & name, ContextPtr context_) const
|
||||
{
|
||||
try
|
||||
{
|
||||
return getTableImpl(name, context_);
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
// Ignore exceptions thrown by TableFunctionHDFS, which indicate that there is no table
|
||||
if (e.code() == ErrorCodes::BAD_ARGUMENTS
|
||||
|| e.code() == ErrorCodes::ACCESS_DENIED
|
||||
|| e.code() == ErrorCodes::DATABASE_ACCESS_DENIED
|
||||
|| e.code() == ErrorCodes::FILE_DOESNT_EXIST
|
||||
|| e.code() == ErrorCodes::UNACCEPTABLE_URL
|
||||
|| e.code() == ErrorCodes::HDFS_ERROR
|
||||
|| e.code() == ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
throw;
|
||||
}
|
||||
catch (const Poco::URISyntaxException &)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
bool DatabaseHDFS::empty() const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return loaded_tables.empty();
|
||||
}
|
||||
|
||||
ASTPtr DatabaseHDFS::getCreateDatabaseQuery() const
|
||||
{
|
||||
const auto & settings = getContext()->getSettingsRef();
|
||||
ParserCreateQuery parser;
|
||||
|
||||
const String query = fmt::format("CREATE DATABASE {} ENGINE = HDFS('{}')", backQuoteIfNeed(getDatabaseName()), source);
|
||||
ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth);
|
||||
|
||||
if (const auto database_comment = getDatabaseComment(); !database_comment.empty())
|
||||
{
|
||||
auto & ast_create_query = ast->as<ASTCreateQuery &>();
|
||||
ast_create_query.set(ast_create_query.comment, std::make_shared<ASTLiteral>(database_comment));
|
||||
}
|
||||
|
||||
return ast;
|
||||
}
|
||||
|
||||
void DatabaseHDFS::shutdown()
|
||||
{
|
||||
Tables tables_snapshot;
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
tables_snapshot = loaded_tables;
|
||||
}
|
||||
|
||||
for (const auto & kv : tables_snapshot)
|
||||
{
|
||||
auto table_id = kv.second->getStorageID();
|
||||
kv.second->flushAndShutdown();
|
||||
}
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
loaded_tables.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an empty vector because the database is read-only and no tables can be backed up
|
||||
*/
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> DatabaseHDFS::getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Returns an empty iterator because the database does not have its own tables
|
||||
* But only caches them for quick access
|
||||
*/
|
||||
DatabaseTablesIteratorPtr DatabaseHDFS::getTablesIterator(ContextPtr, const FilterByNameFunction &) const
|
||||
{
|
||||
return std::make_unique<DatabaseTablesSnapshotIterator>(Tables{}, getDatabaseName());
|
||||
}
|
||||
|
||||
} // DB
|
||||
|
||||
#endif
|
68
src/Databases/DatabaseHDFS.h
Normal file
68
src/Databases/DatabaseHDFS.h
Normal file
@ -0,0 +1,68 @@
|
||||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_HDFS
|
||||
|
||||
#include <mutex>
|
||||
#include <Databases/IDatabase.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Storages/IStorage_fwd.h>
|
||||
#include <base/types.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
/**
|
||||
* DatabaseHDFS allows to interact with files stored on the file system.
|
||||
* Uses TableFunctionHDFS to implicitly load file when a user requests the table,
|
||||
* and provides read-only access to the data in the file.
|
||||
* Tables are cached inside the database for quick access.
|
||||
*/
|
||||
class DatabaseHDFS : public IDatabase, protected WithContext
|
||||
{
|
||||
public:
|
||||
DatabaseHDFS(const String & name, const String & source_url, ContextPtr context);
|
||||
|
||||
String getEngineName() const override { return "S3"; }
|
||||
|
||||
bool isTableExist(const String & name, ContextPtr context) const override;
|
||||
|
||||
StoragePtr getTable(const String & name, ContextPtr context) const override;
|
||||
|
||||
StoragePtr tryGetTable(const String & name, ContextPtr context) const override;
|
||||
|
||||
bool shouldBeEmptyOnDetach() const override { return false; } /// Contains only temporary tables.
|
||||
|
||||
bool empty() const override;
|
||||
|
||||
bool isReadOnly() const override { return true; }
|
||||
|
||||
ASTPtr getCreateDatabaseQuery() const override;
|
||||
|
||||
void shutdown() override;
|
||||
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const override;
|
||||
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override;
|
||||
|
||||
protected:
|
||||
StoragePtr getTableImpl(const String & name, ContextPtr context) const;
|
||||
|
||||
void addTable(const std::string & table_name, StoragePtr table_storage) const;
|
||||
|
||||
bool checkUrl(const std::string & url, ContextPtr context_, bool throw_on_error) const;
|
||||
|
||||
std::string getTablePath(const std::string & table_name) const;
|
||||
|
||||
private:
|
||||
const String source;
|
||||
|
||||
mutable Tables loaded_tables TSA_GUARDED_BY(mutex);
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -36,6 +36,7 @@
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Storages/StorageKeeperMap.h>
|
||||
#include <Storages/AlterCommands.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -1441,9 +1442,49 @@ bool DatabaseReplicated::shouldReplicateQuery(const ContextPtr & query_context,
|
||||
return table->as<StorageKeeperMap>() != nullptr;
|
||||
};
|
||||
|
||||
const auto is_replicated_table = [&](const ASTPtr & ast)
|
||||
{
|
||||
auto table_id = query_context->resolveStorageID(ast, Context::ResolveOrdinary);
|
||||
StoragePtr table = DatabaseCatalog::instance().getTable(table_id, query_context);
|
||||
|
||||
return table->supportsReplication();
|
||||
};
|
||||
|
||||
const auto has_many_shards = [&]()
|
||||
{
|
||||
/// If there is only 1 shard then there is no need to replicate some queries.
|
||||
auto current_cluster = tryGetCluster();
|
||||
return
|
||||
!current_cluster || /// Couldn't get the cluster, so we don't know how many shards there are.
|
||||
current_cluster->getShardsInfo().size() > 1;
|
||||
};
|
||||
|
||||
/// Some ALTERs are not replicated on database level
|
||||
if (const auto * alter = query_ptr->as<const ASTAlterQuery>())
|
||||
return !alter->isAttachAlter() && !alter->isFetchAlter() && !alter->isDropPartitionAlter() && !is_keeper_map_table(query_ptr);
|
||||
{
|
||||
if (alter->isAttachAlter() || alter->isFetchAlter() || alter->isDropPartitionAlter() || is_keeper_map_table(query_ptr))
|
||||
return false;
|
||||
|
||||
if (has_many_shards() || !is_replicated_table(query_ptr))
|
||||
return true;
|
||||
|
||||
try
|
||||
{
|
||||
/// Metadata alter should go through database
|
||||
for (const auto & child : alter->command_list->children)
|
||||
if (AlterCommand::parse(child->as<ASTAlterCommand>()))
|
||||
return true;
|
||||
|
||||
/// It's ALTER PARTITION or mutation, doesn't involve database
|
||||
return false;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// DROP DATABASE is not replicated
|
||||
if (const auto * drop = query_ptr->as<const ASTDropQuery>())
|
||||
@ -1459,11 +1500,7 @@ bool DatabaseReplicated::shouldReplicateQuery(const ContextPtr & query_context,
|
||||
if (is_keeper_map_table(query_ptr))
|
||||
return false;
|
||||
|
||||
/// If there is only 1 shard then there is no need to replicate DELETE query.
|
||||
auto current_cluster = tryGetCluster();
|
||||
return
|
||||
!current_cluster || /// Couldn't get the cluster, so we don't know how many shards there are.
|
||||
current_cluster->getShardsInfo().size() > 1;
|
||||
return has_many_shards() || !is_replicated_table(query_ptr);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
312
src/Databases/DatabaseS3.cpp
Normal file
312
src/Databases/DatabaseS3.cpp
Normal file
@ -0,0 +1,312 @@
|
||||
#include "config.h"
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include <Databases/DatabaseS3.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
#include <IO/S3/URI.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Storages/checkAndGetLiteralArgument.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/NamedCollectionsHelpers.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <filesystem>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static const std::unordered_set<std::string_view> optional_configuration_keys = {
|
||||
"url",
|
||||
"access_key_id",
|
||||
"secret_access_key",
|
||||
"no_sign_request"
|
||||
};
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int UNKNOWN_TABLE;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int FILE_DOESNT_EXIST;
|
||||
extern const int UNACCEPTABLE_URL;
|
||||
extern const int S3_ERROR;
|
||||
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
DatabaseS3::DatabaseS3(const String & name_, const Configuration& config_, ContextPtr context_)
|
||||
: IDatabase(name_)
|
||||
, WithContext(context_->getGlobalContext())
|
||||
, config(config_)
|
||||
, log(&Poco::Logger::get("DatabaseS3(" + name_ + ")"))
|
||||
{
|
||||
}
|
||||
|
||||
void DatabaseS3::addTable(const std::string & table_name, StoragePtr table_storage) const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto [_, inserted] = loaded_tables.emplace(table_name, table_storage);
|
||||
if (!inserted)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Table with name `{}` already exists in database `{}` (engine {})",
|
||||
table_name, getDatabaseName(), getEngineName());
|
||||
}
|
||||
|
||||
std::string DatabaseS3::getFullUrl(const std::string & name) const
|
||||
{
|
||||
if (!config.url_prefix.empty())
|
||||
return fs::path(config.url_prefix) / name;
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
bool DatabaseS3::checkUrl(const std::string & url, ContextPtr context_, bool throw_on_error) const
|
||||
{
|
||||
try
|
||||
{
|
||||
S3::URI uri(url);
|
||||
context_->getGlobalContext()->getRemoteHostFilter().checkURL(uri.uri);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (throw_on_error)
|
||||
throw;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DatabaseS3::isTableExist(const String & name, ContextPtr context_) const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (loaded_tables.find(name) != loaded_tables.end())
|
||||
return true;
|
||||
|
||||
return checkUrl(getFullUrl(name), context_, false);
|
||||
}
|
||||
|
||||
StoragePtr DatabaseS3::getTableImpl(const String & name, ContextPtr context_) const
|
||||
{
|
||||
/// Check if the table exists in the loaded tables map.
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto it = loaded_tables.find(name);
|
||||
if (it != loaded_tables.end())
|
||||
return it->second;
|
||||
}
|
||||
|
||||
auto url = getFullUrl(name);
|
||||
checkUrl(url, context_, /* throw_on_error */true);
|
||||
|
||||
auto function = std::make_shared<ASTFunction>();
|
||||
function->name = "s3";
|
||||
function->arguments = std::make_shared<ASTExpressionList>();
|
||||
function->children.push_back(function->arguments);
|
||||
|
||||
function->arguments->children.push_back(std::make_shared<ASTLiteral>(url));
|
||||
if (config.no_sign_request)
|
||||
{
|
||||
function->arguments->children.push_back(std::make_shared<ASTLiteral>("NOSIGN"));
|
||||
}
|
||||
else if (config.access_key_id.has_value() && config.secret_access_key.has_value())
|
||||
{
|
||||
function->arguments->children.push_back(std::make_shared<ASTLiteral>(config.access_key_id.value()));
|
||||
function->arguments->children.push_back(std::make_shared<ASTLiteral>(config.secret_access_key.value()));
|
||||
}
|
||||
|
||||
auto table_function = TableFunctionFactory::instance().get(function, context_);
|
||||
if (!table_function)
|
||||
return nullptr;
|
||||
|
||||
/// TableFunctionS3 throws exceptions, if table cannot be created.
|
||||
auto table_storage = table_function->execute(function, context_, name);
|
||||
if (table_storage)
|
||||
addTable(name, table_storage);
|
||||
|
||||
return table_storage;
|
||||
}
|
||||
|
||||
StoragePtr DatabaseS3::getTable(const String & name, ContextPtr context_) const
|
||||
{
|
||||
/// Rethrow all exceptions from TableFunctionS3 to show correct error to user.
|
||||
if (auto storage = getTableImpl(name, context_))
|
||||
return storage;
|
||||
|
||||
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist",
|
||||
backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name));
|
||||
}
|
||||
|
||||
StoragePtr DatabaseS3::tryGetTable(const String & name, ContextPtr context_) const
|
||||
{
|
||||
try
|
||||
{
|
||||
return getTableImpl(name, context_);
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
/// Ignore exceptions thrown by TableFunctionS3, which indicate that there is no table.
|
||||
if (e.code() == ErrorCodes::BAD_ARGUMENTS
|
||||
|| e.code() == ErrorCodes::S3_ERROR
|
||||
|| e.code() == ErrorCodes::FILE_DOESNT_EXIST
|
||||
|| e.code() == ErrorCodes::UNACCEPTABLE_URL)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
throw;
|
||||
}
|
||||
catch (const Poco::URISyntaxException &)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
bool DatabaseS3::empty() const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return loaded_tables.empty();
|
||||
}
|
||||
|
||||
ASTPtr DatabaseS3::getCreateDatabaseQuery() const
|
||||
{
|
||||
const auto & settings = getContext()->getSettingsRef();
|
||||
ParserCreateQuery parser;
|
||||
|
||||
std::string creation_args;
|
||||
creation_args += fmt::format("'{}'", config.url_prefix);
|
||||
if (config.no_sign_request)
|
||||
creation_args += ", 'NOSIGN'";
|
||||
else if (config.access_key_id.has_value() && config.secret_access_key.has_value())
|
||||
creation_args += fmt::format(", '{}', '{}'", config.access_key_id.value(), config.secret_access_key.value());
|
||||
|
||||
const String query = fmt::format("CREATE DATABASE {} ENGINE = S3({})", backQuoteIfNeed(getDatabaseName()), creation_args);
|
||||
ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth);
|
||||
|
||||
if (const auto database_comment = getDatabaseComment(); !database_comment.empty())
|
||||
{
|
||||
auto & ast_create_query = ast->as<ASTCreateQuery &>();
|
||||
ast_create_query.set(ast_create_query.comment, std::make_shared<ASTLiteral>(database_comment));
|
||||
}
|
||||
|
||||
return ast;
|
||||
}
|
||||
|
||||
void DatabaseS3::shutdown()
|
||||
{
|
||||
Tables tables_snapshot;
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
tables_snapshot = loaded_tables;
|
||||
}
|
||||
|
||||
for (const auto & kv : tables_snapshot)
|
||||
{
|
||||
auto table_id = kv.second->getStorageID();
|
||||
kv.second->flushAndShutdown();
|
||||
}
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
loaded_tables.clear();
|
||||
}
|
||||
|
||||
DatabaseS3::Configuration DatabaseS3::parseArguments(ASTs engine_args, ContextPtr context_)
|
||||
{
|
||||
Configuration result;
|
||||
|
||||
if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, context_))
|
||||
{
|
||||
auto & collection = *named_collection;
|
||||
|
||||
validateNamedCollection(collection, {}, optional_configuration_keys);
|
||||
|
||||
result.url_prefix = collection.getOrDefault<String>("url", "");
|
||||
result.no_sign_request = collection.getOrDefault<bool>("no_sign_request", false);
|
||||
|
||||
auto key_id = collection.getOrDefault<String>("access_key_id", "");
|
||||
auto secret_key = collection.getOrDefault<String>("secret_access_key", "");
|
||||
|
||||
if (!key_id.empty())
|
||||
result.access_key_id = key_id;
|
||||
|
||||
if (!secret_key.empty())
|
||||
result.secret_access_key = secret_key;
|
||||
}
|
||||
else
|
||||
{
|
||||
const std::string supported_signature =
|
||||
" - S3()\n"
|
||||
" - S3('url')\n"
|
||||
" - S3('url', 'NOSIGN')\n"
|
||||
" - S3('url', 'access_key_id', 'secret_access_key')\n";
|
||||
const auto error_message =
|
||||
fmt::format("Engine DatabaseS3 must have the following arguments signature\n{}", supported_signature);
|
||||
|
||||
for (auto & arg : engine_args)
|
||||
arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context_);
|
||||
|
||||
if (engine_args.size() > 3)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, error_message.c_str());
|
||||
|
||||
if (engine_args.empty())
|
||||
return result;
|
||||
|
||||
result.url_prefix = checkAndGetLiteralArgument<String>(engine_args[0], "url");
|
||||
|
||||
// url, NOSIGN
|
||||
if (engine_args.size() == 2)
|
||||
{
|
||||
auto second_arg = checkAndGetLiteralArgument<String>(engine_args[1], "NOSIGN");
|
||||
if (boost::iequals(second_arg, "NOSIGN"))
|
||||
result.no_sign_request = true;
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, error_message.c_str());
|
||||
}
|
||||
|
||||
// url, access_key_id, secret_access_key
|
||||
if (engine_args.size() == 3)
|
||||
{
|
||||
auto key_id = checkAndGetLiteralArgument<String>(engine_args[1], "access_key_id");
|
||||
auto secret_key = checkAndGetLiteralArgument<String>(engine_args[2], "secret_access_key");
|
||||
|
||||
if (key_id.empty() || secret_key.empty() || boost::iequals(key_id, "NOSIGN"))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, error_message.c_str());
|
||||
|
||||
result.access_key_id = key_id;
|
||||
result.secret_access_key = secret_key;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an empty vector because the database is read-only and no tables can be backed up
|
||||
*/
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> DatabaseS3::getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Returns an empty iterator because the database does not have its own tables
|
||||
* But only caches them for quick access
|
||||
*/
|
||||
DatabaseTablesIteratorPtr DatabaseS3::getTablesIterator(ContextPtr, const FilterByNameFunction &) const
|
||||
{
|
||||
return std::make_unique<DatabaseTablesSnapshotIterator>(Tables{}, getDatabaseName());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
81
src/Databases/DatabaseS3.h
Normal file
81
src/Databases/DatabaseS3.h
Normal file
@ -0,0 +1,81 @@
|
||||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include <mutex>
|
||||
#include <Databases/IDatabase.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Storages/IStorage_fwd.h>
|
||||
#include <base/types.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
/**
|
||||
* DatabaseS3 provides access to data stored in S3.
|
||||
* Uses TableFunctionS3 to implicitly load file when a user requests the table,
|
||||
* and provides read-only access to the data in the file.
|
||||
* Tables are cached inside the database for quick access.
|
||||
*/
|
||||
class DatabaseS3 : public IDatabase, protected WithContext
|
||||
{
|
||||
public:
|
||||
struct Configuration
|
||||
{
|
||||
std::string url_prefix;
|
||||
|
||||
bool no_sign_request = false;
|
||||
|
||||
std::optional<std::string> access_key_id;
|
||||
std::optional<std::string> secret_access_key;
|
||||
};
|
||||
|
||||
DatabaseS3(const String & name, const Configuration& config, ContextPtr context);
|
||||
|
||||
String getEngineName() const override { return "S3"; }
|
||||
|
||||
bool isTableExist(const String & name, ContextPtr context) const override;
|
||||
|
||||
StoragePtr getTable(const String & name, ContextPtr context) const override;
|
||||
|
||||
StoragePtr tryGetTable(const String & name, ContextPtr context) const override;
|
||||
|
||||
// Contains only temporary tables
|
||||
bool shouldBeEmptyOnDetach() const override { return false; }
|
||||
|
||||
bool empty() const override;
|
||||
|
||||
bool isReadOnly() const override { return true; }
|
||||
|
||||
ASTPtr getCreateDatabaseQuery() const override;
|
||||
|
||||
void shutdown() override;
|
||||
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const override;
|
||||
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override;
|
||||
|
||||
static Configuration parseArguments(ASTs engine_args, ContextPtr context);
|
||||
|
||||
protected:
|
||||
StoragePtr getTableImpl(const String & name, ContextPtr context) const;
|
||||
|
||||
void addTable(const std::string & table_name, StoragePtr table_storage) const;
|
||||
|
||||
bool checkUrl(const std::string & url, ContextPtr context_, bool throw_on_error) const;
|
||||
|
||||
std::string getFullUrl(const std::string & name) const;
|
||||
|
||||
private:
|
||||
const Configuration config;
|
||||
|
||||
mutable Tables loaded_tables TSA_GUARDED_BY(mutex);
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
266
src/Databases/DatabasesOverlay.cpp
Normal file
266
src/Databases/DatabasesOverlay.cpp
Normal file
@ -0,0 +1,266 @@
|
||||
#include <Databases/DatabasesOverlay.h>
|
||||
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/InterpreterCreateQuery.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
|
||||
#include <Storages/IStorage_fwd.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int CANNOT_GET_CREATE_TABLE_QUERY;
|
||||
}
|
||||
|
||||
DatabasesOverlay::DatabasesOverlay(const String & name_, ContextPtr context_)
|
||||
: IDatabase(name_), WithContext(context_->getGlobalContext()), log(&Poco::Logger::get("DatabaseOverlay(" + name_ + ")"))
|
||||
{
|
||||
}
|
||||
|
||||
DatabasesOverlay & DatabasesOverlay::registerNextDatabase(DatabasePtr database)
|
||||
{
|
||||
databases.push_back(std::move(database));
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool DatabasesOverlay::isTableExist(const String & table_name, ContextPtr context_) const
|
||||
{
|
||||
for (const auto & db : databases)
|
||||
{
|
||||
if (db->isTableExist(table_name, context_))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
StoragePtr DatabasesOverlay::tryGetTable(const String & table_name, ContextPtr context_) const
|
||||
{
|
||||
StoragePtr result = nullptr;
|
||||
for (const auto & db : databases)
|
||||
{
|
||||
result = db->tryGetTable(table_name, context_);
|
||||
if (result)
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void DatabasesOverlay::createTable(ContextPtr context_, const String & table_name, const StoragePtr & table, const ASTPtr & query)
|
||||
{
|
||||
for (auto & db : databases)
|
||||
{
|
||||
if (!db->isReadOnly())
|
||||
{
|
||||
db->createTable(context_, table_name, table, query);
|
||||
return;
|
||||
}
|
||||
}
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"There is no databases for CREATE TABLE `{}` query in database `{}` (engine {})",
|
||||
table_name,
|
||||
getDatabaseName(),
|
||||
getEngineName());
|
||||
}
|
||||
|
||||
void DatabasesOverlay::dropTable(ContextPtr context_, const String & table_name, bool sync)
|
||||
{
|
||||
for (auto & db : databases)
|
||||
{
|
||||
if (db->isTableExist(table_name, context_))
|
||||
{
|
||||
db->dropTable(context_, table_name, sync);
|
||||
return;
|
||||
}
|
||||
}
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"There is no databases for DROP TABLE `{}` query in database `{}` (engine {})",
|
||||
table_name,
|
||||
getDatabaseName(),
|
||||
getEngineName());
|
||||
}
|
||||
|
||||
void DatabasesOverlay::attachTable(
|
||||
ContextPtr context_, const String & table_name, const StoragePtr & table, const String & relative_table_path)
|
||||
{
|
||||
for (auto & db : databases)
|
||||
{
|
||||
try
|
||||
{
|
||||
db->attachTable(context_, table_name, table, relative_table_path);
|
||||
return;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"There is no databases for ATTACH TABLE `{}` query in database `{}` (engine {})",
|
||||
table_name,
|
||||
getDatabaseName(),
|
||||
getEngineName());
|
||||
}
|
||||
|
||||
StoragePtr DatabasesOverlay::detachTable(ContextPtr context_, const String & table_name)
|
||||
{
|
||||
StoragePtr result = nullptr;
|
||||
for (auto & db : databases)
|
||||
{
|
||||
if (db->isTableExist(table_name, context_))
|
||||
return db->detachTable(context_, table_name);
|
||||
}
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"There is no databases for DETACH TABLE `{}` query in database `{}` (engine {})",
|
||||
table_name,
|
||||
getDatabaseName(),
|
||||
getEngineName());
|
||||
}
|
||||
|
||||
ASTPtr DatabasesOverlay::getCreateTableQueryImpl(const String & name, ContextPtr context_, bool throw_on_error) const
|
||||
{
|
||||
ASTPtr result = nullptr;
|
||||
for (const auto & db : databases)
|
||||
{
|
||||
result = db->tryGetCreateTableQuery(name, context_);
|
||||
if (result)
|
||||
break;
|
||||
}
|
||||
if (!result && throw_on_error)
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_GET_CREATE_TABLE_QUERY,
|
||||
"There is no metadata of table `{}` in database `{}` (engine {})",
|
||||
name,
|
||||
getDatabaseName(),
|
||||
getEngineName());
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* DatabaseOverlay cannot be constructed by "CREATE DATABASE" query, as it is not a traditional ClickHouse database
|
||||
* To use DatabaseOverlay, it must be constructed programmatically in code
|
||||
*/
|
||||
ASTPtr DatabasesOverlay::getCreateDatabaseQuery() const
|
||||
{
|
||||
return std::make_shared<ASTCreateQuery>();
|
||||
}
|
||||
|
||||
String DatabasesOverlay::getTableDataPath(const String & table_name) const
|
||||
{
|
||||
String result;
|
||||
for (const auto & db : databases)
|
||||
{
|
||||
result = db->getTableDataPath(table_name);
|
||||
if (!result.empty())
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
String DatabasesOverlay::getTableDataPath(const ASTCreateQuery & query) const
|
||||
{
|
||||
String result;
|
||||
for (const auto & db : databases)
|
||||
{
|
||||
result = db->getTableDataPath(query);
|
||||
if (!result.empty())
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
UUID DatabasesOverlay::tryGetTableUUID(const String & table_name) const
|
||||
{
|
||||
UUID result = UUIDHelpers::Nil;
|
||||
for (const auto & db : databases)
|
||||
{
|
||||
result = db->tryGetTableUUID(table_name);
|
||||
if (result != UUIDHelpers::Nil)
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void DatabasesOverlay::drop(ContextPtr context_)
|
||||
{
|
||||
for (auto & db : databases)
|
||||
db->drop(context_);
|
||||
}
|
||||
|
||||
void DatabasesOverlay::alterTable(ContextPtr local_context, const StorageID & table_id, const StorageInMemoryMetadata & metadata)
|
||||
{
|
||||
for (auto & db : databases)
|
||||
{
|
||||
if (!db->isReadOnly() && db->isTableExist(table_id.table_name, local_context))
|
||||
{
|
||||
db->alterTable(local_context, table_id, metadata);
|
||||
return;
|
||||
}
|
||||
}
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"There is no databases for ALTER TABLE `{}` query in database `{}` (engine {})",
|
||||
table_id.table_name,
|
||||
getDatabaseName(),
|
||||
getEngineName());
|
||||
}
|
||||
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>>
|
||||
DatabasesOverlay::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const
|
||||
{
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> result;
|
||||
for (const auto & db : databases)
|
||||
{
|
||||
auto db_backup = db->getTablesForBackup(filter, local_context);
|
||||
result.insert(result.end(), std::make_move_iterator(db_backup.begin()), std::make_move_iterator(db_backup.end()));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void DatabasesOverlay::createTableRestoredFromBackup(
|
||||
const ASTPtr & create_table_query,
|
||||
ContextMutablePtr local_context,
|
||||
std::shared_ptr<IRestoreCoordination> /*restore_coordination*/,
|
||||
UInt64 /*timeout_ms*/)
|
||||
{
|
||||
/// Creates a tables by executing a "CREATE TABLE" query.
|
||||
InterpreterCreateQuery interpreter{create_table_query, local_context};
|
||||
interpreter.setInternal(true);
|
||||
interpreter.execute();
|
||||
}
|
||||
|
||||
bool DatabasesOverlay::empty() const
|
||||
{
|
||||
for (const auto & db : databases)
|
||||
{
|
||||
if (!db->empty())
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void DatabasesOverlay::shutdown()
|
||||
{
|
||||
for (auto & db : databases)
|
||||
db->shutdown();
|
||||
}
|
||||
|
||||
DatabaseTablesIteratorPtr DatabasesOverlay::getTablesIterator(ContextPtr context_, const FilterByNameFunction & filter_by_table_name) const
|
||||
{
|
||||
Tables tables;
|
||||
for (const auto & db : databases)
|
||||
{
|
||||
for (auto table_it = db->getTablesIterator(context_, filter_by_table_name); table_it->isValid(); table_it->next())
|
||||
tables.insert({table_it->name(), table_it->table()});
|
||||
}
|
||||
return std::make_unique<DatabaseTablesSnapshotIterator>(std::move(tables), getDatabaseName());
|
||||
}
|
||||
|
||||
}
|
66
src/Databases/DatabasesOverlay.h
Normal file
66
src/Databases/DatabasesOverlay.h
Normal file
@ -0,0 +1,66 @@
|
||||
#pragma once
|
||||
|
||||
#include <Storages/IStorage_fwd.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/**
|
||||
* Implements the IDatabase interface and combines multiple other databases
|
||||
* Searches for tables in each database in order until found, and delegates operations to the appropriate database
|
||||
* Useful for combining databases
|
||||
*
|
||||
* Used in clickhouse-local to combine DatabaseFileSystem and DatabaseMemory
|
||||
*/
|
||||
class DatabasesOverlay : public IDatabase, protected WithContext
|
||||
{
|
||||
public:
|
||||
DatabasesOverlay(const String & name_, ContextPtr context_);
|
||||
|
||||
/// Not thread-safe. Use only as factory to initialize database
|
||||
DatabasesOverlay & registerNextDatabase(DatabasePtr database);
|
||||
|
||||
String getEngineName() const override { return "Overlay"; }
|
||||
|
||||
public:
|
||||
bool isTableExist(const String & table_name, ContextPtr context) const override;
|
||||
|
||||
StoragePtr tryGetTable(const String & table_name, ContextPtr context) const override;
|
||||
|
||||
void createTable(ContextPtr context, const String & table_name, const StoragePtr & table, const ASTPtr & query) override;
|
||||
|
||||
void dropTable(ContextPtr context, const String & table_name, bool sync) override;
|
||||
|
||||
void attachTable(ContextPtr context, const String & table_name, const StoragePtr & table, const String & relative_table_path) override;
|
||||
|
||||
StoragePtr detachTable(ContextPtr context, const String & table_name) override;
|
||||
|
||||
ASTPtr getCreateTableQueryImpl(const String & name, ContextPtr context, bool throw_on_error) const override;
|
||||
ASTPtr getCreateDatabaseQuery() const override;
|
||||
|
||||
String getTableDataPath(const String & table_name) const override;
|
||||
String getTableDataPath(const ASTCreateQuery & query) const override;
|
||||
|
||||
UUID tryGetTableUUID(const String & table_name) const override;
|
||||
|
||||
void drop(ContextPtr context) override;
|
||||
|
||||
void alterTable(ContextPtr local_context, const StorageID & table_id, const StorageInMemoryMetadata & metadata) override;
|
||||
|
||||
std::vector<std::pair<ASTPtr, StoragePtr>> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const override;
|
||||
|
||||
void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr<IRestoreCoordination> restore_coordination, UInt64 timeout_ms) override;
|
||||
|
||||
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;
|
||||
|
||||
bool empty() const override;
|
||||
|
||||
void shutdown() override;
|
||||
|
||||
protected:
|
||||
std::vector<DatabasePtr> databases;
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
||||
}
|
@ -170,7 +170,7 @@ public:
|
||||
/// Get the table for work. Return nullptr if there is no table.
|
||||
virtual StoragePtr tryGetTable(const String & name, ContextPtr context) const = 0;
|
||||
|
||||
StoragePtr getTable(const String & name, ContextPtr context) const;
|
||||
virtual StoragePtr getTable(const String & name, ContextPtr context) const;
|
||||
|
||||
virtual UUID tryGetTableUUID(const String & /*table_name*/) const { return UUIDHelpers::Nil; }
|
||||
|
||||
@ -183,6 +183,8 @@ public:
|
||||
/// Is the database empty.
|
||||
virtual bool empty() const = 0;
|
||||
|
||||
virtual bool isReadOnly() const { return false; }
|
||||
|
||||
/// Add the table to the database. Record its presence in the metadata.
|
||||
virtual void createTable(
|
||||
ContextPtr /*context*/,
|
||||
|
@ -301,9 +301,8 @@ off_t AsynchronousBoundedReadBuffer::seek(off_t offset, int whence)
|
||||
* Lazy ignore. Save number of bytes to ignore and ignore it either for prefetch buffer or current buffer.
|
||||
* Note: we read in range [file_offset_of_buffer_end, read_until_position).
|
||||
*/
|
||||
if (file_offset_of_buffer_end && read_until_position && new_pos < *read_until_position
|
||||
&& new_pos > file_offset_of_buffer_end
|
||||
&& new_pos < file_offset_of_buffer_end + read_settings.remote_read_min_bytes_for_seek)
|
||||
if (!impl->seekIsCheap() && file_offset_of_buffer_end && read_until_position && new_pos < *read_until_position
|
||||
&& new_pos > file_offset_of_buffer_end && new_pos < file_offset_of_buffer_end + read_settings.remote_read_min_bytes_for_seek)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::RemoteFSLazySeeks);
|
||||
bytes_to_ignore = new_pos - file_offset_of_buffer_end;
|
||||
|
@ -50,6 +50,8 @@ public:
|
||||
|
||||
off_t getPosition() override { return file_offset_of_buffer_end - available() + bytes_to_ignore; }
|
||||
|
||||
bool seekIsCheap() override { return !current_buf; }
|
||||
|
||||
private:
|
||||
SeekableReadBufferPtr createImplementationBuffer(const StoredObject & object);
|
||||
|
||||
|
@ -751,7 +751,7 @@ namespace
|
||||
private:
|
||||
using Reader = typename CapnpType::Reader;
|
||||
|
||||
CapnpType::Reader getData(const ColumnPtr & column, size_t row_num)
|
||||
Reader getData(const ColumnPtr & column, size_t row_num)
|
||||
{
|
||||
auto data = column->getDataAt(row_num);
|
||||
if constexpr (std::is_same_v<CapnpType, capnp::Data>)
|
||||
@ -801,7 +801,7 @@ namespace
|
||||
private:
|
||||
using Reader = typename CapnpType::Reader;
|
||||
|
||||
CapnpType::Reader getData(const ColumnPtr & column, size_t row_num)
|
||||
Reader getData(const ColumnPtr & column, size_t row_num)
|
||||
{
|
||||
auto data = column->getDataAt(row_num);
|
||||
if constexpr (std::is_same_v<CapnpType, capnp::Data>)
|
||||
|
@ -71,6 +71,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.csv.try_detect_header = settings.input_format_csv_detect_header;
|
||||
format_settings.csv.skip_trailing_empty_lines = settings.input_format_csv_skip_trailing_empty_lines;
|
||||
format_settings.csv.trim_whitespaces = settings.input_format_csv_trim_whitespaces;
|
||||
format_settings.csv.allow_whitespace_or_tab_as_delimiter = settings.input_format_csv_allow_whitespace_or_tab_as_delimiter;
|
||||
format_settings.csv.ignore_extra_columns = settings.input_format_csv_ignore_extra_columns;
|
||||
format_settings.csv.missing_as_default = settings.input_format_csv_missing_as_default;
|
||||
format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter;
|
||||
|
@ -139,6 +139,7 @@ struct FormatSettings
|
||||
bool try_detect_header = true;
|
||||
bool skip_trailing_empty_lines = false;
|
||||
bool trim_whitespaces = true;
|
||||
bool allow_whitespace_or_tab_as_delimiter = false;
|
||||
bool ignore_extra_columns = false;
|
||||
bool missing_as_default = false;
|
||||
} csv;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user