Merge branch 'master' into format-settings-parsing

This commit is contained in:
Alexey Milovidov 2024-06-29 04:25:34 +02:00
commit 0a493b88f2
358 changed files with 5568 additions and 2983 deletions

View File

@ -159,33 +159,24 @@ jobs:
############################################################################################
##################################### BUILD REPORTER #######################################
############################################################################################
BuilderReport:
Builds_Report:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() }}
needs:
- RunConfig
- BuilderDebAarch64
- BuilderDebAsan
- BuilderDebDebug
- BuilderDebRelease
- BuilderDebTsan
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse build check
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
BuilderSpecialReport:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() }}
needs:
- RunConfig
- BuilderBinDarwin
- BuilderBinDarwinAarch64
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse special build check
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }}
needs: [RunConfig, BuilderDebAarch64, BuilderDebAsan, BuilderDebDebug, BuilderDebRelease, BuilderDebTsan, BuilderBinDarwin, BuilderBinDarwinAarch64]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
- name: Download reports
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --pre --job-name Builds
- name: Builds report
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 ./build_report_check.py --reports package_release package_aarch64 package_asan package_tsan package_debug binary_darwin binary_darwin_aarch64
- name: Set status
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --post --job-name Builds
############################################################################################
#################################### INSTALL PACKAGES ######################################
############################################################################################
@ -256,8 +247,7 @@ jobs:
FinishCheck:
if: ${{ !failure() && !cancelled() }}
needs:
- BuilderReport
- BuilderSpecialReport
- Builds_Report
- FunctionalStatelessTestAsan
- FunctionalStatefulTestDebug
- StressTestTsan
@ -273,5 +263,8 @@ jobs:
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
# update mergeable check
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
# update overall ci report
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
python3 merge_pr.py

29
.github/workflows/create_release.yml vendored Normal file
View File

@ -0,0 +1,29 @@
name: CreateRelease
concurrency:
group: release
'on':
workflow_dispatch:
inputs:
sha:
description: 'The SHA hash of the commit from which to create the release'
required: true
type: string
type:
description: 'The type of release: "new" for a new release or "patch" for a patch release'
required: true
type: choice
options:
- new
- patch
jobs:
Release:
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
- name: Print greeting
run: |
python3 ./tests/ci/release.py --commit ${{ inputs.sha }} --type ${{ inputs.type }} --dry-run

View File

@ -117,11 +117,11 @@ jobs:
# Reports should run even if Builds_1/2 fail - run them separately, not in Tests_1/2/3
Builds_Report:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }}
if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }}
needs: [RunConfig, Builds_1, Builds_2]
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse build check
test_name: Builds
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}

View File

@ -96,20 +96,15 @@ jobs:
stage: Tests_1
data: ${{ needs.RunConfig.outputs.data }}
################################# Stage Final #################################
#
FinishCheck:
if: ${{ !cancelled() }}
CheckReadyForMerge:
if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }}
# Test_2 or Test_3 must not have jobs required for Mergeable check
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Tests_1]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
- name: Check sync status
- name: Check and set merge status
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 sync_pr.py --status
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}

View File

@ -146,11 +146,11 @@ jobs:
# Reports should run even if Builds_1/2 fail - run them separately (not in Tests_1/2/3)
Builds_Report:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }}
if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }}
needs: [RunConfig, StyleCheck, Builds_1, Builds_2]
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse build check
test_name: Builds
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}

View File

@ -176,35 +176,24 @@ jobs:
############################################################################################
##################################### BUILD REPORTER #######################################
############################################################################################
BuilderReport:
Builds_Report:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() }}
needs:
- RunConfig
- BuilderDebRelease
- BuilderDebAarch64
- BuilderDebAsan
- BuilderDebTsan
- BuilderDebUBsan
- BuilderDebMsan
- BuilderDebDebug
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse build check
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
BuilderSpecialReport:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() }}
needs:
- RunConfig
- BuilderBinDarwin
- BuilderBinDarwinAarch64
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse special build check
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }}
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64, BuilderDebAsan, BuilderDebUBsan, BuilderDebMsan, BuilderDebTsan, BuilderDebDebug, BuilderBinDarwin, BuilderBinDarwinAarch64]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1
- name: Download reports
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --pre --job-name Builds
- name: Builds report
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 ./build_report_check.py --reports package_release package_aarch64 package_asan package_msan package_ubsan package_tsan package_debug binary_darwin binary_darwin_aarch64
- name: Set status
run: |
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --post --job-name Builds
MarkReleaseReady:
if: ${{ !failure() && !cancelled() }}
needs:
@ -460,8 +449,7 @@ jobs:
needs:
- DockerServerImage
- DockerKeeperImage
- BuilderReport
- BuilderSpecialReport
- Builds_Report
- MarkReleaseReady
- FunctionalStatelessTestDebug
- FunctionalStatelessTestRelease
@ -496,4 +484,7 @@ jobs:
- name: Finish label
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
# update mergeable check
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
# update overall ci report
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}

View File

@ -29,17 +29,17 @@
* Added `merge_workload` and `mutation_workload` settings to regulate how resources are utilized and shared between merges, mutations and other workloads. [#64061](https://github.com/ClickHouse/ClickHouse/pull/64061) ([Sergei Trifonov](https://github.com/serxa)).
* Add support for comparing IPv4 and IPv6 types using the `=` operator. [#64292](https://github.com/ClickHouse/ClickHouse/pull/64292) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
* Allow to store named collections in zookeeper. [#64574](https://github.com/ClickHouse/ClickHouse/pull/64574) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Support decimal arguments in binary math functions (pow(), atan2(), max2, min2(), hypot(). [#64582](https://github.com/ClickHouse/ClickHouse/pull/64582) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
* Support decimal arguments in binary math functions (pow, atan2, max2, min2, hypot). [#64582](https://github.com/ClickHouse/ClickHouse/pull/64582) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
* Add support for index analysis over `hilbertEncode`. [#64662](https://github.com/ClickHouse/ClickHouse/pull/64662) ([Artem Mustafin](https://github.com/Artemmm91)).
* Added SQL functions `parseReadableSize` (along with `OrNull` and `OrZero` variants). [#64742](https://github.com/ClickHouse/ClickHouse/pull/64742) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
* Add server settings `max_table_num_to_throw` and `max_database_num_to_throw` to limit the number of databases or tables on `CREATE` queries. [#64781](https://github.com/ClickHouse/ClickHouse/pull/64781) ([Xu Jia](https://github.com/XuJia0210)).
* Add _time virtual column to file alike storages (s3/file/hdfs/url/azureBlobStorage). [#64947](https://github.com/ClickHouse/ClickHouse/pull/64947) ([Ilya Golshtein](https://github.com/ilejn)).
* Introduced new functions `base64UrlEncode`, `base64UrlDecode` and `tryBase64UrlDecode`. [#64991](https://github.com/ClickHouse/ClickHouse/pull/64991) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
* Introduced new functions `base64URLEncode`, `base64URLDecode` and `tryBase64URLDecode`. [#64991](https://github.com/ClickHouse/ClickHouse/pull/64991) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
* Add new function `editDistanceUTF8`, which calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two UTF8 strings. [#65269](https://github.com/ClickHouse/ClickHouse/pull/65269) ([LiuNeng](https://github.com/liuneng1994)).
#### Performance Improvement
* Add a native parquet reader, which can read parquet binary to ClickHouse Columns directly. It's controlled by the setting `input_format_parquet_use_native_reader` (disabled by default). [#60361](https://github.com/ClickHouse/ClickHouse/pull/60361) ([ZhiHong Zhang](https://github.com/copperybean)).
* Reduce the number of virtual function calls in ColumnNullable::size(). [#60556](https://github.com/ClickHouse/ClickHouse/pull/60556) ([HappenLee](https://github.com/HappenLee)).
* Reduce the number of virtual function calls in ColumnNullable::size. [#60556](https://github.com/ClickHouse/ClickHouse/pull/60556) ([HappenLee](https://github.com/HappenLee)).
* Speedup `splitByRegexp` when the regular expression argument is a single-character. [#62696](https://github.com/ClickHouse/ClickHouse/pull/62696) ([Robert Schulze](https://github.com/rschu1ze)).
* Speed up FixedHashTable by keeping track of the min and max keys used. This allows to reduce the number of cells that need to be verified. [#62746](https://github.com/ClickHouse/ClickHouse/pull/62746) ([Jiebin Sun](https://github.com/jiebinn)).
* Optimize the resolution of in(LowCardinality, ConstantSet). [#64060](https://github.com/ClickHouse/ClickHouse/pull/64060) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
@ -51,7 +51,7 @@
* Improve function least/greatest for nullable numberic type arguments. [#64668](https://github.com/ClickHouse/ClickHouse/pull/64668) ([KevinyhZou](https://github.com/KevinyhZou)).
* Allow merging two consequent `FilterSteps` of a query plan. This improves filter-push-down optimization if the filter condition can be pushed down from the parent step. [#64760](https://github.com/ClickHouse/ClickHouse/pull/64760) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Remove bad optimization in vertical final implementation and re-enable vertical final algorithm by default. [#64783](https://github.com/ClickHouse/ClickHouse/pull/64783) ([Duc Canh Le](https://github.com/canhld94)).
* Remove ALIAS nodes from the filter expression. This slightly improves performance for queries with `PREWHERE` (with new analyzer). [#64793](https://github.com/ClickHouse/ClickHouse/pull/64793) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Remove ALIAS nodes from the filter expression. This slightly improves performance for queries with `PREWHERE` (with the new analyzer). [#64793](https://github.com/ClickHouse/ClickHouse/pull/64793) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix performance regression in cross join introduced in [#60459](https://github.com/ClickHouse/ClickHouse/issues/60459) (24.5). [#65243](https://github.com/ClickHouse/ClickHouse/pull/65243) ([Nikita Taranov](https://github.com/nickitat)).
#### Improvement
@ -63,7 +63,7 @@
* Reduce the memory usage when using Azure object storage by using fixed memory allocation, avoiding the allocation of an extra buffer. [#63160](https://github.com/ClickHouse/ClickHouse/pull/63160) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Several minor corner case fixes to proxy support & tunneling. [#63427](https://github.com/ClickHouse/ClickHouse/pull/63427) ([Arthur Passos](https://github.com/arthurpassos)).
* Add `http_response_headers` setting to support custom response headers in custom HTTP handlers. [#63562](https://github.com/ClickHouse/ClickHouse/pull/63562) ([Grigorii](https://github.com/GSokol)).
* Improve io_uring resubmits visibility. Rename profile event `IOUringSQEsResubmits` -> `IOUringSQEsResubmitsAsync` and add a new one `IOUringSQEsResubmitsSync`. [#63699](https://github.com/ClickHouse/ClickHouse/pull/63699) ([Tomer Shafir](https://github.com/tomershafir)).
* Improve io_uring resubmit visibility. Rename profile event `IOUringSQEsResubmits` -> `IOUringSQEsResubmitsAsync` and add a new one `IOUringSQEsResubmitsSync`. [#63699](https://github.com/ClickHouse/ClickHouse/pull/63699) ([Tomer Shafir](https://github.com/tomershafir)).
* Introduce assertions to verify all functions are called with columns of the right size. [#63723](https://github.com/ClickHouse/ClickHouse/pull/63723) ([Raúl Marín](https://github.com/Algunenano)).
* `SHOW CREATE TABLE` executed on top of system tables will now show the super handy comment unique for each table which will explain why this table is needed. [#63788](https://github.com/ClickHouse/ClickHouse/pull/63788) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Added setting `metadata_storage_type` to keep free space on metadata storage disk. [#64128](https://github.com/ClickHouse/ClickHouse/pull/64128) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).

View File

@ -6,6 +6,9 @@ namespace
{
std::string getFQDNOrHostNameImpl()
{
#if defined(OS_DARWIN)
return Poco::Net::DNS::hostName();
#else
try
{
return Poco::Net::DNS::thisHost().name();
@ -14,6 +17,7 @@ namespace
{
return Poco::Net::DNS::hostName();
}
#endif
}
}

View File

@ -34,7 +34,7 @@ if (OS_LINUX)
# avoid spurious latencies and additional work associated with
# MADV_DONTNEED. See
# https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation.
set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000")
set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true")
else()
set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000")
endif()

View File

@ -37,6 +37,7 @@ RUN pip3 install \
tqdm==4.66.4 \
types-requests \
unidiff \
jwt \
&& rm -rf /root/.cache/pip
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8

View File

@ -267,7 +267,7 @@ A pull request can be created even if the work is not completed yet. In this cas
Testing will commence as soon as ClickHouse employees label your PR with a tag “can be tested”. The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour.
The system will prepare ClickHouse binary builds for your pull request individually. To retrieve these builds click the “Details” link next to “ClickHouse build check” entry in the list of checks. There you will find direct links to the built .deb packages of ClickHouse which you can deploy even on your production servers (if you have no fear).
The system will prepare ClickHouse binary builds for your pull request individually. To retrieve these builds click the “Details” link next to “Builds” entry in the list of checks. There you will find direct links to the built .deb packages of ClickHouse which you can deploy even on your production servers (if you have no fear).
Most probably some of the builds will fail at first times. This is due to the fact that we check builds both with gcc as well as with clang, with almost all of existing warnings (always with the `-Werror` flag) enabled for clang. On that same page, you can find all of the build logs so that you do not have to build ClickHouse in all of the possible ways.

View File

@ -28,7 +28,7 @@ run, for example, the test `01428_hash_set_nan_key`, change to the repository
folder and run the following command:
```
PATH=$PATH:<path to clickhouse-client> tests/clickhouse-test 01428_hash_set_nan_key
PATH=<path to clickhouse-client>:$PATH tests/clickhouse-test 01428_hash_set_nan_key
```
Test results (`stderr` and `stdout`) are written to files `01428_hash_set_nan_key.[stderr|stdout]` which

View File

@ -56,6 +56,15 @@ SELECT * FROM test_table;
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
## Authentication
Currently there are 3 ways to authenticate:
- `Managed Identity` - Can be used by providing an `endpoint`, `connection_string` or `storage_account_url`.
- `SAS Token` - Can be used by providing an `endpoint`, `connection_string` or `storage_account_url`. It is identified by presence of '?' in the url.
- `Workload Identity` - Can be used by providing an `endpoint` or `storage_account_url`. If `use_workload_identity` parameter is set in config, ([workload identity](https://github.com/Azure/azure-sdk-for-cpp/tree/main/sdk/identity/azure-identity#authenticate-azure-hosted-applications)) is used for authentication.
## See also
[Azure Blob Storage Table Function](/docs/en/sql-reference/table-functions/azureBlobStorage)

View File

@ -28,6 +28,8 @@ CREATE TABLE s3_queue_engine_table (name String, value UInt32)
[s3queue_cleanup_interval_max_ms = 30000,]
```
Starting with `24.7` settings without `s3queue_` prefix are also supported.
**Engine parameters**
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path).

View File

@ -314,7 +314,7 @@ For example, to download a aarch64 binary for ClickHouse v23.4, follow these ste
- Find the GitHub pull request for release v23.4: [Release pull request for branch 23.4](https://github.com/ClickHouse/ClickHouse/pull/49238)
- Click "Commits", then click a commit similar to "Update autogenerated version to 23.4.2.1 and contributors" for the particular version you like to install.
- Click the green check / yellow dot / red cross to open the list of CI checks.
- Click "Details" next to "ClickHouse Build Check" in the list, it will open a page similar to [this page](https://s3.amazonaws.com/clickhouse-test-reports/46793/b460eb70bf29b19eadd19a1f959b15d186705394/clickhouse_build_check/report.html)
- Click "Details" next to "Builds" in the list, it will open a page similar to [this page](https://s3.amazonaws.com/clickhouse-test-reports/46793/b460eb70bf29b19eadd19a1f959b15d186705394/clickhouse_build_check/report.html)
- Find the rows with compiler = "clang-*-aarch64" - there are multiple rows.
- Download the artifacts for these builds.

View File

@ -193,6 +193,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
- `--hardware-utilization` — Print hardware utilization information in progress bar.
- `--print-profile-events` Print `ProfileEvents` packets.
- `--profile-events-delay-ms` Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet).
- `--jwt` If specified, enables authorization via JSON Web Token. Server JWT authorization is available only in ClickHouse Cloud.
Instead of `--host`, `--port`, `--user` and `--password` options, ClickHouse client also supports connection strings (see next section).

View File

@ -2169,6 +2169,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `lz4`.
- [input_format_parquet_max_block_size](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_max_block_size) - Max block row size for parquet reader. Default value - `65409`.
- [input_format_parquet_prefer_block_bytes](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_prefer_block_bytes) - Average block bytes output by parquet reader. Default value - `16744704`.
- [output_format_parquet_write_page_index](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_max_block_size) - Add a possibility to write page index into parquet files. Need to disable `output_format_parquet_use_custom_encoder` at present. Default value - `true`.
## ParquetMetadata {data-format-parquet-metadata}

View File

@ -5,6 +5,10 @@ sidebar_label: "Named collections"
title: "Named collections"
---
import CloudNotSupportedBadge from '@theme/badges/CloudNotSupportedBadge';
<CloudNotSupportedBadge />
Named collections provide a way to store collections of key-value pairs to be
used to configure integrations with external sources. You can use named collections with
dictionaries, tables, table functions, and object storage.

View File

@ -498,6 +498,8 @@ Default: 0.9
Interval in seconds during which the server's maximum allowed memory consumption is adjusted by the corresponding threshold in cgroups. (see
settings `cgroup_memory_watcher_hard_limit_ratio` and `cgroup_memory_watcher_soft_limit_ratio`).
To disable the cgroup observer, set this value to `0`.
Type: UInt64
Default: 15
@ -1463,6 +1465,9 @@ Keys:
- `size` Size of the file. Applies to `log` and `errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place.
- `count` The number of archived log files that ClickHouse stores.
- `console` Send `log` and `errorlog` to the console instead of file. To enable, set to `1` or `true`.
- `console_log_level` Logging level for console. Default to `level`.
- `use_syslog` - Log to syslog as well.
- `syslog_level` - Logging level for logging to syslog.
- `stream_compress` Compress `log` and `errorlog` with `lz4` stream compression. To enable, set to `1` or `true`.
- `formatting` Specify log format to be printed in console log (currently only `json` supported).

View File

@ -1428,6 +1428,13 @@ Average block bytes output by parquet reader. Lowering the configuration in the
Default value: `65409 * 256 = 16744704`
### output_format_parquet_write_page_index {#input_format_parquet_max_block_size}
Could add page index into parquet files. To enable this, need set `output_format_parquet_use_custom_encoder`=`false` and
`output_format_parquet_write_page_index`=`true`.
Enable by default.
## Hive format settings {#hive-format-settings}
### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter}

View File

@ -36,9 +36,24 @@ $ echo 0 | sudo tee /proc/sys/vm/overcommit_memory
Use `perf top` to watch the time spent in the kernel for memory management.
Permanent huge pages also do not need to be allocated.
:::warning
If your system has less than 16 GB of RAM, you may experience various memory exceptions because default settings do not match this amount of memory. The recommended amount of RAM is 32 GB or more. You can use ClickHouse in a system with a small amount of RAM, even with 2 GB of RAM, but it requires additional tuning and can ingest at a low rate.
:::
### Using less than 16GB of RAM
The recommended amount of RAM is 32 GB or more.
If your system has less than 16 GB of RAM, you may experience various memory exceptions because default settings do not match this amount of memory. You can use ClickHouse in a system with a small amount of RAM (as low as 2 GB), but these setups require additional tuning and can only ingest at a low rate.
When using ClickHouse with less than 16GB of RAM, we recommend the following:
- Lower the size of the mark cache in the `config.xml`. It can be set as low as 500 MB, but it cannot be set to zero.
- Lower the number of query processing threads down to `1`.
- Lower the `max_block_size` to `8192`. Values as low as `1024` can still be practical.
- Lower `max_download_threads` to `1`.
- Set `input_format_parallel_parsing` and `output_format_parallel_formatting` to `0`.
Additional notes:
- To flush the memory cached by the memory allocator, you can run the `SYSTEM JEMALLOC PURGE`
command.
- We do not recommend using S3 or Kafka integrations on low-memory machines because they require significant memory for buffers.
## Storage Subsystem {#storage-subsystem}

View File

@ -236,10 +236,10 @@ Read 2 rows, 32.00 B in 0.000 sec., 5182 rows/sec., 80.97 KiB/sec.
Previous example is the same as:
``` bash
$ echo -e "1,2\n3,4" | clickhouse-local --query "
$ echo -e "1,2\n3,4" | clickhouse-local -n --query "
CREATE TABLE table (a Int64, b Int64) ENGINE = File(CSV, stdin);
SELECT a, b FROM table;
DROP TABLE table"
DROP TABLE table;"
Read 2 rows, 32.00 B in 0.000 sec., 4987 rows/sec., 77.93 KiB/sec.
1 2
3 4

View File

@ -6,26 +6,297 @@ sidebar_label: NLP (experimental)
# Natural Language Processing (NLP) Functions
:::note
:::warning
This is an experimental feature that is currently in development and is not ready for general use. It will change in unpredictable backwards-incompatible ways in future releases. Set `allow_experimental_nlp_functions = 1` to enable it.
:::
## detectCharset
The `detectCharset` function detects the character set of the non-UTF8-encoded input string.
*Syntax*
``` sql
detectCharset('text_to_be_analyzed')
```
*Arguments*
- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string).
*Returned value*
- A `String` containing the code of the detected character set
*Examples*
Query:
```sql
SELECT detectCharset('Ich bleibe für ein paar Tage.');
```
Result:
```response
┌─detectCharset('Ich bleibe für ein paar Tage.')─┐
│ WINDOWS-1252 │
└────────────────────────────────────────────────┘
```
## detectLanguage
Detects the language of the UTF8-encoded input string. The function uses the [CLD2 library](https://github.com/CLD2Owners/cld2) for detection, and it returns the 2-letter ISO language code.
The `detectLanguage` function works best when providing over 200 characters in the input string.
*Syntax*
``` sql
detectLanguage('text_to_be_analyzed')
```
*Arguments*
- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string).
*Returned value*
- The 2-letter ISO code of the detected language
Other possible results:
- `un` = unknown, can not detect any language.
- `other` = the detected language does not have 2 letter code.
*Examples*
Query:
```sql
SELECT detectLanguage('Je pense que je ne parviendrai jamais à parler français comme un natif. Where theres a will, theres a way.');
```
Result:
```response
fr
```
## detectLanguageMixed
Similar to the `detectLanguage` function, but `detectLanguageMixed` returns a `Map` of 2-letter language codes that are mapped to the percentage of the certain language in the text.
*Syntax*
``` sql
detectLanguageMixed('text_to_be_analyzed')
```
*Arguments*
- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string).
*Returned value*
- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a percentage of text found for that language
*Examples*
Query:
```sql
SELECT detectLanguageMixed('二兎を追う者は一兎をも得ず二兎を追う者は一兎をも得ず A vaincre sans peril, on triomphe sans gloire.');
```
Result:
```response
┌─detectLanguageMixed()─┐
│ {'ja':0.62,'fr':0.36 │
└───────────────────────┘
```
## detectProgrammingLanguage
Determines the programming language from the source code. Calculates all the unigrams and bigrams of commands in the source code.
Then using a marked-up dictionary with weights of unigrams and bigrams of commands for various programming languages finds the biggest weight of the programming language and returns it.
*Syntax*
``` sql
detectProgrammingLanguage('source_code')
```
*Arguments*
- `source_code` — String representation of the source code to analyze. [String](../data-types/string.md#string).
*Returned value*
- Programming language. [String](../data-types/string.md).
*Examples*
Query:
```sql
SELECT detectProgrammingLanguage('#include <iostream>');
```
Result:
```response
┌─detectProgrammingLanguage('#include <iostream>')─┐
│ C++ │
└──────────────────────────────────────────────────┘
```
## detectLanguageUnknown
Similar to the `detectLanguage` function, except the `detectLanguageUnknown` function works with non-UTF8-encoded strings. Prefer this version when your character set is UTF-16 or UTF-32.
*Syntax*
``` sql
detectLanguageUnknown('text_to_be_analyzed')
```
*Arguments*
- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string).
*Returned value*
- The 2-letter ISO code of the detected language
Other possible results:
- `un` = unknown, can not detect any language.
- `other` = the detected language does not have 2 letter code.
*Examples*
Query:
```sql
SELECT detectLanguageUnknown('Ich bleibe für ein paar Tage.');
```
Result:
```response
┌─detectLanguageUnknown('Ich bleibe für ein paar Tage.')─┐
│ de │
└────────────────────────────────────────────────────────┘
```
## detectTonality
Determines the sentiment of text data. Uses a marked-up sentiment dictionary, in which each word has a tonality ranging from `-12` to `6`.
For each text, it calculates the average sentiment value of its words and returns it in the range `[-1,1]`.
:::note
This function is limited in its current form. Currently it makes use of the embedded emotional dictionary at `/contrib/nlp-data/tonality_ru.zst` and only works for the Russian language.
:::
*Syntax*
``` sql
detectTonality(text)
```
*Arguments*
- `text` — The text to be analyzed. [String](../data-types/string.md#string).
*Returned value*
- The average sentiment value of the words in `text`. [Float32](../data-types/float.md).
*Examples*
Query:
```sql
SELECT detectTonality('Шарик - хороший пёс'), -- Sharik is a good dog
detectTonality('Шарик - пёс'), -- Sharik is a dog
detectTonality('Шарик - плохой пёс'); -- Sharkik is a bad dog
```
Result:
```response
┌─detectTonality('Шарик - хороший пёс')─┬─detectTonality('Шарик - пёс')─┬─detectTonality('Шарик - плохой пёс')─┐
│ 0.44445 │ 0 │ -0.3 │
└───────────────────────────────────────┴───────────────────────────────┴──────────────────────────────────────┘
```
## lemmatize
Performs lemmatization on a given word. Needs dictionaries to operate, which can be obtained [here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models).
*Syntax*
``` sql
lemmatize('language', word)
```
*Arguments*
- `language` — Language which rules will be applied. [String](../data-types/string.md#string).
- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../data-types/string.md#string).
*Examples*
Query:
``` sql
SELECT lemmatize('en', 'wolves');
```
Result:
``` text
┌─lemmatize("wolves")─┐
│ "wolf" │
└─────────────────────┘
```
*Configuration*
This configuration specifies that the dictionary `en.bin` should be used for lemmatization of English (`en`) words. The `.bin` files can be downloaded from
[here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models).
``` xml
<lemmatizers>
<lemmatizer>
<!-- highlight-start -->
<lang>en</lang>
<path>en.bin</path>
<!-- highlight-end -->
</lemmatizer>
</lemmatizers>
```
## stem
Performs stemming on a given word.
### Syntax
*Syntax*
``` sql
stem('language', word)
```
### Arguments
*Arguments*
- `language` — Language which rules will be applied. Use the two letter [ISO 639-1 code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes).
- `word` — word that needs to be stemmed. Must be in lowercase. [String](../data-types/string.md#string).
### Examples
*Examples*
Query:
@ -40,7 +311,7 @@ Result:
│ ['I','think','it','is','a','bless','in','disguis'] │
└────────────────────────────────────────────────────┘
```
### Supported languages for stem()
*Supported languages for stem()*
:::note
The stem() function uses the [Snowball stemming](https://snowballstem.org/) library, see the Snowball website for updated languages etc.
@ -76,53 +347,6 @@ The stem() function uses the [Snowball stemming](https://snowballstem.org/) libr
- Turkish
- Yiddish
## lemmatize
Performs lemmatization on a given word. Needs dictionaries to operate, which can be obtained [here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models).
### Syntax
``` sql
lemmatize('language', word)
```
### Arguments
- `language` — Language which rules will be applied. [String](../data-types/string.md#string).
- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../data-types/string.md#string).
### Examples
Query:
``` sql
SELECT lemmatize('en', 'wolves');
```
Result:
``` text
┌─lemmatize("wolves")─┐
│ "wolf" │
└─────────────────────┘
```
### Configuration
This configuration specifies that the dictionary `en.bin` should be used for lemmatization of English (`en`) words. The `.bin` files can be downloaded from
[here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models).
``` xml
<lemmatizers>
<lemmatizer>
<!-- highlight-start -->
<lang>en</lang>
<path>en.bin</path>
<!-- highlight-end -->
</lemmatizer>
</lemmatizers>
```
## synonyms
Finds synonyms to a given word. There are two types of synonym extensions: `plain` and `wordnet`.
@ -131,18 +355,18 @@ With the `plain` extension type we need to provide a path to a simple text file,
With the `wordnet` extension type we need to provide a path to a directory with WordNet thesaurus in it. Thesaurus must contain a WordNet sense index.
### Syntax
*Syntax*
``` sql
synonyms('extension_name', word)
```
### Arguments
*Arguments*
- `extension_name` — Name of the extension in which search will be performed. [String](../data-types/string.md#string).
- `word` — Word that will be searched in extension. [String](../data-types/string.md#string).
### Examples
*Examples*
Query:
@ -158,7 +382,7 @@ Result:
└──────────────────────────────────────────┘
```
### Configuration
*Configuration*
``` xml
<synonyms_extensions>
<extension>
@ -172,154 +396,4 @@ Result:
<path>en/</path>
</extension>
</synonyms_extensions>
```
## detectLanguage
Detects the language of the UTF8-encoded input string. The function uses the [CLD2 library](https://github.com/CLD2Owners/cld2) for detection, and it returns the 2-letter ISO language code.
The `detectLanguage` function works best when providing over 200 characters in the input string.
### Syntax
``` sql
detectLanguage('text_to_be_analyzed')
```
### Arguments
- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string).
### Returned value
- The 2-letter ISO code of the detected language
Other possible results:
- `un` = unknown, can not detect any language.
- `other` = the detected language does not have 2 letter code.
### Examples
Query:
```sql
SELECT detectLanguage('Je pense que je ne parviendrai jamais à parler français comme un natif. Where theres a will, theres a way.');
```
Result:
```response
fr
```
## detectLanguageMixed
Similar to the `detectLanguage` function, but `detectLanguageMixed` returns a `Map` of 2-letter language codes that are mapped to the percentage of the certain language in the text.
### Syntax
``` sql
detectLanguageMixed('text_to_be_analyzed')
```
### Arguments
- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string).
### Returned value
- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a percentage of text found for that language
### Examples
Query:
```sql
SELECT detectLanguageMixed('二兎を追う者は一兎をも得ず二兎を追う者は一兎をも得ず A vaincre sans peril, on triomphe sans gloire.');
```
Result:
```response
┌─detectLanguageMixed()─┐
│ {'ja':0.62,'fr':0.36 │
└───────────────────────┘
```
## detectLanguageUnknown
Similar to the `detectLanguage` function, except the `detectLanguageUnknown` function works with non-UTF8-encoded strings. Prefer this version when your character set is UTF-16 or UTF-32.
### Syntax
``` sql
detectLanguageUnknown('text_to_be_analyzed')
```
### Arguments
- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string).
### Returned value
- The 2-letter ISO code of the detected language
Other possible results:
- `un` = unknown, can not detect any language.
- `other` = the detected language does not have 2 letter code.
### Examples
Query:
```sql
SELECT detectLanguageUnknown('Ich bleibe für ein paar Tage.');
```
Result:
```response
┌─detectLanguageUnknown('Ich bleibe für ein paar Tage.')─┐
│ de │
└────────────────────────────────────────────────────────┘
```
## detectCharset
The `detectCharset` function detects the character set of the non-UTF8-encoded input string.
### Syntax
``` sql
detectCharset('text_to_be_analyzed')
```
### Arguments
- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string).
### Returned value
- A `String` containing the code of the detected character set
### Examples
Query:
```sql
SELECT detectCharset('Ich bleibe für ein paar Tage.');
```
Result:
```response
┌─detectCharset('Ich bleibe für ein paar Tage.')─┐
│ WINDOWS-1252 │
└────────────────────────────────────────────────┘
```
```

View File

@ -3820,3 +3820,43 @@ Result:
10. │ df │ │
└────┴───────────────────────┘
```
## displayName
Returns the value of `display_name` from [config](../../operations/configuration-files.md/#configuration-files) or server Fully Qualified Domain Name (FQDN) if not set.
**Syntax**
```sql
displayName()
```
**Returned value**
- Value of `display_name` from config or server FQDN if not set. [String](../data-types/string.md).
**Example**
The `display_name` can be set in `config.xml`. Taking for example a server with `display_name` configured to 'production':
```xml
<!-- It is the name that will be shown in the clickhouse-client.
By default, anything with "production" will be highlighted in red in query prompt.
-->
<display_name>production</display_name>
```
Query:
```sql
SELECT displayName();
```
Result:
```response
┌─displayName()─┐
│ production │
└───────────────┘
```

View File

@ -1168,14 +1168,14 @@ Result:
└────────────────────────────┘
```
## base64UrlEncode
## base64URLEncode
Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-5).
**Syntax**
```sql
base64UrlEncode(url)
base64URLEncode(url)
```
**Arguments**
@ -1189,13 +1189,13 @@ base64UrlEncode(url)
**Example**
``` sql
SELECT base64UrlEncode('https://clickhouse.com');
SELECT base64URLEncode('https://clickhouse.com');
```
Result:
```result
┌─base64UrlEncode('https://clickhouse.com')─┐
┌─base64URLEncode('https://clickhouse.com')─┐
│ aHR0cDovL2NsaWNraG91c2UuY29t │
└───────────────────────────────────────────┘
```
@ -1234,19 +1234,19 @@ Result:
└──────────────────────────────────┘
```
## base64UrlDecode
## base64URLDecode
Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-5). Throws an exception in case of an error.
**Syntax**
```sql
base64UrlDecode(encodedUrl)
base64URLDecode(encodedUrl)
```
**Arguments**
- `encodedUrl` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, an exception is thrown.
- `encodedURL` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, an exception is thrown.
**Returned value**
@ -1255,13 +1255,13 @@ base64UrlDecode(encodedUrl)
**Example**
``` sql
SELECT base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t');
SELECT base64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t');
```
Result:
```result
┌─base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t')─┐
┌─base64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t')─┐
│ https://clickhouse.com │
└─────────────────────────────────────────────────┘
```
@ -1298,19 +1298,19 @@ SELECT tryBase64Decode('RW5jb2RlZA==') as res, tryBase64Decode('invalid') as res
└────────────┴─────────────┘
```
## tryBase64UrlDecode
## tryBase64URLDecode
Like `base64UrlDecode` but returns an empty string in case of error.
Like `base64URLDecode` but returns an empty string in case of error.
**Syntax**
```sql
tryBase64UrlDecode(encodedUrl)
tryBase64URLDecode(encodedUrl)
```
**Parameters**
- `encodedUrl`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string.
- `encodedURL`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string.
**Returned value**
@ -1321,7 +1321,7 @@ tryBase64UrlDecode(encodedUrl)
Query:
```sql
SELECT tryBase64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t') as res, tryBase64Decode('aHR0cHM6Ly9jbGlja') as res_invalid;
SELECT tryBase64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t') as res, tryBase64Decode('aHR0cHM6Ly9jbGlja') as res_invalid;
```
```response

View File

@ -818,6 +818,40 @@ The same as above, but including query string and fragment.
Example: `/top/news.html?page=2#comments`.
### protocol
Extracts the protocol from a URL.
**Syntax**
```sql
protocol(url)
```
**Arguments**
- `url` — URL to extract protocol from. [String](../data-types/string.md).
**Returned value**
- Protocol, or an empty string if it cannot be determined. [String](../data-types/string.md).
**Example**
Query:
```sql
SELECT protocol('https://clickhouse.com/');
```
Result:
```response
┌─protocol('https://clickhouse.com/')─┐
│ https │
└─────────────────────────────────────┘
```
### queryString
Returns the query string without the initial question mark, `#` and everything after `#`.

View File

@ -9,8 +9,8 @@ sidebar_label: CONSTRAINT
Constraints could be added or deleted using following syntax:
``` sql
ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT constraint_name CHECK expression;
ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT constraint_name;
ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT [IF NOT EXISTS] constraint_name CHECK expression;
ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT [IF EXISTS] constraint_name;
```
See more on [constraints](../../../sql-reference/statements/create/table.md#constraints).

View File

@ -3,6 +3,10 @@ slug: /en/sql-reference/statements/alter/named-collection
sidebar_label: NAMED COLLECTION
---
import CloudNotSupportedBadge from '@theme/badges/CloudNotSupportedBadge';
<CloudNotSupportedBadge />
# ALTER NAMED COLLECTION
This query intends to modify already existing named collections.

View File

@ -134,8 +134,8 @@ PRIMARY KEY (event_type, ts)
ORDER BY (event_type, ts, browser)
SETTINGS index_granularity = 8192
-- !!! The columns' definition is unchanged but it does not matter, we are not quering
-- MATERIALIZED VIEW, we are quering TO (storage) table.
-- !!! The columns' definition is unchanged but it does not matter, we are not querying
-- MATERIALIZED VIEW, we are querying TO (storage) table.
-- SELECT section is updated.
SHOW CREATE TABLE mv FORMAT TSVRaw;

View File

@ -3,6 +3,10 @@ slug: /en/sql-reference/statements/create/named-collection
sidebar_label: NAMED COLLECTION
---
import CloudNotSupportedBadge from '@theme/badges/CloudNotSupportedBadge';
<CloudNotSupportedBadge />
# CREATE NAMED COLLECTION
Creates a new named collection.

View File

@ -283,7 +283,7 @@ Pull request можно создать, даже если работа над з
Тесты будут запущены, как только сотрудники ClickHouse поставят для pull request тег «Can be tested». Результаты первых проверок (стиль кода) появятся уже через несколько минут. Результаты сборки появятся примерно через пол часа. Результаты основного набора тестов будут доступны в пределах часа.
Система подготовит сборки ClickHouse специально для вашего pull request. Для их получения, нажмите на ссылку «Details» у проверки «Clickhouse build check». Там вы сможете найти прямые ссылки на собранные .deb пакеты ClickHouse, которые, при желании, вы даже сможете установить на свои продакшен серверы (если не страшно).
Система подготовит сборки ClickHouse специально для вашего pull request. Для их получения, нажмите на ссылку «Details» у проверки «Builds». Там вы сможете найти прямые ссылки на собранные .deb пакеты ClickHouse, которые, при желании, вы даже сможете установить на свои продакшен серверы (если не страшно).
Вероятнее всего, часть сборок не будет успешной с первого раза. Ведь мы проверяем сборку кода и gcc и clang, а при сборке с помощью clang включаются почти все существующие в природе warnings (всегда с флагом `-Werror`). На той же странице, вы сможете найти логи сборки - вам не обязательно самому собирать ClickHouse всеми возможными способами.

View File

@ -141,6 +141,7 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe
- `--secure` — если указано, будет использован безопасный канал.
- `--history_file` - путь к файлу с историей команд.
- `--param_<name>` — значение параметра для [запроса с параметрами](#cli-queries-with-parameters).
- `--jwt` авторизация с использованием JSON Web Token. Доступно только в ClickHouse Cloud.
Вместо параметров `--host`, `--port`, `--user` и `--password` клиент ClickHouse также поддерживает строки подключения (смотри следующий раздел).

View File

@ -538,7 +538,7 @@ SELECT base58Decode('3dc8KtHrwM');
Синоним: `TO_BASE64`.
## base64UrlEncode(s)
## base64URLEncode(s)
Производит кодирование URL (String или FixedString) в base64-представление в соответствии с [RFC 4648](https://tools.ietf.org/html/rfc4648).
@ -548,7 +548,7 @@ SELECT base58Decode('3dc8KtHrwM');
Синоним: `FROM_BASE64`.
## base64UrlDecode(s)
## base64URLDecode(s)
Декодирует base64-представление URL в исходную строку в соответствии с [RFC 4648](https://tools.ietf.org/html/rfc4648). При невозможности декодирования выбрасывает исключение
@ -556,9 +556,9 @@ SELECT base58Decode('3dc8KtHrwM');
Функционал аналогичен base64Decode, но при невозможности декодирования возвращает пустую строку.
## tryBase64UrlDecode(s)
## tryBase64URLDecode(s)
Функционал аналогичен base64UrlDecode, но при невозможности декодирования возвращает пустую строку.
Функционал аналогичен base64URLDecode, но при невозможности декодирования возвращает пустую строку.
## endsWith(s, suffix) {#endswith}

View File

@ -11,8 +11,8 @@ sidebar_label: "Манипуляции с ограничениями"
Добавить или удалить ограничение можно с помощью запросов
``` sql
ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT constraint_name CHECK expression;
ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT constraint_name;
ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT [IF NOT EXISTS] constraint_name CHECK expression;
ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT [IF EXISTS] constraint_name;
```
Запросы выполняют добавление или удаление метаданных об ограничениях таблицы `[db].name`, поэтому выполняются мгновенно.

View File

@ -9,8 +9,8 @@ sidebar_label: 约束
约束可以使用以下语法添加或删除:
``` sql
ALTER TABLE [db].name ADD CONSTRAINT constraint_name CHECK expression;
ALTER TABLE [db].name DROP CONSTRAINT constraint_name;
ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT [IF NOT EXISTS] constraint_name CHECK expression;
ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT [IF EXISTS] constraint_name;
```
查看[constraints](../../../sql-reference/statements/create/table.mdx#constraints)。

View File

@ -63,6 +63,7 @@ namespace ErrorCodes
extern const int NETWORK_ERROR;
extern const int AUTHENTICATION_FAILED;
extern const int NO_ELEMENTS_IN_CONFIG;
extern const int USER_EXPIRED;
}
@ -73,6 +74,12 @@ void Client::processError(const String & query) const
fmt::print(stderr, "Received exception from server (version {}):\n{}\n",
server_version,
getExceptionMessage(*server_exception, print_stack_trace, true));
if (server_exception->code() == ErrorCodes::USER_EXPIRED)
{
server_exception->rethrow();
}
if (is_interactive)
{
fmt::print(stderr, "\n");
@ -240,6 +247,10 @@ std::vector<String> Client::loadWarningMessages()
}
}
Poco::Util::LayeredConfiguration & Client::getClientConfiguration()
{
return config();
}
void Client::initialize(Poco::Util::Application & self)
{
@ -680,9 +691,7 @@ bool Client::processWithFuzzing(const String & full_query)
const char * begin = full_query.data();
orig_ast = parseQuery(begin, begin + full_query.size(),
global_context->getSettingsRef(),
/*allow_multi_statements=*/ true,
/*is_interactive=*/ is_interactive,
/*ignore_error=*/ ignore_error);
/*allow_multi_statements=*/ true);
}
catch (const Exception & e)
{
@ -934,6 +943,7 @@ void Client::addOptions(OptionsDescription & options_description)
("ssh-key-file", po::value<std::string>(), "File containing the SSH private key for authenticate with the server.")
("ssh-key-passphrase", po::value<std::string>(), "Passphrase for the SSH private key specified by --ssh-key-file.")
("quota_key", po::value<std::string>(), "A string to differentiate quotas when the user have keyed quotas configured on server")
("jwt", po::value<std::string>(), "Use JWT for authentication")
("max_client_network_bandwidth", po::value<int>(), "the maximum speed of data exchange over the network for the client in bytes per second.")
("compression", po::value<bool>(), "enable or disable compression (enabled by default for remote communication and disabled for localhost communication).")
@ -1092,6 +1102,12 @@ void Client::processOptions(const OptionsDescription & options_description,
config().setBool("no-warnings", true);
if (options.count("fake-drop"))
config().setString("ignore_drop_queries_probability", "1");
if (options.count("jwt"))
{
if (!options["user"].defaulted())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "User and JWT flags can't be specified together");
config().setString("jwt", options["jwt"].as<std::string>());
}
if (options.count("accept-invalid-certificate"))
{
config().setString("openSSL.client.invalidCertificateHandler.name", "AcceptCertificateHandler");

View File

@ -16,6 +16,9 @@ public:
int main(const std::vector<String> & /*args*/) override;
protected:
Poco::Util::LayeredConfiguration & getClientConfiguration() override;
bool processWithFuzzing(const String & full_query) override;
std::optional<bool> processFuzzingStep(const String & query_to_execute, const ASTPtr & parsed_query);

View File

@ -383,6 +383,9 @@ int KeeperClient::main(const std::vector<String> & /* args */)
for (const auto & key : keys)
{
if (key != "node")
continue;
String prefix = "zookeeper." + key;
String host = clickhouse_config.configuration->getString(prefix + ".host");
String port = clickhouse_config.configuration->getString(prefix + ".port");
@ -401,6 +404,7 @@ int KeeperClient::main(const std::vector<String> & /* args */)
zk_args.hosts.push_back(host + ":" + port);
}
zk_args.availability_zones.resize(zk_args.hosts.size());
zk_args.connection_timeout_ms = config().getInt("connection-timeout", 10) * 1000;
zk_args.session_timeout_ms = config().getInt("session-timeout", 10) * 1000;
zk_args.operation_timeout_ms = config().getInt("operation-timeout", 10) * 1000;

View File

@ -355,10 +355,7 @@ try
std::string include_from_path = config().getString("include_from", "/etc/metrika.xml");
if (config().has(DB::PlacementInfo::PLACEMENT_CONFIG_PREFIX))
{
PlacementInfo::PlacementInfo::instance().initialize(config());
}
PlacementInfo::PlacementInfo::instance().initialize(config());
GlobalThreadPool::initialize(
/// We need to have sufficient amount of threads for connections + nuraft workers + keeper workers, 1000 is an estimation
@ -577,8 +574,7 @@ try
#if USE_SSL
CertificateReloader::instance().tryLoad(*config);
#endif
},
/* already_loaded = */ false); /// Reload it right now (initial loading)
});
SCOPE_EXIT({
LOG_INFO(log, "Shutting down.");

View File

@ -11,7 +11,6 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES
LibraryBridgeHandlers.cpp
SharedLibrary.cpp
library-bridge.cpp
createFunctionBaseCast.cpp
)
clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES})
@ -20,6 +19,7 @@ target_link_libraries(clickhouse-library-bridge PRIVATE
daemon
dbms
bridge
clickhouse_functions
)
set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)

View File

@ -79,6 +79,11 @@ void applySettingsOverridesForLocal(ContextMutablePtr context)
context->setSettings(settings);
}
Poco::Util::LayeredConfiguration & LocalServer::getClientConfiguration()
{
return config();
}
void LocalServer::processError(const String &) const
{
if (ignore_error)
@ -114,19 +119,19 @@ void LocalServer::initialize(Poco::Util::Application & self)
Poco::Util::Application::initialize(self);
/// Load config files if exists
if (config().has("config-file") || fs::exists("config.xml"))
if (getClientConfiguration().has("config-file") || fs::exists("config.xml"))
{
const auto config_path = config().getString("config-file", "config.xml");
const auto config_path = getClientConfiguration().getString("config-file", "config.xml");
ConfigProcessor config_processor(config_path, false, true);
ConfigProcessor::setConfigPath(fs::path(config_path).parent_path());
auto loaded_config = config_processor.loadConfig();
config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
getClientConfiguration().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
}
GlobalThreadPool::initialize(
config().getUInt("max_thread_pool_size", 10000),
config().getUInt("max_thread_pool_free_size", 1000),
config().getUInt("thread_pool_queue_size", 10000)
getClientConfiguration().getUInt("max_thread_pool_size", 10000),
getClientConfiguration().getUInt("max_thread_pool_free_size", 1000),
getClientConfiguration().getUInt("thread_pool_queue_size", 10000)
);
#if USE_AZURE_BLOB_STORAGE
@ -138,18 +143,18 @@ void LocalServer::initialize(Poco::Util::Application & self)
#endif
getIOThreadPool().initialize(
config().getUInt("max_io_thread_pool_size", 100),
config().getUInt("max_io_thread_pool_free_size", 0),
config().getUInt("io_thread_pool_queue_size", 10000));
getClientConfiguration().getUInt("max_io_thread_pool_size", 100),
getClientConfiguration().getUInt("max_io_thread_pool_free_size", 0),
getClientConfiguration().getUInt("io_thread_pool_queue_size", 10000));
const size_t active_parts_loading_threads = config().getUInt("max_active_parts_loading_thread_pool_size", 64);
const size_t active_parts_loading_threads = getClientConfiguration().getUInt("max_active_parts_loading_thread_pool_size", 64);
getActivePartsLoadingThreadPool().initialize(
active_parts_loading_threads,
0, // We don't need any threads one all the parts will be loaded
active_parts_loading_threads);
const size_t outdated_parts_loading_threads = config().getUInt("max_outdated_parts_loading_thread_pool_size", 32);
const size_t outdated_parts_loading_threads = getClientConfiguration().getUInt("max_outdated_parts_loading_thread_pool_size", 32);
getOutdatedPartsLoadingThreadPool().initialize(
outdated_parts_loading_threads,
0, // We don't need any threads one all the parts will be loaded
@ -157,7 +162,7 @@ void LocalServer::initialize(Poco::Util::Application & self)
getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads);
const size_t unexpected_parts_loading_threads = config().getUInt("max_unexpected_parts_loading_thread_pool_size", 32);
const size_t unexpected_parts_loading_threads = getClientConfiguration().getUInt("max_unexpected_parts_loading_thread_pool_size", 32);
getUnexpectedPartsLoadingThreadPool().initialize(
unexpected_parts_loading_threads,
0, // We don't need any threads one all the parts will be loaded
@ -165,7 +170,7 @@ void LocalServer::initialize(Poco::Util::Application & self)
getUnexpectedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads);
const size_t cleanup_threads = config().getUInt("max_parts_cleaning_thread_pool_size", 128);
const size_t cleanup_threads = getClientConfiguration().getUInt("max_parts_cleaning_thread_pool_size", 128);
getPartsCleaningThreadPool().initialize(
cleanup_threads,
0, // We don't need any threads one all the parts will be deleted
@ -198,10 +203,10 @@ void LocalServer::tryInitPath()
{
std::string path;
if (config().has("path"))
if (getClientConfiguration().has("path"))
{
// User-supplied path.
path = config().getString("path");
path = getClientConfiguration().getString("path");
Poco::trimInPlace(path);
if (path.empty())
@ -260,13 +265,13 @@ void LocalServer::tryInitPath()
global_context->setUserFilesPath(""); /// user's files are everywhere
std::string user_scripts_path = config().getString("user_scripts_path", fs::path(path) / "user_scripts/");
std::string user_scripts_path = getClientConfiguration().getString("user_scripts_path", fs::path(path) / "user_scripts/");
global_context->setUserScriptsPath(user_scripts_path);
/// top_level_domains_lists
const std::string & top_level_domains_path = config().getString("top_level_domains_path", fs::path(path) / "top_level_domains/");
const std::string & top_level_domains_path = getClientConfiguration().getString("top_level_domains_path", fs::path(path) / "top_level_domains/");
if (!top_level_domains_path.empty())
TLDListsHolder::getInstance().parseConfig(fs::path(top_level_domains_path) / "", config());
TLDListsHolder::getInstance().parseConfig(fs::path(top_level_domains_path) / "", getClientConfiguration());
}
@ -308,14 +313,14 @@ void LocalServer::cleanup()
std::string LocalServer::getInitialCreateTableQuery()
{
if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format") && (!isRegularFile(STDIN_FILENO) || queries.empty()))
if (!getClientConfiguration().has("table-structure") && !getClientConfiguration().has("table-file") && !getClientConfiguration().has("table-data-format") && (!isRegularFile(STDIN_FILENO) || queries.empty()))
return {};
auto table_name = backQuoteIfNeed(config().getString("table-name", "table"));
auto table_structure = config().getString("table-structure", "auto");
auto table_name = backQuoteIfNeed(getClientConfiguration().getString("table-name", "table"));
auto table_structure = getClientConfiguration().getString("table-structure", "auto");
String table_file;
if (!config().has("table-file") || config().getString("table-file") == "-")
if (!getClientConfiguration().has("table-file") || getClientConfiguration().getString("table-file") == "-")
{
/// Use Unix tools stdin naming convention
table_file = "stdin";
@ -323,7 +328,7 @@ std::string LocalServer::getInitialCreateTableQuery()
else
{
/// Use regular file
auto file_name = config().getString("table-file");
auto file_name = getClientConfiguration().getString("table-file");
table_file = quoteString(file_name);
}
@ -371,18 +376,18 @@ void LocalServer::setupUsers()
ConfigurationPtr users_config;
auto & access_control = global_context->getAccessControl();
access_control.setNoPasswordAllowed(config().getBool("allow_no_password", true));
access_control.setPlaintextPasswordAllowed(config().getBool("allow_plaintext_password", true));
if (config().has("config-file") || fs::exists("config.xml"))
access_control.setNoPasswordAllowed(getClientConfiguration().getBool("allow_no_password", true));
access_control.setPlaintextPasswordAllowed(getClientConfiguration().getBool("allow_plaintext_password", true));
if (getClientConfiguration().has("config-file") || fs::exists("config.xml"))
{
String config_path = config().getString("config-file", "");
bool has_user_directories = config().has("user_directories");
String config_path = getClientConfiguration().getString("config-file", "");
bool has_user_directories = getClientConfiguration().has("user_directories");
const auto config_dir = fs::path{config_path}.remove_filename().string();
String users_config_path = config().getString("users_config", "");
String users_config_path = getClientConfiguration().getString("users_config", "");
if (users_config_path.empty() && has_user_directories)
{
users_config_path = config().getString("user_directories.users_xml.path");
users_config_path = getClientConfiguration().getString("user_directories.users_xml.path");
if (fs::path(users_config_path).is_relative() && fs::exists(fs::path(config_dir) / users_config_path))
users_config_path = fs::path(config_dir) / users_config_path;
}
@ -406,10 +411,10 @@ void LocalServer::setupUsers()
void LocalServer::connect()
{
connection_parameters = ConnectionParameters(config(), "localhost");
connection_parameters = ConnectionParameters(getClientConfiguration(), "localhost");
ReadBuffer * in;
auto table_file = config().getString("table-file", "-");
auto table_file = getClientConfiguration().getString("table-file", "-");
if (table_file == "-" || table_file == "stdin")
{
in = &std_in;
@ -429,7 +434,7 @@ try
{
UseSSL use_ssl;
StackTrace::setShowAddresses(config().getBool("show_addresses_in_stack_traces", true));
StackTrace::setShowAddresses(getClientConfiguration().getBool("show_addresses_in_stack_traces", true));
setupSignalHandler();
@ -444,7 +449,7 @@ try
if (rlim.rlim_cur < rlim.rlim_max)
{
rlim.rlim_cur = config().getUInt("max_open_files", static_cast<unsigned>(rlim.rlim_max));
rlim.rlim_cur = getClientConfiguration().getUInt("max_open_files", static_cast<unsigned>(rlim.rlim_max));
int rc = setrlimit(RLIMIT_NOFILE, &rlim);
if (rc != 0)
std::cerr << fmt::format("Cannot set max number of file descriptors to {}. Try to specify max_open_files according to your system limits. error: {}", rlim.rlim_cur, errnoToString()) << '\n';
@ -452,8 +457,8 @@ try
}
is_interactive = stdin_is_a_tty
&& (config().hasOption("interactive")
|| (queries.empty() && !config().has("table-structure") && queries_files.empty() && !config().has("table-file")));
&& (getClientConfiguration().hasOption("interactive")
|| (queries.empty() && !getClientConfiguration().has("table-structure") && queries_files.empty() && !getClientConfiguration().has("table-file")));
if (!is_interactive)
{
@ -477,7 +482,7 @@ try
SCOPE_EXIT({ cleanup(); });
initTTYBuffer(toProgressOption(config().getString("progress", "default")));
initTTYBuffer(toProgressOption(getClientConfiguration().getString("progress", "default")));
ASTAlterCommand::setFormatAlterCommandsWithParentheses(true);
applyCmdSettings(global_context);
@ -485,7 +490,7 @@ try
/// try to load user defined executable functions, throw on error and die
try
{
global_context->loadOrReloadUserDefinedExecutableFunctions(config());
global_context->loadOrReloadUserDefinedExecutableFunctions(getClientConfiguration());
}
catch (...)
{
@ -526,7 +531,7 @@ try
}
catch (const DB::Exception & e)
{
bool need_print_stack_trace = config().getBool("stacktrace", false);
bool need_print_stack_trace = getClientConfiguration().getBool("stacktrace", false);
std::cerr << getExceptionMessage(e, need_print_stack_trace, true) << std::endl;
return e.code() ? e.code() : -1;
}
@ -538,42 +543,42 @@ catch (...)
void LocalServer::updateLoggerLevel(const String & logs_level)
{
config().setString("logger.level", logs_level);
updateLevels(config(), logger());
getClientConfiguration().setString("logger.level", logs_level);
updateLevels(getClientConfiguration(), logger());
}
void LocalServer::processConfig()
{
if (!queries.empty() && config().has("queries-file"))
if (!queries.empty() && getClientConfiguration().has("queries-file"))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--query' and '--queries-file' cannot be specified at the same time");
if (config().has("multiquery"))
if (getClientConfiguration().has("multiquery"))
is_multiquery = true;
pager = config().getString("pager", "");
pager = getClientConfiguration().getString("pager", "");
delayed_interactive = config().has("interactive") && (!queries.empty() || config().has("queries-file"));
delayed_interactive = getClientConfiguration().has("interactive") && (!queries.empty() || getClientConfiguration().has("queries-file"));
if (!is_interactive || delayed_interactive)
{
echo_queries = config().hasOption("echo") || config().hasOption("verbose");
ignore_error = config().getBool("ignore-error", false);
echo_queries = getClientConfiguration().hasOption("echo") || getClientConfiguration().hasOption("verbose");
ignore_error = getClientConfiguration().getBool("ignore-error", false);
}
print_stack_trace = config().getBool("stacktrace", false);
print_stack_trace = getClientConfiguration().getBool("stacktrace", false);
const std::string clickhouse_dialect{"clickhouse"};
load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false)
&& config().getString("dialect", clickhouse_dialect) == clickhouse_dialect;
wait_for_suggestions_to_load = config().getBool("wait_for_suggestions_to_load", false);
load_suggestions = (is_interactive || delayed_interactive) && !getClientConfiguration().getBool("disable_suggestion", false)
&& getClientConfiguration().getString("dialect", clickhouse_dialect) == clickhouse_dialect;
wait_for_suggestions_to_load = getClientConfiguration().getBool("wait_for_suggestions_to_load", false);
auto logging = (config().has("logger.console")
|| config().has("logger.level")
|| config().has("log-level")
|| config().has("send_logs_level")
|| config().has("logger.log"));
auto logging = (getClientConfiguration().has("logger.console")
|| getClientConfiguration().has("logger.level")
|| getClientConfiguration().has("log-level")
|| getClientConfiguration().has("send_logs_level")
|| getClientConfiguration().has("logger.log"));
auto level = config().getString("log-level", "trace");
auto level = getClientConfiguration().getString("log-level", "trace");
if (config().has("server_logs_file"))
if (getClientConfiguration().has("server_logs_file"))
{
auto poco_logs_level = Poco::Logger::parseLevel(level);
Poco::Logger::root().setLevel(poco_logs_level);
@ -583,10 +588,10 @@ void LocalServer::processConfig()
}
else
{
config().setString("logger", "logger");
getClientConfiguration().setString("logger", "logger");
auto log_level_default = logging ? level : "fatal";
config().setString("logger.level", config().getString("log-level", config().getString("send_logs_level", log_level_default)));
buildLoggers(config(), logger(), "clickhouse-local");
getClientConfiguration().setString("logger.level", getClientConfiguration().getString("log-level", getClientConfiguration().getString("send_logs_level", log_level_default)));
buildLoggers(getClientConfiguration(), logger(), "clickhouse-local");
}
shared_context = Context::createShared();
@ -600,13 +605,13 @@ void LocalServer::processConfig()
LoggerRawPtr log = &logger();
/// Maybe useless
if (config().has("macros"))
global_context->setMacros(std::make_unique<Macros>(config(), "macros", log));
if (getClientConfiguration().has("macros"))
global_context->setMacros(std::make_unique<Macros>(getClientConfiguration(), "macros", log));
setDefaultFormatsAndCompressionFromConfiguration();
/// Sets external authenticators config (LDAP, Kerberos).
global_context->setExternalAuthenticatorsConfig(config());
global_context->setExternalAuthenticatorsConfig(getClientConfiguration());
setupUsers();
@ -615,12 +620,12 @@ void LocalServer::processConfig()
global_context->getProcessList().setMaxSize(0);
const size_t physical_server_memory = getMemoryAmount();
const double cache_size_to_ram_max_ratio = config().getDouble("cache_size_to_ram_max_ratio", 0.5);
const double cache_size_to_ram_max_ratio = getClientConfiguration().getDouble("cache_size_to_ram_max_ratio", 0.5);
const size_t max_cache_size = static_cast<size_t>(physical_server_memory * cache_size_to_ram_max_ratio);
String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", DEFAULT_UNCOMPRESSED_CACHE_POLICY);
size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE);
double uncompressed_cache_size_ratio = config().getDouble("uncompressed_cache_size_ratio", DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO);
String uncompressed_cache_policy = getClientConfiguration().getString("uncompressed_cache_policy", DEFAULT_UNCOMPRESSED_CACHE_POLICY);
size_t uncompressed_cache_size = getClientConfiguration().getUInt64("uncompressed_cache_size", DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE);
double uncompressed_cache_size_ratio = getClientConfiguration().getDouble("uncompressed_cache_size_ratio", DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO);
if (uncompressed_cache_size > max_cache_size)
{
uncompressed_cache_size = max_cache_size;
@ -628,9 +633,9 @@ void LocalServer::processConfig()
}
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size, uncompressed_cache_size_ratio);
String mark_cache_policy = config().getString("mark_cache_policy", DEFAULT_MARK_CACHE_POLICY);
size_t mark_cache_size = config().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE);
double mark_cache_size_ratio = config().getDouble("mark_cache_size_ratio", DEFAULT_MARK_CACHE_SIZE_RATIO);
String mark_cache_policy = getClientConfiguration().getString("mark_cache_policy", DEFAULT_MARK_CACHE_POLICY);
size_t mark_cache_size = getClientConfiguration().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE);
double mark_cache_size_ratio = getClientConfiguration().getDouble("mark_cache_size_ratio", DEFAULT_MARK_CACHE_SIZE_RATIO);
if (!mark_cache_size)
LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation.");
if (mark_cache_size > max_cache_size)
@ -640,9 +645,9 @@ void LocalServer::processConfig()
}
global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio);
String index_uncompressed_cache_policy = config().getString("index_uncompressed_cache_policy", DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY);
size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE);
double index_uncompressed_cache_size_ratio = config().getDouble("index_uncompressed_cache_size_ratio", DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO);
String index_uncompressed_cache_policy = getClientConfiguration().getString("index_uncompressed_cache_policy", DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY);
size_t index_uncompressed_cache_size = getClientConfiguration().getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE);
double index_uncompressed_cache_size_ratio = getClientConfiguration().getDouble("index_uncompressed_cache_size_ratio", DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO);
if (index_uncompressed_cache_size > max_cache_size)
{
index_uncompressed_cache_size = max_cache_size;
@ -650,9 +655,9 @@ void LocalServer::processConfig()
}
global_context->setIndexUncompressedCache(index_uncompressed_cache_policy, index_uncompressed_cache_size, index_uncompressed_cache_size_ratio);
String index_mark_cache_policy = config().getString("index_mark_cache_policy", DEFAULT_INDEX_MARK_CACHE_POLICY);
size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE);
double index_mark_cache_size_ratio = config().getDouble("index_mark_cache_size_ratio", DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO);
String index_mark_cache_policy = getClientConfiguration().getString("index_mark_cache_policy", DEFAULT_INDEX_MARK_CACHE_POLICY);
size_t index_mark_cache_size = getClientConfiguration().getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE);
double index_mark_cache_size_ratio = getClientConfiguration().getDouble("index_mark_cache_size_ratio", DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO);
if (index_mark_cache_size > max_cache_size)
{
index_mark_cache_size = max_cache_size;
@ -660,7 +665,7 @@ void LocalServer::processConfig()
}
global_context->setIndexMarkCache(index_mark_cache_policy, index_mark_cache_size, index_mark_cache_size_ratio);
size_t mmap_cache_size = config().getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE);
size_t mmap_cache_size = getClientConfiguration().getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE);
if (mmap_cache_size > max_cache_size)
{
mmap_cache_size = max_cache_size;
@ -672,8 +677,8 @@ void LocalServer::processConfig()
global_context->setQueryCache(0, 0, 0, 0);
#if USE_EMBEDDED_COMPILER
size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE);
size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES);
size_t compiled_expression_cache_max_size_in_bytes = getClientConfiguration().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE);
size_t compiled_expression_cache_max_elements = getClientConfiguration().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES);
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements);
#endif
@ -685,16 +690,16 @@ void LocalServer::processConfig()
applyCmdOptions(global_context);
/// Load global settings from default_profile and system_profile.
global_context->setDefaultProfiles(config());
global_context->setDefaultProfiles(getClientConfiguration());
/// We load temporary database first, because projections need it.
DatabaseCatalog::instance().initializeAndLoadTemporaryDatabase();
std::string default_database = config().getString("default_database", "default");
std::string default_database = getClientConfiguration().getString("default_database", "default");
DatabaseCatalog::instance().attachDatabase(default_database, createClickHouseLocalDatabaseOverlay(default_database, global_context));
global_context->setCurrentDatabase(default_database);
if (config().has("path"))
if (getClientConfiguration().has("path"))
{
String path = global_context->getPath();
fs::create_directories(fs::path(path));
@ -709,7 +714,7 @@ void LocalServer::processConfig()
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
waitLoad(TablesLoaderForegroundPoolId, startup_system_tasks);
if (!config().has("only-system-tables"))
if (!getClientConfiguration().has("only-system-tables"))
{
DatabaseCatalog::instance().createBackgroundTasks();
waitLoad(loadMetadata(global_context));
@ -721,15 +726,15 @@ void LocalServer::processConfig()
LOG_DEBUG(log, "Loaded metadata.");
}
else if (!config().has("no-system-tables"))
else if (!getClientConfiguration().has("no-system-tables"))
{
attachSystemTablesServer(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE), false);
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
}
server_display_name = config().getString("display_name", "");
prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name.default", ":) ");
server_display_name = getClientConfiguration().getString("display_name", "");
prompt_by_server_display_name = getClientConfiguration().getRawString("prompt_by_server_display_name.default", ":) ");
global_context->setQueryKindInitial();
global_context->setQueryKind(query_kind);
@ -807,7 +812,7 @@ void LocalServer::applyCmdSettings(ContextMutablePtr context)
void LocalServer::applyCmdOptions(ContextMutablePtr context)
{
context->setDefaultFormat(config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV")));
context->setDefaultFormat(getClientConfiguration().getString("output-format", getClientConfiguration().getString("format", is_interactive ? "PrettyCompact" : "TSV")));
applyCmdSettings(context);
}
@ -815,33 +820,33 @@ void LocalServer::applyCmdOptions(ContextMutablePtr context)
void LocalServer::processOptions(const OptionsDescription &, const CommandLineOptions & options, const std::vector<Arguments> &, const std::vector<Arguments> &)
{
if (options.count("table"))
config().setString("table-name", options["table"].as<std::string>());
getClientConfiguration().setString("table-name", options["table"].as<std::string>());
if (options.count("file"))
config().setString("table-file", options["file"].as<std::string>());
getClientConfiguration().setString("table-file", options["file"].as<std::string>());
if (options.count("structure"))
config().setString("table-structure", options["structure"].as<std::string>());
getClientConfiguration().setString("table-structure", options["structure"].as<std::string>());
if (options.count("no-system-tables"))
config().setBool("no-system-tables", true);
getClientConfiguration().setBool("no-system-tables", true);
if (options.count("only-system-tables"))
config().setBool("only-system-tables", true);
getClientConfiguration().setBool("only-system-tables", true);
if (options.count("database"))
config().setString("default_database", options["database"].as<std::string>());
getClientConfiguration().setString("default_database", options["database"].as<std::string>());
if (options.count("input-format"))
config().setString("table-data-format", options["input-format"].as<std::string>());
getClientConfiguration().setString("table-data-format", options["input-format"].as<std::string>());
if (options.count("output-format"))
config().setString("output-format", options["output-format"].as<std::string>());
getClientConfiguration().setString("output-format", options["output-format"].as<std::string>());
if (options.count("logger.console"))
config().setBool("logger.console", options["logger.console"].as<bool>());
getClientConfiguration().setBool("logger.console", options["logger.console"].as<bool>());
if (options.count("logger.log"))
config().setString("logger.log", options["logger.log"].as<std::string>());
getClientConfiguration().setString("logger.log", options["logger.log"].as<std::string>());
if (options.count("logger.level"))
config().setString("logger.level", options["logger.level"].as<std::string>());
getClientConfiguration().setString("logger.level", options["logger.level"].as<std::string>());
if (options.count("send_logs_level"))
config().setString("send_logs_level", options["send_logs_level"].as<std::string>());
getClientConfiguration().setString("send_logs_level", options["send_logs_level"].as<std::string>());
if (options.count("wait_for_suggestions_to_load"))
config().setBool("wait_for_suggestions_to_load", true);
getClientConfiguration().setBool("wait_for_suggestions_to_load", true);
}
void LocalServer::readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &)

View File

@ -30,6 +30,9 @@ public:
int main(const std::vector<String> & /*args*/) override;
protected:
Poco::Util::LayeredConfiguration & getClientConfiguration() override;
void connect() override;
void processError(const String & query) const override;

View File

@ -13,7 +13,6 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES
getIdentifierQuote.cpp
odbc-bridge.cpp
validateODBCConnectionString.cpp
createFunctionBaseCast.cpp
)
clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES})
@ -25,6 +24,7 @@ target_link_libraries(clickhouse-odbc-bridge PRIVATE
clickhouse_parsers
ch_contrib::nanodbc
ch_contrib::unixodbc
clickhouse_functions
)
set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)

View File

@ -1003,6 +1003,8 @@ try
ServerUUID::load(path / "uuid", log);
PlacementInfo::PlacementInfo::instance().initialize(config());
zkutil::validateZooKeeperConfig(config());
bool has_zookeeper = zkutil::hasZooKeeperConfig(config());
@ -1540,6 +1542,8 @@ try
global_context->setMaxDictionaryNumToWarn(new_server_settings.max_dictionary_num_to_warn);
global_context->setMaxDatabaseNumToWarn(new_server_settings.max_database_num_to_warn);
global_context->setMaxPartNumToWarn(new_server_settings.max_part_num_to_warn);
/// Only for system.server_settings
global_context->setConfigReloaderInterval(new_server_settings.config_reload_interval_ms);
SlotCount concurrent_threads_soft_limit = UnlimitedSlots;
if (new_server_settings.concurrent_threads_soft_limit_num > 0 && new_server_settings.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit)
@ -1702,8 +1706,7 @@ try
/// Must be the last.
latest_config = config;
},
/* already_loaded = */ false); /// Reload it right now (initial loading)
});
const auto listen_hosts = getListenHosts(config());
const auto interserver_listen_hosts = getInterserverListenHosts(config());
@ -1816,11 +1819,6 @@ try
}
if (config().has(DB::PlacementInfo::PLACEMENT_CONFIG_PREFIX))
{
PlacementInfo::PlacementInfo::instance().initialize(config());
}
{
std::lock_guard lock(servers_lock);
/// We should start interserver communications before (and more important shutdown after) tables.

View File

@ -29,7 +29,14 @@
-->
<size>1000M</size>
<count>10</count>
<!-- <console>1</console> --> <!-- Default behavior is autodetection (log to console if not daemon mode and is tty) -->
<!-- <console_log_level>trace</console_log_level> -->
<!-- <use_syslog>0</use_syslog> -->
<!-- <syslog_level>trace</syslog_level> -->
<!-- <stream_compress>0</stream_compress> -->
<!-- Per level overrides (legacy):
@ -408,13 +415,11 @@
<!-- Approximate size of mark cache, used in tables of MergeTree family.
In bytes. Cache is single for server. Memory is allocated only on demand.
You should not lower this value.
-->
<mark_cache_size>5368709120</mark_cache_size>
You should not lower this value. -->
<!-- <mark_cache_size>5368709120</mark_cache_size> -->
<!-- For marks of secondary indices.
-->
<index_mark_cache_size>5368709120</index_mark_cache_size>
<!-- For marks of secondary indices. -->
<!-- <index_mark_cache_size>5368709120</index_mark_cache_size> -->
<!-- If you enable the `min_bytes_to_use_mmap_io` setting,
the data in MergeTree tables can be read with mmap to avoid copying from kernel to userspace.
@ -432,13 +437,23 @@
The cache is dropped (the files are closed) automatically on removal of old parts in MergeTree,
also it can be dropped manually by the SYSTEM DROP MMAP CACHE query.
-->
<mmap_cache_size>1000</mmap_cache_size>
<!-- <mmap_cache_size>1024</mmap_cache_size> -->
<!-- Cache size in bytes for compiled expressions.-->
<compiled_expression_cache_size>134217728</compiled_expression_cache_size>
<!-- <compiled_expression_cache_size>134217728</compiled_expression_cache_size> -->
<!-- Cache size in elements for compiled expressions.-->
<compiled_expression_cache_elements_size>10000</compiled_expression_cache_elements_size>
<!-- <compiled_expression_cache_elements_size>10000</compiled_expression_cache_elements_size> -->
<!-- Configuration for the query cache -->
<!--
<query_cache>
<max_size_in_bytes>1073741824</max_size_in_bytes>
<max_entries>1024</max_entries>
<max_entry_size_in_bytes>1048576</max_entry_size_in_bytes>
<max_entry_size_in_rows>30000000</max_entry_size_in_rows>
</query_cache>
-->
<!-- Cache path for custom (created from SQL) cached disks -->
<custom_cached_disks_base_directory>/var/lib/clickhouse/caches/</custom_cached_disks_base_directory>
@ -1642,14 +1657,6 @@
-->
<!-- </kafka> -->
<!-- Configuration for the query cache -->
<query_cache>
<max_size_in_bytes>1073741824</max_size_in_bytes>
<max_entries>1024</max_entries>
<max_entry_size_in_bytes>1048576</max_entry_size_in_bytes>
<max_entry_size_in_rows>30000000</max_entry_size_in_rows>
</query_cache>
<backups>
<allowed_path>backups</allowed_path>

View File

@ -260,7 +260,10 @@ uncompressed_cache_size: 8589934592
# Approximate size of mark cache, used in tables of MergeTree family.
# In bytes. Cache is single for server. Memory is allocated only on demand.
# You should not lower this value.
mark_cache_size: 5368709120
# mark_cache_size: 5368709120
# For marks of secondary indices.
# index_mark_cache_size: 5368709120
# If you enable the `min_bytes_to_use_mmap_io` setting,
# the data in MergeTree tables can be read with mmap to avoid copying from kernel to userspace.
@ -277,13 +280,20 @@ mark_cache_size: 5368709120
# in query or server memory usage - because this memory can be discarded similar to OS page cache.
# The cache is dropped (the files are closed) automatically on removal of old parts in MergeTree,
# also it can be dropped manually by the SYSTEM DROP MMAP CACHE query.
mmap_cache_size: 1000
# mmap_cache_size: 1024
# Cache size in bytes for compiled expressions.
compiled_expression_cache_size: 134217728
# compiled_expression_cache_size: 134217728
# Cache size in elements for compiled expressions.
compiled_expression_cache_elements_size: 10000
# compiled_expression_cache_elements_size: 10000
# Configuration for the query cache
# query_cache:
# max_size_in_bytes: 1073741824
# max_entries: 1024
# max_entry_size_in_bytes: 1048576
# max_entry_size_in_rows: 30000000
# Path to data directory, with trailing slash.
path: /var/lib/clickhouse/

View File

@ -108,6 +108,9 @@ bool Authentication::areCredentialsValid(
case AuthenticationType::HTTP:
throw Authentication::Require<BasicCredentials>("ClickHouse Basic Authentication");
case AuthenticationType::JWT:
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud");
case AuthenticationType::KERBEROS:
return external_authenticators.checkKerberosCredentials(auth_data.getKerberosRealm(), *gss_acceptor_context);
@ -149,6 +152,9 @@ bool Authentication::areCredentialsValid(
case AuthenticationType::SSL_CERTIFICATE:
throw Authentication::Require<BasicCredentials>("ClickHouse X.509 Authentication");
case AuthenticationType::JWT:
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud");
case AuthenticationType::SSH_KEY:
#if USE_SSH
throw Authentication::Require<SshCredentials>("SSH Keys Authentication");
@ -193,6 +199,9 @@ bool Authentication::areCredentialsValid(
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
case AuthenticationType::JWT:
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud");
case AuthenticationType::BCRYPT_PASSWORD:
return checkPasswordBcrypt(basic_credentials->getPassword(), auth_data.getPasswordHashBinary());
@ -222,6 +231,9 @@ bool Authentication::areCredentialsValid(
case AuthenticationType::HTTP:
throw Authentication::Require<BasicCredentials>("ClickHouse Basic Authentication");
case AuthenticationType::JWT:
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud");
case AuthenticationType::KERBEROS:
throw Authentication::Require<GSSAcceptorContext>(auth_data.getKerberosRealm());
@ -254,6 +266,9 @@ bool Authentication::areCredentialsValid(
case AuthenticationType::HTTP:
throw Authentication::Require<BasicCredentials>("ClickHouse Basic Authentication");
case AuthenticationType::JWT:
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud");
case AuthenticationType::KERBEROS:
throw Authentication::Require<GSSAcceptorContext>(auth_data.getKerberosRealm());

View File

@ -135,6 +135,7 @@ void AuthenticationData::setPassword(const String & password_)
case AuthenticationType::BCRYPT_PASSWORD:
case AuthenticationType::NO_PASSWORD:
case AuthenticationType::LDAP:
case AuthenticationType::JWT:
case AuthenticationType::KERBEROS:
case AuthenticationType::SSL_CERTIFICATE:
case AuthenticationType::SSH_KEY:
@ -251,6 +252,7 @@ void AuthenticationData::setPasswordHashBinary(const Digest & hash)
case AuthenticationType::NO_PASSWORD:
case AuthenticationType::LDAP:
case AuthenticationType::JWT:
case AuthenticationType::KERBEROS:
case AuthenticationType::SSL_CERTIFICATE:
case AuthenticationType::SSH_KEY:
@ -322,6 +324,10 @@ std::shared_ptr<ASTAuthenticationData> AuthenticationData::toAST() const
node->children.push_back(std::make_shared<ASTLiteral>(getLDAPServerName()));
break;
}
case AuthenticationType::JWT:
{
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud");
}
case AuthenticationType::KERBEROS:
{
const auto & realm = getKerberosRealm();

View File

@ -72,6 +72,11 @@ const AuthenticationTypeInfo & AuthenticationTypeInfo::get(AuthenticationType ty
static const auto info = make_info(Keyword::HTTP);
return info;
}
case AuthenticationType::JWT:
{
static const auto info = make_info(Keyword::JWT);
return info;
}
case AuthenticationType::MAX:
break;
}

View File

@ -41,6 +41,9 @@ enum class AuthenticationType : uint8_t
/// Authentication through HTTP protocol
HTTP,
/// JSON Web Token
JWT,
MAX,
};

View File

@ -33,6 +33,8 @@ void User::setName(const String & name_)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_);
if (name_.starts_with(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_);
if (name_.starts_with(EncodedUserInfo::JWT_AUTHENTICAION_MARKER))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_);
name = name_;
}

View File

@ -880,8 +880,7 @@ void UsersConfigAccessStorage::load(
Settings::checkNoSettingNamesAtTopLevel(*new_config, users_config_path);
parseFromConfig(*new_config);
access_control.getChangesNotifier().sendNotifications();
},
/* already_loaded = */ false);
});
}
void UsersConfigAccessStorage::startPeriodicReloading()

View File

@ -0,0 +1,283 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Core/ServerSettings.h>
#include <Core/ColumnWithTypeAndName.h>
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <Interpreters/castColumn.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
}
namespace
{
struct GroupConcatDataBase
{
UInt64 data_size = 0;
UInt64 allocated_size = 0;
char * data = nullptr;
void checkAndUpdateSize(UInt64 add, Arena * arena)
{
if (data_size + add >= allocated_size)
{
auto old_size = allocated_size;
allocated_size = std::max(2 * allocated_size, data_size + add);
data = arena->realloc(data, old_size, allocated_size);
}
}
void insertChar(const char * str, UInt64 str_size, Arena * arena)
{
checkAndUpdateSize(str_size, arena);
memcpy(data + data_size, str, str_size);
data_size += str_size;
}
void insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena)
{
WriteBufferFromOwnString buff;
serialization->serializeText(*column, row_num, buff, FormatSettings{});
auto string = buff.stringView();
insertChar(string.data(), string.size(), arena);
}
};
template <bool has_limit>
struct GroupConcatData;
template<>
struct GroupConcatData<false> final : public GroupConcatDataBase
{
};
template<>
struct GroupConcatData<true> final : public GroupConcatDataBase
{
using Offset = UInt64;
using Allocator = MixedAlignedArenaAllocator<alignof(Offset), 4096>;
using Offsets = PODArray<Offset, 32, Allocator>;
/// offset[i * 2] - beginning of the i-th row, offset[i * 2 + 1] - end of the i-th row
Offsets offsets;
UInt64 num_rows = 0;
UInt64 getSize(size_t i) const { return offsets[i * 2 + 1] - offsets[i * 2]; }
UInt64 getString(size_t i) const { return offsets[i * 2]; }
void insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena)
{
WriteBufferFromOwnString buff;
serialization->serializeText(*column, row_num, buff, {});
auto string = buff.stringView();
checkAndUpdateSize(string.size(), arena);
memcpy(data + data_size, string.data(), string.size());
offsets.push_back(data_size, arena);
data_size += string.size();
offsets.push_back(data_size, arena);
num_rows++;
}
};
template <bool has_limit>
class GroupConcatImpl final
: public IAggregateFunctionDataHelper<GroupConcatData<has_limit>, GroupConcatImpl<has_limit>>
{
static constexpr auto name = "groupConcat";
SerializationPtr serialization;
UInt64 limit;
const String delimiter;
public:
GroupConcatImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 limit_, const String & delimiter_)
: IAggregateFunctionDataHelper<GroupConcatData<has_limit>, GroupConcatImpl<has_limit>>(
{data_type_}, parameters_, std::make_shared<DataTypeString>())
, serialization(this->argument_types[0]->getDefaultSerialization())
, limit(limit_)
, delimiter(delimiter_)
{
}
String getName() const override { return name; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
auto & cur_data = this->data(place);
if constexpr (has_limit)
if (cur_data.num_rows >= limit)
return;
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
cur_data.insert(columns[0], serialization, row_num, arena);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & cur_data = this->data(place);
auto & rhs_data = this->data(rhs);
if (rhs_data.data_size == 0)
return;
if constexpr (has_limit)
{
UInt64 new_elems_count = std::min(rhs_data.num_rows, limit - cur_data.num_rows);
for (UInt64 i = 0; i < new_elems_count; ++i)
{
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
cur_data.offsets.push_back(cur_data.data_size, arena);
cur_data.insertChar(rhs_data.data + rhs_data.getString(i), rhs_data.getSize(i), arena);
cur_data.num_rows++;
cur_data.offsets.push_back(cur_data.data_size, arena);
}
}
else
{
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
cur_data.insertChar(rhs_data.data, rhs_data.data_size, arena);
}
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
auto & cur_data = this->data(place);
writeVarUInt(cur_data.data_size, buf);
buf.write(cur_data.data, cur_data.data_size);
if constexpr (has_limit)
{
writeVarUInt(cur_data.num_rows, buf);
for (const auto & offset : cur_data.offsets)
writeVarUInt(offset, buf);
}
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
auto & cur_data = this->data(place);
UInt64 temp_size = 0;
readVarUInt(temp_size, buf);
cur_data.checkAndUpdateSize(temp_size, arena);
buf.readStrict(cur_data.data + cur_data.data_size, temp_size);
cur_data.data_size = temp_size;
if constexpr (has_limit)
{
readVarUInt(cur_data.num_rows, buf);
cur_data.offsets.resize_exact(cur_data.num_rows * 2, arena);
for (auto & offset : cur_data.offsets)
readVarUInt(offset, buf);
}
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto & cur_data = this->data(place);
if (cur_data.data_size == 0)
{
to.insertDefault();
return;
}
auto & column_string = assert_cast<ColumnString &>(to);
column_string.insertData(cur_data.data, cur_data.data_size);
}
bool allocatesMemoryInArena() const override { return true; }
};
AggregateFunctionPtr createAggregateFunctionGroupConcat(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertUnary(name, argument_types);
bool has_limit = false;
UInt64 limit = 0;
String delimiter;
if (parameters.size() > 2)
throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION,
"Incorrect number of parameters for aggregate function {}, should be 0, 1 or 2, got: {}", name, parameters.size());
if (!parameters.empty())
{
auto type = parameters[0].getType();
if (type != Field::Types::String)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First parameter for aggregate function {} should be string", name);
delimiter = parameters[0].get<String>();
}
if (parameters.size() == 2)
{
auto type = parameters[1].getType();
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number", name);
if ((type == Field::Types::Int64 && parameters[1].get<Int64>() <= 0) ||
(type == Field::Types::UInt64 && parameters[1].get<UInt64>() == 0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number, got: {}", name, parameters[1].get<Int64>());
has_limit = true;
limit = parameters[1].get<UInt64>();
}
if (has_limit)
return std::make_shared<GroupConcatImpl</* has_limit= */ true>>(argument_types[0], parameters, limit, delimiter);
else
return std::make_shared<GroupConcatImpl</* has_limit= */ false>>(argument_types[0], parameters, limit, delimiter);
}
}
void registerAggregateFunctionGroupConcat(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
factory.registerFunction("groupConcat", { createAggregateFunctionGroupConcat, properties });
factory.registerAlias("group_concat", "groupConcat", AggregateFunctionFactory::CaseInsensitive);
}
}

View File

@ -91,7 +91,8 @@ public:
return std::make_shared<DataTypeNumber<PointType>>();
}
bool allocatesMemoryInArena() const override { return false; }
/// MaxIntersectionsData::Allocator uses the arena
bool allocatesMemoryInArena() const override { return true; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{

View File

@ -19,6 +19,7 @@ void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factor
void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArrayIntersect(AggregateFunctionFactory &);
void registerAggregateFunctionGroupConcat(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantile(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileDeterministic(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileExact(AggregateFunctionFactory &);
@ -120,6 +121,7 @@ void registerAggregateFunctions()
registerAggregateFunctionGroupUniqArray(factory);
registerAggregateFunctionGroupArrayInsertAt(factory);
registerAggregateFunctionGroupArrayIntersect(factory);
registerAggregateFunctionGroupConcat(factory);
registerAggregateFunctionsQuantile(factory);
registerAggregateFunctionsQuantileDeterministic(factory);
registerAggregateFunctionsQuantileExact(factory);

View File

@ -43,50 +43,56 @@ public:
bool replaced_argument = false;
auto replaced_uniq_function_arguments_nodes = function_node->getArguments().getNodes();
for (auto & uniq_function_argument_node : replaced_uniq_function_arguments_nodes)
/// Replace injective function with its single argument
auto remove_injective_function = [&replaced_argument](QueryTreeNodePtr & arg) -> bool
{
auto * uniq_function_argument_node_typed = uniq_function_argument_node->as<FunctionNode>();
if (!uniq_function_argument_node_typed || !uniq_function_argument_node_typed->isOrdinaryFunction())
continue;
auto & uniq_function_argument_node_argument_nodes = uniq_function_argument_node_typed->getArguments().getNodes();
auto * arg_typed = arg->as<FunctionNode>();
if (!arg_typed || !arg_typed->isOrdinaryFunction())
return false;
/// Do not apply optimization if injective function contains multiple arguments
if (uniq_function_argument_node_argument_nodes.size() != 1)
continue;
auto & arg_arguments_nodes = arg_typed->getArguments().getNodes();
if (arg_arguments_nodes.size() != 1)
return false;
const auto & uniq_function_argument_node_function = uniq_function_argument_node_typed->getFunction();
if (!uniq_function_argument_node_function->isInjective({}))
continue;
const auto & arg_function = arg_typed->getFunction();
if (!arg_function->isInjective({}))
return false;
/// Replace injective function with its single argument
uniq_function_argument_node = uniq_function_argument_node_argument_nodes[0];
replaced_argument = true;
arg = arg_arguments_nodes[0];
return replaced_argument = true;
};
for (auto & uniq_function_argument_node : replaced_uniq_function_arguments_nodes)
{
while (remove_injective_function(uniq_function_argument_node))
;
}
if (!replaced_argument)
return;
DataTypes argument_types;
argument_types.reserve(replaced_uniq_function_arguments_nodes.size());
DataTypes replaced_argument_types;
replaced_argument_types.reserve(replaced_uniq_function_arguments_nodes.size());
for (const auto & function_node_argument : replaced_uniq_function_arguments_nodes)
argument_types.emplace_back(function_node_argument->getResultType());
replaced_argument_types.emplace_back(function_node_argument->getResultType());
auto current_aggregate_function = function_node->getAggregateFunction();
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
auto replaced_aggregate_function = AggregateFunctionFactory::instance().get(
function_node->getFunctionName(),
NullsAction::EMPTY,
argument_types,
function_node->getAggregateFunction()->getParameters(),
replaced_argument_types,
current_aggregate_function->getParameters(),
properties);
/// uniqCombined returns nullable with nullable arguments so the result type might change which breaks the pass
if (!aggregate_function->getResultType()->equals(*function_node->getAggregateFunction()->getResultType()))
if (!replaced_aggregate_function->getResultType()->equals(*current_aggregate_function->getResultType()))
return;
function_node->getArguments().getNodes() = replaced_uniq_function_arguments_nodes;
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
function_node->getArguments().getNodes() = std::move(replaced_uniq_function_arguments_nodes);
function_node->resolveAsAggregateFunction(std::move(replaced_aggregate_function));
}
};

View File

@ -1,3 +1,5 @@
#include <Common/FieldVisitorToString.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeNullable.h>
@ -3495,7 +3497,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
*
* 4. If node has alias, update its value in scope alias map. Deregister alias from expression_aliases_in_resolve_process.
*/
ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias)
ProjectionNames QueryAnalyzer::resolveExpressionNode(
QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias)
{
checkStackSize();
@ -4505,7 +4508,36 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
table_name = table_identifier[1];
}
auto parametrized_view_storage = scope_context->getQueryContext()->buildParametrizedViewStorage(function_ast, database_name, table_name);
/// Collect parametrized view arguments
NameToNameMap view_params;
for (const auto & argument : table_function_node_typed.getArguments())
{
if (auto * arg_func = argument->as<FunctionNode>())
{
if (arg_func->getFunctionName() != "equals")
continue;
auto nodes = arg_func->getArguments().getNodes();
if (nodes.size() != 2)
continue;
if (auto * identifier_node = nodes[0]->as<IdentifierNode>())
{
resolveExpressionNode(nodes[1], scope, /* allow_lambda_expression */false, /* allow_table_function */false);
if (auto * constant = nodes[1]->as<ConstantNode>())
{
view_params[identifier_node->getIdentifier().getFullName()] = convertFieldToString(constant->getValue());
}
}
}
}
auto context = scope_context->getQueryContext();
auto parametrized_view_storage = context->buildParametrizedViewStorage(
database_name,
table_name,
view_params);
if (parametrized_view_storage)
{
auto fake_table_node = std::make_shared<TableNode>(parametrized_view_storage, scope_context);

View File

@ -438,7 +438,7 @@ void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_
String create_table_query_str = serializeAST(*create_table_query);
bool is_predefined_table = DatabaseCatalog::instance().isPredefinedTable(StorageID{table_name.database, table_name.table});
auto table_dependencies = getDependenciesFromCreateQuery(context, table_name, create_table_query);
auto table_dependencies = getDependenciesFromCreateQuery(context, table_name, create_table_query, context->getCurrentDatabase());
bool table_has_data = backup->hasFiles(data_path_in_backup);
std::lock_guard lock{mutex};

View File

@ -222,7 +222,7 @@ add_object_library(clickhouse_storages_mergetree Storages/MergeTree)
add_object_library(clickhouse_storages_statistics Storages/Statistics)
add_object_library(clickhouse_storages_liveview Storages/LiveView)
add_object_library(clickhouse_storages_windowview Storages/WindowView)
add_object_library(clickhouse_storages_s3queue Storages/S3Queue)
add_object_library(clickhouse_storages_s3queue Storages/ObjectStorageQueue)
add_object_library(clickhouse_storages_materializedview Storages/MaterializedView)
add_object_library(clickhouse_client Client)
add_object_library(clickhouse_bridge BridgeHelper)

View File

@ -110,6 +110,7 @@ namespace ErrorCodes
extern const int USER_SESSION_LIMIT_EXCEEDED;
extern const int NOT_IMPLEMENTED;
extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR;
extern const int USER_EXPIRED;
}
}
@ -302,8 +303,29 @@ public:
ClientBase::~ClientBase() = default;
ClientBase::ClientBase() = default;
ClientBase::ClientBase(
int in_fd_,
int out_fd_,
int err_fd_,
std::istream & input_stream_,
std::ostream & output_stream_,
std::ostream & error_stream_
)
: std_in(in_fd_)
, std_out(out_fd_)
, progress_indication(output_stream_, in_fd_, err_fd_)
, in_fd(in_fd_)
, out_fd(out_fd_)
, err_fd(err_fd_)
, input_stream(input_stream_)
, output_stream(output_stream_)
, error_stream(error_stream_)
{
stdin_is_a_tty = isatty(in_fd);
stdout_is_a_tty = isatty(out_fd);
stderr_is_a_tty = isatty(err_fd);
terminal_width = getTerminalWidth(in_fd, err_fd);
}
void ClientBase::setupSignalHandler()
{
@ -330,7 +352,7 @@ void ClientBase::setupSignalHandler()
}
ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements, bool is_interactive, bool ignore_error)
ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements)
{
std::unique_ptr<IParserBase> parser;
ASTPtr res;
@ -359,7 +381,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Setting
if (!res)
{
std::cerr << std::endl << message << std::endl << std::endl;
error_stream << std::endl << message << std::endl << std::endl;
return nullptr;
}
}
@ -373,11 +395,11 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Setting
if (is_interactive)
{
std::cout << std::endl;
WriteBufferFromOStream res_buf(std::cout, 4096);
output_stream << std::endl;
WriteBufferFromOStream res_buf(output_stream, 4096);
formatAST(*res, res_buf);
res_buf.finalize();
std::cout << std::endl << std::endl;
output_stream << std::endl << std::endl;
}
return res;
@ -481,7 +503,7 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
if (need_render_progress && tty_buf)
{
if (select_into_file && !select_into_file_and_stdout)
std::cerr << "\r";
error_stream << "\r";
progress_indication.writeProgress(*tty_buf);
}
}
@ -741,17 +763,17 @@ bool ClientBase::isRegularFile(int fd)
void ClientBase::setDefaultFormatsAndCompressionFromConfiguration()
{
if (config().has("output-format"))
if (getClientConfiguration().has("output-format"))
{
default_output_format = config().getString("output-format");
default_output_format = getClientConfiguration().getString("output-format");
is_default_format = false;
}
else if (config().has("format"))
else if (getClientConfiguration().has("format"))
{
default_output_format = config().getString("format");
default_output_format = getClientConfiguration().getString("format");
is_default_format = false;
}
else if (config().has("vertical"))
else if (getClientConfiguration().has("vertical"))
{
default_output_format = "Vertical";
is_default_format = false;
@ -777,17 +799,17 @@ void ClientBase::setDefaultFormatsAndCompressionFromConfiguration()
default_output_format = "TSV";
}
if (config().has("input-format"))
if (getClientConfiguration().has("input-format"))
{
default_input_format = config().getString("input-format");
default_input_format = getClientConfiguration().getString("input-format");
}
else if (config().has("format"))
else if (getClientConfiguration().has("format"))
{
default_input_format = config().getString("format");
default_input_format = getClientConfiguration().getString("format");
}
else if (config().getString("table-file", "-") != "-")
else if (getClientConfiguration().getString("table-file", "-") != "-")
{
auto file_name = config().getString("table-file");
auto file_name = getClientConfiguration().getString("table-file");
std::optional<String> format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(file_name);
if (format_from_file_name)
default_input_format = *format_from_file_name;
@ -803,7 +825,7 @@ void ClientBase::setDefaultFormatsAndCompressionFromConfiguration()
default_input_format = "TSV";
}
format_max_block_size = config().getUInt64("format_max_block_size",
format_max_block_size = getClientConfiguration().getUInt64("format_max_block_size",
global_context->getSettingsRef().max_block_size);
/// Setting value from cmd arg overrides one from config
@ -813,7 +835,7 @@ void ClientBase::setDefaultFormatsAndCompressionFromConfiguration()
}
else
{
insert_format_max_block_size = config().getUInt64("insert_format_max_block_size",
insert_format_max_block_size = getClientConfiguration().getUInt64("insert_format_max_block_size",
global_context->getSettingsRef().max_insert_block_size);
}
}
@ -924,9 +946,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query)
const char * begin = full_query.data();
auto parsed_query = parseQuery(begin, begin + full_query.size(),
global_context->getSettingsRef(),
/*allow_multi_statements=*/ false,
is_interactive,
ignore_error);
/*allow_multi_statements=*/ false);
if (!parsed_query)
return;
@ -1100,7 +1120,7 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
/// has been received yet.
if (processed_rows == 0 && e.code() == ErrorCodes::DEADLOCK_AVOIDED && --retries_left)
{
std::cerr << "Got a transient error from the server, will"
error_stream << "Got a transient error from the server, will"
<< " retry (" << retries_left << " retries left)";
}
else
@ -1154,7 +1174,7 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b
double elapsed = receive_watch.elapsedSeconds();
if (break_on_timeout && elapsed > receive_timeout.totalSeconds())
{
std::cout << "Timeout exceeded while receiving data from server."
output_stream << "Timeout exceeded while receiving data from server."
<< " Waited for " << static_cast<size_t>(elapsed) << " seconds,"
<< " timeout is " << receive_timeout.totalSeconds() << " seconds." << std::endl;
@ -1189,7 +1209,7 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b
if (cancelled && is_interactive)
{
std::cout << "Query was cancelled." << std::endl;
output_stream << "Query was cancelled." << std::endl;
cancelled_printed = true;
}
}
@ -1308,9 +1328,9 @@ void ClientBase::onEndOfStream()
if (is_interactive)
{
if (cancelled && !cancelled_printed)
std::cout << "Query was cancelled." << std::endl;
output_stream << "Query was cancelled." << std::endl;
else if (!written_first_block)
std::cout << "Ok." << std::endl;
output_stream << "Ok." << std::endl;
}
}
@ -1863,7 +1883,7 @@ void ClientBase::cancelQuery()
progress_indication.clearProgressOutput(*tty_buf);
if (is_interactive)
std::cout << "Cancelling query." << std::endl;
output_stream << "Cancelling query." << std::endl;
cancelled = true;
}
@ -1993,7 +2013,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
{
const String & new_database = use_query->getDatabase();
/// If the client initiates the reconnection, it takes the settings from the config.
config().setString("database", new_database);
getClientConfiguration().setString("database", new_database);
/// If the connection initiates the reconnection, it uses its variable.
connection->setDefaultDatabase(new_database);
}
@ -2013,21 +2033,21 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
if (is_interactive)
{
std::cout << std::endl;
output_stream << std::endl;
if (!server_exception || processed_rows != 0)
std::cout << processed_rows << " row" << (processed_rows == 1 ? "" : "s") << " in set. ";
std::cout << "Elapsed: " << progress_indication.elapsedSeconds() << " sec. ";
output_stream << processed_rows << " row" << (processed_rows == 1 ? "" : "s") << " in set. ";
output_stream << "Elapsed: " << progress_indication.elapsedSeconds() << " sec. ";
progress_indication.writeFinalProgress();
std::cout << std::endl << std::endl;
output_stream << std::endl << std::endl;
}
else if (print_time_to_stderr)
else if (getClientConfiguration().getBool("print-time-to-stderr", false))
{
std::cerr << progress_indication.elapsedSeconds() << "\n";
error_stream << progress_indication.elapsedSeconds() << "\n";
}
if (!is_interactive && print_num_processed_rows)
if (!is_interactive && getClientConfiguration().getBool("print-num-processed-rows", false))
{
std::cout << "Processed rows: " << processed_rows << "\n";
output_stream << "Processed rows: " << processed_rows << "\n";
}
if (have_error && report_error)
@ -2077,9 +2097,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
{
parsed_query = parseQuery(this_query_end, all_queries_end,
global_context->getSettingsRef(),
/*allow_multi_statements=*/ true,
is_interactive,
ignore_error);
/*allow_multi_statements=*/ true);
}
catch (const Exception & e)
{
@ -2238,7 +2256,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
catch (...)
{
// Surprisingly, this is a client error. A server error would
// have been reported without throwing (see onReceiveSeverException()).
// have been reported without throwing (see onReceiveExceptionFromServer()).
client_exception = std::make_unique<Exception>(getCurrentExceptionMessageAndPattern(print_stack_trace), getCurrentExceptionCode());
have_error = true;
}
@ -2395,12 +2413,12 @@ void ClientBase::initQueryIdFormats()
return;
/// Initialize query_id_formats if any
if (config().has("query_id_formats"))
if (getClientConfiguration().has("query_id_formats"))
{
Poco::Util::AbstractConfiguration::Keys keys;
config().keys("query_id_formats", keys);
getClientConfiguration().keys("query_id_formats", keys);
for (const auto & name : keys)
query_id_formats.emplace_back(name + ":", config().getString("query_id_formats." + name));
query_id_formats.emplace_back(name + ":", getClientConfiguration().getString("query_id_formats." + name));
}
if (query_id_formats.empty())
@ -2445,9 +2463,9 @@ bool ClientBase::addMergeTreeSettings(ASTCreateQuery & ast_create)
void ClientBase::runInteractive()
{
if (config().has("query_id"))
if (getClientConfiguration().has("query_id"))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "query_id could be specified only in non-interactive mode");
if (print_time_to_stderr)
if (getClientConfiguration().getBool("print-time-to-stderr", false))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "time option could be specified only in non-interactive mode");
initQueryIdFormats();
@ -2460,9 +2478,9 @@ void ClientBase::runInteractive()
{
/// Load suggestion data from the server.
if (global_context->getApplicationType() == Context::ApplicationType::CLIENT)
suggest->load<Connection>(global_context, connection_parameters, config().getInt("suggestion_limit"), wait_for_suggestions_to_load);
suggest->load<Connection>(global_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load);
else if (global_context->getApplicationType() == Context::ApplicationType::LOCAL)
suggest->load<LocalConnection>(global_context, connection_parameters, config().getInt("suggestion_limit"), wait_for_suggestions_to_load);
suggest->load<LocalConnection>(global_context, connection_parameters, getClientConfiguration().getInt("suggestion_limit"), wait_for_suggestions_to_load);
}
if (home_path.empty())
@ -2473,8 +2491,8 @@ void ClientBase::runInteractive()
}
/// Load command history if present.
if (config().has("history_file"))
history_file = config().getString("history_file");
if (getClientConfiguration().has("history_file"))
history_file = getClientConfiguration().getString("history_file");
else
{
auto * history_file_from_env = getenv("CLICKHOUSE_HISTORY_FILE"); // NOLINT(concurrency-mt-unsafe)
@ -2495,7 +2513,7 @@ void ClientBase::runInteractive()
{
if (e.getErrno() != EEXIST)
{
std::cerr << getCurrentExceptionMessage(false) << '\n';
error_stream << getCurrentExceptionMessage(false) << '\n';
}
}
}
@ -2506,13 +2524,13 @@ void ClientBase::runInteractive()
#if USE_REPLXX
replxx::Replxx::highlighter_callback_t highlight_callback{};
if (config().getBool("highlight", true))
if (getClientConfiguration().getBool("highlight", true))
highlight_callback = highlight;
ReplxxLineReader lr(
*suggest,
history_file,
config().has("multiline"),
getClientConfiguration().has("multiline"),
query_extenders,
query_delimiters,
word_break_characters,
@ -2520,7 +2538,7 @@ void ClientBase::runInteractive()
#else
LineReader lr(
history_file,
config().has("multiline"),
getClientConfiguration().has("multiline"),
query_extenders,
query_delimiters,
word_break_characters);
@ -2600,7 +2618,7 @@ void ClientBase::runInteractive()
{
// If a separate connection loading suggestions failed to open a new session,
// use the main session to receive them.
suggest->load(*connection, connection_parameters.timeouts, config().getInt("suggestion_limit"), global_context->getClientInfo());
suggest->load(*connection, connection_parameters.timeouts, getClientConfiguration().getInt("suggestion_limit"), global_context->getClientInfo());
}
try
@ -2611,8 +2629,11 @@ void ClientBase::runInteractive()
}
catch (const Exception & e)
{
if (e.code() == ErrorCodes::USER_EXPIRED)
break;
/// We don't need to handle the test hints in the interactive mode.
std::cerr << "Exception on client:" << std::endl << getExceptionMessage(e, print_stack_trace, true) << std::endl << std::endl;
error_stream << "Exception on client:" << std::endl << getExceptionMessage(e, print_stack_trace, true) << std::endl << std::endl;
client_exception.reset(e.clone());
}
@ -2629,11 +2650,11 @@ void ClientBase::runInteractive()
while (true);
if (isNewYearMode())
std::cout << "Happy new year." << std::endl;
output_stream << "Happy new year." << std::endl;
else if (isChineseNewYearMode(local_tz))
std::cout << "Happy Chinese new year. 春节快乐!" << std::endl;
output_stream << "Happy Chinese new year. 春节快乐!" << std::endl;
else
std::cout << "Bye." << std::endl;
output_stream << "Bye." << std::endl;
}
@ -2644,7 +2665,7 @@ bool ClientBase::processMultiQueryFromFile(const String & file_name)
ReadBufferFromFile in(file_name);
readStringUntilEOF(queries_from_file, in);
if (!has_log_comment)
if (!getClientConfiguration().has("log_comment"))
{
/// NOTE: cannot use even weakly_canonical() since it fails for /dev/stdin due to resolving of "pipe:[X]"
global_context->setSetting("log_comment", String(fs::absolute(fs::path(file_name))));
@ -2751,13 +2772,13 @@ void ClientBase::clearTerminal()
/// It is needed if garbage is left in terminal.
/// Show cursor. It can be left hidden by invocation of previous programs.
/// A test for this feature: perl -e 'print "x"x100000'; echo -ne '\033[0;0H\033[?25l'; clickhouse-client
std::cout << "\033[0J" "\033[?25h";
output_stream << "\033[0J" "\033[?25h";
}
void ClientBase::showClientVersion()
{
std::cout << VERSION_NAME << " " + getName() + " version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl;
output_stream << VERSION_NAME << " " + getName() + " version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl;
}
namespace
@ -2824,7 +2845,10 @@ private:
}
/// Enable optimizations even in debug builds because otherwise options parsing becomes extremely slow affecting .sh tests
#if defined(__clang__)
#pragma clang optimize on
#endif
void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments)
{
if (allow_repeated_settings)
@ -3043,18 +3067,18 @@ void ClientBase::init(int argc, char ** argv)
if (options.count("version-clean"))
{
std::cout << VERSION_STRING;
output_stream << VERSION_STRING;
exit(0); // NOLINT(concurrency-mt-unsafe)
}
if (options.count("verbose"))
config().setBool("verbose", true);
getClientConfiguration().setBool("verbose", true);
/// Output of help message.
if (options.count("help")
|| (options.count("host") && options["host"].as<std::string>() == "elp")) /// If user writes -help instead of --help.
{
if (config().getBool("verbose", false))
if (getClientConfiguration().getBool("verbose", false))
printHelpMessage(options_description, true);
else
printHelpMessage(options_description_non_verbose, false);
@ -3062,72 +3086,75 @@ void ClientBase::init(int argc, char ** argv)
}
/// Common options for clickhouse-client and clickhouse-local.
/// Output execution time to stderr in batch mode.
if (options.count("time"))
print_time_to_stderr = true;
getClientConfiguration().setBool("print-time-to-stderr", true);
if (options.count("query"))
queries = options["query"].as<std::vector<std::string>>();
if (options.count("query_id"))
config().setString("query_id", options["query_id"].as<std::string>());
getClientConfiguration().setString("query_id", options["query_id"].as<std::string>());
if (options.count("database"))
config().setString("database", options["database"].as<std::string>());
getClientConfiguration().setString("database", options["database"].as<std::string>());
if (options.count("config-file"))
config().setString("config-file", options["config-file"].as<std::string>());
getClientConfiguration().setString("config-file", options["config-file"].as<std::string>());
if (options.count("queries-file"))
queries_files = options["queries-file"].as<std::vector<std::string>>();
if (options.count("multiline"))
config().setBool("multiline", true);
getClientConfiguration().setBool("multiline", true);
if (options.count("multiquery"))
config().setBool("multiquery", true);
getClientConfiguration().setBool("multiquery", true);
if (options.count("ignore-error"))
config().setBool("ignore-error", true);
getClientConfiguration().setBool("ignore-error", true);
if (options.count("format"))
config().setString("format", options["format"].as<std::string>());
getClientConfiguration().setString("format", options["format"].as<std::string>());
if (options.count("output-format"))
config().setString("output-format", options["output-format"].as<std::string>());
getClientConfiguration().setString("output-format", options["output-format"].as<std::string>());
if (options.count("vertical"))
config().setBool("vertical", true);
getClientConfiguration().setBool("vertical", true);
if (options.count("stacktrace"))
config().setBool("stacktrace", true);
getClientConfiguration().setBool("stacktrace", true);
if (options.count("print-profile-events"))
config().setBool("print-profile-events", true);
getClientConfiguration().setBool("print-profile-events", true);
if (options.count("profile-events-delay-ms"))
config().setUInt64("profile-events-delay-ms", options["profile-events-delay-ms"].as<UInt64>());
getClientConfiguration().setUInt64("profile-events-delay-ms", options["profile-events-delay-ms"].as<UInt64>());
/// Whether to print the number of processed rows at
if (options.count("processed-rows"))
print_num_processed_rows = true;
getClientConfiguration().setBool("print-num-processed-rows", true);
if (options.count("progress"))
{
switch (options["progress"].as<ProgressOption>())
{
case DEFAULT:
config().setString("progress", "default");
getClientConfiguration().setString("progress", "default");
break;
case OFF:
config().setString("progress", "off");
getClientConfiguration().setString("progress", "off");
break;
case TTY:
config().setString("progress", "tty");
getClientConfiguration().setString("progress", "tty");
break;
case ERR:
config().setString("progress", "err");
getClientConfiguration().setString("progress", "err");
break;
}
}
if (options.count("echo"))
config().setBool("echo", true);
getClientConfiguration().setBool("echo", true);
if (options.count("disable_suggestion"))
config().setBool("disable_suggestion", true);
getClientConfiguration().setBool("disable_suggestion", true);
if (options.count("wait_for_suggestions_to_load"))
config().setBool("wait_for_suggestions_to_load", true);
getClientConfiguration().setBool("wait_for_suggestions_to_load", true);
if (options.count("suggestion_limit"))
config().setInt("suggestion_limit", options["suggestion_limit"].as<int>());
getClientConfiguration().setInt("suggestion_limit", options["suggestion_limit"].as<int>());
if (options.count("highlight"))
config().setBool("highlight", options["highlight"].as<bool>());
getClientConfiguration().setBool("highlight", options["highlight"].as<bool>());
if (options.count("history_file"))
config().setString("history_file", options["history_file"].as<std::string>());
getClientConfiguration().setString("history_file", options["history_file"].as<std::string>());
if (options.count("interactive"))
config().setBool("interactive", true);
getClientConfiguration().setBool("interactive", true);
if (options.count("pager"))
config().setString("pager", options["pager"].as<std::string>());
getClientConfiguration().setString("pager", options["pager"].as<std::string>());
if (options.count("log-level"))
Poco::Logger::root().setLevel(options["log-level"].as<std::string>());
@ -3145,13 +3172,13 @@ void ClientBase::init(int argc, char ** argv)
alias_names.reserve(options_description.main_description->options().size());
for (const auto& option : options_description.main_description->options())
alias_names.insert(option->long_name());
argsToConfig(common_arguments, config(), 100, &alias_names);
argsToConfig(common_arguments, getClientConfiguration(), 100, &alias_names);
}
clearPasswordFromCommandLine(argc, argv);
/// Limit on total memory usage
std::string max_client_memory_usage = config().getString("max_memory_usage_in_client", "0" /*default value*/);
std::string max_client_memory_usage = getClientConfiguration().getString("max_memory_usage_in_client", "0" /*default value*/);
if (max_client_memory_usage != "0")
{
UInt64 max_client_memory_usage_int = parseWithSizeSuffix<UInt64>(max_client_memory_usage.c_str(), max_client_memory_usage.length());
@ -3160,8 +3187,6 @@ void ClientBase::init(int argc, char ** argv)
total_memory_tracker.setDescription("(total)");
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
}
has_log_comment = config().has("log_comment");
}
}

View File

@ -18,7 +18,6 @@
#include <Storages/SelectQueryInfo.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
namespace po = boost::program_options;
@ -67,13 +66,22 @@ class ClientBase : public Poco::Util::Application, public IHints<2>
public:
using Arguments = std::vector<String>;
ClientBase();
explicit ClientBase
(
int in_fd_ = STDIN_FILENO,
int out_fd_ = STDOUT_FILENO,
int err_fd_ = STDERR_FILENO,
std::istream & input_stream_ = std::cin,
std::ostream & output_stream_ = std::cout,
std::ostream & error_stream_ = std::cerr
);
~ClientBase() override;
void init(int argc, char ** argv);
std::vector<String> getAllRegisteredNames() const override { return cmd_options; }
static ASTPtr parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements, bool is_interactive, bool ignore_error);
ASTPtr parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements);
protected:
void runInteractive();
@ -82,6 +90,9 @@ protected:
char * argv0 = nullptr;
void runLibFuzzer();
/// This is the analogue of Poco::Application::config()
virtual Poco::Util::LayeredConfiguration & getClientConfiguration() = 0;
virtual bool processWithFuzzing(const String &)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Query processing with fuzzing is not implemented");
@ -107,7 +118,7 @@ protected:
String & query_to_execute, ASTPtr & parsed_query, const String & all_queries_text,
std::unique_ptr<Exception> & current_exception);
static void clearTerminal();
void clearTerminal();
void showClientVersion();
using ProgramOptionsDescription = boost::program_options::options_description;
@ -129,6 +140,7 @@ protected:
const std::vector<Arguments> & hosts_and_ports_arguments) = 0;
virtual void processConfig() = 0;
/// Returns true if query processing was successful.
bool processQueryText(const String & text);
virtual void readArguments(
@ -206,7 +218,6 @@ protected:
bool echo_queries = false; /// Print queries before execution in batch mode.
bool ignore_error = false; /// In case of errors, don't print error message, continue to next query. Only applicable for non-interactive mode.
bool print_time_to_stderr = false; /// Output execution time to stderr in batch mode.
std::optional<Suggest> suggest;
bool load_suggestions = false;
@ -248,9 +259,9 @@ protected:
ConnectionParameters connection_parameters;
/// Buffer that reads from stdin in batch mode.
ReadBufferFromFileDescriptor std_in{STDIN_FILENO};
ReadBufferFromFileDescriptor std_in;
/// Console output.
WriteBufferFromFileDescriptor std_out{STDOUT_FILENO};
WriteBufferFromFileDescriptor std_out;
std::unique_ptr<ShellCommand> pager_cmd;
/// The user can specify to redirect query output to a file.
@ -281,7 +292,6 @@ protected:
bool need_render_profile_events = true;
bool written_first_block = false;
size_t processed_rows = 0; /// How many rows have been read or written.
bool print_num_processed_rows = false; /// Whether to print the number of processed rows at
bool print_stack_trace = false;
/// The last exception that was received from the server. Is used for the
@ -329,8 +339,14 @@ protected:
bool cancelled = false;
bool cancelled_printed = false;
/// Does log_comment has specified by user?
bool has_log_comment = false;
/// Unpacked descriptors and streams for the ease of use.
int in_fd = STDIN_FILENO;
int out_fd = STDOUT_FILENO;
int err_fd = STDERR_FILENO;
std::istream & input_stream;
std::ostream & output_stream;
std::ostream & error_stream;
};
}

View File

@ -1,3 +1,4 @@
#include <cstddef>
#include <memory>
#include <Poco/Net/NetException.h>
#include <Core/Defines.h>
@ -37,6 +38,7 @@
#include <Common/FailPoint.h>
#include <Common/config_version.h>
#include <Core/Types.h>
#include "config.h"
#if USE_SSL
@ -68,12 +70,23 @@ namespace ErrorCodes
extern const int EMPTY_DATA_PASSED;
}
Connection::~Connection() = default;
Connection::~Connection()
{
try{
if (connected)
Connection::disconnect();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
Connection::Connection(const String & host_, UInt16 port_,
const String & default_database_,
const String & user_, const String & password_,
[[maybe_unused]] const SSHKey & ssh_private_key_,
const String & jwt_,
const String & quota_key_,
const String & cluster_,
const String & cluster_secret_,
@ -86,6 +99,7 @@ Connection::Connection(const String & host_, UInt16 port_,
, ssh_private_key(ssh_private_key_)
#endif
, quota_key(quota_key_)
, jwt(jwt_)
, cluster(cluster_)
, cluster_secret(cluster_secret_)
, client_name(client_name_)
@ -257,13 +271,31 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
void Connection::disconnect()
{
maybe_compressed_out = nullptr;
in = nullptr;
last_input_packet_type.reset();
std::exception_ptr finalize_exception;
try
{
// finalize() can write and throw an exception.
if (maybe_compressed_out)
maybe_compressed_out->finalize();
}
catch (...)
{
/// Don't throw an exception here, it will leave Connection in invalid state.
finalize_exception = std::current_exception();
if (out)
{
out->cancel();
out = nullptr;
}
}
maybe_compressed_out = nullptr;
try
{
// finalize() can write to socket and throw an exception.
if (out)
out->finalize();
}
@ -276,6 +308,7 @@ void Connection::disconnect()
if (socket)
socket->close();
socket = nullptr;
connected = false;
nonce.reset();
@ -341,6 +374,11 @@ void Connection::sendHello()
performHandshakeForSSHAuth();
}
#endif
else if (!jwt.empty())
{
writeStringBinary(EncodedUserInfo::JWT_AUTHENTICAION_MARKER, *out);
writeStringBinary(jwt, *out);
}
else
{
writeStringBinary(user, *out);
@ -767,6 +805,8 @@ void Connection::sendQuery(
}
maybe_compressed_in.reset();
if (maybe_compressed_out && maybe_compressed_out != out)
maybe_compressed_out->cancel();
maybe_compressed_out.reset();
block_in.reset();
block_logs_in.reset();
@ -1310,6 +1350,7 @@ ServerConnectionPtr Connection::createConnection(const ConnectionParameters & pa
parameters.user,
parameters.password,
parameters.ssh_private_key,
parameters.jwt,
parameters.quota_key,
"", /* cluster */
"", /* cluster_secret */

View File

@ -53,6 +53,7 @@ public:
const String & default_database_,
const String & user_, const String & password_,
const SSHKey & ssh_private_key_,
const String & jwt_,
const String & quota_key_,
const String & cluster_,
const String & cluster_secret_,
@ -173,6 +174,7 @@ private:
SSHKey ssh_private_key;
#endif
String quota_key;
String jwt;
/// For inter-server authorization
String cluster;

View File

@ -52,31 +52,11 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
/// changed the default value to "default" to fix the issue when the user in the prompt is blank
user = config.getString("user", "default");
if (!config.has("ssh-key-file"))
if (config.has("jwt"))
{
bool password_prompt = false;
if (config.getBool("ask-password", false))
{
if (config.has("password"))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specified both --password and --ask-password. Remove one of them");
password_prompt = true;
}
else
{
password = config.getString("password", "");
/// if the value of --password is omitted, the password will be set implicitly to "\n"
if (password == ASK_PASSWORD)
password_prompt = true;
}
if (password_prompt)
{
std::string prompt{"Password for user (" + user + "): "};
char buf[1000] = {};
if (auto * result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0))
password = result;
}
jwt = config.getString("jwt");
}
else
else if (config.has("ssh-key-file"))
{
#if USE_SSH
std::string filename = config.getString("ssh-key-file");
@ -102,6 +82,30 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
#endif
}
else
{
bool password_prompt = false;
if (config.getBool("ask-password", false))
{
if (config.has("password"))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specified both --password and --ask-password. Remove one of them");
password_prompt = true;
}
else
{
password = config.getString("password", "");
/// if the value of --password is omitted, the password will be set implicitly to "\n"
if (password == ASK_PASSWORD)
password_prompt = true;
}
if (password_prompt)
{
std::string prompt{"Password for user (" + user + "): "};
char buf[1000] = {};
if (auto * result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0))
password = result;
}
}
quota_key = config.getString("quota_key", "");
@ -139,7 +143,7 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
}
UInt16 ConnectionParameters::getPortFromConfig(const Poco::Util::AbstractConfiguration & config,
std::string connection_host)
const std::string & connection_host)
{
bool is_secure = enableSecureConnection(config, connection_host);
return config.getInt("port",

View File

@ -22,6 +22,7 @@ struct ConnectionParameters
std::string password;
std::string quota_key;
SSHKey ssh_private_key;
std::string jwt;
Protocol::Secure security = Protocol::Secure::Disable;
Protocol::Compression compression = Protocol::Compression::Enable;
ConnectionTimeouts timeouts;
@ -30,7 +31,7 @@ struct ConnectionParameters
ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host);
ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host, std::optional<UInt16> port);
static UInt16 getPortFromConfig(const Poco::Util::AbstractConfiguration & config, std::string connection_host);
static UInt16 getPortFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & connection_host);
/// Ask to enter the user's password if password option contains this value.
/// "\n" is used because there is hardly a chance that a user would use '\n' as password.

View File

@ -123,7 +123,7 @@ protected:
{
return std::make_shared<Connection>(
host, port,
default_database, user, password, SSHKey(), quota_key,
default_database, user, password, SSHKey(), /*jwt*/ "", quota_key,
cluster, cluster_secret,
client_name, compression, secure);
}

View File

@ -23,14 +23,6 @@ void trim(String & s)
s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end());
}
/// Check if multi-line query is inserted from the paste buffer.
/// Allows delaying the start of query execution until the entirety of query is inserted.
bool hasInputData()
{
pollfd fd{STDIN_FILENO, POLLIN, 0};
return poll(&fd, 1, 0) == 1;
}
struct NoCaseCompare
{
bool operator()(const std::string & str1, const std::string & str2)
@ -63,6 +55,14 @@ void addNewWords(Words & to, const Words & from, Compare comp)
namespace DB
{
/// Check if multi-line query is inserted from the paste buffer.
/// Allows delaying the start of query execution until the entirety of query is inserted.
bool LineReader::hasInputData() const
{
pollfd fd{in_fd, POLLIN, 0};
return poll(&fd, 1, 0) == 1;
}
replxx::Replxx::completions_t LineReader::Suggest::getCompletions(const String & prefix, size_t prefix_length, const char * word_break_characters)
{
std::string_view last_word;
@ -131,11 +131,22 @@ void LineReader::Suggest::addWords(Words && new_words) // NOLINT(cppcoreguidelin
}
}
LineReader::LineReader(const String & history_file_path_, bool multiline_, Patterns extenders_, Patterns delimiters_)
LineReader::LineReader(
const String & history_file_path_,
bool multiline_,
Patterns extenders_,
Patterns delimiters_,
std::istream & input_stream_,
std::ostream & output_stream_,
int in_fd_
)
: history_file_path(history_file_path_)
, multiline(multiline_)
, extenders(std::move(extenders_))
, delimiters(std::move(delimiters_))
, input_stream(input_stream_)
, output_stream(output_stream_)
, in_fd(in_fd_)
{
/// FIXME: check extender != delimiter
}
@ -212,9 +223,9 @@ LineReader::InputStatus LineReader::readOneLine(const String & prompt)
input.clear();
{
std::cout << prompt;
std::getline(std::cin, input);
if (!std::cin.good())
output_stream << prompt;
std::getline(input_stream, input);
if (!input_stream.good())
return ABORT;
}

View File

@ -1,5 +1,7 @@
#pragma once
#include <iostream>
#include <unistd.h>
#include <mutex>
#include <atomic>
#include <vector>
@ -37,7 +39,16 @@ public:
using Patterns = std::vector<const char *>;
LineReader(const String & history_file_path, bool multiline, Patterns extenders, Patterns delimiters);
LineReader(
const String & history_file_path,
bool multiline,
Patterns extenders,
Patterns delimiters,
std::istream & input_stream_ = std::cin,
std::ostream & output_stream_ = std::cout,
int in_fd_ = STDIN_FILENO
);
virtual ~LineReader() = default;
/// Reads the whole line until delimiter (in multiline mode) or until the last line without extender.
@ -56,6 +67,8 @@ public:
virtual void enableBracketedPaste() {}
virtual void disableBracketedPaste() {}
bool hasInputData() const;
protected:
enum InputStatus
{
@ -77,6 +90,10 @@ protected:
virtual InputStatus readOneLine(const String & prompt);
virtual void addToHistory(const String &) {}
std::istream & input_stream;
std::ostream & output_stream;
int in_fd;
};
}

View File

@ -16,7 +16,10 @@
#include <Storages/IStorage.h>
#include <Common/ConcurrentBoundedQueue.h>
#include <Common/CurrentThread.h>
#include <Parsers/ParserQuery.h>
#include <Parsers/PRQL/ParserPRQLQuery.h>
#include <Parsers/Kusto/ParserKQLStatement.h>
#include <Parsers/Kusto/parseKQLQuery.h>
namespace DB
{
@ -150,12 +153,26 @@ void LocalConnection::sendQuery(
state->block = sample;
String current_format = "Values";
const auto & settings = context->getSettingsRef();
const char * begin = state->query.data();
auto parsed_query = ClientBase::parseQuery(begin, begin + state->query.size(),
context->getSettingsRef(),
/*allow_multi_statements=*/ false,
/*is_interactive=*/ false,
/*ignore_error=*/ false);
const char * end = begin + state->query.size();
const Dialect & dialect = settings.dialect;
std::unique_ptr<IParserBase> parser;
if (dialect == Dialect::kusto)
parser = std::make_unique<ParserKQLStatement>(end, settings.allow_settings_after_format_in_insert);
else if (dialect == Dialect::prql)
parser = std::make_unique<ParserPRQLQuery>(settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks);
else
parser = std::make_unique<ParserQuery>(end, settings.allow_settings_after_format_in_insert);
ASTPtr parsed_query;
if (dialect == Dialect::kusto)
parsed_query = parseKQLQueryAndMovePosition(*parser, begin, end, "", /*allow_multi_statements*/false, settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks);
else
parsed_query = parseQueryAndMovePosition(*parser, begin, end, "", /*allow_multi_statements*/false, settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks);
if (const auto * insert = parsed_query->as<ASTInsertQuery>())
{
if (!insert->format.empty())

View File

@ -297,8 +297,15 @@ ReplxxLineReader::ReplxxLineReader(
Patterns extenders_,
Patterns delimiters_,
const char word_break_characters_[],
replxx::Replxx::highlighter_callback_t highlighter_)
: LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_)), highlighter(std::move(highlighter_))
replxx::Replxx::highlighter_callback_t highlighter_,
[[ maybe_unused ]] std::istream & input_stream_,
[[ maybe_unused ]] std::ostream & output_stream_,
[[ maybe_unused ]] int in_fd_,
[[ maybe_unused ]] int out_fd_,
[[ maybe_unused ]] int err_fd_
)
: LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_), input_stream_, output_stream_, in_fd_)
, highlighter(std::move(highlighter_))
, word_break_characters(word_break_characters_)
, editor(getEditor())
{
@ -471,7 +478,7 @@ ReplxxLineReader::ReplxxLineReader(
ReplxxLineReader::~ReplxxLineReader()
{
if (close(history_file_fd))
if (history_file_fd >= 0 && close(history_file_fd))
rx.print("Close of history file failed: %s\n", errnoToString().c_str());
}
@ -496,7 +503,7 @@ void ReplxxLineReader::addToHistory(const String & line)
// but replxx::Replxx::history_load() does not
// and that is why flock() is added here.
bool locked = false;
if (flock(history_file_fd, LOCK_EX))
if (history_file_fd >= 0 && flock(history_file_fd, LOCK_EX))
rx.print("Lock of history file failed: %s\n", errnoToString().c_str());
else
locked = true;
@ -507,7 +514,7 @@ void ReplxxLineReader::addToHistory(const String & line)
if (!rx.history_save(history_file_path))
rx.print("Saving history failed: %s\n", errnoToString().c_str());
if (locked && 0 != flock(history_file_fd, LOCK_UN))
if (history_file_fd >= 0 && locked && 0 != flock(history_file_fd, LOCK_UN))
rx.print("Unlock of history file failed: %s\n", errnoToString().c_str());
}

View File

@ -1,6 +1,7 @@
#pragma once
#include "LineReader.h"
#include <Client/LineReader.h>
#include <base/strong_typedef.h>
#include <replxx.hxx>
namespace DB
@ -9,14 +10,22 @@ namespace DB
class ReplxxLineReader : public LineReader
{
public:
ReplxxLineReader(
ReplxxLineReader
(
Suggest & suggest,
const String & history_file_path,
bool multiline,
Patterns extenders_,
Patterns delimiters_,
const char word_break_characters_[],
replxx::Replxx::highlighter_callback_t highlighter_);
replxx::Replxx::highlighter_callback_t highlighter_,
std::istream & input_stream_ = std::cin,
std::ostream & output_stream_ = std::cout,
int in_fd_ = STDIN_FILENO,
int out_fd_ = STDOUT_FILENO,
int err_fd_ = STDERR_FILENO
);
~ReplxxLineReader() override;
void enableBracketedPaste() override;

View File

@ -19,8 +19,7 @@ ConfigReloader::ConfigReloader(
const std::string & preprocessed_dir_,
zkutil::ZooKeeperNodeCache && zk_node_cache_,
const zkutil::EventPtr & zk_changed_event_,
Updater && updater_,
bool already_loaded)
Updater && updater_)
: config_path(config_path_)
, extra_paths(extra_paths_)
, preprocessed_dir(preprocessed_dir_)
@ -28,10 +27,15 @@ ConfigReloader::ConfigReloader(
, zk_changed_event(zk_changed_event_)
, updater(std::move(updater_))
{
if (!already_loaded)
reloadIfNewer(/* force = */ true, /* throw_on_error = */ true, /* fallback_to_preprocessed = */ true, /* initial_loading = */ true);
}
auto config = reloadIfNewer(/* force = */ true, /* throw_on_error = */ true, /* fallback_to_preprocessed = */ true, /* initial_loading = */ true);
if (config.has_value())
reload_interval = std::chrono::milliseconds(config->configuration->getInt64("config_reload_interval_ms", DEFAULT_RELOAD_INTERVAL.count()));
else
reload_interval = DEFAULT_RELOAD_INTERVAL;
LOG_TRACE(log, "Config reload interval set to {}ms", reload_interval.count());
}
void ConfigReloader::start()
{
@ -82,7 +86,17 @@ void ConfigReloader::run()
if (quit)
return;
reloadIfNewer(zk_changed, /* throw_on_error = */ false, /* fallback_to_preprocessed = */ false, /* initial_loading = */ false);
auto config = reloadIfNewer(zk_changed, /* throw_on_error = */ false, /* fallback_to_preprocessed = */ false, /* initial_loading = */ false);
if (config.has_value())
{
auto new_reload_interval = std::chrono::milliseconds(config->configuration->getInt64("config_reload_interval_ms", DEFAULT_RELOAD_INTERVAL.count()));
if (new_reload_interval != reload_interval)
{
reload_interval = new_reload_interval;
LOG_TRACE(log, "Config reload interval changed to {}ms", reload_interval.count());
}
}
}
catch (...)
{
@ -92,7 +106,7 @@ void ConfigReloader::run()
}
}
void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallback_to_preprocessed, bool initial_loading)
std::optional<ConfigProcessor::LoadedConfig> ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallback_to_preprocessed, bool initial_loading)
{
std::lock_guard lock(reload_mutex);
@ -120,7 +134,7 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac
throw;
tryLogCurrentException(log, "ZooKeeper error when loading config from '" + config_path + "'");
return;
return std::nullopt;
}
catch (...)
{
@ -128,7 +142,7 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac
throw;
tryLogCurrentException(log, "Error loading config from '" + config_path + "'");
return;
return std::nullopt;
}
config_processor.savePreprocessedConfig(loaded_config, preprocessed_dir);
@ -154,11 +168,13 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac
if (throw_on_error)
throw;
tryLogCurrentException(log, "Error updating configuration from '" + config_path + "' config.");
return;
return std::nullopt;
}
LOG_DEBUG(log, "Loaded config '{}', performed update on configuration", config_path);
return loaded_config;
}
return std::nullopt;
}
struct ConfigReloader::FileWithTimestamp

View File

@ -17,8 +17,6 @@ namespace Poco { class Logger; }
namespace DB
{
class Context;
/** Every two seconds checks configuration files for update.
* If configuration is changed, then config will be reloaded by ConfigProcessor
* and the reloaded config will be applied via Updater functor.
@ -27,6 +25,8 @@ class Context;
class ConfigReloader
{
public:
static constexpr auto DEFAULT_RELOAD_INTERVAL = std::chrono::milliseconds(2000);
using Updater = std::function<void(ConfigurationPtr, bool)>;
ConfigReloader(
@ -35,8 +35,7 @@ public:
const std::string & preprocessed_dir,
zkutil::ZooKeeperNodeCache && zk_node_cache,
const zkutil::EventPtr & zk_changed_event,
Updater && updater,
bool already_loaded);
Updater && updater);
~ConfigReloader();
@ -53,7 +52,7 @@ public:
private:
void run();
void reloadIfNewer(bool force, bool throw_on_error, bool fallback_to_preprocessed, bool initial_loading);
std::optional<ConfigProcessor::LoadedConfig> reloadIfNewer(bool force, bool throw_on_error, bool fallback_to_preprocessed, bool initial_loading);
struct FileWithTimestamp;
@ -67,8 +66,6 @@ private:
FilesChangesTracker getNewFileList() const;
static constexpr auto reload_interval = std::chrono::seconds(2);
LoggerPtr log = getLogger("ConfigReloader");
std::string config_path;
@ -85,6 +82,8 @@ private:
std::atomic<bool> quit{false};
ThreadFromGlobalPool thread;
std::chrono::milliseconds reload_interval = DEFAULT_RELOAD_INTERVAL;
/// Locked inside reloadIfNewer.
std::mutex reload_mutex;
};

View File

@ -60,4 +60,26 @@ GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t
return get_priority;
}
/// Some load balancing strategies (such as "nearest hostname") have preferred nodes to connect to.
/// Usually it's a node in the same data center/availability zone.
/// For other strategies there's no difference between nodes.
bool GetPriorityForLoadBalancing::hasOptimalNode() const
{
switch (load_balancing)
{
case LoadBalancing::NEAREST_HOSTNAME:
return true;
case LoadBalancing::HOSTNAME_LEVENSHTEIN_DISTANCE:
return true;
case LoadBalancing::IN_ORDER:
return false;
case LoadBalancing::RANDOM:
return false;
case LoadBalancing::FIRST_OR_RANDOM:
return true;
case LoadBalancing::ROUND_ROBIN:
return false;
}
}
}

View File

@ -30,6 +30,8 @@ public:
Func getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const;
bool hasOptimalNode() const;
std::vector<size_t> hostname_prefix_distance; /// Prefix distances from name of this host to the names of hosts of pools.
std::vector<size_t> hostname_levenshtein_distance; /// Levenshtein Distances from name of this host to the names of hosts of pools.

View File

@ -637,11 +637,11 @@ The server successfully detected this situation and will download merged part fr
M(S3QueueSetFileProcessingMicroseconds, "Time spent to set file as processing")\
M(S3QueueSetFileProcessedMicroseconds, "Time spent to set file as processed")\
M(S3QueueSetFileFailedMicroseconds, "Time spent to set file as failed")\
M(S3QueueFailedFiles, "Number of files which failed to be processed")\
M(S3QueueProcessedFiles, "Number of files which were processed")\
M(S3QueueCleanupMaxSetSizeOrTTLMicroseconds, "Time spent to set file as failed")\
M(S3QueuePullMicroseconds, "Time spent to read file data")\
M(S3QueueLockLocalFileStatusesMicroseconds, "Time spent to lock local file statuses")\
M(ObjectStorageQueueFailedFiles, "Number of files which failed to be processed")\
M(ObjectStorageQueueProcessedFiles, "Number of files which were processed")\
M(ObjectStorageQueueCleanupMaxSetSizeOrTTLMicroseconds, "Time spent to set file as failed")\
M(ObjectStorageQueuePullMicroseconds, "Time spent to read file data")\
M(ObjectStorageQueueLockLocalFileStatusesMicroseconds, "Time spent to lock local file statuses")\
\
M(ServerStartupMilliseconds, "Time elapsed from starting server to listening to sockets in milliseconds")\
M(IOUringSQEsSubmitted, "Total number of io_uring SQEs submitted") \

View File

@ -92,19 +92,19 @@ void ProgressIndication::writeFinalProgress()
if (progress.read_rows < 1000)
return;
std::cout << "Processed " << formatReadableQuantity(progress.read_rows) << " rows, "
output_stream << "Processed " << formatReadableQuantity(progress.read_rows) << " rows, "
<< formatReadableSizeWithDecimalSuffix(progress.read_bytes);
UInt64 elapsed_ns = getElapsedNanoseconds();
if (elapsed_ns)
std::cout << " (" << formatReadableQuantity(progress.read_rows * 1000000000.0 / elapsed_ns) << " rows/s., "
output_stream << " (" << formatReadableQuantity(progress.read_rows * 1000000000.0 / elapsed_ns) << " rows/s., "
<< formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.)";
else
std::cout << ". ";
output_stream << ". ";
auto peak_memory_usage = getMemoryUsage().peak;
if (peak_memory_usage >= 0)
std::cout << "\nPeak memory usage: " << formatReadableSizeWithBinarySuffix(peak_memory_usage) << ".";
output_stream << "\nPeak memory usage: " << formatReadableSizeWithBinarySuffix(peak_memory_usage) << ".";
}
void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message)
@ -125,7 +125,7 @@ void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message)
const char * indicator = indicators[increment % 8];
size_t terminal_width = getTerminalWidth();
size_t terminal_width = getTerminalWidth(in_fd, err_fd);
if (!written_progress_chars)
{

View File

@ -32,6 +32,19 @@ using HostToTimesMap = std::unordered_map<String, ThreadEventData>;
class ProgressIndication
{
public:
explicit ProgressIndication
(
std::ostream & output_stream_ = std::cout,
int in_fd_ = STDIN_FILENO,
int err_fd_ = STDERR_FILENO
)
: output_stream(output_stream_),
in_fd(in_fd_),
err_fd(err_fd_)
{
}
/// Write progress bar.
void writeProgress(WriteBufferFromFileDescriptor & message);
void clearProgressOutput(WriteBufferFromFileDescriptor & message);
@ -103,6 +116,10 @@ private:
/// - hosts_data/cpu_usage_meter (guarded with profile_events_mutex)
mutable std::mutex profile_events_mutex;
mutable std::mutex progress_mutex;
std::ostream & output_stream;
int in_fd;
int err_fd;
};
}

View File

@ -11,7 +11,7 @@
#include <Interpreters/TextLog.h>
#include <Interpreters/TraceLog.h>
#include <Interpreters/FilesystemCacheLog.h>
#include <Interpreters/S3QueueLog.h>
#include <Interpreters/ObjectStorageQueueLog.h>
#include <Interpreters/FilesystemReadPrefetchesLog.h>
#include <Interpreters/ProcessorsProfileLog.h>
#include <Interpreters/ZooKeeperLog.h>

View File

@ -25,7 +25,7 @@
M(ZooKeeperLogElement) \
M(ProcessorProfileLogElement) \
M(TextLogElement) \
M(S3QueueLogElement) \
M(ObjectStorageQueueLogElement) \
M(FilesystemCacheLogElement) \
M(FilesystemReadPrefetchesLogElement) \
M(AsynchronousInsertLogElement) \

View File

@ -13,17 +13,17 @@ namespace DB::ErrorCodes
extern const int SYSTEM_ERROR;
}
uint16_t getTerminalWidth()
uint16_t getTerminalWidth(int in_fd, int err_fd)
{
struct winsize terminal_size {};
if (isatty(STDIN_FILENO))
if (isatty(in_fd))
{
if (ioctl(STDIN_FILENO, TIOCGWINSZ, &terminal_size))
if (ioctl(in_fd, TIOCGWINSZ, &terminal_size))
throw DB::ErrnoException(DB::ErrorCodes::SYSTEM_ERROR, "Cannot obtain terminal window size (ioctl TIOCGWINSZ)");
}
else if (isatty(STDERR_FILENO))
else if (isatty(err_fd))
{
if (ioctl(STDERR_FILENO, TIOCGWINSZ, &terminal_size))
if (ioctl(err_fd, TIOCGWINSZ, &terminal_size))
throw DB::ErrnoException(DB::ErrorCodes::SYSTEM_ERROR, "Cannot obtain terminal window size (ioctl TIOCGWINSZ)");
}
/// Default - 0.

View File

@ -1,16 +1,16 @@
#pragma once
#include <string>
#include <unistd.h>
#include <boost/program_options.hpp>
namespace po = boost::program_options;
uint16_t getTerminalWidth();
uint16_t getTerminalWidth(int in_fd = STDIN_FILENO, int err_fd = STDERR_FILENO);
/** Creates po::options_description with name and an appropriate size for option displaying
* when program is called with option --help
* */
po::options_description createOptionsDescription(const std::string &caption, unsigned short terminal_width); /// NOLINT

View File

@ -559,6 +559,8 @@ public:
/// Useful to check owner of ephemeral node.
virtual int64_t getSessionID() const = 0;
virtual String tryGetAvailabilityZone() { return ""; }
/// If the method will throw an exception, callbacks won't be called.
///
/// After the method is executed successfully, you must wait for callbacks
@ -635,10 +637,6 @@ public:
virtual const DB::KeeperFeatureFlags * getKeeperFeatureFlags() const { return nullptr; }
/// A ZooKeeper session can have an optional deadline set on it.
/// After it has been reached, the session needs to be finalized.
virtual bool hasReachedDeadline() const = 0;
/// Expire session and finish all pending requests
virtual void finalize(const String & reason) = 0;
};

View File

@ -39,7 +39,6 @@ public:
~TestKeeper() override;
bool isExpired() const override { return expired; }
bool hasReachedDeadline() const override { return false; }
Int8 getConnectedNodeIdx() const override { return 0; }
String getConnectedHostPort() const override { return "TestKeeper:0000"; }
int32_t getConnectionXid() const override { return 0; }

View File

@ -8,6 +8,7 @@
#include <functional>
#include <ranges>
#include <vector>
#include <chrono>
#include <Common/ZooKeeper/Types.h>
#include <Common/ZooKeeper/ZooKeeperCommon.h>
@ -16,10 +17,12 @@
#include <base/sort.h>
#include <base/getFQDNOrHostName.h>
#include <Core/ServerUUID.h>
#include <Core/BackgroundSchedulePool.h>
#include "Common/ZooKeeper/IKeeper.h"
#include <Common/DNSResolver.h>
#include <Common/StringUtils.h>
#include <Common/Exception.h>
#include <Interpreters/Context.h>
#include <Poco/Net/NetException.h>
#include <Poco/Net/DNS.h>
@ -55,70 +58,120 @@ static void check(Coordination::Error code, const std::string & path)
throw KeeperException::fromPath(code, path);
}
UInt64 getSecondsUntilReconnect(const ZooKeeperArgs & args)
{
std::uniform_int_distribution<UInt32> fallback_session_lifetime_distribution
{
args.fallback_session_lifetime.min_sec,
args.fallback_session_lifetime.max_sec,
};
UInt32 session_lifetime_seconds = fallback_session_lifetime_distribution(thread_local_rng);
return session_lifetime_seconds;
}
void ZooKeeper::init(ZooKeeperArgs args_)
void ZooKeeper::updateAvailabilityZones()
{
ShuffleHosts shuffled_hosts = shuffleHosts();
for (const auto & node : shuffled_hosts)
{
try
{
ShuffleHosts single_node{node};
auto tmp_impl = std::make_unique<Coordination::ZooKeeper>(single_node, args, zk_log);
auto idx = node.original_index;
availability_zones[idx] = tmp_impl->tryGetAvailabilityZone();
LOG_TEST(log, "Got availability zone for {}: {}", args.hosts[idx], availability_zones[idx]);
}
catch (...)
{
DB::tryLogCurrentException(log, "Failed to get availability zone for " + node.host);
}
}
LOG_DEBUG(log, "Updated availability zones: [{}]", fmt::join(availability_zones, ", "));
}
void ZooKeeper::init(ZooKeeperArgs args_, std::unique_ptr<Coordination::IKeeper> existing_impl)
{
args = std::move(args_);
log = getLogger("ZooKeeper");
if (args.implementation == "zookeeper")
if (existing_impl)
{
chassert(args.implementation == "zookeeper");
impl = std::move(existing_impl);
LOG_INFO(log, "Switching to connection to a more optimal node {}", impl->getConnectedHostPort());
}
else if (args.implementation == "zookeeper")
{
if (args.hosts.empty())
throw KeeperException::fromMessage(Coordination::Error::ZBADARGUMENTS, "No hosts passed to ZooKeeper constructor.");
Coordination::ZooKeeper::Nodes nodes;
nodes.reserve(args.hosts.size());
chassert(args.availability_zones.size() == args.hosts.size());
if (availability_zones.empty())
{
/// availability_zones is empty on server startup or after config reloading
/// We will keep the az info when starting new sessions
availability_zones = args.availability_zones;
LOG_TEST(log, "Availability zones from config: [{}], client: {}", fmt::join(availability_zones, ", "), args.client_availability_zone);
if (args.availability_zone_autodetect)
updateAvailabilityZones();
}
chassert(availability_zones.size() == args.hosts.size());
/// Shuffle the hosts to distribute the load among ZooKeeper nodes.
std::vector<ShuffleHost> shuffled_hosts = shuffleHosts();
ShuffleHosts shuffled_hosts = shuffleHosts();
bool dns_error = false;
for (auto & host : shuffled_hosts)
{
auto & host_string = host.host;
try
{
const bool secure = startsWith(host_string, "secure://");
if (secure)
host_string.erase(0, strlen("secure://"));
/// We want to resolve all hosts without DNS cache for keeper connection.
Coordination::DNSResolver::instance().removeHostFromCache(host_string);
const Poco::Net::SocketAddress host_socket_addr{host_string};
LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, host_socket_addr.toString());
nodes.emplace_back(Coordination::ZooKeeper::Node{host_socket_addr, host.original_index, secure});
}
catch (const Poco::Net::HostNotFoundException & e)
{
/// Most likely it's misconfiguration and wrong hostname was specified
LOG_ERROR(log, "Cannot use ZooKeeper host {}, reason: {}", host_string, e.displayText());
}
catch (const Poco::Net::DNSException & e)
{
/// Most likely DNS is not available now
dns_error = true;
LOG_ERROR(log, "Cannot use ZooKeeper host {} due to DNS error: {}", host_string, e.displayText());
}
}
if (nodes.empty())
{
/// For DNS errors we throw exception with ZCONNECTIONLOSS code, so it will be considered as hardware error, not user error
if (dns_error)
throw KeeperException::fromMessage(Coordination::Error::ZCONNECTIONLOSS, "Cannot resolve any of provided ZooKeeper hosts due to DNS error");
else
throw KeeperException::fromMessage(Coordination::Error::ZCONNECTIONLOSS, "Cannot use any of provided ZooKeeper nodes");
}
impl = std::make_unique<Coordination::ZooKeeper>(nodes, args, zk_log);
impl = std::make_unique<Coordination::ZooKeeper>(shuffled_hosts, args, zk_log);
Int8 node_idx = impl->getConnectedNodeIdx();
if (args.chroot.empty())
LOG_TRACE(log, "Initialized, hosts: {}", fmt::join(args.hosts, ","));
else
LOG_TRACE(log, "Initialized, hosts: {}, chroot: {}", fmt::join(args.hosts, ","), args.chroot);
/// If the balancing strategy has an optimal node then it will be the first in the list
bool connected_to_suboptimal_node = node_idx != shuffled_hosts[0].original_index;
bool respect_az = args.prefer_local_availability_zone && !args.client_availability_zone.empty();
bool may_benefit_from_reconnecting = respect_az || args.get_priority_load_balancing.hasOptimalNode();
if (connected_to_suboptimal_node && may_benefit_from_reconnecting)
{
auto reconnect_timeout_sec = getSecondsUntilReconnect(args);
LOG_DEBUG(log, "Connected to a suboptimal ZooKeeper host ({}, index {})."
" To preserve balance in ZooKeeper usage, this ZooKeeper session will expire in {} seconds",
impl->getConnectedHostPort(), node_idx, reconnect_timeout_sec);
auto reconnect_task_holder = DB::Context::getGlobalContextInstance()->getSchedulePool().createTask("ZKReconnect", [this, optimal_host = shuffled_hosts[0]]()
{
try
{
LOG_DEBUG(log, "Trying to connect to a more optimal node {}", optimal_host.host);
ShuffleHosts node{optimal_host};
std::unique_ptr<Coordination::IKeeper> new_impl = std::make_unique<Coordination::ZooKeeper>(node, args, zk_log);
Int8 new_node_idx = new_impl->getConnectedNodeIdx();
/// Maybe the node was unavailable when getting AZs first time, update just in case
if (args.availability_zone_autodetect && availability_zones[new_node_idx].empty())
{
availability_zones[new_node_idx] = new_impl->tryGetAvailabilityZone();
LOG_DEBUG(log, "Got availability zone for {}: {}", optimal_host.host, availability_zones[new_node_idx]);
}
optimal_impl = std::move(new_impl);
impl->finalize("Connected to a more optimal node");
}
catch (...)
{
LOG_WARNING(log, "Failed to connect to a more optimal ZooKeeper, will try again later: {}", DB::getCurrentExceptionMessage(/*with_stacktrace*/ false));
(*reconnect_task)->scheduleAfter(getSecondsUntilReconnect(args) * 1000);
}
});
reconnect_task = std::make_unique<DB::BackgroundSchedulePoolTaskHolder>(std::move(reconnect_task_holder));
(*reconnect_task)->activate();
(*reconnect_task)->scheduleAfter(reconnect_timeout_sec * 1000);
}
}
else if (args.implementation == "testkeeper")
{
@ -152,29 +205,53 @@ void ZooKeeper::init(ZooKeeperArgs args_)
}
}
ZooKeeper::~ZooKeeper()
{
if (reconnect_task)
(*reconnect_task)->deactivate();
}
ZooKeeper::ZooKeeper(const ZooKeeperArgs & args_, std::shared_ptr<DB::ZooKeeperLog> zk_log_)
: zk_log(std::move(zk_log_))
{
init(args_);
init(args_, /*existing_impl*/ {});
}
ZooKeeper::ZooKeeper(const ZooKeeperArgs & args_, std::shared_ptr<DB::ZooKeeperLog> zk_log_, Strings availability_zones_, std::unique_ptr<Coordination::IKeeper> existing_impl)
: availability_zones(std::move(availability_zones_)), zk_log(std::move(zk_log_))
{
if (availability_zones.size() != args_.hosts.size())
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Argument sizes mismatch: availability_zones count {} and hosts count {}",
availability_zones.size(), args_.hosts.size());
init(args_, std::move(existing_impl));
}
ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr<DB::ZooKeeperLog> zk_log_)
: zk_log(std::move(zk_log_))
{
init(ZooKeeperArgs(config, config_name));
init(ZooKeeperArgs(config, config_name), /*existing_impl*/ {});
}
std::vector<ShuffleHost> ZooKeeper::shuffleHosts() const
ShuffleHosts ZooKeeper::shuffleHosts() const
{
std::function<Priority(size_t index)> get_priority = args.get_priority_load_balancing.getPriorityFunc(args.get_priority_load_balancing.load_balancing, 0, args.hosts.size());
std::vector<ShuffleHost> shuffle_hosts;
std::function<Priority(size_t index)> get_priority = args.get_priority_load_balancing.getPriorityFunc(
args.get_priority_load_balancing.load_balancing, /* offset for first_or_random */ 0, args.hosts.size());
ShuffleHosts shuffle_hosts;
for (size_t i = 0; i < args.hosts.size(); ++i)
{
ShuffleHost shuffle_host;
shuffle_host.host = args.hosts[i];
shuffle_host.original_index = static_cast<UInt8>(i);
shuffle_host.secure = startsWith(shuffle_host.host, "secure://");
if (shuffle_host.secure)
shuffle_host.host.erase(0, strlen("secure://"));
if (!args.client_availability_zone.empty() && !availability_zones[i].empty())
shuffle_host.az_info = availability_zones[i] == args.client_availability_zone ? ShuffleHost::SAME : ShuffleHost::OTHER;
if (get_priority)
shuffle_host.priority = get_priority(i);
shuffle_host.randomize();
@ -1023,7 +1100,10 @@ ZooKeeperPtr ZooKeeper::create(const Poco::Util::AbstractConfiguration & config,
ZooKeeperPtr ZooKeeper::startNewSession() const
{
auto res = std::shared_ptr<ZooKeeper>(new ZooKeeper(args, zk_log));
if (reconnect_task)
(*reconnect_task)->deactivate();
auto res = std::shared_ptr<ZooKeeper>(new ZooKeeper(args, zk_log, availability_zones, std::move(optimal_impl)));
res->initSession();
return res;
}
@ -1456,6 +1536,16 @@ int32_t ZooKeeper::getConnectionXid() const
return impl->getConnectionXid();
}
String ZooKeeper::getConnectedHostAvailabilityZone() const
{
if (args.implementation != "zookeeper" || !impl)
return "";
Int8 idx = impl->getConnectedNodeIdx();
if (idx < 0)
return ""; /// session expired
return availability_zones.at(idx);
}
size_t getFailedOpIndex(Coordination::Error exception_code, const Coordination::Responses & responses)
{
if (responses.empty())

View File

@ -32,6 +32,7 @@ namespace DB
{
class ZooKeeperLog;
class ZooKeeperWithFaultInjection;
class BackgroundSchedulePoolTaskHolder;
namespace ErrorCodes
{
@ -48,11 +49,23 @@ constexpr size_t MULTI_BATCH_SIZE = 100;
struct ShuffleHost
{
enum AvailabilityZoneInfo
{
SAME = 0,
UNKNOWN = 1,
OTHER = 2,
};
String host;
bool secure = false;
UInt8 original_index = 0;
AvailabilityZoneInfo az_info = UNKNOWN;
Priority priority;
UInt64 random = 0;
/// We should resolve it each time without caching
mutable std::optional<Poco::Net::SocketAddress> address;
void randomize()
{
random = thread_local_rng();
@ -60,11 +73,13 @@ struct ShuffleHost
static bool compare(const ShuffleHost & lhs, const ShuffleHost & rhs)
{
return std::forward_as_tuple(lhs.priority, lhs.random)
< std::forward_as_tuple(rhs.priority, rhs.random);
return std::forward_as_tuple(lhs.az_info, lhs.priority, lhs.random)
< std::forward_as_tuple(rhs.az_info, rhs.priority, rhs.random);
}
};
using ShuffleHosts = std::vector<ShuffleHost>;
struct RemoveException
{
explicit RemoveException(std::string_view path_ = "", bool remove_subtree_ = true)
@ -197,6 +212,9 @@ class ZooKeeper
explicit ZooKeeper(const ZooKeeperArgs & args_, std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr);
/// Allows to keep info about availability zones when starting a new session
ZooKeeper(const ZooKeeperArgs & args_, std::shared_ptr<DB::ZooKeeperLog> zk_log_, Strings availability_zones_, std::unique_ptr<Coordination::IKeeper> existing_impl);
/** Config of the form:
<zookeeper>
<node>
@ -228,7 +246,9 @@ public:
using Ptr = std::shared_ptr<ZooKeeper>;
using ErrorsList = std::initializer_list<Coordination::Error>;
std::vector<ShuffleHost> shuffleHosts() const;
~ZooKeeper();
ShuffleHosts shuffleHosts() const;
static Ptr create(const Poco::Util::AbstractConfiguration & config,
const std::string & config_name,
@ -596,8 +616,6 @@ public:
UInt32 getSessionUptime() const { return static_cast<UInt32>(session_uptime.elapsedSeconds()); }
bool hasReachedDeadline() const { return impl->hasReachedDeadline(); }
uint64_t getSessionTimeoutMS() const { return args.session_timeout_ms; }
void setServerCompletelyStarted();
@ -606,6 +624,8 @@ public:
String getConnectedHostPort() const;
int32_t getConnectionXid() const;
String getConnectedHostAvailabilityZone() const;
const DB::KeeperFeatureFlags * getKeeperFeatureFlags() const { return impl->getKeeperFeatureFlags(); }
/// Checks that our session was not killed, and allows to avoid applying a request from an old lost session.
@ -625,7 +645,8 @@ public:
void addCheckSessionOp(Coordination::Requests & requests) const;
private:
void init(ZooKeeperArgs args_);
void init(ZooKeeperArgs args_, std::unique_ptr<Coordination::IKeeper> existing_impl);
void updateAvailabilityZones();
/// The following methods don't any throw exceptions but return error codes.
Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created);
@ -690,15 +711,20 @@ private:
}
std::unique_ptr<Coordination::IKeeper> impl;
mutable std::unique_ptr<Coordination::IKeeper> optimal_impl;
ZooKeeperArgs args;
Strings availability_zones;
LoggerPtr log = nullptr;
std::shared_ptr<DB::ZooKeeperLog> zk_log;
AtomicStopwatch session_uptime;
int32_t session_node_version;
std::unique_ptr<DB::BackgroundSchedulePoolTaskHolder> reconnect_task;
};

View File

@ -5,6 +5,9 @@
#include <Poco/Util/AbstractConfiguration.h>
#include <Common/isLocalAddress.h>
#include <Common/StringUtils.h>
#include <Common/thread_local_rng.h>
#include <Server/CloudPlacementInfo.h>
#include <IO/S3/Credentials.h>
#include <Poco/String.h>
namespace DB
@ -53,6 +56,7 @@ ZooKeeperArgs::ZooKeeperArgs(const Poco::Util::AbstractConfiguration & config, c
ZooKeeperArgs::ZooKeeperArgs(const String & hosts_string)
{
splitInto<','>(hosts, hosts_string);
availability_zones.resize(hosts.size());
}
void ZooKeeperArgs::initFromKeeperServerSection(const Poco::Util::AbstractConfiguration & config)
@ -103,8 +107,11 @@ void ZooKeeperArgs::initFromKeeperServerSection(const Poco::Util::AbstractConfig
for (const auto & key : keys)
{
if (startsWith(key, "server"))
{
hosts.push_back(
(secure ? "secure://" : "") + config.getString(raft_configuration_key + "." + key + ".hostname") + ":" + tcp_port);
availability_zones.push_back(config.getString(raft_configuration_key + "." + key + ".availability_zone", ""));
}
}
static constexpr std::array load_balancing_keys
@ -123,11 +130,15 @@ void ZooKeeperArgs::initFromKeeperServerSection(const Poco::Util::AbstractConfig
auto load_balancing = magic_enum::enum_cast<DB::LoadBalancing>(Poco::toUpper(load_balancing_str));
if (!load_balancing)
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown load balancing: {}", load_balancing_str);
get_priority_load_balancing.load_balancing = *load_balancing;
get_priority_load_balancing = DB::GetPriorityForLoadBalancing(*load_balancing, thread_local_rng() % hosts.size());
break;
}
}
availability_zone_autodetect = config.getBool(std::string{config_name} + ".availability_zone_autodetect", false);
prefer_local_availability_zone = config.getBool(std::string{config_name} + ".prefer_local_availability_zone", false);
if (prefer_local_availability_zone)
client_availability_zone = DB::PlacementInfo::PlacementInfo::instance().getAvailabilityZone();
}
void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguration & config, const std::string & config_name)
@ -137,6 +148,8 @@ void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguratio
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(config_name, keys);
std::optional<DB::LoadBalancing> load_balancing;
for (const auto & key : keys)
{
if (key.starts_with("node"))
@ -144,6 +157,7 @@ void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguratio
hosts.push_back(
(config.getBool(config_name + "." + key + ".secure", false) ? "secure://" : "")
+ config.getString(config_name + "." + key + ".host") + ":" + config.getString(config_name + "." + key + ".port", "2181"));
availability_zones.push_back(config.getString(config_name + "." + key + ".availability_zone", ""));
}
else if (key == "session_timeout_ms")
{
@ -199,6 +213,10 @@ void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguratio
{
sessions_path = config.getString(config_name + "." + key);
}
else if (key == "prefer_local_availability_zone")
{
prefer_local_availability_zone = config.getBool(config_name + "." + key);
}
else if (key == "implementation")
{
implementation = config.getString(config_name + "." + key);
@ -207,10 +225,9 @@ void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguratio
{
String load_balancing_str = config.getString(config_name + "." + key);
/// Use magic_enum to avoid dependency from dbms (`SettingFieldLoadBalancingTraits::fromString(...)`)
auto load_balancing = magic_enum::enum_cast<DB::LoadBalancing>(Poco::toUpper(load_balancing_str));
load_balancing = magic_enum::enum_cast<DB::LoadBalancing>(Poco::toUpper(load_balancing_str));
if (!load_balancing)
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown load balancing: {}", load_balancing_str);
get_priority_load_balancing.load_balancing = *load_balancing;
}
else if (key == "fallback_session_lifetime")
{
@ -224,9 +241,19 @@ void ZooKeeperArgs::initFromKeeperSection(const Poco::Util::AbstractConfiguratio
{
use_compression = config.getBool(config_name + "." + key);
}
else if (key == "availability_zone_autodetect")
{
availability_zone_autodetect = config.getBool(config_name + "." + key);
}
else
throw KeeperException(Coordination::Error::ZBADARGUMENTS, "Unknown key {} in config file", key);
}
if (load_balancing)
get_priority_load_balancing = DB::GetPriorityForLoadBalancing(*load_balancing, thread_local_rng() % hosts.size());
if (prefer_local_availability_zone)
client_availability_zone = DB::PlacementInfo::PlacementInfo::instance().getAvailabilityZone();
}
}

View File

@ -32,10 +32,12 @@ struct ZooKeeperArgs
String zookeeper_name = "zookeeper";
String implementation = "zookeeper";
Strings hosts;
Strings availability_zones;
String auth_scheme;
String identity;
String chroot;
String sessions_path = "/clickhouse/sessions";
String client_availability_zone;
int32_t connection_timeout_ms = Coordination::DEFAULT_CONNECTION_TIMEOUT_MS;
int32_t session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS;
int32_t operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS;
@ -47,6 +49,8 @@ struct ZooKeeperArgs
UInt64 send_sleep_ms = 0;
UInt64 recv_sleep_ms = 0;
bool use_compression = false;
bool prefer_local_availability_zone = false;
bool availability_zone_autodetect = false;
SessionLifetimeConfiguration fallback_session_lifetime = {};
DB::GetPriorityForLoadBalancing get_priority_load_balancing;

View File

@ -23,6 +23,9 @@
#include <Common/setThreadName.h>
#include <Common/thread_local_rng.h>
#include <Poco/Net/NetException.h>
#include <Poco/Net/DNS.h>
#include "Coordination/KeeperConstants.h"
#include "config.h"
@ -338,7 +341,7 @@ ZooKeeper::~ZooKeeper()
ZooKeeper::ZooKeeper(
const Nodes & nodes,
const zkutil::ShuffleHosts & nodes,
const zkutil::ZooKeeperArgs & args_,
std::shared_ptr<ZooKeeperLog> zk_log_)
: args(args_)
@ -426,7 +429,7 @@ ZooKeeper::ZooKeeper(
void ZooKeeper::connect(
const Nodes & nodes,
const zkutil::ShuffleHosts & nodes,
Poco::Timespan connection_timeout)
{
if (nodes.empty())
@ -434,15 +437,51 @@ void ZooKeeper::connect(
static constexpr size_t num_tries = 3;
bool connected = false;
bool dns_error = false;
size_t resolved_count = 0;
for (const auto & node : nodes)
{
try
{
const Poco::Net::SocketAddress host_socket_addr{node.host};
LOG_TRACE(log, "Adding ZooKeeper host {} ({}), az: {}, priority: {}", node.host, host_socket_addr.toString(), node.az_info, node.priority);
node.address = host_socket_addr;
++resolved_count;
}
catch (const Poco::Net::HostNotFoundException & e)
{
/// Most likely it's misconfiguration and wrong hostname was specified
LOG_ERROR(log, "Cannot use ZooKeeper host {}, reason: {}", node.host, e.displayText());
}
catch (const Poco::Net::DNSException & e)
{
/// Most likely DNS is not available now
dns_error = true;
LOG_ERROR(log, "Cannot use ZooKeeper host {} due to DNS error: {}", node.host, e.displayText());
}
}
if (resolved_count == 0)
{
/// For DNS errors we throw exception with ZCONNECTIONLOSS code, so it will be considered as hardware error, not user error
if (dns_error)
throw zkutil::KeeperException::fromMessage(
Coordination::Error::ZCONNECTIONLOSS, "Cannot resolve any of provided ZooKeeper hosts due to DNS error");
else
throw zkutil::KeeperException::fromMessage(Coordination::Error::ZCONNECTIONLOSS, "Cannot use any of provided ZooKeeper nodes");
}
WriteBufferFromOwnString fail_reasons;
for (size_t try_no = 0; try_no < num_tries; ++try_no)
{
for (size_t i = 0; i < nodes.size(); ++i)
for (const auto & node : nodes)
{
const auto & node = nodes[i];
try
{
if (!node.address)
continue;
/// Reset the state of previous attempt.
if (node.secure)
{
@ -458,7 +497,7 @@ void ZooKeeper::connect(
socket = Poco::Net::StreamSocket();
}
socket.connect(node.address, connection_timeout);
socket.connect(*node.address, connection_timeout);
socket_address = socket.peerAddress();
socket.setReceiveTimeout(args.operation_timeout_ms * 1000);
@ -498,27 +537,11 @@ void ZooKeeper::connect(
}
original_index = static_cast<Int8>(node.original_index);
if (i != 0)
{
std::uniform_int_distribution<UInt32> fallback_session_lifetime_distribution
{
args.fallback_session_lifetime.min_sec,
args.fallback_session_lifetime.max_sec,
};
UInt32 session_lifetime_seconds = fallback_session_lifetime_distribution(thread_local_rng);
client_session_deadline = clock::now() + std::chrono::seconds(session_lifetime_seconds);
LOG_DEBUG(log, "Connected to a suboptimal ZooKeeper host ({}, index {})."
" To preserve balance in ZooKeeper usage, this ZooKeeper session will expire in {} seconds",
node.address.toString(), i, session_lifetime_seconds);
}
break;
}
catch (...)
{
fail_reasons << "\n" << getCurrentExceptionMessage(false) << ", " << node.address.toString();
fail_reasons << "\n" << getCurrentExceptionMessage(false) << ", " << node.address->toString();
}
}
@ -532,6 +555,9 @@ void ZooKeeper::connect(
bool first = true;
for (const auto & node : nodes)
{
if (!node.address)
continue;
if (first)
first = false;
else
@ -540,7 +566,7 @@ void ZooKeeper::connect(
if (node.secure)
message << "secure://";
message << node.address.toString();
message << node.address->toString();
}
message << fail_reasons.str() << "\n";
@ -1153,7 +1179,6 @@ void ZooKeeper::pushRequest(RequestInfo && info)
{
try
{
checkSessionDeadline();
info.time = clock::now();
auto maybe_zk_log = std::atomic_load(&zk_log);
if (maybe_zk_log)
@ -1201,44 +1226,44 @@ bool ZooKeeper::isFeatureEnabled(KeeperFeatureFlag feature_flag) const
return keeper_feature_flags.isEnabled(feature_flag);
}
void ZooKeeper::initFeatureFlags()
std::optional<String> ZooKeeper::tryGetSystemZnode(const std::string & path, const std::string & description)
{
const auto try_get = [&](const std::string & path, const std::string & description) -> std::optional<std::string>
auto promise = std::make_shared<std::promise<Coordination::GetResponse>>();
auto future = promise->get_future();
auto callback = [promise](const Coordination::GetResponse & response) mutable
{
auto promise = std::make_shared<std::promise<Coordination::GetResponse>>();
auto future = promise->get_future();
auto callback = [promise](const Coordination::GetResponse & response) mutable
{
promise->set_value(response);
};
get(path, std::move(callback), {});
if (future.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready)
throw Exception(Error::ZOPERATIONTIMEOUT, "Failed to get {}: timeout", description);
auto response = future.get();
if (response.error == Coordination::Error::ZNONODE)
{
LOG_TRACE(log, "Failed to get {}", description);
return std::nullopt;
}
else if (response.error != Coordination::Error::ZOK)
{
throw Exception(response.error, "Failed to get {}", description);
}
return std::move(response.data);
promise->set_value(response);
};
if (auto feature_flags = try_get(keeper_api_feature_flags_path, "feature flags"); feature_flags.has_value())
get(path, std::move(callback), {});
if (future.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready)
throw Exception(Error::ZOPERATIONTIMEOUT, "Failed to get {}: timeout", description);
auto response = future.get();
if (response.error == Coordination::Error::ZNONODE)
{
LOG_TRACE(log, "Failed to get {}", description);
return std::nullopt;
}
else if (response.error != Coordination::Error::ZOK)
{
throw Exception(response.error, "Failed to get {}", description);
}
return std::move(response.data);
}
void ZooKeeper::initFeatureFlags()
{
if (auto feature_flags = tryGetSystemZnode(keeper_api_feature_flags_path, "feature flags"); feature_flags.has_value())
{
keeper_feature_flags.setFeatureFlags(std::move(*feature_flags));
return;
}
auto keeper_api_version_string = try_get(keeper_api_version_path, "API version");
auto keeper_api_version_string = tryGetSystemZnode(keeper_api_version_path, "API version");
DB::KeeperApiVersion keeper_api_version{DB::KeeperApiVersion::ZOOKEEPER_COMPATIBLE};
@ -1256,6 +1281,17 @@ void ZooKeeper::initFeatureFlags()
keeper_feature_flags.fromApiVersion(keeper_api_version);
}
String ZooKeeper::tryGetAvailabilityZone()
{
auto res = tryGetSystemZnode(keeper_availability_zone_path, "availability zone");
if (res)
{
LOG_TRACE(log, "Availability zone for ZooKeeper at {}: {}", getConnectedHostPort(), *res);
return *res;
}
return "";
}
void ZooKeeper::executeGenericRequest(
const ZooKeeperRequestPtr & request,
@ -1587,17 +1623,6 @@ void ZooKeeper::setupFaultDistributions()
inject_setup.test_and_set();
}
void ZooKeeper::checkSessionDeadline() const
{
if (unlikely(hasReachedDeadline()))
throw Exception::fromMessage(Error::ZSESSIONEXPIRED, "Session expired (force expiry client-side)");
}
bool ZooKeeper::hasReachedDeadline() const
{
return client_session_deadline.has_value() && clock::now() >= client_session_deadline.value();
}
void ZooKeeper::maybeInjectSendFault()
{
if (unlikely(inject_setup.test() && send_inject_fault && send_inject_fault.value()(thread_local_rng)))

View File

@ -8,6 +8,7 @@
#include <Common/ZooKeeper/IKeeper.h>
#include <Common/ZooKeeper/ZooKeeperCommon.h>
#include <Common/ZooKeeper/ZooKeeperArgs.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Coordination/KeeperConstants.h>
#include <Coordination/KeeperFeatureFlags.h>
@ -102,21 +103,12 @@ using namespace DB;
class ZooKeeper final : public IKeeper
{
public:
struct Node
{
Poco::Net::SocketAddress address;
UInt8 original_index;
bool secure;
};
using Nodes = std::vector<Node>;
/** Connection to nodes is performed in order. If you want, shuffle them manually.
* Operation timeout couldn't be greater than session timeout.
* Operation timeout applies independently for network read, network write, waiting for events and synchronization.
*/
ZooKeeper(
const Nodes & nodes,
const zkutil::ShuffleHosts & nodes,
const zkutil::ZooKeeperArgs & args_,
std::shared_ptr<ZooKeeperLog> zk_log_);
@ -130,9 +122,7 @@ public:
String getConnectedHostPort() const override { return (original_index == -1) ? "" : args.hosts[original_index]; }
int32_t getConnectionXid() const override { return next_xid.load(); }
/// A ZooKeeper session can have an optional deadline set on it.
/// After it has been reached, the session needs to be finalized.
bool hasReachedDeadline() const override;
String tryGetAvailabilityZone() override;
/// Useful to check owner of ephemeral node.
int64_t getSessionID() const override { return session_id; }
@ -271,7 +261,6 @@ private:
clock::time_point time;
};
std::optional<clock::time_point> client_session_deadline {};
using RequestsQueue = ConcurrentBoundedQueue<RequestInfo>;
RequestsQueue requests_queue{1024};
@ -316,7 +305,7 @@ private:
LoggerPtr log;
void connect(
const Nodes & node,
const zkutil::ShuffleHosts & node,
Poco::Timespan connection_timeout);
void sendHandshake();
@ -346,9 +335,10 @@ private:
void logOperationIfNeeded(const ZooKeeperRequestPtr & request, const ZooKeeperResponsePtr & response = nullptr, bool finalize = false, UInt64 elapsed_microseconds = 0);
std::optional<String> tryGetSystemZnode(const std::string & path, const std::string & description);
void initFeatureFlags();
void checkSessionDeadline() const;
CurrentMetrics::Increment active_session_metric_increment{CurrentMetrics::ZooKeeperSession};
std::shared_ptr<ZooKeeperLog> zk_log;

View File

@ -1,15 +1,18 @@
clickhouse_add_executable(zkutil_test_commands zkutil_test_commands.cpp)
target_link_libraries(zkutil_test_commands PRIVATE
clickhouse_common_zookeeper_no_log
clickhouse_functions
dbms)
clickhouse_add_executable(zkutil_test_commands_new_lib zkutil_test_commands_new_lib.cpp)
target_link_libraries(zkutil_test_commands_new_lib PRIVATE
clickhouse_common_zookeeper_no_log
clickhouse_compression
clickhouse_functions
dbms)
clickhouse_add_executable(zkutil_test_async zkutil_test_async.cpp)
target_link_libraries(zkutil_test_async PRIVATE
clickhouse_common_zookeeper_no_log
clickhouse_functions
dbms)

View File

@ -25,24 +25,24 @@ try
Poco::Logger::root().setChannel(channel);
Poco::Logger::root().setLevel("trace");
std::string hosts_arg = argv[1];
std::vector<std::string> hosts_strings;
splitInto<','>(hosts_strings, hosts_arg);
ZooKeeper::Nodes nodes;
nodes.reserve(hosts_strings.size());
for (size_t i = 0; i < hosts_strings.size(); ++i)
zkutil::ZooKeeperArgs args{argv[1]};
zkutil::ShuffleHosts nodes;
nodes.reserve(args.hosts.size());
for (size_t i = 0; i < args.hosts.size(); ++i)
{
std::string host_string = hosts_strings[i];
bool secure = startsWith(host_string, "secure://");
zkutil::ShuffleHost node;
std::string host_string = args.hosts[i];
node.secure = startsWith(host_string, "secure://");
if (secure)
if (node.secure)
host_string.erase(0, strlen("secure://"));
nodes.emplace_back(ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, static_cast<UInt8>(i) , secure});
node.host = host_string;
node.original_index = i;
nodes.emplace_back(node);
}
zkutil::ZooKeeperArgs args;
ZooKeeper zk(nodes, args, nullptr);
Poco::Event event(true);

View File

@ -11,10 +11,10 @@ clickhouse_add_executable (small_table small_table.cpp)
target_link_libraries (small_table PRIVATE clickhouse_common_io)
clickhouse_add_executable (parallel_aggregation parallel_aggregation.cpp)
target_link_libraries (parallel_aggregation PRIVATE dbms)
target_link_libraries (parallel_aggregation PRIVATE dbms clickhouse_functions)
clickhouse_add_executable (parallel_aggregation2 parallel_aggregation2.cpp)
target_link_libraries (parallel_aggregation2 PRIVATE dbms)
target_link_libraries (parallel_aggregation2 PRIVATE dbms clickhouse_functions)
clickhouse_add_executable (int_hashes_perf int_hashes_perf.cpp)
target_link_libraries (int_hashes_perf PRIVATE clickhouse_common_io)
@ -85,7 +85,7 @@ target_link_libraries (interval_tree PRIVATE dbms)
if (ENABLE_SSL)
clickhouse_add_executable (encrypt_decrypt encrypt_decrypt.cpp)
target_link_libraries (encrypt_decrypt PRIVATE dbms)
target_link_libraries (encrypt_decrypt PRIVATE dbms clickhouse_functions)
endif()
clickhouse_add_executable (check_pointer_valid check_pointer_valid.cpp)

View File

@ -57,14 +57,16 @@ void CompressedWriteBuffer::nextImpl()
}
}
CompressedWriteBuffer::~CompressedWriteBuffer()
{
finalize();
}
CompressedWriteBuffer::CompressedWriteBuffer(WriteBuffer & out_, CompressionCodecPtr codec_, size_t buf_size)
: BufferWithOwnMemory<WriteBuffer>(buf_size), out(out_), codec(std::move(codec_))
{
}
CompressedWriteBuffer::~CompressedWriteBuffer()
{
if (!canceled)
finalize();
}
}

View File

@ -90,13 +90,13 @@ static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_POLICY = "SLRU";
static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE = 0_MiB;
static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO = 0.5l;
static constexpr auto DEFAULT_MARK_CACHE_POLICY = "SLRU";
static constexpr auto DEFAULT_MARK_CACHE_MAX_SIZE = 5368_MiB;
static constexpr auto DEFAULT_MARK_CACHE_MAX_SIZE = 5_GiB;
static constexpr auto DEFAULT_MARK_CACHE_SIZE_RATIO = 0.5l;
static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY = "SLRU";
static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE = 0;
static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO = 0.5;
static constexpr auto DEFAULT_INDEX_MARK_CACHE_POLICY = "SLRU";
static constexpr auto DEFAULT_INDEX_MARK_CACHE_MAX_SIZE = 5368_MiB;
static constexpr auto DEFAULT_INDEX_MARK_CACHE_MAX_SIZE = 5_GiB;
static constexpr auto DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO = 0.3;
static constexpr auto DEFAULT_MMAP_CACHE_MAX_SIZE = 1_KiB; /// chosen by rolling dice
static constexpr auto DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE = 128_MiB;

View File

@ -63,6 +63,9 @@ const char USER_INTERSERVER_MARKER[] = " INTERSERVER SECRET ";
/// Marker for SSH-keys-based authentication (passed as the user name)
const char SSH_KEY_AUTHENTICAION_MARKER[] = " SSH KEY AUTHENTICATION ";
/// Market for JSON Web Token authentication
const char JWT_AUTHENTICAION_MARKER[] = " JWT AUTHENTICATION ";
};
namespace Protocol

View File

@ -154,6 +154,7 @@ namespace DB
M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \
M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \
M(Double, gwp_asan_force_sample_probability, 0, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \
M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \
/// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp

View File

@ -470,7 +470,7 @@ class IColumn;
M(UInt64, max_rows_in_join, 0, "Maximum size of the hash table for JOIN (in number of rows).", 0) \
M(UInt64, max_bytes_in_join, 0, "Maximum size of the hash table for JOIN (in number of bytes in memory).", 0) \
M(OverflowMode, join_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \
M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key.", IMPORTANT) \
M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key. Can be applied only to hash join and storage join.", IMPORTANT) \
M(JoinAlgorithm, join_algorithm, JoinAlgorithm::DEFAULT, "Specify join algorithm.", 0) \
M(UInt64, cross_join_min_rows_to_compress, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached.", 0) \
M(UInt64, cross_join_min_bytes_to_compress, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached.", 0) \
@ -1092,6 +1092,7 @@ class IColumn;
M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \
M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \
M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \
M(Bool, input_format_json_ignore_key_case, false, "Ignore json key case while read json field from string", 0) \
M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \
@ -1160,6 +1161,7 @@ class IColumn;
M(Bool, output_format_parquet_parallel_encoding, true, "Do Parquet encoding in multiple threads. Requires output_format_parquet_use_custom_encoder.", 0) \
M(UInt64, output_format_parquet_data_page_size, 1024 * 1024, "Target page size in bytes, before compression.", 0) \
M(UInt64, output_format_parquet_batch_size, 1024, "Check page size every this many rows. Consider decreasing if you have columns with average values size above a few KBs.", 0) \
M(Bool, output_format_parquet_write_page_index, true, "Add a possibility to write page index into parquet files.", 0) \
M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy', 'zstd'.", 0) \
M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \
M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \

View File

@ -86,6 +86,8 @@ namespace SettingsChangesHistory
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
static const std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
{
{"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."},
}},
{"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"},
{"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"},
{"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
@ -115,6 +117,7 @@ static const std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges
{"output_format_csv_serialize_tuple_into_separate_columns", true, true, "A new way of how interpret tuples in CSV format was added."},
{"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."},
{"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."},
{"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},
}},
{"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"},
{"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."},

View File

@ -201,13 +201,13 @@ IMPLEMENT_SETTING_ENUM(ORCCompression, ErrorCodes::BAD_ARGUMENTS,
{"zlib", FormatSettings::ORCCompression::ZLIB},
{"lz4", FormatSettings::ORCCompression::LZ4}})
IMPLEMENT_SETTING_ENUM(S3QueueMode, ErrorCodes::BAD_ARGUMENTS,
{{"ordered", S3QueueMode::ORDERED},
{"unordered", S3QueueMode::UNORDERED}})
IMPLEMENT_SETTING_ENUM(ObjectStorageQueueMode, ErrorCodes::BAD_ARGUMENTS,
{{"ordered", ObjectStorageQueueMode::ORDERED},
{"unordered", ObjectStorageQueueMode::UNORDERED}})
IMPLEMENT_SETTING_ENUM(S3QueueAction, ErrorCodes::BAD_ARGUMENTS,
{{"keep", S3QueueAction::KEEP},
{"delete", S3QueueAction::DELETE}})
IMPLEMENT_SETTING_ENUM(ObjectStorageQueueAction, ErrorCodes::BAD_ARGUMENTS,
{{"keep", ObjectStorageQueueAction::KEEP},
{"delete", ObjectStorageQueueAction::DELETE}})
IMPLEMENT_SETTING_ENUM(ExternalCommandStderrReaction, ErrorCodes::BAD_ARGUMENTS,
{{"none", ExternalCommandStderrReaction::NONE},

Some files were not shown because too many files have changed in this diff Show More