mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 00:52:02 +00:00
Merge branch 'master' into add_statistics_cmsketch
# Conflicts: # docs/en/engines/table-engines/mergetree-family/mergetree.md # src/Storages/Statistics/Statistics.cpp # src/Storages/Statistics/Statistics.h # src/Storages/Statistics/StatisticsTDigest.h # src/Storages/Statistics/StatisticsUniq.h # src/Storages/Statistics/TDigestStatistics.cpp # tests/queries/0_stateless/02864_statistics_uniq.sql
This commit is contained in:
commit
0c5821e5b8
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -2,3 +2,4 @@ contrib/* linguist-vendored
|
||||
*.h linguist-language=C++
|
||||
tests/queries/0_stateless/data_json/* binary
|
||||
tests/queries/0_stateless/*.reference -crlf
|
||||
src/Core/SettingsChangesHistory.cpp merge=union
|
||||
|
49
.github/workflows/backport_branches.yml
vendored
49
.github/workflows/backport_branches.yml
vendored
@ -159,33 +159,24 @@ jobs:
|
||||
############################################################################################
|
||||
##################################### BUILD REPORTER #######################################
|
||||
############################################################################################
|
||||
BuilderReport:
|
||||
Builds_Report:
|
||||
# run report check for failed builds to indicate the CI error
|
||||
if: ${{ !cancelled() }}
|
||||
needs:
|
||||
- RunConfig
|
||||
- BuilderDebAarch64
|
||||
- BuilderDebAsan
|
||||
- BuilderDebDebug
|
||||
- BuilderDebRelease
|
||||
- BuilderDebTsan
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: ClickHouse build check
|
||||
runner_type: style-checker-aarch64
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
BuilderSpecialReport:
|
||||
# run report check for failed builds to indicate the CI error
|
||||
if: ${{ !cancelled() }}
|
||||
needs:
|
||||
- RunConfig
|
||||
- BuilderBinDarwin
|
||||
- BuilderBinDarwinAarch64
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: ClickHouse special build check
|
||||
runner_type: style-checker-aarch64
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }}
|
||||
needs: [RunConfig, BuilderDebAarch64, BuilderDebAsan, BuilderDebDebug, BuilderDebRelease, BuilderDebTsan, BuilderBinDarwin, BuilderBinDarwinAarch64]
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
- name: Download reports
|
||||
run: |
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --pre --job-name Builds
|
||||
- name: Builds report
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 ./build_report_check.py --reports package_release package_aarch64 package_asan package_tsan package_debug binary_darwin binary_darwin_aarch64
|
||||
- name: Set status
|
||||
run: |
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --post --job-name Builds
|
||||
############################################################################################
|
||||
#################################### INSTALL PACKAGES ######################################
|
||||
############################################################################################
|
||||
@ -256,8 +247,7 @@ jobs:
|
||||
FinishCheck:
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
needs:
|
||||
- BuilderReport
|
||||
- BuilderSpecialReport
|
||||
- Builds_Report
|
||||
- FunctionalStatelessTestAsan
|
||||
- FunctionalStatefulTestDebug
|
||||
- StressTestTsan
|
||||
@ -273,5 +263,8 @@ jobs:
|
||||
- name: Finish label
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
# update mergeable check
|
||||
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
# update overall ci report
|
||||
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
python3 merge_pr.py
|
||||
|
29
.github/workflows/create_release.yml
vendored
Normal file
29
.github/workflows/create_release.yml
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
name: CreateRelease
|
||||
|
||||
concurrency:
|
||||
group: release
|
||||
|
||||
'on':
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
sha:
|
||||
description: 'The SHA hash of the commit from which to create the release'
|
||||
required: true
|
||||
type: string
|
||||
type:
|
||||
description: 'The type of release: "new" for a new release or "patch" for a patch release'
|
||||
required: true
|
||||
type: choice
|
||||
options:
|
||||
- new
|
||||
- patch
|
||||
|
||||
jobs:
|
||||
Release:
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
- name: Print greeting
|
||||
run: |
|
||||
python3 ./tests/ci/release.py --commit ${{ inputs.sha }} --type ${{ inputs.type }} --dry-run
|
4
.github/workflows/master.yml
vendored
4
.github/workflows/master.yml
vendored
@ -117,11 +117,11 @@ jobs:
|
||||
# Reports should run even if Builds_1/2 fail - run them separately, not in Tests_1/2/3
|
||||
Builds_Report:
|
||||
# run report check for failed builds to indicate the CI error
|
||||
if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }}
|
||||
if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }}
|
||||
needs: [RunConfig, Builds_1, Builds_2]
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: ClickHouse build check
|
||||
test_name: Builds
|
||||
runner_type: style-checker-aarch64
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
|
||||
|
15
.github/workflows/merge_queue.yml
vendored
15
.github/workflows/merge_queue.yml
vendored
@ -96,20 +96,15 @@ jobs:
|
||||
stage: Tests_1
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
|
||||
################################# Stage Final #################################
|
||||
#
|
||||
FinishCheck:
|
||||
if: ${{ !cancelled() }}
|
||||
CheckReadyForMerge:
|
||||
if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }}
|
||||
# Test_2 or Test_3 must not have jobs required for Mergeable check
|
||||
needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Tests_1]
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
- name: Check sync status
|
||||
- name: Check and set merge status
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 sync_pr.py --status
|
||||
- name: Finish label
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
|
4
.github/workflows/pull_request.yml
vendored
4
.github/workflows/pull_request.yml
vendored
@ -146,11 +146,11 @@ jobs:
|
||||
# Reports should run even if Builds_1/2 fail - run them separately (not in Tests_1/2/3)
|
||||
Builds_Report:
|
||||
# run report check for failed builds to indicate the CI error
|
||||
if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }}
|
||||
if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }}
|
||||
needs: [RunConfig, StyleCheck, Builds_1, Builds_2]
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: ClickHouse build check
|
||||
test_name: Builds
|
||||
runner_type: style-checker-aarch64
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
|
||||
|
51
.github/workflows/release_branches.yml
vendored
51
.github/workflows/release_branches.yml
vendored
@ -176,35 +176,24 @@ jobs:
|
||||
############################################################################################
|
||||
##################################### BUILD REPORTER #######################################
|
||||
############################################################################################
|
||||
BuilderReport:
|
||||
Builds_Report:
|
||||
# run report check for failed builds to indicate the CI error
|
||||
if: ${{ !cancelled() }}
|
||||
needs:
|
||||
- RunConfig
|
||||
- BuilderDebRelease
|
||||
- BuilderDebAarch64
|
||||
- BuilderDebAsan
|
||||
- BuilderDebTsan
|
||||
- BuilderDebUBsan
|
||||
- BuilderDebMsan
|
||||
- BuilderDebDebug
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: ClickHouse build check
|
||||
runner_type: style-checker-aarch64
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
BuilderSpecialReport:
|
||||
# run report check for failed builds to indicate the CI error
|
||||
if: ${{ !cancelled() }}
|
||||
needs:
|
||||
- RunConfig
|
||||
- BuilderBinDarwin
|
||||
- BuilderBinDarwinAarch64
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: ClickHouse special build check
|
||||
runner_type: style-checker-aarch64
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }}
|
||||
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64, BuilderDebAsan, BuilderDebUBsan, BuilderDebMsan, BuilderDebTsan, BuilderDebDebug, BuilderBinDarwin, BuilderBinDarwinAarch64]
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
- name: Download reports
|
||||
run: |
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --pre --job-name Builds
|
||||
- name: Builds report
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 ./build_report_check.py --reports package_release package_aarch64 package_asan package_msan package_ubsan package_tsan package_debug binary_darwin binary_darwin_aarch64
|
||||
- name: Set status
|
||||
run: |
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --post --job-name Builds
|
||||
MarkReleaseReady:
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
needs:
|
||||
@ -460,8 +449,7 @@ jobs:
|
||||
needs:
|
||||
- DockerServerImage
|
||||
- DockerKeeperImage
|
||||
- BuilderReport
|
||||
- BuilderSpecialReport
|
||||
- Builds_Report
|
||||
- MarkReleaseReady
|
||||
- FunctionalStatelessTestDebug
|
||||
- FunctionalStatelessTestRelease
|
||||
@ -496,4 +484,7 @@ jobs:
|
||||
- name: Finish label
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
# update mergeable check
|
||||
python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
# update overall ci report
|
||||
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
|
@ -13,5 +13,4 @@ rules:
|
||||
level: warning
|
||||
comments:
|
||||
min-spaces-from-content: 1
|
||||
document-start:
|
||||
present: false
|
||||
document-start: disable
|
||||
|
154
CHANGELOG.md
154
CHANGELOG.md
@ -1,5 +1,5 @@
|
||||
### Table of Contents
|
||||
**[ClickHouse release v24.6, 2024-06-27](#246)**<br/>
|
||||
**[ClickHouse release v24.6, 2024-07-01](#246)**<br/>
|
||||
**[ClickHouse release v24.5, 2024-05-30](#245)**<br/>
|
||||
**[ClickHouse release v24.4, 2024-04-30](#244)**<br/>
|
||||
**[ClickHouse release v24.3 LTS, 2024-03-26](#243)**<br/>
|
||||
@ -9,107 +9,105 @@
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### <a id="246"></a> ClickHouse release 24.6, 2024-06-27
|
||||
### <a id="246"></a> ClickHouse release 24.6, 2024-07-01
|
||||
|
||||
#### Backward Incompatible Change
|
||||
* Enable asynchronous load of databases and tables by default. See the `async_load_databases` in config.xml. While this change is fully compatible, it can introduce a difference in behavior. When `async_load_databases` is false, as in the previous versions, the server will not accept connections until all tables are loaded. When `async_load_databases` is true, as in the new version, the server can accept connections before all the tables are loaded. If a query is made to a table that is not yet loaded, it will wait for the table's loading, which can take considerable time. It can change the behavior of the server if it is part of a large distributed system under a load balancer. In the first case, the load balancer can get a connection refusal and quickly failover to another server. In the second case, the load balancer can connect to a server that is still loading the tables, and the query will have a higher latency. Moreover, if many queries accumulate in the waiting state, it can lead to a "thundering herd" problem when they start processing simultaneously. This can make a difference only for highly loaded distributed backends. You can set the value of `async_load_databases` to false to avoid this problem. [#57695](https://github.com/ClickHouse/ClickHouse/pull/57695) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Setting `replace_long_file_name_to_hash` is enabled by default for `MergeTree` tables. [#64457](https://github.com/ClickHouse/ClickHouse/pull/64457) ([Anton Popov](https://github.com/CurtizJ)). This setting is fully compatible, and no actions needed during upgrade. The new data format is supported from all versions starting from 23.9. After enabling this setting, you can no longer downgrade to a version 23.8 or older.
|
||||
* Some invalid queries will fail earlier during parsing. Note: disabled the support for inline KQL expressions (the experimental Kusto language) when they are put into a `kql` table function without a string literal, e.g. `kql(garbage | trash)` instead of `kql('garbage | trash')` or `kql($$garbage | trash$$)`. This feature was introduced unintentionally and should not exist. [#61500](https://github.com/ClickHouse/ClickHouse/pull/61500) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Rework parallel processing in `Ordered` mode of storage `S3Queue`. This PR is backward incompatible for Ordered mode if you used settings `s3queue_processing_threads_num` or `s3queue_total_shards_num`. Setting `s3queue_total_shards_num` is deleted, previously it was allowed to use only under `s3queue_allow_experimental_sharded_mode`, which is now deprecated. A new setting is added - `s3queue_buckets`. [#64349](https://github.com/ClickHouse/ClickHouse/pull/64349) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* New functions `snowflakeIDToDateTime`, `snowflakeIDToDateTime64`, `dateTimeToSnowflakeID`, and `dateTime64ToSnowflakeID` were added. Unlike the existing functions `snowflakeToDateTime`, `snowflakeToDateTime64`, `dateTimeToSnowflake`, and `dateTime64ToSnowflake`, the new functions are compatible with function `generateSnowflakeID`, i.e. they accept the snowflake IDs generated by `generateSnowflakeID` and produce snowflake IDs of the same type as `generateSnowflakeID` (i.e. `UInt64`). Furthermore, the new functions default to the UNIX epoch (aka. 1970-01-01), just like `generateSnowflakeID`. If necessary, a different epoch, e.g. Twitter's/X's epoch 2010-11-04 aka. 1288834974657 msec since UNIX epoch, can be passed. The old conversion functions are deprecated and will be removed after a transition period: to use them regardless, enable setting `allow_deprecated_snowflake_conversion_functions`. [#64948](https://github.com/ClickHouse/ClickHouse/pull/64948) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### New Feature
|
||||
* Introduce statistics of type "number of distinct values". [#59357](https://github.com/ClickHouse/ClickHouse/pull/59357) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Allow to store named collections in ClickHouse Keeper. [#64574](https://github.com/ClickHouse/ClickHouse/pull/64574) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Support empty tuples. [#55061](https://github.com/ClickHouse/ClickHouse/pull/55061) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Add Hilbert Curve encode and decode functions. [#60156](https://github.com/ClickHouse/ClickHouse/pull/60156) ([Artem Mustafin](https://github.com/Artemmm91)).
|
||||
* Added support for reading LINESTRING geometry in WKT format using function `readWKTLineString`. [#62519](https://github.com/ClickHouse/ClickHouse/pull/62519) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Allow to attach parts from a different disk. [#63087](https://github.com/ClickHouse/ClickHouse/pull/63087) ([Unalian](https://github.com/Unalian)).
|
||||
* Allow proxy to be bypassed for hosts specified in `no_proxy` env variable and ClickHouse proxy configuration. [#63314](https://github.com/ClickHouse/ClickHouse/pull/63314) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Added a new table function `loop` to support returning query results in an infinite loop. [#63452](https://github.com/ClickHouse/ClickHouse/pull/63452) ([Sariel](https://github.com/sarielwxm)).
|
||||
* Added new SQL functions `generateSnowflakeID` for generating Twitter-style Snowflake IDs. [#63577](https://github.com/ClickHouse/ClickHouse/pull/63577) ([Danila Puzov](https://github.com/kazalika)).
|
||||
* Add the ability to reshuffle rows during insert to optimize for size without violating the order set by `PRIMARY KEY`. It's controlled by the setting `optimize_row_order` (off by default). [#63578](https://github.com/ClickHouse/ClickHouse/pull/63578) ([Igor Markelov](https://github.com/ElderlyPassionFruit)).
|
||||
* Added `merge_workload` and `mutation_workload` settings to regulate how resources are utilized and shared between merges, mutations and other workloads. [#64061](https://github.com/ClickHouse/ClickHouse/pull/64061) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Add support for comparing IPv4 and IPv6 types using the `=` operator. [#64292](https://github.com/ClickHouse/ClickHouse/pull/64292) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
|
||||
* Allow to store named collections in zookeeper. [#64574](https://github.com/ClickHouse/ClickHouse/pull/64574) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Support decimal arguments in binary math functions (pow(), atan2(), max2, min2(), hypot(). [#64582](https://github.com/ClickHouse/ClickHouse/pull/64582) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
|
||||
* Add support for index analysis over `hilbertEncode`. [#64662](https://github.com/ClickHouse/ClickHouse/pull/64662) ([Artem Mustafin](https://github.com/Artemmm91)).
|
||||
* Added support for reading `LINESTRING` geometry in the WKT format using function `readWKTLineString`. [#62519](https://github.com/ClickHouse/ClickHouse/pull/62519) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Allow to attach parts from a different disk. [#63087](https://github.com/ClickHouse/ClickHouse/pull/63087) ([Unalian](https://github.com/Unalian)).
|
||||
* Added new SQL functions `generateSnowflakeID` for generating Twitter-style Snowflake IDs. [#63577](https://github.com/ClickHouse/ClickHouse/pull/63577) ([Danila Puzov](https://github.com/kazalika)).
|
||||
* Added `merge_workload` and `mutation_workload` settings to regulate how resources are utilized and shared between merges, mutations and other workloads. [#64061](https://github.com/ClickHouse/ClickHouse/pull/64061) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Add support for comparing `IPv4` and `IPv6` types using the `=` operator. [#64292](https://github.com/ClickHouse/ClickHouse/pull/64292) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
|
||||
* Support decimal arguments in binary math functions (pow, atan2, max2, min2, hypot). [#64582](https://github.com/ClickHouse/ClickHouse/pull/64582) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
|
||||
* Added SQL functions `parseReadableSize` (along with `OrNull` and `OrZero` variants). [#64742](https://github.com/ClickHouse/ClickHouse/pull/64742) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
|
||||
* Add server settings `max_table_num_to_throw` and `max_database_num_to_throw` to limit the number of databases or tables on `CREATE` queries. [#64781](https://github.com/ClickHouse/ClickHouse/pull/64781) ([Xu Jia](https://github.com/XuJia0210)).
|
||||
* Add _time virtual column to file alike storages (s3/file/hdfs/url/azureBlobStorage). [#64947](https://github.com/ClickHouse/ClickHouse/pull/64947) ([Ilya Golshtein](https://github.com/ilejn)).
|
||||
* Introduced new functions `base64UrlEncode`, `base64UrlDecode` and `tryBase64UrlDecode`. [#64991](https://github.com/ClickHouse/ClickHouse/pull/64991) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
|
||||
* Add `_time` virtual column to file alike storages (s3/file/hdfs/url/azureBlobStorage). [#64947](https://github.com/ClickHouse/ClickHouse/pull/64947) ([Ilya Golshtein](https://github.com/ilejn)).
|
||||
* Introduced new functions `base64URLEncode`, `base64URLDecode` and `tryBase64URLDecode`. [#64991](https://github.com/ClickHouse/ClickHouse/pull/64991) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
|
||||
* Add new function `editDistanceUTF8`, which calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two UTF8 strings. [#65269](https://github.com/ClickHouse/ClickHouse/pull/65269) ([LiuNeng](https://github.com/liuneng1994)).
|
||||
* Add `http_response_headers` configuration to support custom response headers in custom HTTP handlers. [#63562](https://github.com/ClickHouse/ClickHouse/pull/63562) ([Grigorii](https://github.com/GSokol)).
|
||||
* Added a new table function `loop` to support returning query results in an infinite loop. [#63452](https://github.com/ClickHouse/ClickHouse/pull/63452) ([Sariel](https://github.com/sarielwxm)). This is useful for testing.
|
||||
* Introduced two additional columns in the `system.query_log`: `used_privileges` and `missing_privileges`. `used_privileges` is populated with the privileges that were checked during query execution, and `missing_privileges` contains required privileges that are missing. [#64597](https://github.com/ClickHouse/ClickHouse/pull/64597) ([Alexey Katsman](https://github.com/alexkats)).
|
||||
* Added a setting `output_format_pretty_display_footer_column_names` which when enabled displays column names at the end of the table for long tables (50 rows by default), with the threshold value for minimum number of rows controlled by `output_format_pretty_display_footer_column_names_min_rows`. [#65144](https://github.com/ClickHouse/ClickHouse/pull/65144) ([Shaun Struwig](https://github.com/Blargian)).
|
||||
|
||||
#### Experimental Feature
|
||||
* Introduce statistics of type "number of distinct values". [#59357](https://github.com/ClickHouse/ClickHouse/pull/59357) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Support statistics with ReplicatedMergeTree. [#64934](https://github.com/ClickHouse/ClickHouse/pull/64934) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* If "replica group" is configured for a `Replicated` database, automatically create a cluster that includes replicas from all groups. [#64312](https://github.com/ClickHouse/ClickHouse/pull/64312) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Add settings `parallel_replicas_custom_key_range_lower` and `parallel_replicas_custom_key_range_upper` to control how parallel replicas with dynamic shards parallelizes queries when using a range filter. [#64604](https://github.com/ClickHouse/ClickHouse/pull/64604) ([josh-hildred](https://github.com/josh-hildred)).
|
||||
|
||||
#### Performance Improvement
|
||||
* Add the ability to reshuffle rows during insert to optimize for size without violating the order set by `PRIMARY KEY`. It's controlled by the setting `optimize_row_order` (off by default). [#63578](https://github.com/ClickHouse/ClickHouse/pull/63578) ([Igor Markelov](https://github.com/ElderlyPassionFruit)).
|
||||
* Add a native parquet reader, which can read parquet binary to ClickHouse Columns directly. It's controlled by the setting `input_format_parquet_use_native_reader` (disabled by default). [#60361](https://github.com/ClickHouse/ClickHouse/pull/60361) ([ZhiHong Zhang](https://github.com/copperybean)).
|
||||
* Reduce the number of virtual function calls in ColumnNullable::size(). [#60556](https://github.com/ClickHouse/ClickHouse/pull/60556) ([HappenLee](https://github.com/HappenLee)).
|
||||
* Speedup `splitByRegexp` when the regular expression argument is a single-character. [#62696](https://github.com/ClickHouse/ClickHouse/pull/62696) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Speed up FixedHashTable by keeping track of the min and max keys used. This allows to reduce the number of cells that need to be verified. [#62746](https://github.com/ClickHouse/ClickHouse/pull/62746) ([Jiebin Sun](https://github.com/jiebinn)).
|
||||
* Optimize the resolution of in(LowCardinality, ConstantSet). [#64060](https://github.com/ClickHouse/ClickHouse/pull/64060) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
|
||||
* Use a thread pool to initialize and destroy hash tables inside `ConcurrentHashJoin`. [#64241](https://github.com/ClickHouse/ClickHouse/pull/64241) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Optimized vertical merges in tables with sparse columns. [#64311](https://github.com/ClickHouse/ClickHouse/pull/64311) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Enabled prefetches of data from remote filesystem during vertical merges. It improves latency of vertical merges in tables with data stored on remote filesystem. [#64314](https://github.com/ClickHouse/ClickHouse/pull/64314) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Reduce redundant calls to `isDefault()` of `ColumnSparse::filter` to improve performance. [#64426](https://github.com/ClickHouse/ClickHouse/pull/64426) ([Jiebin Sun](https://github.com/jiebinn)).
|
||||
* Speedup `find_super_nodes` and `find_big_family` keeper-client commands by making multiple asynchronous getChildren requests. [#64628](https://github.com/ClickHouse/ClickHouse/pull/64628) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Improve function least/greatest for nullable numberic type arguments. [#64668](https://github.com/ClickHouse/ClickHouse/pull/64668) ([KevinyhZou](https://github.com/KevinyhZou)).
|
||||
* Allow merging two consequent `FilterSteps` of a query plan. This improves filter-push-down optimization if the filter condition can be pushed down from the parent step. [#64760](https://github.com/ClickHouse/ClickHouse/pull/64760) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Remove bad optimization in vertical final implementation and re-enable vertical final algorithm by default. [#64783](https://github.com/ClickHouse/ClickHouse/pull/64783) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Remove ALIAS nodes from the filter expression. This slightly improves performance for queries with `PREWHERE` (with new analyzer). [#64793](https://github.com/ClickHouse/ClickHouse/pull/64793) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix performance regression in cross join introduced in [#60459](https://github.com/ClickHouse/ClickHouse/issues/60459) (24.5). [#65243](https://github.com/ClickHouse/ClickHouse/pull/65243) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
|
||||
#### Improvement
|
||||
* Support empty tuples. [#55061](https://github.com/ClickHouse/ClickHouse/pull/55061) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Hot reload storage policy for distributed tables when adding a new disk. [#58285](https://github.com/ClickHouse/ClickHouse/pull/58285) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Avoid possible deadlock during MergeTree index analysis when scheduling threads in a saturated service. [#59427](https://github.com/ClickHouse/ClickHouse/pull/59427) ([Sean Haynes](https://github.com/seandhaynes)).
|
||||
* Support partial trivial count optimization when the query filter is able to select exact ranges from merge tree tables. [#60463](https://github.com/ClickHouse/ClickHouse/pull/60463) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Reduce max memory usage of multithreaded `INSERT`s by collecting chunks of multiple threads in a single transform. [#61047](https://github.com/ClickHouse/ClickHouse/pull/61047) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Reduce the memory usage when using Azure object storage by using fixed memory allocation, avoiding the allocation of an extra buffer. [#63160](https://github.com/ClickHouse/ClickHouse/pull/63160) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* Several minor corner case fixes to proxy support & tunneling. [#63427](https://github.com/ClickHouse/ClickHouse/pull/63427) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Add `http_response_headers` setting to support custom response headers in custom HTTP handlers. [#63562](https://github.com/ClickHouse/ClickHouse/pull/63562) ([Grigorii](https://github.com/GSokol)).
|
||||
* Improve io_uring resubmits visibility. Rename profile event `IOUringSQEsResubmits` -> `IOUringSQEsResubmitsAsync` and add a new one `IOUringSQEsResubmitsSync`. [#63699](https://github.com/ClickHouse/ClickHouse/pull/63699) ([Tomer Shafir](https://github.com/tomershafir)).
|
||||
* Introduce assertions to verify all functions are called with columns of the right size. [#63723](https://github.com/ClickHouse/ClickHouse/pull/63723) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Reduce the number of virtual function calls in `ColumnNullable::size`. [#60556](https://github.com/ClickHouse/ClickHouse/pull/60556) ([HappenLee](https://github.com/HappenLee)).
|
||||
* Speedup `splitByRegexp` when the regular expression argument is a single-character. [#62696](https://github.com/ClickHouse/ClickHouse/pull/62696) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Speed up aggregation by 8-bit and 16-bit keys by keeping track of the min and max keys used. This allows to reduce the number of cells that need to be verified. [#62746](https://github.com/ClickHouse/ClickHouse/pull/62746) ([Jiebin Sun](https://github.com/jiebinn)).
|
||||
* Optimize operator IN when the left hand side is `LowCardinality` and the right is a set of constants. [#64060](https://github.com/ClickHouse/ClickHouse/pull/64060) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
|
||||
* Use a thread pool to initialize and destroy hash tables inside `ConcurrentHashJoin`. [#64241](https://github.com/ClickHouse/ClickHouse/pull/64241) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Optimized vertical merges in tables with sparse columns. [#64311](https://github.com/ClickHouse/ClickHouse/pull/64311) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Enabled prefetches of data from remote filesystem during vertical merges. It improves latency of vertical merges in tables with data stored on remote filesystem. [#64314](https://github.com/ClickHouse/ClickHouse/pull/64314) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Reduce redundant calls to `isDefault` of `ColumnSparse::filter` to improve performance. [#64426](https://github.com/ClickHouse/ClickHouse/pull/64426) ([Jiebin Sun](https://github.com/jiebinn)).
|
||||
* Speedup `find_super_nodes` and `find_big_family` keeper-client commands by making multiple asynchronous getChildren requests. [#64628](https://github.com/ClickHouse/ClickHouse/pull/64628) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Improve function `least`/`greatest` for nullable numberic type arguments. [#64668](https://github.com/ClickHouse/ClickHouse/pull/64668) ([KevinyhZou](https://github.com/KevinyhZou)).
|
||||
* Allow merging two consequent filtering steps of a query plan. This improves filter-push-down optimization if the filter condition can be pushed down from the parent step. [#64760](https://github.com/ClickHouse/ClickHouse/pull/64760) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Remove bad optimization in the vertical final implementation and re-enable vertical final algorithm by default. [#64783](https://github.com/ClickHouse/ClickHouse/pull/64783) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Remove ALIAS nodes from the filter expression. This slightly improves performance for queries with `PREWHERE` (with the new analyzer). [#64793](https://github.com/ClickHouse/ClickHouse/pull/64793) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Re-enable OpenSSL session caching. [#65111](https://github.com/ClickHouse/ClickHouse/pull/65111) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Added settings to disable materialization of skip indexes and statistics on inserts (`materialize_skip_indexes_on_insert` and `materialize_statistics_on_insert`). [#64391](https://github.com/ClickHouse/ClickHouse/pull/64391) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Use the allocated memory size to calculate the row group size and reduce the peak memory of the parquet writer in the single-threaded mode. [#64424](https://github.com/ClickHouse/ClickHouse/pull/64424) ([LiuNeng](https://github.com/liuneng1994)).
|
||||
* Improve the iterator of sparse column to reduce call of `size`. [#64497](https://github.com/ClickHouse/ClickHouse/pull/64497) ([Jiebin Sun](https://github.com/jiebinn)).
|
||||
* Update condition to use server-side copy for backups to Azure blob storage. [#64518](https://github.com/ClickHouse/ClickHouse/pull/64518) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* Optimized memory usage of vertical merges for tables with high number of skip indexes. [#64580](https://github.com/ClickHouse/ClickHouse/pull/64580) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
|
||||
#### Improvement
|
||||
* `SHOW CREATE TABLE` executed on top of system tables will now show the super handy comment unique for each table which will explain why this table is needed. [#63788](https://github.com/ClickHouse/ClickHouse/pull/63788) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Added setting `metadata_storage_type` to keep free space on metadata storage disk. [#64128](https://github.com/ClickHouse/ClickHouse/pull/64128) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
|
||||
* Add metrics to track the number of directories created and removed by the plain_rewritable metadata storage, and the number of entries in the local-to-remote in-memory map. [#64175](https://github.com/ClickHouse/ClickHouse/pull/64175) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* The second argument (scale) of functions `round()`, `roundBankers()`, `floor()`, `ceil()` and `trunc()` can now be non-const. [#64798](https://github.com/ClickHouse/ClickHouse/pull/64798) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
|
||||
* Hot reload storage policy for `Distributed` tables when adding a new disk. [#58285](https://github.com/ClickHouse/ClickHouse/pull/58285) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Avoid possible deadlock during MergeTree index analysis when scheduling threads in a saturated service. [#59427](https://github.com/ClickHouse/ClickHouse/pull/59427) ([Sean Haynes](https://github.com/seandhaynes)).
|
||||
* Several minor corner case fixes to S3 proxy support & tunneling. [#63427](https://github.com/ClickHouse/ClickHouse/pull/63427) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Improve io_uring resubmit visibility. Rename profile event `IOUringSQEsResubmits` -> `IOUringSQEsResubmitsAsync` and add a new one `IOUringSQEsResubmitsSync`. [#63699](https://github.com/ClickHouse/ClickHouse/pull/63699) ([Tomer Shafir](https://github.com/tomershafir)).
|
||||
* Added a new setting, `metadata_keep_free_space_bytes` to keep free space on the metadata storage disk. [#64128](https://github.com/ClickHouse/ClickHouse/pull/64128) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
|
||||
* Add metrics to track the number of directories created and removed by the `plain_rewritable` metadata storage, and the number of entries in the local-to-remote in-memory map. [#64175](https://github.com/ClickHouse/ClickHouse/pull/64175) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* The query cache now considers identical queries with different settings as different. This increases robustness in cases where different settings (e.g. `limit` or `additional_table_filters`) would affect the query result. [#64205](https://github.com/ClickHouse/ClickHouse/pull/64205) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Better Exception Message in Delete Table with Projection, users can understand the error and the steps should be taken. [#64212](https://github.com/ClickHouse/ClickHouse/pull/64212) ([jsc0218](https://github.com/jsc0218)).
|
||||
* Support the non standard error code `QpsLimitExceeded` in object storage as a retryable error. [#64225](https://github.com/ClickHouse/ClickHouse/pull/64225) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* Forbid converting a MergeTree table to replicated if the zookeeper path for this table already exists. [#64244](https://github.com/ClickHouse/ClickHouse/pull/64244) ([Kirill](https://github.com/kirillgarbar)).
|
||||
* If "replica group" is configured for a `Replicated` database, automatically create a cluster that includes replicas from all groups. [#64312](https://github.com/ClickHouse/ClickHouse/pull/64312) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Added settings to disable materialization of skip indexes and statistics on inserts (`materialize_skip_indexes_on_insert` and `materialize_statistics_on_insert`). [#64391](https://github.com/ClickHouse/ClickHouse/pull/64391) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Use the allocated memory size to calculate the row group size and reduce the peak memory of the parquet writer in single-threaded mode. [#64424](https://github.com/ClickHouse/ClickHouse/pull/64424) ([LiuNeng](https://github.com/liuneng1994)).
|
||||
* Added new configuration input_format_parquet_prefer_block_bytes to control the average output block bytes, and modified the default value of input_format_parquet_max_block_size to 65409. [#64427](https://github.com/ClickHouse/ClickHouse/pull/64427) ([LiuNeng](https://github.com/liuneng1994)).
|
||||
* Added a new setting `input_format_parquet_prefer_block_bytes` to control the average output block bytes, and modified the default value of `input_format_parquet_max_block_size` to 65409. [#64427](https://github.com/ClickHouse/ClickHouse/pull/64427) ([LiuNeng](https://github.com/liuneng1994)).
|
||||
* Allow proxy to be bypassed for hosts specified in `no_proxy` env variable and ClickHouse proxy configuration. [#63314](https://github.com/ClickHouse/ClickHouse/pull/63314) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Always start Keeper with sufficient amount of threads in global thread pool. [#64444](https://github.com/ClickHouse/ClickHouse/pull/64444) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Settings from user config doesn't affect merges and mutations for MergeTree on top of object storage. [#64456](https://github.com/ClickHouse/ClickHouse/pull/64456) ([alesapin](https://github.com/alesapin)).
|
||||
* Setting `replace_long_file_name_to_hash` is enabled by default for `MergeTree` tables. [#64457](https://github.com/ClickHouse/ClickHouse/pull/64457) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Improve the iterator of sparse column to reduce call of size(). [#64497](https://github.com/ClickHouse/ClickHouse/pull/64497) ([Jiebin Sun](https://github.com/jiebinn)).
|
||||
* Update condition to use copy for azure blob storage. [#64518](https://github.com/ClickHouse/ClickHouse/pull/64518) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* Settings from the user's config don't affect merges and mutations for `MergeTree` on top of object storage. [#64456](https://github.com/ClickHouse/ClickHouse/pull/64456) ([alesapin](https://github.com/alesapin)).
|
||||
* Support the non standard error code `TotalQpsLimitExceeded` in object storage as a retryable error. [#64520](https://github.com/ClickHouse/ClickHouse/pull/64520) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* Optimized memory usage of vertical merges for tables with high number of skip indexes. [#64580](https://github.com/ClickHouse/ClickHouse/pull/64580) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Introduced two additional columns in the `system.query_log`: `used_privileges` and `missing_privileges`. `used_privileges` is populated with the privileges that were checked during query execution, and `missing_privileges` contains required privileges that are missing. [#64597](https://github.com/ClickHouse/ClickHouse/pull/64597) ([Alexey Katsman](https://github.com/alexkats)).
|
||||
* Add settings `parallel_replicas_custom_key_range_lower` and `parallel_replicas_custom_key_range_upper` to control how parallel replicas with dynamic shards parallelizes queries when using a range filter. [#64604](https://github.com/ClickHouse/ClickHouse/pull/64604) ([josh-hildred](https://github.com/josh-hildred)).
|
||||
* Updated Advanced Dashboard for both open-source and ClickHouse Cloud versions to include a chart for 'Maximum concurrent network connections'. [#64610](https://github.com/ClickHouse/ClickHouse/pull/64610) ([Thom O'Connor](https://github.com/thomoco)).
|
||||
* The second argument (scale) of functions `round()`, `roundBankers()`, `floor()`, `ceil()` and `trunc()` can now be non-const. [#64798](https://github.com/ClickHouse/ClickHouse/pull/64798) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
|
||||
* Improve progress report on zeros_mt and generateRandom. [#64804](https://github.com/ClickHouse/ClickHouse/pull/64804) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Add an asynchronous metric jemalloc.profile.active to show whether sampling is currently active. This is an activation mechanism in addition to prof.active; both must be active for the calling thread to sample. [#64842](https://github.com/ClickHouse/ClickHouse/pull/64842) ([Unalian](https://github.com/Unalian)).
|
||||
* Support statistics with ReplicatedMergeTree. [#64934](https://github.com/ClickHouse/ClickHouse/pull/64934) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Improve progress report on `zeros_mt` and `generateRandom`. [#64804](https://github.com/ClickHouse/ClickHouse/pull/64804) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Add an asynchronous metric `jemalloc.profile.active` to show whether sampling is currently active. This is an activation mechanism in addition to prof.active; both must be active for the calling thread to sample. [#64842](https://github.com/ClickHouse/ClickHouse/pull/64842) ([Unalian](https://github.com/Unalian)).
|
||||
* Remove mark of `allow_experimental_join_condition` as important. This mark may have prevented distributed queries in a mixed versions cluster from being executed successfully. [#65008](https://github.com/ClickHouse/ClickHouse/pull/65008) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Added server Asynchronous metrics `DiskGetObjectThrottler*` and `DiskGetObjectThrottler*` reflecting request per second rate limit defined with `s3_max_get_rps` and `s3_max_put_rps` disk settings and currently available number of requests that could be sent without hitting throttling limit on the disk. Metrics are defined for every disk that has a configured limit. [#65050](https://github.com/ClickHouse/ClickHouse/pull/65050) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Added a setting `output_format_pretty_display_footer_column_names` which when enabled displays column names at the end of the table for long tables (50 rows by default), with the threshold value for minimum number of rows controlled by `output_format_pretty_display_footer_column_names_min_rows`. [#65144](https://github.com/ClickHouse/ClickHouse/pull/65144) ([Shaun Struwig](https://github.com/Blargian)).
|
||||
* Returned back the behaviour of how ClickHouse works and interprets Tuples in CSV format. This change effectively reverts https://github.com/ClickHouse/ClickHouse/pull/60994 and makes it available only under a few settings: `output_format_csv_serialize_tuple_into_separate_columns`, `input_format_csv_deserialize_separate_columns_into_tuple` and `input_format_csv_try_infer_strings_from_quoted_tuples`. [#65170](https://github.com/ClickHouse/ClickHouse/pull/65170) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Initialize global trace collector for Poco::ThreadPool (needed for keeper, etc). [#65239](https://github.com/ClickHouse/ClickHouse/pull/65239) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Add validation when creating a user with bcrypt_hash. [#65242](https://github.com/ClickHouse/ClickHouse/pull/65242) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Unite s3/hdfs/azure storage implementations into a single class working with IObjectStorage. Same for *Cluster, data lakes and Queue storages. [#59767](https://github.com/ClickHouse/ClickHouse/pull/59767) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Refactor data part writer to remove dependencies on MergeTreeData and DataPart. [#63620](https://github.com/ClickHouse/ClickHouse/pull/63620) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Add profile events for number of rows read during/after prewhere. [#64198](https://github.com/ClickHouse/ClickHouse/pull/64198) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Print query in explain plan with parallel replicas. [#64298](https://github.com/ClickHouse/ClickHouse/pull/64298) ([vdimir](https://github.com/vdimir)).
|
||||
* Initialize global trace collector for `Poco::ThreadPool` (needed for Keeper, etc). [#65239](https://github.com/ClickHouse/ClickHouse/pull/65239) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Add a validation when creating a user with `bcrypt_hash`. [#65242](https://github.com/ClickHouse/ClickHouse/pull/65242) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Add profile events for number of rows read during/after `PREWHERE`. [#64198](https://github.com/ClickHouse/ClickHouse/pull/64198) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Print query in `EXPLAIN PLAN` with parallel replicas. [#64298](https://github.com/ClickHouse/ClickHouse/pull/64298) ([vdimir](https://github.com/vdimir)).
|
||||
* Rename `allow_deprecated_functions` to `allow_deprecated_error_prone_window_functions`. [#64358](https://github.com/ClickHouse/ClickHouse/pull/64358) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Respect `max_read_buffer_size` setting for file descriptors as well in file() table function. [#64532](https://github.com/ClickHouse/ClickHouse/pull/64532) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Respect `max_read_buffer_size` setting for file descriptors as well in the `file` table function. [#64532](https://github.com/ClickHouse/ClickHouse/pull/64532) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Disable transactions for unsupported storages even for materialized views. [#64918](https://github.com/ClickHouse/ClickHouse/pull/64918) ([alesapin](https://github.com/alesapin)).
|
||||
* Refactor `KeyCondition` and key analysis to improve PartitionPruner and trivial count optimization. This is separated from [#60463](https://github.com/ClickHouse/ClickHouse/issues/60463) . [#61459](https://github.com/ClickHouse/ClickHouse/pull/61459) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Forbid `QUALIFY` clause in the old analyzer. The old analyzer ignored `QUALIFY`, so it could lead to unexpected data removal in mutations. [#65356](https://github.com/ClickHouse/ClickHouse/pull/65356) ([Dmitry Novik](https://github.com/novikd)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
* A bug in Apache ORC library was fixed: Fixed ORC statistics calculation, when writing, for unsigned types on all platforms and Int8 on ARM. [#64563](https://github.com/ClickHouse/ClickHouse/pull/64563) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Returned back the behaviour of how ClickHouse works and interprets Tuples in CSV format. This change effectively reverts https://github.com/ClickHouse/ClickHouse/pull/60994 and makes it available only under a few settings: `output_format_csv_serialize_tuple_into_separate_columns`, `input_format_csv_deserialize_separate_columns_into_tuple` and `input_format_csv_try_infer_strings_from_quoted_tuples`. [#65170](https://github.com/ClickHouse/ClickHouse/pull/65170) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Fix a permission error where a user in a specific situation can escalate their privileges on the default database without necessary grants. [#64769](https://github.com/ClickHouse/ClickHouse/pull/64769) ([pufit](https://github.com/pufit)).
|
||||
* Fix crash with UniqInjectiveFunctionsEliminationPass and uniqCombined. [#65188](https://github.com/ClickHouse/ClickHouse/pull/65188) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix a bug in ClickHouse Keeper that causes digest mismatch during closing session. [#65198](https://github.com/ClickHouse/ClickHouse/pull/65198) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
* Forbid `QUALIFY` clause in the old analyzer. The old analyzer ignored `QUALIFY`, so it could lead to unexpected data removal in mutations. [#65356](https://github.com/ClickHouse/ClickHouse/pull/65356) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Use correct memory alignment for Distinct combinator. Previously, crash could happen because of invalid memory allocation when the combinator was used. [#65379](https://github.com/ClickHouse/ClickHouse/pull/65379) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix crash with `DISTINCT` and window functions. [#64767](https://github.com/ClickHouse/ClickHouse/pull/64767) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Fixed 'set' skip index not working with IN and indexHint(). [#62083](https://github.com/ClickHouse/ClickHouse/pull/62083) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
@ -132,7 +130,6 @@
|
||||
* Fixed `optimize_read_in_order` behaviour for ORDER BY ... NULLS FIRST / LAST on tables with nullable keys. [#64483](https://github.com/ClickHouse/ClickHouse/pull/64483) ([Eduard Karacharov](https://github.com/korowa)).
|
||||
* Fix the `Expression nodes list expected 1 projection names` and `Unknown expression or identifier` errors for queries with aliases to `GLOBAL IN.`. [#64517](https://github.com/ClickHouse/ClickHouse/pull/64517) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix an error `Cannot find column` in distributed queries with constant CTE in the `GROUP BY` key. [#64519](https://github.com/ClickHouse/ClickHouse/pull/64519) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed ORC statistics calculation, when writing, for unsigned types on all platforms and Int8 on ARM. [#64563](https://github.com/ClickHouse/ClickHouse/pull/64563) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix the crash loop when restoring from backup is blocked by creating an MV with a definer that hasn't been restored yet. [#64595](https://github.com/ClickHouse/ClickHouse/pull/64595) ([pufit](https://github.com/pufit)).
|
||||
* Fix the output of function `formatDateTimeInJodaSyntax` when a formatter generates an uneven number of characters and the last character is `0`. For example, `SELECT formatDateTimeInJodaSyntax(toDate('2012-05-29'), 'D')` now correctly returns `150` instead of previously `15`. [#64614](https://github.com/ClickHouse/ClickHouse/pull/64614) ([LiuNeng](https://github.com/liuneng1994)).
|
||||
* Do not rewrite aggregation if `-If` combinator is already used. [#64638](https://github.com/ClickHouse/ClickHouse/pull/64638) ([Dmitry Novik](https://github.com/novikd)).
|
||||
@ -166,21 +163,14 @@
|
||||
* This PR ensures that the type of the constant(IN operator's second parameter) is always visible during the IN operator's type conversion process. Otherwise, losing type information may cause some conversions to fail, such as the conversion from DateTime to Date. This fixes ([#64487](https://github.com/ClickHouse/ClickHouse/issues/64487)). [#65315](https://github.com/ClickHouse/ClickHouse/pull/65315) ([pn](https://github.com/chloro-pn)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Make `network` service be required when using the rc init script to start the ClickHouse server daemon. [#60650](https://github.com/ClickHouse/ClickHouse/pull/60650) ([Chun-Sheng, Li](https://github.com/peter279k)).
|
||||
* Fix typo in test_hdfsCluster_unset_skip_unavailable_shards. The test writes data to unskip_unavailable_shards, but uses skip_unavailable_shards from the previous test. [#64243](https://github.com/ClickHouse/ClickHouse/pull/64243) ([Mikhail Artemenko](https://github.com/Michicosun)).
|
||||
* Reduce the size of some slow tests. [#64387](https://github.com/ClickHouse/ClickHouse/pull/64387) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Reduce the size of some slow tests. [#64452](https://github.com/ClickHouse/ClickHouse/pull/64452) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix test_lost_part_other_replica. [#64512](https://github.com/ClickHouse/ClickHouse/pull/64512) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Add tests for experimental unequal joins and randomize new settings in clickhouse-test. [#64535](https://github.com/ClickHouse/ClickHouse/pull/64535) ([Nikita Fomichev](https://github.com/fm4v)).
|
||||
* Upgrade tests: Update config and work with release candidates. [#64542](https://github.com/ClickHouse/ClickHouse/pull/64542) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Add support for LLVM XRay. [#64592](https://github.com/ClickHouse/ClickHouse/pull/64592) ([Tomer Shafir](https://github.com/tomershafir)).
|
||||
* Speed up 02995_forget_partition. [#64761](https://github.com/ClickHouse/ClickHouse/pull/64761) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix 02790_async_queries_in_query_log. [#64764](https://github.com/ClickHouse/ClickHouse/pull/64764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Support LLVM XRay on Linux amd64 only. [#64837](https://github.com/ClickHouse/ClickHouse/pull/64837) ([Tomer Shafir](https://github.com/tomershafir)).
|
||||
* Get rid of custom code in `tests/ci/download_release_packages.py` and `tests/ci/get_previous_release_tag.py` to avoid issues after the https://github.com/ClickHouse/ClickHouse/pull/64759 is merged. [#64848](https://github.com/ClickHouse/ClickHouse/pull/64848) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Decrease the `unit-test` image a few times. [#65102](https://github.com/ClickHouse/ClickHouse/pull/65102) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add support for LLVM XRay. [#64592](https://github.com/ClickHouse/ClickHouse/pull/64592) [#64837](https://github.com/ClickHouse/ClickHouse/pull/64837) ([Tomer Shafir](https://github.com/tomershafir)).
|
||||
* Unite s3/hdfs/azure storage implementations into a single class working with IObjectStorage. Same for *Cluster, data lakes and Queue storages. [#59767](https://github.com/ClickHouse/ClickHouse/pull/59767) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Refactor data part writer to remove dependencies on MergeTreeData and DataPart. [#63620](https://github.com/ClickHouse/ClickHouse/pull/63620) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Refactor `KeyCondition` and key analysis to improve PartitionPruner and trivial count optimization. This is separated from [#60463](https://github.com/ClickHouse/ClickHouse/issues/60463) . [#61459](https://github.com/ClickHouse/ClickHouse/pull/61459) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Introduce assertions to verify all functions are called with columns of the right size. [#63723](https://github.com/ClickHouse/ClickHouse/pull/63723) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Make `network` service be required when using the `rc` init script to start the ClickHouse server daemon. [#60650](https://github.com/ClickHouse/ClickHouse/pull/60650) ([Chun-Sheng, Li](https://github.com/peter279k)).
|
||||
* Reduce the size of some slow tests. [#64387](https://github.com/ClickHouse/ClickHouse/pull/64387) [#64452](https://github.com/ClickHouse/ClickHouse/pull/64452) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Replay ZooKeeper logs using keeper-bench. [#62481](https://github.com/ClickHouse/ClickHouse/pull/62481) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Re-enable OpenSSL session caching. [#65111](https://github.com/ClickHouse/ClickHouse/pull/65111) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
### <a id="245"></a> ClickHouse release 24.5, 2024-05-30
|
||||
|
||||
|
@ -319,7 +319,6 @@ endif()
|
||||
# Disable floating-point expression contraction in order to get consistent floating point calculation results across platforms
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffp-contract=off")
|
||||
|
||||
# Our built-in unwinder only supports DWARF version up to 4.
|
||||
set (DEBUG_INFO_FLAGS "-g")
|
||||
|
||||
# Disable omit frame pointer compiler optimization using -fno-omit-frame-pointer
|
||||
@ -333,15 +332,15 @@ endif()
|
||||
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} ${CMAKE_CXX_FLAGS_ADD}")
|
||||
set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}")
|
||||
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}")
|
||||
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}")
|
||||
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} ${CMAKE_C_FLAGS_ADD}")
|
||||
set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}")
|
||||
set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}")
|
||||
set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -Og ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}")
|
||||
|
||||
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMPILER_FLAGS} ${CMAKE_ASM_FLAGS_ADD}")
|
||||
set (CMAKE_ASM_FLAGS_RELWITHDEBINFO "${CMAKE_ASM_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}")
|
||||
set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}")
|
||||
set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -Og ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}")
|
||||
|
||||
if (OS_DARWIN)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
|
||||
|
@ -14,6 +14,7 @@ The following versions of ClickHouse server are currently supported with securit
|
||||
|
||||
| Version | Supported |
|
||||
|:-|:-|
|
||||
| 24.6 | ✔️ |
|
||||
| 24.5 | ✔️ |
|
||||
| 24.4 | ✔️ |
|
||||
| 24.3 | ✔️ |
|
||||
|
@ -6,6 +6,9 @@ namespace
|
||||
{
|
||||
std::string getFQDNOrHostNameImpl()
|
||||
{
|
||||
#if defined(OS_DARWIN)
|
||||
return Poco::Net::DNS::hostName();
|
||||
#else
|
||||
try
|
||||
{
|
||||
return Poco::Net::DNS::thisHost().name();
|
||||
@ -14,6 +17,7 @@ namespace
|
||||
{
|
||||
return Poco::Net::DNS::hostName();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,32 +1,3 @@
|
||||
// Based on https://github.com/amdn/itoa and combined with our optimizations
|
||||
//
|
||||
//=== itoa.cpp - Fast integer to ascii conversion --*- C++ -*-//
|
||||
//
|
||||
// The MIT License (MIT)
|
||||
// Copyright (c) 2016 Arturo Martin-de-Nicolas
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included
|
||||
// in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <type_traits>
|
||||
#include <base/defines.h>
|
||||
#include <base/extended_types.h>
|
||||
@ -34,99 +5,15 @@
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename T>
|
||||
ALWAYS_INLINE inline constexpr T pow10(size_t x)
|
||||
{
|
||||
return x ? 10 * pow10<T>(x - 1) : 1;
|
||||
}
|
||||
|
||||
// Division by a power of 10 is implemented using a multiplicative inverse.
|
||||
// This strength reduction is also done by optimizing compilers, but
|
||||
// presently the fastest results are produced by using the values
|
||||
// for the multiplication and the shift as given by the algorithm
|
||||
// described by Agner Fog in "Optimizing Subroutines in Assembly Language"
|
||||
//
|
||||
// http://www.agner.org/optimize/optimizing_assembly.pdf
|
||||
//
|
||||
// "Integer division by a constant (all processors)
|
||||
// A floating point number can be divided by a constant by multiplying
|
||||
// with the reciprocal. If we want to do the same with integers, we have
|
||||
// to scale the reciprocal by 2n and then shift the product to the right
|
||||
// by n. There are various algorithms for finding a suitable value of n
|
||||
// and compensating for rounding errors. The algorithm described below
|
||||
// was invented by Terje Mathisen, Norway, and not published elsewhere."
|
||||
|
||||
/// Division by constant is performed by:
|
||||
/// 1. Adding 1 if needed;
|
||||
/// 2. Multiplying by another constant;
|
||||
/// 3. Shifting right by another constant.
|
||||
template <typename UInt, bool add_, UInt multiplier_, unsigned shift_>
|
||||
struct Division
|
||||
{
|
||||
static constexpr bool add{add_};
|
||||
static constexpr UInt multiplier{multiplier_};
|
||||
static constexpr unsigned shift{shift_};
|
||||
};
|
||||
|
||||
/// Select a type with appropriate number of bytes from the list of types.
|
||||
/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes.
|
||||
/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t.
|
||||
template <size_t N, typename T, typename... Ts>
|
||||
struct SelectType
|
||||
{
|
||||
using Result = typename SelectType<N / 2, Ts...>::Result;
|
||||
};
|
||||
|
||||
template <typename T, typename... Ts>
|
||||
struct SelectType<1, T, Ts...>
|
||||
{
|
||||
using Result = T;
|
||||
};
|
||||
|
||||
|
||||
/// Division by 10^N where N is the size of the type.
|
||||
template <size_t N>
|
||||
using DivisionBy10PowN = typename SelectType<
|
||||
N,
|
||||
Division<uint8_t, false, 205U, 11>, /// divide by 10
|
||||
Division<uint16_t, true, 41943U, 22>, /// divide by 100
|
||||
Division<uint32_t, false, 3518437209U, 45>, /// divide by 10000
|
||||
Division<uint64_t, false, 12379400392853802749ULL, 90> /// divide by 100000000
|
||||
>::Result;
|
||||
|
||||
template <size_t N>
|
||||
using UnsignedOfSize = typename SelectType<N, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>::Result;
|
||||
|
||||
/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in
|
||||
template <size_t N>
|
||||
struct QuotientAndRemainder
|
||||
{
|
||||
UnsignedOfSize<N> quotient; // quotient with fewer than 2*N decimal digits
|
||||
UnsignedOfSize<N / 2> remainder; // remainder with at most N decimal digits
|
||||
};
|
||||
|
||||
template <size_t N>
|
||||
QuotientAndRemainder<N> inline split(UnsignedOfSize<N> value)
|
||||
{
|
||||
constexpr DivisionBy10PowN<N> division;
|
||||
|
||||
UnsignedOfSize<N> quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift;
|
||||
UnsignedOfSize<N / 2> remainder = static_cast<UnsignedOfSize<N / 2>>(value - quotient * pow10<UnsignedOfSize<N / 2>>(N));
|
||||
|
||||
return {quotient, remainder};
|
||||
}
|
||||
|
||||
ALWAYS_INLINE inline char * outDigit(char * p, uint8_t value)
|
||||
ALWAYS_INLINE inline char * outOneDigit(char * p, uint8_t value)
|
||||
{
|
||||
*p = '0' + value;
|
||||
++p;
|
||||
return p;
|
||||
return p + 1;
|
||||
}
|
||||
|
||||
// Using a lookup table to convert binary numbers from 0 to 99
|
||||
// into ascii characters as described by Andrei Alexandrescu in
|
||||
// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/
|
||||
|
||||
const char digits[201] = "00010203040506070809"
|
||||
"10111213141516171819"
|
||||
"20212223242526272829"
|
||||
@ -137,7 +24,6 @@ const char digits[201] = "00010203040506070809"
|
||||
"70717273747576777879"
|
||||
"80818283848586878889"
|
||||
"90919293949596979899";
|
||||
|
||||
ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value)
|
||||
{
|
||||
memcpy(p, &digits[value * 2], 2);
|
||||
@ -145,153 +31,260 @@ ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value)
|
||||
return p;
|
||||
}
|
||||
|
||||
namespace convert
|
||||
namespace jeaiii
|
||||
{
|
||||
template <typename UInt, size_t N = sizeof(UInt)>
|
||||
char * head(char * p, UInt u);
|
||||
template <typename UInt, size_t N = sizeof(UInt)>
|
||||
char * tail(char * p, UInt u);
|
||||
/*
|
||||
MIT License
|
||||
|
||||
//===----------------------------------------------------------===//
|
||||
// head: find most significant digit, skip leading zeros
|
||||
//===----------------------------------------------------------===//
|
||||
Copyright (c) 2022 James Edward Anhalt III - https://github.com/jeaiii/itoa
|
||||
|
||||
// "x" contains quotient and remainder after division by 10^N
|
||||
// quotient is less than 10^N
|
||||
template <size_t N>
|
||||
ALWAYS_INLINE inline char * head(char * p, QuotientAndRemainder<N> x)
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
*/
|
||||
struct pair
|
||||
{
|
||||
p = head(p, UnsignedOfSize<N / 2>(x.quotient));
|
||||
p = tail(p, x.remainder);
|
||||
return p;
|
||||
}
|
||||
char dd[2];
|
||||
constexpr pair(char c) : dd{c, '\0'} { } /// NOLINT(google-explicit-constructor)
|
||||
constexpr pair(int n) : dd{"0123456789"[n / 10], "0123456789"[n % 10]} { } /// NOLINT(google-explicit-constructor)
|
||||
};
|
||||
|
||||
// "u" is less than 10^2*N
|
||||
template <typename UInt, size_t N>
|
||||
ALWAYS_INLINE inline char * head(char * p, UInt u)
|
||||
constexpr struct
|
||||
{
|
||||
return u < pow10<UnsignedOfSize<N>>(N) ? head(p, UnsignedOfSize<N / 2>(u)) : head<N>(p, split<N>(u));
|
||||
}
|
||||
pair dd[100]{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, //
|
||||
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, //
|
||||
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, //
|
||||
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, //
|
||||
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, //
|
||||
50, 51, 52, 53, 54, 55, 56, 57, 58, 59, //
|
||||
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, //
|
||||
70, 71, 72, 73, 74, 75, 76, 77, 78, 79, //
|
||||
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, //
|
||||
90, 91, 92, 93, 94, 95, 96, 97, 98, 99, //
|
||||
};
|
||||
pair fd[100]{
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', //
|
||||
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, //
|
||||
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, //
|
||||
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, //
|
||||
40, 41, 42, 43, 44, 45, 46, 47, 48, 49, //
|
||||
50, 51, 52, 53, 54, 55, 56, 57, 58, 59, //
|
||||
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, //
|
||||
70, 71, 72, 73, 74, 75, 76, 77, 78, 79, //
|
||||
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, //
|
||||
90, 91, 92, 93, 94, 95, 96, 97, 98, 99, //
|
||||
};
|
||||
} digits;
|
||||
|
||||
// recursion base case, selected when "u" is one byte
|
||||
template <>
|
||||
ALWAYS_INLINE inline char * head<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
|
||||
constexpr UInt64 mask24 = (UInt64(1) << 24) - 1;
|
||||
constexpr UInt64 mask32 = (UInt64(1) << 32) - 1;
|
||||
constexpr UInt64 mask57 = (UInt64(1) << 57) - 1;
|
||||
|
||||
template <bool, class, class F>
|
||||
struct _cond
|
||||
{
|
||||
return u < 10 ? outDigit(p, u) : outTwoDigits(p, u);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------===//
|
||||
// tail: produce all digits including leading zeros
|
||||
//===----------------------------------------------------------===//
|
||||
|
||||
// recursive step, "u" is less than 10^2*N
|
||||
template <typename UInt, size_t N>
|
||||
ALWAYS_INLINE inline char * tail(char * p, UInt u)
|
||||
using type = F;
|
||||
};
|
||||
template <class T, class F>
|
||||
struct _cond<true, T, F>
|
||||
{
|
||||
QuotientAndRemainder<N> x = split<N>(u);
|
||||
p = tail(p, UnsignedOfSize<N / 2>(x.quotient));
|
||||
p = tail(p, x.remainder);
|
||||
return p;
|
||||
}
|
||||
using type = T;
|
||||
};
|
||||
template <bool B, class T, class F>
|
||||
using cond = typename _cond<B, T, F>::type;
|
||||
|
||||
// recursion base case, selected when "u" is one byte
|
||||
template <>
|
||||
ALWAYS_INLINE inline char * tail<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
|
||||
template <class T>
|
||||
inline ALWAYS_INLINE char * to_text_from_integer(char * b, T i)
|
||||
{
|
||||
return outTwoDigits(p, u);
|
||||
}
|
||||
constexpr auto q = sizeof(T);
|
||||
using U = cond<q == 1, char8_t, cond<q <= sizeof(UInt16), UInt16, cond<q <= sizeof(UInt32), UInt32, UInt64>>>;
|
||||
|
||||
//===----------------------------------------------------------===//
|
||||
// large values are >= 10^2*N
|
||||
// where x contains quotient and remainder after division by 10^N
|
||||
//===----------------------------------------------------------===//
|
||||
template <size_t N>
|
||||
ALWAYS_INLINE inline char * large(char * p, QuotientAndRemainder<N> x)
|
||||
{
|
||||
QuotientAndRemainder<N> y = split<N>(x.quotient);
|
||||
p = head(p, UnsignedOfSize<N / 2>(y.quotient));
|
||||
p = tail(p, y.remainder);
|
||||
p = tail(p, x.remainder);
|
||||
return p;
|
||||
}
|
||||
// convert bool to int before test with unary + to silence warning if T happens to be bool
|
||||
U const n = +i < 0 ? *b++ = '-', U(0) - U(i) : U(i);
|
||||
|
||||
//===----------------------------------------------------------===//
|
||||
// handle values of "u" that might be >= 10^2*N
|
||||
// where N is the size of "u" in bytes
|
||||
//===----------------------------------------------------------===//
|
||||
template <typename UInt, size_t N = sizeof(UInt)>
|
||||
ALWAYS_INLINE inline char * uitoa(char * p, UInt u)
|
||||
{
|
||||
if (u < pow10<UnsignedOfSize<N>>(N))
|
||||
return head(p, UnsignedOfSize<N / 2>(u));
|
||||
QuotientAndRemainder<N> x = split<N>(u);
|
||||
if (n < U(1e2))
|
||||
{
|
||||
/// This is changed from the original jeaiii implementation
|
||||
/// For small numbers the extra branch to call outOneDigit() is worth it as it saves some instructions
|
||||
/// and a memory access (no need to read digits.fd[n])
|
||||
/// This is not true for pure random numbers, but that's not the common use case of a database
|
||||
/// Original jeaii code
|
||||
// *reinterpret_cast<pair *>(b) = digits.fd[n];
|
||||
// return n < 10 ? b + 1 : b + 2;
|
||||
return n < 10 ? outOneDigit(b, n) : outTwoDigits(b, n);
|
||||
}
|
||||
if (n < UInt32(1e6))
|
||||
{
|
||||
if (sizeof(U) == 1 || n < U(1e4))
|
||||
{
|
||||
auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * n;
|
||||
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 24];
|
||||
if constexpr (sizeof(U) == 1)
|
||||
b -= 1;
|
||||
else
|
||||
b -= n < U(1e3);
|
||||
auto f2 = (f0 & mask24) * 100;
|
||||
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 24];
|
||||
return b + 4;
|
||||
}
|
||||
auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * n;
|
||||
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 32];
|
||||
if constexpr (sizeof(U) == 2)
|
||||
b -= 1;
|
||||
else
|
||||
b -= n < U(1e5);
|
||||
auto f2 = (f0 & mask32) * 100;
|
||||
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
|
||||
auto f4 = (f2 & mask32) * 100;
|
||||
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
|
||||
return b + 6;
|
||||
}
|
||||
if (sizeof(U) == 4 || n < UInt64(1ull << 32ull))
|
||||
{
|
||||
if (n < U(1e8))
|
||||
{
|
||||
auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * n >> 16;
|
||||
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 32];
|
||||
b -= n < U(1e7);
|
||||
auto f2 = (f0 & mask32) * 100;
|
||||
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
|
||||
auto f4 = (f2 & mask32) * 100;
|
||||
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
|
||||
auto f6 = (f4 & mask32) * 100;
|
||||
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 32];
|
||||
return b + 8;
|
||||
}
|
||||
auto f0 = UInt64(10 * (1ull << 57ull) / 1e9 + 1) * n;
|
||||
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 57];
|
||||
b -= n < UInt32(1e9);
|
||||
auto f2 = (f0 & mask57) * 100;
|
||||
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 57];
|
||||
auto f4 = (f2 & mask57) * 100;
|
||||
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 57];
|
||||
auto f6 = (f4 & mask57) * 100;
|
||||
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 57];
|
||||
auto f8 = (f6 & mask57) * 100;
|
||||
*reinterpret_cast<pair *>(b + 8) = digits.dd[f8 >> 57];
|
||||
return b + 10;
|
||||
}
|
||||
|
||||
return u < pow10<UnsignedOfSize<N>>(2 * N) ? head<N>(p, x) : large<N>(p, x);
|
||||
}
|
||||
// if we get here U must be UInt64 but some compilers don't know that, so reassign n to a UInt64 to avoid warnings
|
||||
UInt32 z = n % UInt32(1e8);
|
||||
UInt64 u = n / UInt32(1e8);
|
||||
|
||||
// selected when "u" is one byte
|
||||
template <>
|
||||
ALWAYS_INLINE inline char * uitoa<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
|
||||
{
|
||||
if (u < 10)
|
||||
return outDigit(p, u);
|
||||
else if (u < 100)
|
||||
return outTwoDigits(p, u);
|
||||
if (u < UInt32(1e2))
|
||||
{
|
||||
// u can't be 1 digit (if u < 10 it would have been handled above as a 9 digit 32bit number)
|
||||
*reinterpret_cast<pair *>(b) = digits.dd[u];
|
||||
b += 2;
|
||||
}
|
||||
else if (u < UInt32(1e6))
|
||||
{
|
||||
if (u < UInt32(1e4))
|
||||
{
|
||||
auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * u;
|
||||
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 24];
|
||||
b -= u < UInt32(1e3);
|
||||
auto f2 = (f0 & mask24) * 100;
|
||||
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 24];
|
||||
b += 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto f0 = UInt64(10 * (1ull << 32ull) / 1e5 + 1) * u;
|
||||
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 32];
|
||||
b -= u < UInt32(1e5);
|
||||
auto f2 = (f0 & mask32) * 100;
|
||||
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
|
||||
auto f4 = (f2 & mask32) * 100;
|
||||
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
|
||||
b += 6;
|
||||
}
|
||||
}
|
||||
else if (u < UInt32(1e8))
|
||||
{
|
||||
auto f0 = UInt64(10 * (1ull << 48ull) / 1e7 + 1) * u >> 16;
|
||||
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 32];
|
||||
b -= u < UInt32(1e7);
|
||||
auto f2 = (f0 & mask32) * 100;
|
||||
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
|
||||
auto f4 = (f2 & mask32) * 100;
|
||||
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
|
||||
auto f6 = (f4 & mask32) * 100;
|
||||
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 32];
|
||||
b += 8;
|
||||
}
|
||||
else if (u < UInt64(1ull << 32ull))
|
||||
{
|
||||
auto f0 = UInt64(10 * (1ull << 57ull) / 1e9 + 1) * u;
|
||||
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 57];
|
||||
b -= u < UInt32(1e9);
|
||||
auto f2 = (f0 & mask57) * 100;
|
||||
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 57];
|
||||
auto f4 = (f2 & mask57) * 100;
|
||||
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 57];
|
||||
auto f6 = (f4 & mask57) * 100;
|
||||
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 57];
|
||||
auto f8 = (f6 & mask57) * 100;
|
||||
*reinterpret_cast<pair *>(b + 8) = digits.dd[f8 >> 57];
|
||||
b += 10;
|
||||
}
|
||||
else
|
||||
{
|
||||
p = outDigit(p, u / 100);
|
||||
p = outTwoDigits(p, u % 100);
|
||||
return p;
|
||||
UInt32 y = u % UInt32(1e8);
|
||||
u /= UInt32(1e8);
|
||||
|
||||
// u is 2, 3, or 4 digits (if u < 10 it would have been handled above)
|
||||
if (u < UInt32(1e2))
|
||||
{
|
||||
*reinterpret_cast<pair *>(b) = digits.dd[u];
|
||||
b += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto f0 = UInt32(10 * (1 << 24) / 1e3 + 1) * u;
|
||||
*reinterpret_cast<pair *>(b) = digits.fd[f0 >> 24];
|
||||
b -= u < UInt32(1e3);
|
||||
auto f2 = (f0 & mask24) * 100;
|
||||
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 24];
|
||||
b += 4;
|
||||
}
|
||||
// do 8 digits
|
||||
auto f0 = (UInt64((1ull << 48ull) / 1e6 + 1) * y >> 16) + 1;
|
||||
*reinterpret_cast<pair *>(b) = digits.dd[f0 >> 32];
|
||||
auto f2 = (f0 & mask32) * 100;
|
||||
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
|
||||
auto f4 = (f2 & mask32) * 100;
|
||||
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
|
||||
auto f6 = (f4 & mask32) * 100;
|
||||
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 32];
|
||||
b += 8;
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------===//
|
||||
// handle unsigned and signed integral operands
|
||||
//===----------------------------------------------------------===//
|
||||
|
||||
// itoa: handle unsigned integral operands (selected by SFINAE)
|
||||
template <typename U>
|
||||
requires(!std::is_signed_v<U> && std::is_integral_v<U>)
|
||||
ALWAYS_INLINE inline char * itoa(U u, char * p)
|
||||
{
|
||||
return convert::uitoa(p, u);
|
||||
}
|
||||
|
||||
// itoa: handle signed integral operands (selected by SFINAE)
|
||||
template <typename I, size_t N = sizeof(I)>
|
||||
requires(std::is_signed_v<I> && std::is_integral_v<I>)
|
||||
ALWAYS_INLINE inline char * itoa(I i, char * p)
|
||||
{
|
||||
// Need "mask" to be filled with a copy of the sign bit.
|
||||
// If "i" is a negative value, then the result of "operator >>"
|
||||
// is implementation-defined, though usually it is an arithmetic
|
||||
// right shift that replicates the sign bit.
|
||||
// Use a conditional expression to be portable,
|
||||
// a good optimizing compiler generates an arithmetic right shift
|
||||
// and avoids the conditional branch.
|
||||
UnsignedOfSize<N> mask = i < 0 ? ~UnsignedOfSize<N>(0) : 0;
|
||||
// Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize<N>.
|
||||
// Cannot use std::abs() because the result is undefined
|
||||
// in 2's complement systems for the most-negative value.
|
||||
// Want to avoid conditional branch for performance reasons since
|
||||
// CPU branch prediction will be ineffective when negative values
|
||||
// occur randomly.
|
||||
// Let "u" be "i" cast to unsigned type UnsignedOfSize<N>.
|
||||
// Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative.
|
||||
// This yields the absolute value with the desired type without
|
||||
// using a conditional branch and without invoking undefined or
|
||||
// implementation defined behavior:
|
||||
UnsignedOfSize<N> u = ((2 * UnsignedOfSize<N>(i)) & ~mask) - UnsignedOfSize<N>(i);
|
||||
// Unconditionally store a minus sign when producing digits
|
||||
// in a forward direction and increment the pointer only if
|
||||
// the value is in fact negative.
|
||||
// This avoids a conditional branch and is safe because we will
|
||||
// always produce at least one digit and it will overwrite the
|
||||
// minus sign when the value is not negative.
|
||||
*p = '-';
|
||||
p += (mask & 1);
|
||||
p = convert::uitoa(p, u);
|
||||
return p;
|
||||
// do 8 digits
|
||||
auto f0 = (UInt64((1ull << 48ull) / 1e6 + 1) * z >> 16) + 1;
|
||||
*reinterpret_cast<pair *>(b) = digits.dd[f0 >> 32];
|
||||
auto f2 = (f0 & mask32) * 100;
|
||||
*reinterpret_cast<pair *>(b + 2) = digits.dd[f2 >> 32];
|
||||
auto f4 = (f2 & mask32) * 100;
|
||||
*reinterpret_cast<pair *>(b + 4) = digits.dd[f4 >> 32];
|
||||
auto f6 = (f4 & mask32) * 100;
|
||||
*reinterpret_cast<pair *>(b + 6) = digits.dd[f6 >> 32];
|
||||
return b + 8;
|
||||
}
|
||||
}
|
||||
|
||||
@ -303,7 +296,7 @@ ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p)
|
||||
{
|
||||
/// If we the highest 64bit item is empty, we can print just the lowest item as u64
|
||||
if (_x.items[UInt128::_impl::little(1)] == 0)
|
||||
return convert::itoa(_x.items[UInt128::_impl::little(0)], p);
|
||||
return jeaiii::to_text_from_integer(p, _x.items[UInt128::_impl::little(0)]);
|
||||
|
||||
/// Doing operations using __int128 is faster and we already rely on this feature
|
||||
using T = unsigned __int128;
|
||||
@ -334,7 +327,7 @@ ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p)
|
||||
current_block += max_multiple_of_hundred_blocks;
|
||||
}
|
||||
|
||||
char * highest_part_print = convert::itoa(uint64_t(x), p);
|
||||
char * highest_part_print = jeaiii::to_text_from_integer(p, uint64_t(x));
|
||||
for (int i = 0; i < current_block; i++)
|
||||
{
|
||||
outTwoDigits(highest_part_print, two_values[current_block - 1 - i]);
|
||||
@ -450,12 +443,12 @@ ALWAYS_INLINE inline char * writeSIntText(T x, char * pos)
|
||||
|
||||
char * itoa(UInt8 i, char * p)
|
||||
{
|
||||
return convert::itoa(uint8_t(i), p);
|
||||
return jeaiii::to_text_from_integer(p, uint8_t(i));
|
||||
}
|
||||
|
||||
char * itoa(Int8 i, char * p)
|
||||
{
|
||||
return convert::itoa(int8_t(i), p);
|
||||
return jeaiii::to_text_from_integer(p, int8_t(i));
|
||||
}
|
||||
|
||||
char * itoa(UInt128 i, char * p)
|
||||
@ -481,7 +474,7 @@ char * itoa(Int256 i, char * p)
|
||||
#define DEFAULT_ITOA(T) \
|
||||
char * itoa(T i, char * p) \
|
||||
{ \
|
||||
return convert::itoa(i, p); \
|
||||
return jeaiii::to_text_from_integer(p, i); \
|
||||
}
|
||||
|
||||
#define FOR_MISSING_INTEGER_TYPES(M) \
|
||||
|
@ -235,8 +235,6 @@ namespace Net
|
||||
/// Note that simply closing a socket is not sufficient
|
||||
/// to be able to re-use it again.
|
||||
|
||||
Poco::Timespan getMaxTimeout();
|
||||
|
||||
private:
|
||||
SecureSocketImpl(const SecureSocketImpl &);
|
||||
SecureSocketImpl & operator=(const SecureSocketImpl &);
|
||||
@ -250,6 +248,9 @@ namespace Net
|
||||
Session::Ptr _pSession;
|
||||
|
||||
friend class SecureStreamSocketImpl;
|
||||
|
||||
Poco::Timespan getMaxTimeoutOrLimit();
|
||||
//// Return max(send, receive) if non zero, otherwise maximum timeout
|
||||
};
|
||||
|
||||
|
||||
|
@ -199,7 +199,7 @@ void SecureSocketImpl::connectSSL(bool performHandshake)
|
||||
if (performHandshake && _pSocket->getBlocking())
|
||||
{
|
||||
int ret;
|
||||
Poco::Timespan remaining_time = getMaxTimeout();
|
||||
Poco::Timespan remaining_time = getMaxTimeoutOrLimit();
|
||||
do
|
||||
{
|
||||
RemainingTimeCounter counter(remaining_time);
|
||||
@ -302,7 +302,7 @@ int SecureSocketImpl::sendBytes(const void* buffer, int length, int flags)
|
||||
return rc;
|
||||
}
|
||||
|
||||
Poco::Timespan remaining_time = getMaxTimeout();
|
||||
Poco::Timespan remaining_time = getMaxTimeoutOrLimit();
|
||||
do
|
||||
{
|
||||
RemainingTimeCounter counter(remaining_time);
|
||||
@ -338,7 +338,7 @@ int SecureSocketImpl::receiveBytes(void* buffer, int length, int flags)
|
||||
return rc;
|
||||
}
|
||||
|
||||
Poco::Timespan remaining_time = getMaxTimeout();
|
||||
Poco::Timespan remaining_time = getMaxTimeoutOrLimit();
|
||||
do
|
||||
{
|
||||
/// SSL record may consist of several TCP packets,
|
||||
@ -372,7 +372,7 @@ int SecureSocketImpl::completeHandshake()
|
||||
poco_check_ptr (_pSSL);
|
||||
|
||||
int rc;
|
||||
Poco::Timespan remaining_time = getMaxTimeout();
|
||||
Poco::Timespan remaining_time = getMaxTimeoutOrLimit();
|
||||
do
|
||||
{
|
||||
RemainingTimeCounter counter(remaining_time);
|
||||
@ -453,18 +453,29 @@ X509* SecureSocketImpl::peerCertificate() const
|
||||
return 0;
|
||||
}
|
||||
|
||||
Poco::Timespan SecureSocketImpl::getMaxTimeout()
|
||||
Poco::Timespan SecureSocketImpl::getMaxTimeoutOrLimit()
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
Poco::Timespan remaining_time = _pSocket->getReceiveTimeout();
|
||||
Poco::Timespan send_timeout = _pSocket->getSendTimeout();
|
||||
if (remaining_time < send_timeout)
|
||||
remaining_time = send_timeout;
|
||||
/// zero SO_SNDTIMEO/SO_RCVTIMEO works as no timeout, let's replicate this
|
||||
///
|
||||
/// NOTE: we cannot use INT64_MAX (std::numeric_limits<Poco::Timespan::TimeDiff>::max()),
|
||||
/// since it will be later passed to poll() which accept int timeout, and
|
||||
/// even though poll() accepts milliseconds and Timespan() accepts
|
||||
/// microseconds, let's use smaller maximum value just to avoid some possible
|
||||
/// issues, this should be enough anyway (it is ~24 days).
|
||||
if (remaining_time == 0)
|
||||
remaining_time = Poco::Timespan(std::numeric_limits<int>::max());
|
||||
return remaining_time;
|
||||
}
|
||||
|
||||
bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time)
|
||||
{
|
||||
if (remaining_time == 0)
|
||||
return false;
|
||||
std::lock_guard<std::recursive_mutex> lock(_mutex);
|
||||
if (rc <= 0)
|
||||
{
|
||||
@ -475,9 +486,7 @@ bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time)
|
||||
case SSL_ERROR_WANT_READ:
|
||||
if (_pSocket->getBlocking())
|
||||
{
|
||||
/// Level-triggered mode of epoll_wait is used, so if SSL_read don't read all available data from socket,
|
||||
/// epoll_wait returns true without waiting for new data even if remaining_time == 0
|
||||
if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_READ) && remaining_time != 0)
|
||||
if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_READ))
|
||||
return true;
|
||||
else
|
||||
throw Poco::TimeoutException();
|
||||
@ -486,13 +495,15 @@ bool SecureSocketImpl::mustRetry(int rc, Poco::Timespan& remaining_time)
|
||||
case SSL_ERROR_WANT_WRITE:
|
||||
if (_pSocket->getBlocking())
|
||||
{
|
||||
/// The same as for SSL_ERROR_WANT_READ
|
||||
if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_WRITE) && remaining_time != 0)
|
||||
if (_pSocket->pollImpl(remaining_time, Poco::Net::Socket::SELECT_WRITE))
|
||||
return true;
|
||||
else
|
||||
throw Poco::TimeoutException();
|
||||
}
|
||||
break;
|
||||
/// NOTE: POCO_EINTR is the same as SSL_ERROR_WANT_READ (at least in
|
||||
/// OpenSSL), so this likely dead code, but let's leave it for
|
||||
/// compatibility with other implementations
|
||||
case SSL_ERROR_SYSCALL:
|
||||
return socketError == POCO_EAGAIN || socketError == POCO_EINTR;
|
||||
default:
|
||||
|
@ -34,7 +34,7 @@ if (OS_LINUX)
|
||||
# avoid spurious latencies and additional work associated with
|
||||
# MADV_DONTNEED. See
|
||||
# https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation.
|
||||
set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000")
|
||||
set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000,prof:true,prof_active:false,background_thread:true")
|
||||
else()
|
||||
set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000")
|
||||
endif()
|
||||
|
2
contrib/mariadb-connector-c
vendored
2
contrib/mariadb-connector-c
vendored
@ -1 +1 @@
|
||||
Subproject commit e39608998f5f6944ece9ec61f48e9172ec1de660
|
||||
Subproject commit d0a788c5b9fcaca2368d9233770d3ca91ea79f88
|
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.5.3.5"
|
||||
ARG VERSION="24.6.1.4423"
|
||||
ARG PACKAGES="clickhouse-keeper"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="24.5.3.5"
|
||||
ARG VERSION="24.6.1.4423"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
ARG DIRECT_DOWNLOAD_URLS=""
|
||||
|
||||
|
@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="24.5.3.5"
|
||||
ARG VERSION="24.6.1.4423"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
#docker-official-library:off
|
||||
|
@ -253,7 +253,7 @@ function run_tests()
|
||||
try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')"
|
||||
|
||||
set +e
|
||||
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
|
||||
timeout -s TERM --preserve-status 120m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
|
||||
--no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
|
||||
| ts '%Y-%m-%d %H:%M:%S' \
|
||||
| tee -a test_output/test_result.txt
|
||||
|
@ -37,6 +37,7 @@ RUN pip3 install \
|
||||
tqdm==4.66.4 \
|
||||
types-requests \
|
||||
unidiff \
|
||||
jwt \
|
||||
&& rm -rf /root/.cache/pip
|
||||
|
||||
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
|
||||
|
41
docs/changelogs/v24.5.4.49-stable.md
Normal file
41
docs/changelogs/v24.5.4.49-stable.md
Normal file
@ -0,0 +1,41 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.5.4.49-stable (63b760955a0) FIXME as compared to v24.5.3.5-stable (e0eb66f8e17)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#65886](https://github.com/ClickHouse/ClickHouse/issues/65886): Always start Keeper with sufficient amount of threads in global thread pool. [#64444](https://github.com/ClickHouse/ClickHouse/pull/64444) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Backported in [#65304](https://github.com/ClickHouse/ClickHouse/issues/65304): Returned back the behaviour of how ClickHouse works and interprets Tuples in CSV format. This change effectively reverts https://github.com/ClickHouse/ClickHouse/pull/60994 and makes it available only under a few settings: `output_format_csv_serialize_tuple_into_separate_columns`, `input_format_csv_deserialize_separate_columns_into_tuple` and `input_format_csv_try_infer_strings_from_quoted_tuples`. [#65170](https://github.com/ClickHouse/ClickHouse/pull/65170) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Backported in [#65896](https://github.com/ClickHouse/ClickHouse/issues/65896): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
|
||||
* Backported in [#65287](https://github.com/ClickHouse/ClickHouse/issues/65287): Fix crash with UniqInjectiveFunctionsEliminationPass and uniqCombined. [#65188](https://github.com/ClickHouse/ClickHouse/pull/65188) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#65374](https://github.com/ClickHouse/ClickHouse/issues/65374): Fix a bug in ClickHouse Keeper that causes digest mismatch during closing session. [#65198](https://github.com/ClickHouse/ClickHouse/pull/65198) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
* Backported in [#65437](https://github.com/ClickHouse/ClickHouse/issues/65437): Forbid `QUALIFY` clause in the old analyzer. The old analyzer ignored `QUALIFY`, so it could lead to unexpected data removal in mutations. [#65356](https://github.com/ClickHouse/ClickHouse/pull/65356) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Backported in [#65450](https://github.com/ClickHouse/ClickHouse/issues/65450): Use correct memory alignment for Distinct combinator. Previously, crash could happen because of invalid memory allocation when the combinator was used. [#65379](https://github.com/ClickHouse/ClickHouse/pull/65379) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Backported in [#65712](https://github.com/ClickHouse/ClickHouse/issues/65712): Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
* Backported in [#65681](https://github.com/ClickHouse/ClickHouse/issues/65681): Fix `duplicate alias` error for distributed queries with `ARRAY JOIN`. [#64226](https://github.com/ClickHouse/ClickHouse/pull/64226) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#65331](https://github.com/ClickHouse/ClickHouse/issues/65331): Fix the crash loop when restoring from backup is blocked by creating an MV with a definer that hasn't been restored yet. [#64595](https://github.com/ClickHouse/ClickHouse/pull/64595) ([pufit](https://github.com/pufit)).
|
||||
* Backported in [#64835](https://github.com/ClickHouse/ClickHouse/issues/64835): Fix bug which could lead to non-working TTLs with expressions. [#64694](https://github.com/ClickHouse/ClickHouse/pull/64694) ([alesapin](https://github.com/alesapin)).
|
||||
* Backported in [#65542](https://github.com/ClickHouse/ClickHouse/issues/65542): Fix crash for `ALTER TABLE ... ON CLUSTER ... MODIFY SQL SECURITY`. [#64957](https://github.com/ClickHouse/ClickHouse/pull/64957) ([pufit](https://github.com/pufit)).
|
||||
* Backported in [#65580](https://github.com/ClickHouse/ClickHouse/issues/65580): Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Backported in [#65618](https://github.com/ClickHouse/ClickHouse/issues/65618): Fix possible infinite query duration in case of cyclic aliases. Fixes [#64849](https://github.com/ClickHouse/ClickHouse/issues/64849). [#65081](https://github.com/ClickHouse/ClickHouse/pull/65081) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#65617](https://github.com/ClickHouse/ClickHouse/issues/65617): Fix aggregate function name rewriting in the new analyzer. [#65110](https://github.com/ClickHouse/ClickHouse/pull/65110) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Backported in [#65732](https://github.com/ClickHouse/ClickHouse/issues/65732): Eliminate injective function in argument of functions `uniq*` recursively. This used to work correctly but was broken in the new analyzer. [#65140](https://github.com/ClickHouse/ClickHouse/pull/65140) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Backported in [#65265](https://github.com/ClickHouse/ClickHouse/issues/65265): Fix the bug in Hashed and Hashed_Array dictionary short circuit evaluation, which may read uninitialized number, leading to various errors. [#65256](https://github.com/ClickHouse/ClickHouse/pull/65256) ([jsc0218](https://github.com/jsc0218)).
|
||||
* Backported in [#65663](https://github.com/ClickHouse/ClickHouse/issues/65663): Disable `non-intersecting-parts` optimization for queries with `FINAL` in case of `read-in-order` optimization was enabled. This could lead to an incorrect query result. As a workaround, disable `do_not_merge_across_partitions_select_final` and `split_parts_ranges_into_intersecting_and_non_intersecting_final` before this fix is merged. [#65505](https://github.com/ClickHouse/ClickHouse/pull/65505) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#65788](https://github.com/ClickHouse/ClickHouse/issues/65788): Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Backported in [#65812](https://github.com/ClickHouse/ClickHouse/issues/65812): Fix invalid exceptions in function `parseDateTime` with `%F` and `%D` placeholders. [#65768](https://github.com/ClickHouse/ClickHouse/pull/65768) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Backported in [#65828](https://github.com/ClickHouse/ClickHouse/issues/65828): Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Backported in [#65412](https://github.com/ClickHouse/ClickHouse/issues/65412): Re-enable OpenSSL session caching. [#65111](https://github.com/ClickHouse/ClickHouse/pull/65111) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Backported in [#65905](https://github.com/ClickHouse/ClickHouse/issues/65905): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
735
docs/changelogs/v24.6.1.4423-stable.md
Normal file
735
docs/changelogs/v24.6.1.4423-stable.md
Normal file
@ -0,0 +1,735 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.6.1.4423-stable (dcced7c8478) FIXME as compared to v24.4.1.2088-stable (6d4b31322d1)
|
||||
|
||||
#### Backward Incompatible Change
|
||||
* Enable asynchronous load of databases and tables by default. See the `async_load_databases` in config.xml. While this change is fully compatible, it can introduce a difference in behavior. When `async_load_databases` is false, as in the previous versions, the server will not accept connections until all tables are loaded. When `async_load_databases` is true, as in the new version, the server can accept connections before all the tables are loaded. If a query is made to a table that is not yet loaded, it will wait for the table's loading, which can take considerable time. It can change the behavior of the server if it is part of a large distributed system under a load balancer. In the first case, the load balancer can get a connection refusal and quickly failover to another server. In the second case, the load balancer can connect to a server that is still loading the tables, and the query will have a higher latency. Moreover, if many queries accumulate in the waiting state, it can lead to a "thundering herd" problem when they start processing simultaneously. This can make a difference only for highly loaded distributed backends. You can set the value of `async_load_databases` to false to avoid this problem. [#57695](https://github.com/ClickHouse/ClickHouse/pull/57695) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Some invalid queries will fail earlier during parsing. Note: disabled the support for inline KQL expressions (the experimental Kusto language) when they are put into a `kql` table function without a string literal, e.g. `kql(garbage | trash)` instead of `kql('garbage | trash')` or `kql($$garbage | trash$$)`. This feature was introduced unintentionally and should not exist. [#61500](https://github.com/ClickHouse/ClickHouse/pull/61500) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Renamed "inverted indexes" to "full-text indexes" which is a less technical / more user-friendly name. This also changes internal table metadata and breaks tables with existing (experimental) inverted indexes. Please make to drop such indexes before upgrade and re-create them after upgrade. [#62884](https://github.com/ClickHouse/ClickHouse/pull/62884) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Usage of functions `neighbor`, `runningAccumulate`, `runningDifferenceStartingWithFirstValue`, `runningDifference` deprecated (because it is error-prone). Proper window functions should be used instead. To enable them back, set `allow_deprecated_functions=1`. [#63132](https://github.com/ClickHouse/ClickHouse/pull/63132) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Queries from `system.columns` will work faster if there is a large number of columns, but many databases or tables are not granted for `SHOW TABLES`. Note that in previous versions, if you grant `SHOW COLUMNS` to individual columns without granting `SHOW TABLES` to the corresponding tables, the `system.columns` table will show these columns, but in a new version, it will skip the table entirely. Remove trace log messages "Access granted" and "Access denied" that slowed down queries. [#63439](https://github.com/ClickHouse/ClickHouse/pull/63439) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Rework parallel processing in `Ordered` mode of storage `S3Queue`. This PR is backward incompatible for Ordered mode if you used settings `s3queue_processing_threads_num` or `s3queue_total_shards_num`. Setting `s3queue_total_shards_num` is deleted, previously it was allowed to use only under `s3queue_allow_experimental_sharded_mode`, which is now deprecated. A new setting is added - `s3queue_buckets`. [#64349](https://github.com/ClickHouse/ClickHouse/pull/64349) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* New functions `snowflakeIDToDateTime`, `snowflakeIDToDateTime64`, `dateTimeToSnowflakeID`, and `dateTime64ToSnowflakeID` were added. Unlike the existing functions `snowflakeToDateTime`, `snowflakeToDateTime64`, `dateTimeToSnowflake`, and `dateTime64ToSnowflake`, the new functions are compatible with function `generateSnowflakeID`, i.e. they accept the snowflake IDs generated by `generateSnowflakeID` and produce snowflake IDs of the same type as `generateSnowflakeID` (i.e. `UInt64`). Furthermore, the new functions default to the UNIX epoch (aka. 1970-01-01), just like `generateSnowflakeID`. If necessary, a different epoch, e.g. Twitter's/X's epoch 2010-11-04 aka. 1288834974657 msec since UNIX epoch, can be passed. The old conversion functions are deprecated and will be removed after a transition period: to use them regardless, enable setting `allow_deprecated_snowflake_conversion_functions`. [#64948](https://github.com/ClickHouse/ClickHouse/pull/64948) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### New Feature
|
||||
* Provide support for AzureBlobStorage function in ClickHouse server to use Azure Workload identity to authenticate against Azure blob storage. If `use_workload_identity` parameter is set in config, [workload identity](https://github.com/Azure/azure-sdk-for-cpp/tree/main/sdk/identity/azure-identity#authenticate-azure-hosted-applications) is used for authentication. [#57881](https://github.com/ClickHouse/ClickHouse/pull/57881) ([Vinay Suryadevara](https://github.com/vinay92-ch)).
|
||||
* Introduce bulk loading to StorageEmbeddedRocksDB by creating and ingesting SST file instead of relying on rocksdb build-in memtable. This help to increase importing speed, especially for long-running insert query to StorageEmbeddedRocksDB tables. Also, introduce `StorageEmbeddedRocksDB` table settings. [#59163](https://github.com/ClickHouse/ClickHouse/pull/59163) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Introduce statistics of type "number of distinct values". [#59357](https://github.com/ClickHouse/ClickHouse/pull/59357) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* User can now parse CRLF with TSV format using a setting `input_format_tsv_crlf_end_of_line`. Closes [#56257](https://github.com/ClickHouse/ClickHouse/issues/56257). [#59747](https://github.com/ClickHouse/ClickHouse/pull/59747) ([Shaun Struwig](https://github.com/Blargian)).
|
||||
* Add Hilbert Curve encode and decode functions. [#60156](https://github.com/ClickHouse/ClickHouse/pull/60156) ([Artem Mustafin](https://github.com/Artemmm91)).
|
||||
* Adds the Form Format to read/write a single record in the application/x-www-form-urlencoded format. [#60199](https://github.com/ClickHouse/ClickHouse/pull/60199) ([Shaun Struwig](https://github.com/Blargian)).
|
||||
* Added possibility to compress in CROSS JOIN. [#60459](https://github.com/ClickHouse/ClickHouse/pull/60459) ([p1rattttt](https://github.com/p1rattttt)).
|
||||
* New setting `input_format_force_null_for_omitted_fields` that forces NULL values for omitted fields. [#60887](https://github.com/ClickHouse/ClickHouse/pull/60887) ([Constantine Peresypkin](https://github.com/pkit)).
|
||||
* Support join with inequal conditions which involve columns from both left and right table. e.g. `t1.y < t2.y`. To enable, `SET allow_experimental_join_condition = 1`. [#60920](https://github.com/ClickHouse/ClickHouse/pull/60920) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Earlier our s3 storage and s3 table function didn't support selecting from archive files. I created a solution that allows to iterate over files inside archives in S3. [#62259](https://github.com/ClickHouse/ClickHouse/pull/62259) ([Daniil Ivanik](https://github.com/divanik)).
|
||||
* Support for conditional function `clamp`. [#62377](https://github.com/ClickHouse/ClickHouse/pull/62377) ([skyoct](https://github.com/skyoct)).
|
||||
* Add npy output format. [#62430](https://github.com/ClickHouse/ClickHouse/pull/62430) ([豪肥肥](https://github.com/HowePa)).
|
||||
* Added support for reading LINESTRING geometry in WKT format using function `readWKTLineString`. [#62519](https://github.com/ClickHouse/ClickHouse/pull/62519) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Added SQL functions `generateUUIDv7`, `generateUUIDv7ThreadMonotonic`, `generateUUIDv7NonMonotonic` (with different monotonicity/performance trade-offs) to generate version 7 UUIDs aka. timestamp-based UUIDs with random component. Also added a new function `UUIDToNum` to extract bytes from a UUID and a new function `UUIDv7ToDateTime` to extract timestamp component from a UUID version 7. [#62852](https://github.com/ClickHouse/ClickHouse/pull/62852) ([Alexey Petrunyaka](https://github.com/pet74alex)).
|
||||
* Implement Dynamic data type that allows to store values of any type inside it without knowing all of them in advance. Dynamic type is available under a setting `allow_experimental_dynamic_type`. [#63058](https://github.com/ClickHouse/ClickHouse/pull/63058) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Allow to attach parts from a different disk. [#63087](https://github.com/ClickHouse/ClickHouse/pull/63087) ([Unalian](https://github.com/Unalian)).
|
||||
* Allow proxy to be bypassed for hosts specified in `no_proxy` env variable and ClickHouse proxy configuration. [#63314](https://github.com/ClickHouse/ClickHouse/pull/63314) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Introduce bulk loading to StorageEmbeddedRocksDB by creating and ingesting SST file instead of relying on rocksdb build-in memtable. This help to increase importing speed, especially for long-running insert query to StorageEmbeddedRocksDB tables. Also, introduce StorageEmbeddedRocksDB table settings. [#63324](https://github.com/ClickHouse/ClickHouse/pull/63324) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Raw as a synonym for TSVRaw. [#63394](https://github.com/ClickHouse/ClickHouse/pull/63394) ([Unalian](https://github.com/Unalian)).
|
||||
* Added possibility to do cross join in temporary file if size exceeds limits. [#63432](https://github.com/ClickHouse/ClickHouse/pull/63432) ([p1rattttt](https://github.com/p1rattttt)).
|
||||
* Added a new table function `loop` to support returning query results in an infinite loop. [#63452](https://github.com/ClickHouse/ClickHouse/pull/63452) ([Sariel](https://github.com/sarielwxm)).
|
||||
* Added new SQL functions `generateSnowflakeID` for generating Twitter-style Snowflake IDs. [#63577](https://github.com/ClickHouse/ClickHouse/pull/63577) ([Danila Puzov](https://github.com/kazalika)).
|
||||
* Add the ability to reshuffle rows during insert to optimize for size without violating the order set by `PRIMARY KEY`. It's controlled by the setting `optimize_row_order` (off by default). [#63578](https://github.com/ClickHouse/ClickHouse/pull/63578) ([Igor Markelov](https://github.com/ElderlyPassionFruit)).
|
||||
* On Linux and MacOS, if the program has STDOUT redirected to a file with a compression extension, use the corresponding compression method instead of nothing (making it behave similarly to `INTO OUTFILE` ). [#63662](https://github.com/ClickHouse/ClickHouse/pull/63662) ([v01dXYZ](https://github.com/v01dXYZ)).
|
||||
* Added `merge_workload` and `mutation_workload` settings to regulate how resources are utilized and shared between merges, mutations and other workloads. [#64061](https://github.com/ClickHouse/ClickHouse/pull/64061) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Change warning on high number of attached tables to differentiate tables, views and dictionaries. [#64180](https://github.com/ClickHouse/ClickHouse/pull/64180) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
|
||||
* Add support for comparing IPv4 and IPv6 types using the `=` operator. [#64292](https://github.com/ClickHouse/ClickHouse/pull/64292) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
|
||||
* Allow to store named collections in zookeeper. [#64574](https://github.com/ClickHouse/ClickHouse/pull/64574) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Support decimal arguments in binary math functions (pow(), atan2(), max2, min2(), hypot(). [#64582](https://github.com/ClickHouse/ClickHouse/pull/64582) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
|
||||
* Add support for index analysis over `hilbertEncode`. [#64662](https://github.com/ClickHouse/ClickHouse/pull/64662) ([Artem Mustafin](https://github.com/Artemmm91)).
|
||||
* Added SQL functions `parseReadableSize` (along with `OrNull` and `OrZero` variants). [#64742](https://github.com/ClickHouse/ClickHouse/pull/64742) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
|
||||
* Add server settings `max_table_num_to_throw` and `max_database_num_to_throw` to limit the number of databases or tables on `CREATE` queries. [#64781](https://github.com/ClickHouse/ClickHouse/pull/64781) ([Xu Jia](https://github.com/XuJia0210)).
|
||||
* Add _time virtual column to file alike storages (s3/file/hdfs/url/azureBlobStorage). [#64947](https://github.com/ClickHouse/ClickHouse/pull/64947) ([Ilya Golshtein](https://github.com/ilejn)).
|
||||
* Introduced new functions `base64URLEncode`, `base64URLDecode` and `tryBase64URLDecode`. [#64991](https://github.com/ClickHouse/ClickHouse/pull/64991) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
|
||||
* Add new function `editDistanceUTF8`, which calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two UTF8 strings. [#65269](https://github.com/ClickHouse/ClickHouse/pull/65269) ([LiuNeng](https://github.com/liuneng1994)).
|
||||
|
||||
#### Performance Improvement
|
||||
* Skip merging of newly created projection blocks during `INSERT`-s. [#59405](https://github.com/ClickHouse/ClickHouse/pull/59405) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Add a native parquet reader, which can read parquet binary to ClickHouse Columns directly. It's controlled by the setting `input_format_parquet_use_native_reader` (disabled by default). [#60361](https://github.com/ClickHouse/ClickHouse/pull/60361) ([ZhiHong Zhang](https://github.com/copperybean)).
|
||||
* Reduce the number of virtual function calls in ColumnNullable::size(). [#60556](https://github.com/ClickHouse/ClickHouse/pull/60556) ([HappenLee](https://github.com/HappenLee)).
|
||||
* Process string functions XXXUTF8 'asciily' if input strings are all ascii chars. Inspired by https://github.com/apache/doris/pull/29799. Overall speed up by 1.07x~1.62x. Notice that peak memory usage had been decreased in some cases. [#61632](https://github.com/ClickHouse/ClickHouse/pull/61632) ([李扬](https://github.com/taiyang-li)).
|
||||
* Improved performance of selection (`{}`) globs in StorageS3. [#62120](https://github.com/ClickHouse/ClickHouse/pull/62120) ([Andrey Zvonov](https://github.com/zvonand)).
|
||||
* HostResolver has each IP address several times. If remote host has several IPs and by some reason (firewall rules for example) access on some IPs allowed and on others forbidden, than only first record of forbidden IPs marked as failed, and in each try these IPs have a chance to be chosen (and failed again). Even if fix this, every 120 seconds DNS cache dropped, and IPs can be chosen again. [#62652](https://github.com/ClickHouse/ClickHouse/pull/62652) ([Anton Ivashkin](https://github.com/ianton-ru)).
|
||||
* Speedup `splitByRegexp` when the regular expression argument is a single-character. [#62696](https://github.com/ClickHouse/ClickHouse/pull/62696) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Speed up FixedHashTable by keeping track of the min and max keys used. This allows to reduce the number of cells that need to be verified. [#62746](https://github.com/ClickHouse/ClickHouse/pull/62746) ([Jiebin Sun](https://github.com/jiebinn)).
|
||||
* Add a new configuration`prefer_merge_sort_block_bytes` to control the memory usage and speed up sorting 2 times when merging when there are many columns. [#62904](https://github.com/ClickHouse/ClickHouse/pull/62904) ([LiuNeng](https://github.com/liuneng1994)).
|
||||
* `clickhouse-local` will start faster. In previous versions, it was not deleting temporary directories by mistake. Now it will. This closes [#62941](https://github.com/ClickHouse/ClickHouse/issues/62941). [#63074](https://github.com/ClickHouse/ClickHouse/pull/63074) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Micro-optimizations for the new analyzer. [#63429](https://github.com/ClickHouse/ClickHouse/pull/63429) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Index analysis will work if `DateTime` is compared to `DateTime64`. This closes [#63441](https://github.com/ClickHouse/ClickHouse/issues/63441). [#63443](https://github.com/ClickHouse/ClickHouse/pull/63443) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Index analysis will work if `DateTime` is compared to `DateTime64`. This closes [#63441](https://github.com/ClickHouse/ClickHouse/issues/63441). [#63532](https://github.com/ClickHouse/ClickHouse/pull/63532) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Optimize the resolution of in(LowCardinality, ConstantSet). [#64060](https://github.com/ClickHouse/ClickHouse/pull/64060) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
|
||||
* Speed up indices of type `set` a little (around 1.5 times) by removing garbage. [#64098](https://github.com/ClickHouse/ClickHouse/pull/64098) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Use a thread pool to initialize and destroy hash tables inside `ConcurrentHashJoin`. [#64241](https://github.com/ClickHouse/ClickHouse/pull/64241) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Optimized vertical merges in tables with sparse columns. [#64311](https://github.com/ClickHouse/ClickHouse/pull/64311) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Enabled prefetches of data from remote filesystem during vertical merges. It improves latency of vertical merges in tables with data stored on remote filesystem. [#64314](https://github.com/ClickHouse/ClickHouse/pull/64314) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Reduce redundant calls to `isDefault()` of `ColumnSparse::filter` to improve performance. [#64426](https://github.com/ClickHouse/ClickHouse/pull/64426) ([Jiebin Sun](https://github.com/jiebinn)).
|
||||
* Speedup `find_super_nodes` and `find_big_family` keeper-client commands by making multiple asynchronous getChildren requests. [#64628](https://github.com/ClickHouse/ClickHouse/pull/64628) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Improve function least/greatest for nullable numberic type arguments. [#64668](https://github.com/ClickHouse/ClickHouse/pull/64668) ([KevinyhZou](https://github.com/KevinyhZou)).
|
||||
* Allow merging two consequent `FilterSteps` of a query plan. This improves filter-push-down optimization if the filter condition can be pushed down from the parent step. [#64760](https://github.com/ClickHouse/ClickHouse/pull/64760) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Remove bad optimization in vertical final implementation and re-enable vertical final algorithm by default. [#64783](https://github.com/ClickHouse/ClickHouse/pull/64783) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Remove ALIAS nodes from the filter expression. This slightly improves performance for queries with `PREWHERE` (with new analyzer). [#64793](https://github.com/ClickHouse/ClickHouse/pull/64793) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix performance regression in cross join introduced in [#60459](https://github.com/ClickHouse/ClickHouse/issues/60459) (24.5). [#65243](https://github.com/ClickHouse/ClickHouse/pull/65243) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
|
||||
#### Improvement
|
||||
* Support empty tuples. [#55061](https://github.com/ClickHouse/ClickHouse/pull/55061) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Hot reload storage policy for distributed tables when adding a new disk. [#58285](https://github.com/ClickHouse/ClickHouse/pull/58285) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Maps can now have `Float32`, `Float64`, `Array(T)`, `Map(K,V)` and `Tuple(T1, T2, ...)` as keys. Closes [#54537](https://github.com/ClickHouse/ClickHouse/issues/54537). [#59318](https://github.com/ClickHouse/ClickHouse/pull/59318) ([李扬](https://github.com/taiyang-li)).
|
||||
* Avoid possible deadlock during MergeTree index analysis when scheduling threads in a saturated service. [#59427](https://github.com/ClickHouse/ClickHouse/pull/59427) ([Sean Haynes](https://github.com/seandhaynes)).
|
||||
* Multiline strings with border preservation and column width change. [#59940](https://github.com/ClickHouse/ClickHouse/pull/59940) ([Volodyachan](https://github.com/Volodyachan)).
|
||||
* Make rabbitmq nack broken messages. Closes [#45350](https://github.com/ClickHouse/ClickHouse/issues/45350). [#60312](https://github.com/ClickHouse/ClickHouse/pull/60312) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Support partial trivial count optimization when the query filter is able to select exact ranges from merge tree tables. [#60463](https://github.com/ClickHouse/ClickHouse/pull/60463) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix a crash in asynchronous stack unwinding (such as when using the sampling query profiler) while interpreting debug info. This closes [#60460](https://github.com/ClickHouse/ClickHouse/issues/60460). [#60468](https://github.com/ClickHouse/ClickHouse/pull/60468) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Reduce max memory usage of multithreaded `INSERT`s by collecting chunks of multiple threads in a single transform. [#61047](https://github.com/ClickHouse/ClickHouse/pull/61047) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Distinct messages for s3 error 'no key' for cases disk and storage. [#61108](https://github.com/ClickHouse/ClickHouse/pull/61108) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* Less contention in filesystem cache (part 4). Allow to keep filesystem cache not filled to the limit by doing additional eviction in the background (controlled by `keep_free_space_size(elements)_ratio`). This allows to release pressure from space reservation for queries (on `tryReserve` method). Also this is done in a lock free way as much as possible, e.g. should not block normal cache usage. [#61250](https://github.com/ClickHouse/ClickHouse/pull/61250) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* The progress bar will work for trivial queries with LIMIT from `system.zeros`, `system.zeros_mt` (it already works for `system.numbers` and `system.numbers_mt`), and the `generateRandom` table function. As a bonus, if the total number of records is greater than the `max_rows_to_read` limit, it will throw an exception earlier. This closes [#58183](https://github.com/ClickHouse/ClickHouse/issues/58183). [#61823](https://github.com/ClickHouse/ClickHouse/pull/61823) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* YAML Merge Key support. [#62685](https://github.com/ClickHouse/ClickHouse/pull/62685) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Enhance error message when non-deterministic function is used with Replicated source. [#62896](https://github.com/ClickHouse/ClickHouse/pull/62896) ([Grégoire Pineau](https://github.com/lyrixx)).
|
||||
* Fix interserver secret for Distributed over Distributed from `remote`. [#63013](https://github.com/ClickHouse/ClickHouse/pull/63013) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Allow using `clickhouse-local` and its shortcuts `clickhouse` and `ch` with a query or queries file as a positional argument. Examples: `ch "SELECT 1"`, `ch --param_test Hello "SELECT {test:String}"`, `ch query.sql`. This closes [#62361](https://github.com/ClickHouse/ClickHouse/issues/62361). [#63081](https://github.com/ClickHouse/ClickHouse/pull/63081) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Support configuration substitutions from YAML files. [#63106](https://github.com/ClickHouse/ClickHouse/pull/63106) ([Eduard Karacharov](https://github.com/korowa)).
|
||||
* Reduce the memory usage when using Azure object storage by using fixed memory allocation, avoiding the allocation of an extra buffer. [#63160](https://github.com/ClickHouse/ClickHouse/pull/63160) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* Add TTL information in system parts_columns table. [#63200](https://github.com/ClickHouse/ClickHouse/pull/63200) ([litlig](https://github.com/litlig)).
|
||||
* Keep previous data in terminal after picking from skim suggestions. [#63261](https://github.com/ClickHouse/ClickHouse/pull/63261) ([FlameFactory](https://github.com/FlameFactory)).
|
||||
* Width of fields now correctly calculate, ignoring ANSI escape sequences. [#63270](https://github.com/ClickHouse/ClickHouse/pull/63270) ([Shaun Struwig](https://github.com/Blargian)).
|
||||
* Enable plain_rewritable metadata for local and Azure (azure_blob_storage) object storages. [#63365](https://github.com/ClickHouse/ClickHouse/pull/63365) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* Support English-style Unicode quotes, e.g. “Hello”, ‘world’. This is questionable in general but helpful when you type your query in a word processor, such as Google Docs. This closes [#58634](https://github.com/ClickHouse/ClickHouse/issues/58634). [#63381](https://github.com/ClickHouse/ClickHouse/pull/63381) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Allowed to create MaterializedMySQL database without connection to MySQL. [#63397](https://github.com/ClickHouse/ClickHouse/pull/63397) ([Kirill](https://github.com/kirillgarbar)).
|
||||
* Remove copying data when writing to filesystem cache. [#63401](https://github.com/ClickHouse/ClickHouse/pull/63401) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Update the usage of error code `NUMBER_OF_ARGUMENTS_DOESNT_MATCH` by more accurate error codes when appropriate. [#63406](https://github.com/ClickHouse/ClickHouse/pull/63406) ([Yohann Jardin](https://github.com/yohannj)).
|
||||
* Several minor corner case fixes to proxy support & tunneling. [#63427](https://github.com/ClickHouse/ClickHouse/pull/63427) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* `os_user` and `client_hostname` are now correctly set up for queries for command line suggestions in clickhouse-client. This closes [#63430](https://github.com/ClickHouse/ClickHouse/issues/63430). [#63433](https://github.com/ClickHouse/ClickHouse/pull/63433) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fixed tabulation from line numbering, correct handling of length when moving a line if the value has a tab, added tests. [#63493](https://github.com/ClickHouse/ClickHouse/pull/63493) ([Volodyachan](https://github.com/Volodyachan)).
|
||||
* Add this `aggregate_function_group_array_has_limit_size`setting to support discarding data in some scenarios. [#63516](https://github.com/ClickHouse/ClickHouse/pull/63516) ([zhongyuankai](https://github.com/zhongyuankai)).
|
||||
* Automatically mark a replica of Replicated database as lost and start recovery if some DDL task fails more than `max_retries_before_automatic_recovery` (100 by default) times in a row with the same error. Also, fixed a bug that could cause skipping DDL entries when an exception is thrown during an early stage of entry execution. [#63549](https://github.com/ClickHouse/ClickHouse/pull/63549) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Add `http_response_headers` setting to support custom response headers in custom HTTP handlers. [#63562](https://github.com/ClickHouse/ClickHouse/pull/63562) ([Grigorii](https://github.com/GSokol)).
|
||||
* Automatically correct `max_block_size=0` to default value. [#63587](https://github.com/ClickHouse/ClickHouse/pull/63587) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Account failed files in `s3queue_tracked_file_ttl_sec` and `s3queue_traked_files_limit` for `StorageS3Queue`. [#63638](https://github.com/ClickHouse/ClickHouse/pull/63638) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Add a build_id ALIAS column to trace_log to facilitate auto renaming upon detecting binary changes. This is to address [#52086](https://github.com/ClickHouse/ClickHouse/issues/52086). [#63656](https://github.com/ClickHouse/ClickHouse/pull/63656) ([Zimu Li](https://github.com/woodlzm)).
|
||||
* Enable truncate operation for object storage disks. [#63693](https://github.com/ClickHouse/ClickHouse/pull/63693) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
|
||||
* Improve io_uring resubmits visibility. Rename profile event `IOUringSQEsResubmits` -> `IOUringSQEsResubmitsAsync` and add a new one `IOUringSQEsResubmitsSync`. [#63699](https://github.com/ClickHouse/ClickHouse/pull/63699) ([Tomer Shafir](https://github.com/tomershafir)).
|
||||
* Introduce assertions to verify all functions are called with columns of the right size. [#63723](https://github.com/ClickHouse/ClickHouse/pull/63723) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* The loading of the keywords list is now dependent on the server revision and will be disabled for the old versions of ClickHouse server. CC @azat. [#63786](https://github.com/ClickHouse/ClickHouse/pull/63786) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* `SHOW CREATE TABLE` executed on top of system tables will now show the super handy comment unique for each table which will explain why this table is needed. [#63788](https://github.com/ClickHouse/ClickHouse/pull/63788) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Allow trailing commas in the columns list in the INSERT query. For example, `INSERT INTO test (a, b, c, ) VALUES ...`. [#63803](https://github.com/ClickHouse/ClickHouse/pull/63803) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Better exception messages for the `Regexp` format. [#63804](https://github.com/ClickHouse/ClickHouse/pull/63804) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Allow trailing commas in the `Values` format. For example, this query is allowed: `INSERT INTO test (a, b, c) VALUES (4, 5, 6,);`. [#63810](https://github.com/ClickHouse/ClickHouse/pull/63810) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Clickhouse disks have to read server setting to obtain actual metadata format version. [#63831](https://github.com/ClickHouse/ClickHouse/pull/63831) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* Disable pretty format restrictions (`output_format_pretty_max_rows`/`output_format_pretty_max_value_width`) when stdout is not TTY. [#63942](https://github.com/ClickHouse/ClickHouse/pull/63942) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Exception handling now works when ClickHouse is used inside AWS Lambda. Author: [Alexey Coolnev](https://github.com/acoolnev). [#64014](https://github.com/ClickHouse/ClickHouse/pull/64014) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Throw `CANNOT_DECOMPRESS` instread of `CORRUPTED_DATA` on invalid compressed data passed via HTTP. [#64036](https://github.com/ClickHouse/ClickHouse/pull/64036) ([vdimir](https://github.com/vdimir)).
|
||||
* A tip for a single large number in Pretty formats now works for Nullable and LowCardinality. This closes [#61993](https://github.com/ClickHouse/ClickHouse/issues/61993). [#64084](https://github.com/ClickHouse/ClickHouse/pull/64084) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Now backups with azure blob storage will use multicopy. [#64116](https://github.com/ClickHouse/ClickHouse/pull/64116) ([alesapin](https://github.com/alesapin)).
|
||||
* Added a new setting, `metadata_keep_free_space_bytes` to keep free space on the metadata storage disk. [#64128](https://github.com/ClickHouse/ClickHouse/pull/64128) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
|
||||
* Add metrics, logs, and thread names around parts filtering with indices. [#64130](https://github.com/ClickHouse/ClickHouse/pull/64130) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Allow to use native copy for azure even with different containers. [#64154](https://github.com/ClickHouse/ClickHouse/pull/64154) ([alesapin](https://github.com/alesapin)).
|
||||
* Add metrics to track the number of directories created and removed by the plain_rewritable metadata storage, and the number of entries in the local-to-remote in-memory map. [#64175](https://github.com/ClickHouse/ClickHouse/pull/64175) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* Finally enable native copy for azure. [#64182](https://github.com/ClickHouse/ClickHouse/pull/64182) ([alesapin](https://github.com/alesapin)).
|
||||
* Ignore `allow_suspicious_primary_key` on `ATTACH` and verify on `ALTER`. [#64202](https://github.com/ClickHouse/ClickHouse/pull/64202) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* The query cache now considers identical queries with different settings as different. This increases robustness in cases where different settings (e.g. `limit` or `additional_table_filters`) would affect the query result. [#64205](https://github.com/ClickHouse/ClickHouse/pull/64205) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Better Exception Message in Delete Table with Projection, users can understand the error and the steps should be taken. [#64212](https://github.com/ClickHouse/ClickHouse/pull/64212) ([jsc0218](https://github.com/jsc0218)).
|
||||
* Support the non standard error code `QpsLimitExceeded` in object storage as a retryable error. [#64225](https://github.com/ClickHouse/ClickHouse/pull/64225) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* Forbid converting a MergeTree table to replicated if the zookeeper path for this table already exists. [#64244](https://github.com/ClickHouse/ClickHouse/pull/64244) ([Kirill](https://github.com/kirillgarbar)).
|
||||
* If "replica group" is configured for a `Replicated` database, automatically create a cluster that includes replicas from all groups. [#64312](https://github.com/ClickHouse/ClickHouse/pull/64312) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Added settings to disable materialization of skip indexes and statistics on inserts (`materialize_skip_indexes_on_insert` and `materialize_statistics_on_insert`). [#64391](https://github.com/ClickHouse/ClickHouse/pull/64391) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Use the allocated memory size to calculate the row group size and reduce the peak memory of the parquet writer in single-threaded mode. [#64424](https://github.com/ClickHouse/ClickHouse/pull/64424) ([LiuNeng](https://github.com/liuneng1994)).
|
||||
* Added new configuration input_format_parquet_prefer_block_bytes to control the average output block bytes, and modified the default value of input_format_parquet_max_block_size to 65409. [#64427](https://github.com/ClickHouse/ClickHouse/pull/64427) ([LiuNeng](https://github.com/liuneng1994)).
|
||||
* Always start Keeper with sufficient amount of threads in global thread pool. [#64444](https://github.com/ClickHouse/ClickHouse/pull/64444) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Settings from user config doesn't affect merges and mutations for MergeTree on top of object storage. [#64456](https://github.com/ClickHouse/ClickHouse/pull/64456) ([alesapin](https://github.com/alesapin)).
|
||||
* Setting `replace_long_file_name_to_hash` is enabled by default for `MergeTree` tables. [#64457](https://github.com/ClickHouse/ClickHouse/pull/64457) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Improve the iterator of sparse column to reduce call of size(). [#64497](https://github.com/ClickHouse/ClickHouse/pull/64497) ([Jiebin Sun](https://github.com/jiebinn)).
|
||||
* Update condition to use copy for azure blob storage. [#64518](https://github.com/ClickHouse/ClickHouse/pull/64518) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* Support the non standard error code `TotalQpsLimitExceeded` in object storage as a retryable error. [#64520](https://github.com/ClickHouse/ClickHouse/pull/64520) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* Optimized memory usage of vertical merges for tables with high number of skip indexes. [#64580](https://github.com/ClickHouse/ClickHouse/pull/64580) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Introduced two additional columns in the `system.query_log`: `used_privileges` and `missing_privileges`. `used_privileges` is populated with the privileges that were checked during query execution, and `missing_privileges` contains required privileges that are missing. [#64597](https://github.com/ClickHouse/ClickHouse/pull/64597) ([Alexey Katsman](https://github.com/alexkats)).
|
||||
* Add settings `parallel_replicas_custom_key_range_lower` and `parallel_replicas_custom_key_range_upper` to control how parallel replicas with dynamic shards parallelizes queries when using a range filter. [#64604](https://github.com/ClickHouse/ClickHouse/pull/64604) ([josh-hildred](https://github.com/josh-hildred)).
|
||||
* Updated Advanced Dashboard for both open-source and ClickHouse Cloud versions to include a chart for 'Maximum concurrent network connections'. [#64610](https://github.com/ClickHouse/ClickHouse/pull/64610) ([Thom O'Connor](https://github.com/thomoco)).
|
||||
* The second argument (scale) of functions `round()`, `roundBankers()`, `floor()`, `ceil()` and `trunc()` can now be non-const. [#64798](https://github.com/ClickHouse/ClickHouse/pull/64798) ([Mikhail Gorshkov](https://github.com/mgorshkov)).
|
||||
* Improve progress report on zeros_mt and generateRandom. [#64804](https://github.com/ClickHouse/ClickHouse/pull/64804) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Add an asynchronous metric jemalloc.profile.active to show whether sampling is currently active. This is an activation mechanism in addition to prof.active; both must be active for the calling thread to sample. [#64842](https://github.com/ClickHouse/ClickHouse/pull/64842) ([Unalian](https://github.com/Unalian)).
|
||||
* Support statistics with ReplicatedMergeTree. [#64934](https://github.com/ClickHouse/ClickHouse/pull/64934) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Don't mark of `allow_experimental_join_condition` as IMPORTANT. This may have prevented distributed queries in a mixed versions cluster from being executed successfully. [#65008](https://github.com/ClickHouse/ClickHouse/pull/65008) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Backported in [#65716](https://github.com/ClickHouse/ClickHouse/issues/65716): `StorageS3Queue` related fixes and improvements. Deduce a default value of `s3queue_processing_threads_num` according to the number of physical cpu cores on the server (instead of the previous default value as 1). Set default value of `s3queue_loading_retries` to 10. Fix possible vague "Uncaught exception" in exception column of `system.s3queue`. Do not increment retry count on `MEMORY_LIMIT_EXCEEDED` exception. Move files commit to a stage after insertion into table fully finished to avoid files being commited while not inserted. Add settings `s3queue_max_processed_files_before_commit`, `s3queue_max_processed_rows_before_commit`, `s3queue_max_processed_bytes_before_commit`, `s3queue_max_processing_time_sec_before_commit`, to better control commit and flush time. [#65046](https://github.com/ClickHouse/ClickHouse/pull/65046) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Added server Asynchronous metrics `DiskGetObjectThrottler*` and `DiskGetObjectThrottler*` reflecting request per second rate limit defined with `s3_max_get_rps` and `s3_max_put_rps` disk settings and currently available number of requests that could be sent without hitting throttling limit on the disk. Metrics are defined for every disk that has a configured limit. [#65050](https://github.com/ClickHouse/ClickHouse/pull/65050) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Added a setting `output_format_pretty_display_footer_column_names` which when enabled displays column names at the end of the table for long tables (50 rows by default), with the threshold value for minimum number of rows controlled by `output_format_pretty_display_footer_column_names_min_rows`. [#65144](https://github.com/ClickHouse/ClickHouse/pull/65144) ([Shaun Struwig](https://github.com/Blargian)).
|
||||
* Returned back the behaviour of how ClickHouse works and interprets Tuples in CSV format. This change effectively reverts https://github.com/ClickHouse/ClickHouse/pull/60994 and makes it available only under a few settings: `output_format_csv_serialize_tuple_into_separate_columns`, `input_format_csv_deserialize_separate_columns_into_tuple` and `input_format_csv_try_infer_strings_from_quoted_tuples`. [#65170](https://github.com/ClickHouse/ClickHouse/pull/65170) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Initialize global trace collector for Poco::ThreadPool (needed for keeper, etc). [#65239](https://github.com/ClickHouse/ClickHouse/pull/65239) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Add validation when creating a user with bcrypt_hash. [#65242](https://github.com/ClickHouse/ClickHouse/pull/65242) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
|
||||
* Fix a permission error where a user in a specific situation can escalate their privileges on the default database without necessary grants. [#64769](https://github.com/ClickHouse/ClickHouse/pull/64769) ([pufit](https://github.com/pufit)).
|
||||
* Fix crash with UniqInjectiveFunctionsEliminationPass and uniqCombined. [#65188](https://github.com/ClickHouse/ClickHouse/pull/65188) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix a bug in ClickHouse Keeper that causes digest mismatch during closing session. [#65198](https://github.com/ClickHouse/ClickHouse/pull/65198) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
* Forbid `QUALIFY` clause in the old analyzer. The old analyzer ignored `QUALIFY`, so it could lead to unexpected data removal in mutations. [#65356](https://github.com/ClickHouse/ClickHouse/pull/65356) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Use correct memory alignment for Distinct combinator. Previously, crash could happen because of invalid memory allocation when the combinator was used. [#65379](https://github.com/ClickHouse/ClickHouse/pull/65379) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Backported in [#65846](https://github.com/ClickHouse/ClickHouse/issues/65846): Check cyclic dependencies on CREATE/REPLACE/RENAME/EXCHANGE queries and throw an exception if there is a cyclic dependency. Previously such cyclic dependencies could lead to a deadlock during server startup. Closes [#65355](https://github.com/ClickHouse/ClickHouse/issues/65355). Also fix some bugs in dependencies creation. [#65405](https://github.com/ClickHouse/ClickHouse/pull/65405) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#65714](https://github.com/ClickHouse/ClickHouse/issues/65714): Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
* Fix making backup when multiple shards are used. This PR fixes [#56566](https://github.com/ClickHouse/ClickHouse/issues/56566). [#57684](https://github.com/ClickHouse/ClickHouse/pull/57684) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix passing projections/indexes from CREATE query into inner table of MV. [#59183](https://github.com/ClickHouse/ClickHouse/pull/59183) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix boundRatio incorrect merge. [#60532](https://github.com/ClickHouse/ClickHouse/pull/60532) ([Tao Wang](https://github.com/wangtZJU)).
|
||||
* Fix crash when using some functions with low-cardinality columns. [#61966](https://github.com/ClickHouse/ClickHouse/pull/61966) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fixed 'set' skip index not working with IN and indexHint(). [#62083](https://github.com/ClickHouse/ClickHouse/pull/62083) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix queries with FINAL give wrong result when table does not use adaptive granularity. [#62432](https://github.com/ClickHouse/ClickHouse/pull/62432) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Improve the detection of cgroups v2 memory controller in unusual locations. This fixes a warning that the cgroup memory observer was disabled because no cgroups v1 or v2 current memory file could be found. [#62903](https://github.com/ClickHouse/ClickHouse/pull/62903) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix subsequent use of external tables in client. [#62964](https://github.com/ClickHouse/ClickHouse/pull/62964) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix crash with untuple and unresolved lambda. [#63131](https://github.com/ClickHouse/ClickHouse/pull/63131) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix bug which could lead to server to accept connections before server is actually loaded. [#63181](https://github.com/ClickHouse/ClickHouse/pull/63181) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix intersect parts when restart after drop range. [#63202](https://github.com/ClickHouse/ClickHouse/pull/63202) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Fix a misbehavior when SQL security defaults don't load for old tables during server startup. [#63209](https://github.com/ClickHouse/ClickHouse/pull/63209) ([pufit](https://github.com/pufit)).
|
||||
* JOIN filter push down filled join fix. Closes [#63228](https://github.com/ClickHouse/ClickHouse/issues/63228). [#63234](https://github.com/ClickHouse/ClickHouse/pull/63234) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix infinite loop while listing objects in Azure blob storage. [#63257](https://github.com/ClickHouse/ClickHouse/pull/63257) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* CROSS join can be executed with any value `join_algorithm` setting, close [#62431](https://github.com/ClickHouse/ClickHouse/issues/62431). [#63273](https://github.com/ClickHouse/ClickHouse/pull/63273) ([vdimir](https://github.com/vdimir)).
|
||||
* Fixed a potential crash caused by a `no space left` error when temporary data in the cache is used. [#63346](https://github.com/ClickHouse/ClickHouse/pull/63346) ([vdimir](https://github.com/vdimir)).
|
||||
* Fix bug which could potentially lead to rare LOGICAL_ERROR during SELECT query with message: `Unexpected return type from materialize. Expected type_XXX. Got type_YYY.` Introduced in [#59379](https://github.com/ClickHouse/ClickHouse/issues/59379). [#63353](https://github.com/ClickHouse/ClickHouse/pull/63353) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix `X-ClickHouse-Timezone` header returning wrong timezone when using `session_timezone` as query level setting. [#63377](https://github.com/ClickHouse/ClickHouse/pull/63377) ([Andrey Zvonov](https://github.com/zvonand)).
|
||||
* Fix debug assert when using grouping WITH ROLLUP and LowCardinality types. [#63398](https://github.com/ClickHouse/ClickHouse/pull/63398) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix logical errors in queries with `GROUPING SETS` and `WHERE` and `group_by_use_nulls = true`, close [#60538](https://github.com/ClickHouse/ClickHouse/issues/60538). [#63405](https://github.com/ClickHouse/ClickHouse/pull/63405) ([vdimir](https://github.com/vdimir)).
|
||||
* Fix backup of projection part in case projection was removed from table metadata, but part still has projection. [#63426](https://github.com/ClickHouse/ClickHouse/pull/63426) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix 'Every derived table must have its own alias' error for MYSQL dictionary source, close [#63341](https://github.com/ClickHouse/ClickHouse/issues/63341). [#63481](https://github.com/ClickHouse/ClickHouse/pull/63481) ([vdimir](https://github.com/vdimir)).
|
||||
* Insert QueryFinish on AsyncInsertFlush with no data. [#63483](https://github.com/ClickHouse/ClickHouse/pull/63483) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix `system.query_log.used_dictionaries` logging. [#63487](https://github.com/ClickHouse/ClickHouse/pull/63487) ([Eduard Karacharov](https://github.com/korowa)).
|
||||
* Support executing function during assignment of parameterized view value. [#63502](https://github.com/ClickHouse/ClickHouse/pull/63502) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* Avoid segafult in `MergeTreePrefetchedReadPool` while fetching projection parts. [#63513](https://github.com/ClickHouse/ClickHouse/pull/63513) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix rabbitmq heap-use-after-free found by clang-18, which can happen if an error is thrown from RabbitMQ during initialization of exchange and queues. [#63515](https://github.com/ClickHouse/ClickHouse/pull/63515) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix crash on exit with sentry enabled (due to openssl destroyed before sentry). [#63548](https://github.com/ClickHouse/ClickHouse/pull/63548) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed parquet memory tracking. [#63584](https://github.com/ClickHouse/ClickHouse/pull/63584) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix support for Array and Map with Keyed hashing functions and materialized keys. [#63628](https://github.com/ClickHouse/ClickHouse/pull/63628) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
* Fixed Parquet filter pushdown not working with Analyzer. [#63642](https://github.com/ClickHouse/ClickHouse/pull/63642) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* It is forbidden to convert MergeTree to replicated if the zookeeper path for this table already exists. [#63670](https://github.com/ClickHouse/ClickHouse/pull/63670) ([Kirill](https://github.com/kirillgarbar)).
|
||||
* Read only the necessary columns from VIEW (new analyzer). Closes [#62594](https://github.com/ClickHouse/ClickHouse/issues/62594). [#63688](https://github.com/ClickHouse/ClickHouse/pull/63688) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix rare case with missing data in the result of distributed query. [#63691](https://github.com/ClickHouse/ClickHouse/pull/63691) ([vdimir](https://github.com/vdimir)).
|
||||
* Fix [#63539](https://github.com/ClickHouse/ClickHouse/issues/63539). Forbid WINDOW redefinition in new analyzer. [#63694](https://github.com/ClickHouse/ClickHouse/pull/63694) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Flatten_nested is broken with replicated database. [#63695](https://github.com/ClickHouse/ClickHouse/pull/63695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix `SIZES_OF_COLUMNS_DOESNT_MATCH` error for queries with `arrayJoin` function in `WHERE`. Fixes [#63653](https://github.com/ClickHouse/ClickHouse/issues/63653). [#63722](https://github.com/ClickHouse/ClickHouse/pull/63722) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix `Not found column` and `CAST AS Map from array requires nested tuple of 2 elements` exceptions for distributed queries which use `Map(Nothing, Nothing)` type. Fixes [#63637](https://github.com/ClickHouse/ClickHouse/issues/63637). [#63753](https://github.com/ClickHouse/ClickHouse/pull/63753) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix possible `ILLEGAL_COLUMN` error in `partial_merge` join, close [#37928](https://github.com/ClickHouse/ClickHouse/issues/37928). [#63755](https://github.com/ClickHouse/ClickHouse/pull/63755) ([vdimir](https://github.com/vdimir)).
|
||||
* `query_plan_remove_redundant_distinct` can break queries with WINDOW FUNCTIONS (with `allow_experimental_analyzer` is on). Fixes [#62820](https://github.com/ClickHouse/ClickHouse/issues/62820). [#63776](https://github.com/ClickHouse/ClickHouse/pull/63776) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Fix possible crash with SYSTEM UNLOAD PRIMARY KEY. [#63778](https://github.com/ClickHouse/ClickHouse/pull/63778) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix a query with a duplicating cycling alias. Fixes [#63320](https://github.com/ClickHouse/ClickHouse/issues/63320). [#63791](https://github.com/ClickHouse/ClickHouse/pull/63791) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed performance degradation of parsing data formats in INSERT query. This closes [#62918](https://github.com/ClickHouse/ClickHouse/issues/62918). This partially reverts [#42284](https://github.com/ClickHouse/ClickHouse/issues/42284), which breaks the original design and introduces more problems. [#63801](https://github.com/ClickHouse/ClickHouse/pull/63801) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Add 'endpoint_subpath' S3 URI setting to allow plain_rewritable disks to share the same endpoint. [#63806](https://github.com/ClickHouse/ClickHouse/pull/63806) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* Fix queries using parallel read buffer (e.g. with max_download_thread > 0) getting stuck when threads cannot be allocated. [#63814](https://github.com/ClickHouse/ClickHouse/pull/63814) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Allow JOIN filter push down to both streams if only single equivalent column is used in query. Closes [#63799](https://github.com/ClickHouse/ClickHouse/issues/63799). [#63819](https://github.com/ClickHouse/ClickHouse/pull/63819) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Remove the data from all disks after DROP with the Lazy database engines. Without these changes, orhpaned will remain on the disks. [#63848](https://github.com/ClickHouse/ClickHouse/pull/63848) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
|
||||
* Fix incorrect select query result when parallel replicas were used to read from a Materialized View. [#63861](https://github.com/ClickHouse/ClickHouse/pull/63861) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Fixes in `find_super_nodes` and `find_big_family` command of keeper-client: - do not fail on ZNONODE errors - find super nodes inside super nodes - properly calculate subtree node count. [#63862](https://github.com/ClickHouse/ClickHouse/pull/63862) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Fix a error `Database name is empty` for remote queries with lambdas over the cluster with modified default database. Fixes [#63471](https://github.com/ClickHouse/ClickHouse/issues/63471). [#63864](https://github.com/ClickHouse/ClickHouse/pull/63864) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix SIGSEGV due to CPU/Real (`query_profiler_real_time_period_ns`/`query_profiler_cpu_time_period_ns`) profiler (has been an issue since 2022, that leads to periodic server crashes, especially if you were using distributed engine). [#63865](https://github.com/ClickHouse/ClickHouse/pull/63865) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixed `EXPLAIN CURRENT TRANSACTION` query. [#63926](https://github.com/ClickHouse/ClickHouse/pull/63926) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix analyzer - IN function with arbitrary deep sub-selects in materialized view to use insertion block. [#63930](https://github.com/ClickHouse/ClickHouse/pull/63930) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Allow `ALTER TABLE .. MODIFY|RESET SETTING` and `ALTER TABLE .. MODIFY COMMENT` for plain_rewritable disk. [#63933](https://github.com/ClickHouse/ClickHouse/pull/63933) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* Fix Recursive CTE with distributed queries. Closes [#63790](https://github.com/ClickHouse/ClickHouse/issues/63790). [#63939](https://github.com/ClickHouse/ClickHouse/pull/63939) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fixed reading of columns of type `Tuple(Map(LowCardinality(String), String), ...)`. [#63956](https://github.com/ClickHouse/ClickHouse/pull/63956) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix resolve of unqualified COLUMNS matcher. Preserve the input columns order and forbid usage of unknown identifiers. [#63962](https://github.com/ClickHouse/ClickHouse/pull/63962) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Fix the `Not found column` error for queries with `skip_unused_shards = 1`, `LIMIT BY`, and the new analyzer. Fixes [#63943](https://github.com/ClickHouse/ClickHouse/issues/63943). [#63983](https://github.com/ClickHouse/ClickHouse/pull/63983) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* (Low-quality third-party Kusto Query Language). Resolve Client Abortion Issue When Using KQL Table Function in Interactive Mode. [#63992](https://github.com/ClickHouse/ClickHouse/pull/63992) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Fix an `Cyclic aliases` error for cyclic aliases of different type (expression and function). [#63993](https://github.com/ClickHouse/ClickHouse/pull/63993) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Deserialize untrusted binary inputs in a safer way. [#64024](https://github.com/ClickHouse/ClickHouse/pull/64024) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Do not throw `Storage doesn't support FINAL` error for remote queries over non-MergeTree tables with `final = true` and new analyzer. Fixes [#63960](https://github.com/ClickHouse/ClickHouse/issues/63960). [#64037](https://github.com/ClickHouse/ClickHouse/pull/64037) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Add missing settings to recoverLostReplica. [#64040](https://github.com/ClickHouse/ClickHouse/pull/64040) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix unwind on SIGSEGV on aarch64 (due to small stack for signal). [#64058](https://github.com/ClickHouse/ClickHouse/pull/64058) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* This fix will use a proper redefined context with the correct definer for each individual view in the query pipeline. [#64079](https://github.com/ClickHouse/ClickHouse/pull/64079) ([pufit](https://github.com/pufit)).
|
||||
* Fix analyzer: "Not found column" error is fixed when using INTERPOLATE. [#64096](https://github.com/ClickHouse/ClickHouse/pull/64096) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Fix azure backup writing multipart blocks as 1mb (read buffer size) instead of max_upload_part_size. [#64117](https://github.com/ClickHouse/ClickHouse/pull/64117) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix creating backups to S3 buckets with different credentials from the disk containing the file. [#64153](https://github.com/ClickHouse/ClickHouse/pull/64153) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Prevent LOGICAL_ERROR on CREATE TABLE as MaterializedView. [#64174](https://github.com/ClickHouse/ClickHouse/pull/64174) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* The query cache now considers two identical queries against different databases as different. The previous behavior could be used to bypass missing privileges to read from a table. [#64199](https://github.com/ClickHouse/ClickHouse/pull/64199) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Ignore `text_log` config when using Keeper. [#64218](https://github.com/ClickHouse/ClickHouse/pull/64218) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix `duplicate alias` error for distributed queries with `ARRAY JOIN`. [#64226](https://github.com/ClickHouse/ClickHouse/pull/64226) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix unexpected accurateCast from string to integer. [#64255](https://github.com/ClickHouse/ClickHouse/pull/64255) ([wudidapaopao](https://github.com/wudidapaopao)).
|
||||
* Fixed CNF simplification, in case any OR group contains mutually exclusive atoms. [#64256](https://github.com/ClickHouse/ClickHouse/pull/64256) ([Eduard Karacharov](https://github.com/korowa)).
|
||||
* Fix Query Tree size validation. [#64377](https://github.com/ClickHouse/ClickHouse/pull/64377) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Fix `Logical error: Bad cast` for `Buffer` table with `PREWHERE`. [#64388](https://github.com/ClickHouse/ClickHouse/pull/64388) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Prevent recursive logging in `blob_storage_log` when it's stored on object storage. [#64393](https://github.com/ClickHouse/ClickHouse/pull/64393) ([vdimir](https://github.com/vdimir)).
|
||||
* Fixed `CREATE TABLE AS` queries for tables with default expressions. [#64455](https://github.com/ClickHouse/ClickHouse/pull/64455) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fixed `optimize_read_in_order` behaviour for ORDER BY ... NULLS FIRST / LAST on tables with nullable keys. [#64483](https://github.com/ClickHouse/ClickHouse/pull/64483) ([Eduard Karacharov](https://github.com/korowa)).
|
||||
* Fix the `Expression nodes list expected 1 projection names` and `Unknown expression or identifier` errors for queries with aliases to `GLOBAL IN.`. [#64517](https://github.com/ClickHouse/ClickHouse/pull/64517) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix an error `Cannot find column` in distributed queries with constant CTE in the `GROUP BY` key. [#64519](https://github.com/ClickHouse/ClickHouse/pull/64519) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed ORC statistics calculation, when writing, for unsigned types on all platforms and Int8 on ARM. [#64563](https://github.com/ClickHouse/ClickHouse/pull/64563) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix the crash loop when restoring from backup is blocked by creating an MV with a definer that hasn't been restored yet. [#64595](https://github.com/ClickHouse/ClickHouse/pull/64595) ([pufit](https://github.com/pufit)).
|
||||
* Fix the output of function `formatDateTimeInJodaSyntax` when a formatter generates an uneven number of characters and the last character is `0`. For example, `SELECT formatDateTimeInJodaSyntax(toDate('2012-05-29'), 'D')` now correctly returns `150` instead of previously `15`. [#64614](https://github.com/ClickHouse/ClickHouse/pull/64614) ([LiuNeng](https://github.com/liuneng1994)).
|
||||
* Do not rewrite aggregation if `-If` combinator is already used. [#64638](https://github.com/ClickHouse/ClickHouse/pull/64638) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Fix type inference for float (in case of small buffer, i.e. `--max_read_buffer_size 1`). [#64641](https://github.com/ClickHouse/ClickHouse/pull/64641) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix bug which could lead to non-working TTLs with expressions. [#64694](https://github.com/ClickHouse/ClickHouse/pull/64694) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix removing the `WHERE` and `PREWHERE` expressions, which are always true (for the new analyzer). [#64695](https://github.com/ClickHouse/ClickHouse/pull/64695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fixed excessive part elimination by token-based text indexes (`ngrambf` , `full_text`) when filtering by result of `startsWith`, `endsWith`, `match`, `multiSearchAny`. [#64720](https://github.com/ClickHouse/ClickHouse/pull/64720) ([Eduard Karacharov](https://github.com/korowa)).
|
||||
* Fixes incorrect behaviour of ANSI CSI escaping in the `UTF8::computeWidth` function. [#64756](https://github.com/ClickHouse/ClickHouse/pull/64756) ([Shaun Struwig](https://github.com/Blargian)).
|
||||
* Fix a case of incorrect removal of `ORDER BY` / `LIMIT BY` across subqueries. [#64766](https://github.com/ClickHouse/ClickHouse/pull/64766) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix (experimental) unequal join with subqueries for sets which are in the mixed join conditions. [#64775](https://github.com/ClickHouse/ClickHouse/pull/64775) ([lgbo](https://github.com/lgbo-ustc)).
|
||||
* Fix crash in a local cache over `plain_rewritable` disk. [#64778](https://github.com/ClickHouse/ClickHouse/pull/64778) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* Keeper fix: return correct value for `zk_latest_snapshot_size` in `mntr` command. [#64784](https://github.com/ClickHouse/ClickHouse/pull/64784) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix `Cannot find column` in distributed query with `ARRAY JOIN` by `Nested` column. Fixes [#64755](https://github.com/ClickHouse/ClickHouse/issues/64755). [#64801](https://github.com/ClickHouse/ClickHouse/pull/64801) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix memory leak in slru cache policy. [#64803](https://github.com/ClickHouse/ClickHouse/pull/64803) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fixed possible incorrect memory tracking in several kinds of queries: queries that read any data from S3, queries via http protocol, asynchronous inserts. [#64844](https://github.com/ClickHouse/ClickHouse/pull/64844) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix the `Block structure mismatch` error for queries reading with `PREWHERE` from the materialized view when the materialized view has columns of different types than the source table. Fixes [#64611](https://github.com/ClickHouse/ClickHouse/issues/64611). [#64855](https://github.com/ClickHouse/ClickHouse/pull/64855) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix rare crash when table has TTL with subquery + database replicated + parallel replicas + analyzer. It's really rare, but please don't use TTLs with subqueries. [#64858](https://github.com/ClickHouse/ClickHouse/pull/64858) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix duplicating `Delete` events in `blob_storage_log` in case of large batch to delete. [#64924](https://github.com/ClickHouse/ClickHouse/pull/64924) ([vdimir](https://github.com/vdimir)).
|
||||
* Backported in [#65544](https://github.com/ClickHouse/ClickHouse/issues/65544): Fix crash for `ALTER TABLE ... ON CLUSTER ... MODIFY SQL SECURITY`. [#64957](https://github.com/ClickHouse/ClickHouse/pull/64957) ([pufit](https://github.com/pufit)).
|
||||
* Fixed `Session moved to another server` error from [Zoo]Keeper that might happen after server startup when the config has includes from [Zoo]Keeper. [#64986](https://github.com/ClickHouse/ClickHouse/pull/64986) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Backported in [#65582](https://github.com/ClickHouse/ClickHouse/issues/65582): Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix `ALTER MODIFY COMMENT` query that was broken for parameterized VIEWs in https://github.com/ClickHouse/ClickHouse/pull/54211. [#65031](https://github.com/ClickHouse/ClickHouse/pull/65031) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix `host_id` in DatabaseReplicated when `cluster_secure_connection` parameter is enabled. Previously all the connections within the cluster created by DatabaseReplicated were not secure, even if the parameter was enabled. [#65054](https://github.com/ClickHouse/ClickHouse/pull/65054) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Avoid writing to finalized buffer in File-like storages. [#65063](https://github.com/ClickHouse/ClickHouse/pull/65063) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix possible infinite query duration in case of cyclic aliases. Fixes [#64849](https://github.com/ClickHouse/ClickHouse/issues/64849). [#65081](https://github.com/ClickHouse/ClickHouse/pull/65081) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix the `Unknown expression identifier` error for remote queries with `INTERPOLATE (alias)` (new analyzer). Fixes [#64636](https://github.com/ClickHouse/ClickHouse/issues/64636). [#65090](https://github.com/ClickHouse/ClickHouse/pull/65090) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix pushing arithmetic operations out of aggregation. In the new analyzer, optimization was applied only once. [#65104](https://github.com/ClickHouse/ClickHouse/pull/65104) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Fix aggregate function name rewriting in the new analyzer. [#65110](https://github.com/ClickHouse/ClickHouse/pull/65110) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Respond with 5xx instead of 200 OK in case of receive timeout while reading (parts of) the request body from the client socket. [#65118](https://github.com/ClickHouse/ClickHouse/pull/65118) ([Julian Maicher](https://github.com/jmaicher)).
|
||||
* Backported in [#65734](https://github.com/ClickHouse/ClickHouse/issues/65734): Eliminate injective function in argument of functions `uniq*` recursively. This used to work correctly but was broken in the new analyzer. [#65140](https://github.com/ClickHouse/ClickHouse/pull/65140) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Fix possible crash for hedged requests. [#65206](https://github.com/ClickHouse/ClickHouse/pull/65206) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix the bug in Hashed and Hashed_Array dictionary short circuit evaluation, which may read uninitialized number, leading to various errors. [#65256](https://github.com/ClickHouse/ClickHouse/pull/65256) ([jsc0218](https://github.com/jsc0218)).
|
||||
* This PR ensures that the type of the constant(IN operator's second parameter) is always visible during the IN operator's type conversion process. Otherwise, losing type information may cause some conversions to fail, such as the conversion from DateTime to Date. fix ([#64487](https://github.com/ClickHouse/ClickHouse/issues/64487)). [#65315](https://github.com/ClickHouse/ClickHouse/pull/65315) ([pn](https://github.com/chloro-pn)).
|
||||
* Backported in [#65665](https://github.com/ClickHouse/ClickHouse/issues/65665): Disable `non-intersecting-parts` optimization for queries with `FINAL` in case of `read-in-order` optimization was enabled. This could lead to an incorrect query result. As a workaround, disable `do_not_merge_across_partitions_select_final` and `split_parts_ranges_into_intersecting_and_non_intersecting_final` before this fix is merged. [#65505](https://github.com/ClickHouse/ClickHouse/pull/65505) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#65606](https://github.com/ClickHouse/ClickHouse/issues/65606): Fix getting exception `Index out of bound for blob metadata` in case all files from list batch were filtered out. [#65523](https://github.com/ClickHouse/ClickHouse/pull/65523) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Backported in [#65790](https://github.com/ClickHouse/ClickHouse/issues/65790): Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Backported in [#65814](https://github.com/ClickHouse/ClickHouse/issues/65814): Fix invalid exceptions in function `parseDateTime` with `%F` and `%D` placeholders. [#65768](https://github.com/ClickHouse/ClickHouse/pull/65768) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Backported in [#65830](https://github.com/ClickHouse/ClickHouse/issues/65830): Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* ClickHouse is built with clang-18. A lot of new checks from clang-tidy-18 have been enabled. [#60469](https://github.com/ClickHouse/ClickHouse/pull/60469) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Make `network` service be required when using the rc init script to start the ClickHouse server daemon. [#60650](https://github.com/ClickHouse/ClickHouse/pull/60650) ([Chun-Sheng, Li](https://github.com/peter279k)).
|
||||
* Re-enable broken s390x build in CI. [#63135](https://github.com/ClickHouse/ClickHouse/pull/63135) ([Harry Lee](https://github.com/HarryLeeIBM)).
|
||||
* The Dockerfile is reviewed by the docker official library in https://github.com/docker-library/official-images/pull/15846. [#63400](https://github.com/ClickHouse/ClickHouse/pull/63400) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Information about every symbol in every translation unit will be collected in the CI database for every build in the CI. This closes [#63494](https://github.com/ClickHouse/ClickHouse/issues/63494). [#63495](https://github.com/ClickHouse/ClickHouse/pull/63495) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Experimentally support loongarch64 as a new platform for ClickHouse. [#63733](https://github.com/ClickHouse/ClickHouse/pull/63733) ([qiangxuhui](https://github.com/qiangxuhui)).
|
||||
* Update Apache Datasketches library. It resolves [#63858](https://github.com/ClickHouse/ClickHouse/issues/63858). [#63923](https://github.com/ClickHouse/ClickHouse/pull/63923) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Enable GRPC support for aarch64 linux while cross-compiling binary. [#64072](https://github.com/ClickHouse/ClickHouse/pull/64072) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix typo in test_hdfsCluster_unset_skip_unavailable_shards. The test writes data to unskip_unavailable_shards, but uses skip_unavailable_shards from the previous test. [#64243](https://github.com/ClickHouse/ClickHouse/pull/64243) ([Mikhail Artemenko](https://github.com/Michicosun)).
|
||||
* Reduce the size of some slow tests. [#64387](https://github.com/ClickHouse/ClickHouse/pull/64387) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Reduce the size of some slow tests. [#64452](https://github.com/ClickHouse/ClickHouse/pull/64452) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix test_lost_part_other_replica. [#64512](https://github.com/ClickHouse/ClickHouse/pull/64512) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Add tests for experimental unequal joins and randomize new settings in clickhouse-test. [#64535](https://github.com/ClickHouse/ClickHouse/pull/64535) ([Nikita Fomichev](https://github.com/fm4v)).
|
||||
* Upgrade tests: Update config and work with release candidates. [#64542](https://github.com/ClickHouse/ClickHouse/pull/64542) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Add support for LLVM XRay. [#64592](https://github.com/ClickHouse/ClickHouse/pull/64592) ([Tomer Shafir](https://github.com/tomershafir)).
|
||||
* Speed up 02995_forget_partition. [#64761](https://github.com/ClickHouse/ClickHouse/pull/64761) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix 02790_async_queries_in_query_log. [#64764](https://github.com/ClickHouse/ClickHouse/pull/64764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Support LLVM XRay on Linux amd64 only. [#64837](https://github.com/ClickHouse/ClickHouse/pull/64837) ([Tomer Shafir](https://github.com/tomershafir)).
|
||||
* Get rid of custom code in `tests/ci/download_release_packages.py` and `tests/ci/get_previous_release_tag.py` to avoid issues after the https://github.com/ClickHouse/ClickHouse/pull/64759 is merged. [#64848](https://github.com/ClickHouse/ClickHouse/pull/64848) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Decrease the `unit-test` image a few times. [#65102](https://github.com/ClickHouse/ClickHouse/pull/65102) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### NO CL CATEGORY
|
||||
|
||||
* Backported in [#65568](https://github.com/ClickHouse/ClickHouse/issues/65568):. [#65498](https://github.com/ClickHouse/ClickHouse/pull/65498) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Backported in [#65693](https://github.com/ClickHouse/ClickHouse/issues/65693):. [#65686](https://github.com/ClickHouse/ClickHouse/pull/65686) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### NO CL ENTRY
|
||||
|
||||
* NO CL ENTRY: 'Revert "Do not remove server constants from GROUP BY key for secondary query."'. [#63297](https://github.com/ClickHouse/ClickHouse/pull/63297) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* NO CL ENTRY: 'Revert "Introduce bulk loading to StorageEmbeddedRocksDB"'. [#63316](https://github.com/ClickHouse/ClickHouse/pull/63316) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* NO CL ENTRY: 'Revert "Revert "Do not remove server constants from GROUP BY key for secondary query.""'. [#63415](https://github.com/ClickHouse/ClickHouse/pull/63415) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* NO CL ENTRY: 'Revert "Fix index analysis for `DateTime64`"'. [#63525](https://github.com/ClickHouse/ClickHouse/pull/63525) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* NO CL ENTRY: 'Revert "Update gui.md - Add ch-ui to open-source available tools."'. [#64064](https://github.com/ClickHouse/ClickHouse/pull/64064) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* NO CL ENTRY: 'Revert "Prevent conversion to Replicated if zookeeper path already exists"'. [#64214](https://github.com/ClickHouse/ClickHouse/pull/64214) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* NO CL ENTRY: 'Revert "Refactoring of Server.h: Isolate server management from other logic"'. [#64425](https://github.com/ClickHouse/ClickHouse/pull/64425) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* NO CL ENTRY: 'Revert "Remove some unnecessary `UNREACHABLE`s"'. [#64430](https://github.com/ClickHouse/ClickHouse/pull/64430) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* NO CL ENTRY: 'Revert "CI: fix build_report selection in case of job reuse"'. [#64516](https://github.com/ClickHouse/ClickHouse/pull/64516) ([Max K.](https://github.com/maxknv)).
|
||||
* NO CL ENTRY: 'Revert "Revert "CI: fix build_report selection in case of job reuse""'. [#64531](https://github.com/ClickHouse/ClickHouse/pull/64531) ([Max K.](https://github.com/maxknv)).
|
||||
* NO CL ENTRY: 'Revert "Add `fromReadableSize` function"'. [#64616](https://github.com/ClickHouse/ClickHouse/pull/64616) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* NO CL ENTRY: 'Update CHANGELOG.md'. [#64816](https://github.com/ClickHouse/ClickHouse/pull/64816) ([Paweł Kudzia](https://github.com/pakud)).
|
||||
* NO CL ENTRY: 'Revert "Reduce lock contention for MergeTree tables (by renaming parts without holding lock)"'. [#64899](https://github.com/ClickHouse/ClickHouse/pull/64899) ([alesapin](https://github.com/alesapin)).
|
||||
* NO CL ENTRY: 'Revert "Add dynamic untracked memory limits for more precise memory tracking"'. [#64969](https://github.com/ClickHouse/ClickHouse/pull/64969) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* NO CL ENTRY: 'Revert "Fix duplicating Delete events in blob_storage_log"'. [#65049](https://github.com/ClickHouse/ClickHouse/pull/65049) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* NO CL ENTRY: 'Revert "Revert "Fix duplicating Delete events in blob_storage_log""'. [#65053](https://github.com/ClickHouse/ClickHouse/pull/65053) ([vdimir](https://github.com/vdimir)).
|
||||
* NO CL ENTRY: 'Revert "S3: reduce retires time for queries, increase retries count for backups"'. [#65148](https://github.com/ClickHouse/ClickHouse/pull/65148) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* NO CL ENTRY: 'Revert "Small fix for 02340_parts_refcnt_mergetree"'. [#65149](https://github.com/ClickHouse/ClickHouse/pull/65149) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* NO CL ENTRY: 'Revert "Change default s3_throw_on_zero_files_match to true, document that presigned S3 URLs are not supported"'. [#65250](https://github.com/ClickHouse/ClickHouse/pull/65250) ([Max K.](https://github.com/maxknv)).
|
||||
* NO CL ENTRY: 'Revert "Fix AWS ECS"'. [#65361](https://github.com/ClickHouse/ClickHouse/pull/65361) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Try abort on current thread join. [#42544](https://github.com/ClickHouse/ClickHouse/pull/42544) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* This change was reverted. [#51008](https://github.com/ClickHouse/ClickHouse/pull/51008) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Analyzer fuzzer 2. [#57098](https://github.com/ClickHouse/ClickHouse/pull/57098) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Analyzer fuzzer 4. [#57101](https://github.com/ClickHouse/ClickHouse/pull/57101) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Check python code with flake8. [#58349](https://github.com/ClickHouse/ClickHouse/pull/58349) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Unite s3/hdfs/azure storage implementations into a single class working with IObjectStorage. Same for *Cluster, data lakes and Queue storages. [#59767](https://github.com/ClickHouse/ClickHouse/pull/59767) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Remove http_max_chunk_size setting (too internal). [#60852](https://github.com/ClickHouse/ClickHouse/pull/60852) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix race in refreshable materialized views causing SELECT to fail sometimes. [#60883](https://github.com/ClickHouse/ClickHouse/pull/60883) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Refactor KeyCondition and key analysis to improve PartitionPruner and trivial count optimization. This is separated from [#60463](https://github.com/ClickHouse/ClickHouse/issues/60463) . [#61459](https://github.com/ClickHouse/ClickHouse/pull/61459) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Implement cumulative A Sync status. [#61464](https://github.com/ClickHouse/ClickHouse/pull/61464) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Parallel replicas: table check failover. [#61935](https://github.com/ClickHouse/ClickHouse/pull/61935) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* This change was reverted. [#61973](https://github.com/ClickHouse/ClickHouse/pull/61973) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Avoid crashing on column type mismatch in a few dozen places. [#62087](https://github.com/ClickHouse/ClickHouse/pull/62087) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix optimize_if_chain_to_multiif const NULL handling. [#62104](https://github.com/ClickHouse/ClickHouse/pull/62104) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Use intrusive lists for `ResourceRequest` instead of deque. [#62165](https://github.com/ClickHouse/ClickHouse/pull/62165) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Analyzer: Fix validateAggregates for tables with different aliases. [#62346](https://github.com/ClickHouse/ClickHouse/pull/62346) ([vdimir](https://github.com/vdimir)).
|
||||
* Improve code and tests of `DROP` of multiple tables. [#62359](https://github.com/ClickHouse/ClickHouse/pull/62359) ([zhongyuankai](https://github.com/zhongyuankai)).
|
||||
* Fix exception message during writing to partitioned s3/hdfs/azure path with globs. [#62423](https://github.com/ClickHouse/ClickHouse/pull/62423) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Support UBSan on Clang-19 (master). [#62466](https://github.com/ClickHouse/ClickHouse/pull/62466) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Replay ZK logs using keeper-bench. [#62481](https://github.com/ClickHouse/ClickHouse/pull/62481) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Save the stacktrace of thread waiting on failing AsyncLoader job. [#62719](https://github.com/ClickHouse/ClickHouse/pull/62719) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* group_by_use_nulls strikes back. [#62922](https://github.com/ClickHouse/ClickHouse/pull/62922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Analyzer: prefer column name to alias from array join. [#62995](https://github.com/ClickHouse/ClickHouse/pull/62995) ([vdimir](https://github.com/vdimir)).
|
||||
* CI: try separate the workflows file for GitHub's Merge Queue. [#63123](https://github.com/ClickHouse/ClickHouse/pull/63123) ([Max K.](https://github.com/maxknv)).
|
||||
* Try to fix coverage tests. [#63130](https://github.com/ClickHouse/ClickHouse/pull/63130) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix azure backup flaky test. [#63158](https://github.com/ClickHouse/ClickHouse/pull/63158) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* Merging [#60920](https://github.com/ClickHouse/ClickHouse/issues/60920). [#63159](https://github.com/ClickHouse/ClickHouse/pull/63159) ([vdimir](https://github.com/vdimir)).
|
||||
* QueryAnalysisPass improve QUALIFY validation. [#63162](https://github.com/ClickHouse/ClickHouse/pull/63162) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Add numpy tests for different endianness. [#63189](https://github.com/ClickHouse/ClickHouse/pull/63189) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Clean the `_work` directory between runner's launches. Fallback to auto-update actions runner if it fails to start. Make the `init-network.sh` sourceable and executable. [#63195](https://github.com/ClickHouse/ClickHouse/pull/63195) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add ability to run Azure tests in PR with label. [#63196](https://github.com/ClickHouse/ClickHouse/pull/63196) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix possible endless loop while reading from azure. [#63197](https://github.com/ClickHouse/ClickHouse/pull/63197) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Add information about materialized view security bug fix into the changelog. [#63204](https://github.com/ClickHouse/ClickHouse/pull/63204) ([pufit](https://github.com/pufit)).
|
||||
* Disable one test from 02994_sanity_check_settings. [#63208](https://github.com/ClickHouse/ClickHouse/pull/63208) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Enable custom parquet encoder by default, attempt 2. [#63210](https://github.com/ClickHouse/ClickHouse/pull/63210) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Update version after release. [#63215](https://github.com/ClickHouse/ClickHouse/pull/63215) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Update version_date.tsv and changelogs after v24.4.1.2088-stable. [#63217](https://github.com/ClickHouse/ClickHouse/pull/63217) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Update version_date.tsv and changelogs after v24.3.3.102-lts. [#63226](https://github.com/ClickHouse/ClickHouse/pull/63226) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Update version_date.tsv and changelogs after v24.2.3.70-stable. [#63227](https://github.com/ClickHouse/ClickHouse/pull/63227) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Return back [#61551](https://github.com/ClickHouse/ClickHouse/issues/61551) (More optimal loading of marks). [#63233](https://github.com/ClickHouse/ClickHouse/pull/63233) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Hide CI options under a spoiler. [#63237](https://github.com/ClickHouse/ClickHouse/pull/63237) ([Konstantin Bogdanov](https://github.com/thevar1able)).
|
||||
* Add azure run with msan. [#63238](https://github.com/ClickHouse/ClickHouse/pull/63238) ([alesapin](https://github.com/alesapin)).
|
||||
* Now syntax for this command is following: `TRUNCATE ALL TABLES FROM [IF EXISTS] <database_name>`. [#63241](https://github.com/ClickHouse/ClickHouse/pull/63241) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Minor follow-up to a renaming PR. [#63260](https://github.com/ClickHouse/ClickHouse/pull/63260) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Followup for [#62613](https://github.com/ClickHouse/ClickHouse/issues/62613) Adding back checks similar to these: https://github.com/ClickHouse/ClickHouse/pull/62613/files#diff-70859078da57ecdfc66d26f732c0d7718d269e82bdc80e62b39f5ffeab36c05bL99 https://github.com/ClickHouse/ClickHouse/pull/62613/files#diff-70859078da57ecdfc66d26f732c0d7718d269e82bdc80e62b39f5ffeab36c05bL144-L149. [#63274](https://github.com/ClickHouse/ClickHouse/pull/63274) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* This setting was added in 24.5, not 24.4. [#63278](https://github.com/ClickHouse/ClickHouse/pull/63278) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Improve cloud backport script. [#63282](https://github.com/ClickHouse/ClickHouse/pull/63282) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Update version_date.tsv and changelogs after v23.8.14.6-lts. [#63285](https://github.com/ClickHouse/ClickHouse/pull/63285) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Fix azure flaky test. [#63286](https://github.com/ClickHouse/ClickHouse/pull/63286) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* Fix deadlock in `CacheDictionaryUpdateQueue` in case of exception in constructor. [#63287](https://github.com/ClickHouse/ClickHouse/pull/63287) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* DiskApp: fix 'list --recursive /' and crash on invalid arguments. [#63296](https://github.com/ClickHouse/ClickHouse/pull/63296) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix terminate because of unhandled exception in `MergeTreeDeduplicationLog::shutdown`. [#63298](https://github.com/ClickHouse/ClickHouse/pull/63298) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Move s3_plain_rewritable unit test to shell. [#63317](https://github.com/ClickHouse/ClickHouse/pull/63317) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* Add tests for [#63264](https://github.com/ClickHouse/ClickHouse/issues/63264). [#63321](https://github.com/ClickHouse/ClickHouse/pull/63321) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Try fix segfault in `MergeTreeReadPoolBase::createTask`. [#63323](https://github.com/ClickHouse/ClickHouse/pull/63323) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Reduce time-to-insert profiling data in case of logs cluster issues. [#63325](https://github.com/ClickHouse/ClickHouse/pull/63325) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Update README.md. [#63326](https://github.com/ClickHouse/ClickHouse/pull/63326) ([Tyler Hannan](https://github.com/tylerhannan)).
|
||||
* This should fix failures with error like `Permission denied ["/var/lib/clickhouse/disks/s3/store/364/3643ff83-0996-4a4a-a90b-a96e66a10c74"]` when table dir was chmod-ed by DatabaseCatalog. [#63330](https://github.com/ClickHouse/ClickHouse/pull/63330) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Use `/commit/` to have the URLs in [reports](https://play.clickhouse.com/play?user=play#c2VsZWN0IGRpc3RpbmN0IGNvbW1pdF91cmwgZnJvbSBjaGVja3Mgd2hlcmUgY2hlY2tfc3RhcnRfdGltZSA+PSBub3coKSAtIGludGVydmFsIDEgbW9udGggYW5kIHB1bGxfcmVxdWVzdF9udW1iZXI9NjA1MzI=) like https://github.com/ClickHouse/ClickHouse/commit/44f8bc5308b53797bec8cccc3bd29fab8a00235d and not like https://github.com/ClickHouse/ClickHouse/commits/44f8bc5308b53797bec8cccc3bd29fab8a00235d. [#63331](https://github.com/ClickHouse/ClickHouse/pull/63331) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add test for [#56287](https://github.com/ClickHouse/ClickHouse/issues/56287). [#63340](https://github.com/ClickHouse/ClickHouse/pull/63340) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Update README.md. [#63350](https://github.com/ClickHouse/ClickHouse/pull/63350) ([Tyler Hannan](https://github.com/tylerhannan)).
|
||||
* Add test for [#48049](https://github.com/ClickHouse/ClickHouse/issues/48049). [#63351](https://github.com/ClickHouse/ClickHouse/pull/63351) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Add option `query_id_prefix` to `clickhouse-benchmark`. [#63352](https://github.com/ClickHouse/ClickHouse/pull/63352) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* New version is fantatish (at least with Ubuntu 22.04.4 LTS): ``` azurite --version /usr/local/lib/node_modules/azurite/dist/src/common/persistence/MemoryExtentStore.js:53 return this._chunks.get(categoryName)?.chunks.get(id); ^. [#63354](https://github.com/ClickHouse/ClickHouse/pull/63354) ([alesapin](https://github.com/alesapin)).
|
||||
* Randomize setting `enable_block_offset_column` in stress tests. [#63355](https://github.com/ClickHouse/ClickHouse/pull/63355) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix AST parsing of invalid type names. [#63357](https://github.com/ClickHouse/ClickHouse/pull/63357) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix some 00002_log_and_exception_messages_formatting flakiness. [#63358](https://github.com/ClickHouse/ClickHouse/pull/63358) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Add tags for the test 03000_traverse_shadow_system_data_paths.sql to make it stable. [#63366](https://github.com/ClickHouse/ClickHouse/pull/63366) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
* Add a test for [#55655](https://github.com/ClickHouse/ClickHouse/issues/55655). [#63380](https://github.com/ClickHouse/ClickHouse/pull/63380) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix data race in `reportBrokenPart`. [#63396](https://github.com/ClickHouse/ClickHouse/pull/63396) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Workaround for `oklch()` inside canvas bug for firefox. [#63404](https://github.com/ClickHouse/ClickHouse/pull/63404) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Add test for issue [#47862](https://github.com/ClickHouse/ClickHouse/issues/47862). [#63424](https://github.com/ClickHouse/ClickHouse/pull/63424) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix parsing of `CREATE INDEX` query. [#63425](https://github.com/ClickHouse/ClickHouse/pull/63425) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* We are using Shared Catalog in the CI Logs cluster. [#63442](https://github.com/ClickHouse/ClickHouse/pull/63442) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix collection of coverage data in the CI Logs cluster. [#63453](https://github.com/ClickHouse/ClickHouse/pull/63453) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix flaky test for rocksdb bulk sink. [#63457](https://github.com/ClickHouse/ClickHouse/pull/63457) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Extra constraints for stress and fuzzer tests. [#63470](https://github.com/ClickHouse/ClickHouse/pull/63470) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* io_uring: refactor get reader from context. [#63475](https://github.com/ClickHouse/ClickHouse/pull/63475) ([Tomer Shafir](https://github.com/tomershafir)).
|
||||
* Analyzer setting max_streams_to_max_threads_ratio overflow fix. [#63478](https://github.com/ClickHouse/ClickHouse/pull/63478) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Provides setting `output_format_pretty_preserve_border_for_multiline_string` which allows to render multiline strings in pretty format better. The default value for this setting is true. [#63479](https://github.com/ClickHouse/ClickHouse/pull/63479) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Fix logical error when reloading config with customly created web disk broken after [#56367](https://github.com/ClickHouse/ClickHouse/issues/56367). [#63484](https://github.com/ClickHouse/ClickHouse/pull/63484) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Add test for [#49307](https://github.com/ClickHouse/ClickHouse/issues/49307). [#63486](https://github.com/ClickHouse/ClickHouse/pull/63486) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Remove leftovers of GCC support in cmake rules. [#63488](https://github.com/ClickHouse/ClickHouse/pull/63488) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix ProfileEventTimeIncrement code. [#63489](https://github.com/ClickHouse/ClickHouse/pull/63489) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* MergeTreePrefetchedReadPool: Print parent name when logging projection parts. [#63522](https://github.com/ClickHouse/ClickHouse/pull/63522) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Correctly stop `asyncCopy` tasks in all cases. [#63523](https://github.com/ClickHouse/ClickHouse/pull/63523) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Almost everything should work on AArch64 (Part of [#58061](https://github.com/ClickHouse/ClickHouse/issues/58061)). [#63527](https://github.com/ClickHouse/ClickHouse/pull/63527) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Update randomization of `old_parts_lifetime`. [#63530](https://github.com/ClickHouse/ClickHouse/pull/63530) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Update 02240_system_filesystem_cache_table.sh. [#63531](https://github.com/ClickHouse/ClickHouse/pull/63531) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix data race in `DistributedSink`. [#63538](https://github.com/ClickHouse/ClickHouse/pull/63538) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix azure tests run on master. [#63540](https://github.com/ClickHouse/ClickHouse/pull/63540) ([alesapin](https://github.com/alesapin)).
|
||||
* The commit 2b8254f987a65d5c21d74fe67b4ee9757970466e was not synced into the cloud because it was falsely marked as a success by `upstream_pr.head.sha`. Here we'll try our best to find a proper commit, and won't make anything if we can't. [#63543](https://github.com/ClickHouse/ClickHouse/pull/63543) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add `no-s3-storage` tag to local_plain_rewritable ut. [#63546](https://github.com/ClickHouse/ClickHouse/pull/63546) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* Add `jwcrypto` to integration tests runner. [#63551](https://github.com/ClickHouse/ClickHouse/pull/63551) ([Konstantin Bogdanov](https://github.com/thevar1able)).
|
||||
* Go back to upstream lz4. [#63574](https://github.com/ClickHouse/ClickHouse/pull/63574) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix logical error in ColumnTuple::tryInsert(). [#63583](https://github.com/ClickHouse/ClickHouse/pull/63583) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* harmonize sumMap error messages on ILLEGAL_TYPE_OF_ARGUMENT. [#63619](https://github.com/ClickHouse/ClickHouse/pull/63619) ([Yohann Jardin](https://github.com/yohannj)).
|
||||
* Refactor data part writer to remove dependencies on MergeTreeData and DataPart. [#63620](https://github.com/ClickHouse/ClickHouse/pull/63620) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Update README.md. [#63631](https://github.com/ClickHouse/ClickHouse/pull/63631) ([Tyler Hannan](https://github.com/tylerhannan)).
|
||||
* Ignore global profiler if system.trace_log is not enabled and fix really disable it for keeper standalone build. [#63632](https://github.com/ClickHouse/ClickHouse/pull/63632) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fixes for 00002_log_and_exception_messages_formatting. [#63634](https://github.com/ClickHouse/ClickHouse/pull/63634) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix 02362_part_log_merge_algorithm flaky test. [#63635](https://github.com/ClickHouse/ClickHouse/pull/63635) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
|
||||
* Fix tests flakiness due to long SYSTEM FLUSH LOGS (explicitly specify old_parts_lifetime). [#63639](https://github.com/ClickHouse/ClickHouse/pull/63639) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Update clickhouse-test help section. [#63663](https://github.com/ClickHouse/ClickHouse/pull/63663) ([Ali](https://github.com/xogoodnow)).
|
||||
* Fix bad test `02950_part_log_bytes_uncompressed`. [#63672](https://github.com/ClickHouse/ClickHouse/pull/63672) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove leftovers of `optimize_monotonous_functions_in_order_by`. [#63674](https://github.com/ClickHouse/ClickHouse/pull/63674) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* tests: attempt to fix 02340_parts_refcnt_mergetree flakiness. [#63684](https://github.com/ClickHouse/ClickHouse/pull/63684) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Parallel replicas: simple cleanup. [#63685](https://github.com/ClickHouse/ClickHouse/pull/63685) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Cancel S3 reads properly when parallel reads are used. [#63687](https://github.com/ClickHouse/ClickHouse/pull/63687) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Explaining insertion order of the Map datatype. [#63690](https://github.com/ClickHouse/ClickHouse/pull/63690) ([Mark Needham](https://github.com/mneedham)).
|
||||
* selectRangesToRead() simple cleanup. [#63692](https://github.com/ClickHouse/ClickHouse/pull/63692) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Fix fuzzed analyzer_join_with_constant query. [#63702](https://github.com/ClickHouse/ClickHouse/pull/63702) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Add missing explicit instantiations of ColumnUnique. [#63718](https://github.com/ClickHouse/ClickHouse/pull/63718) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Better asserts in ColumnString.h. [#63719](https://github.com/ClickHouse/ClickHouse/pull/63719) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Try to fix flaky s3 tests test_seekable_formats and test_seekable_formats_url. [#63720](https://github.com/ClickHouse/ClickHouse/pull/63720) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Don't randomize some settings in 02941_variant_type_* tests to avoid timeouts. [#63721](https://github.com/ClickHouse/ClickHouse/pull/63721) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix flaky 03145_non_loaded_projection_backup.sh. [#63728](https://github.com/ClickHouse/ClickHouse/pull/63728) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Userspace page cache: don't collect stats if cache is unused. [#63730](https://github.com/ClickHouse/ClickHouse/pull/63730) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix insignificant UBSAN error in QueryAnalyzer::replaceNodesWithPositionalArguments(). [#63734](https://github.com/ClickHouse/ClickHouse/pull/63734) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix a bug in resolving matcher inside lambda inside ARRAY JOIN. [#63744](https://github.com/ClickHouse/ClickHouse/pull/63744) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Self explanatory. [#63754](https://github.com/ClickHouse/ClickHouse/pull/63754) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Do not hide disk name. [#63756](https://github.com/ClickHouse/ClickHouse/pull/63756) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* CI: remove Cancel and Debug workflows as redundant. [#63757](https://github.com/ClickHouse/ClickHouse/pull/63757) ([Max K.](https://github.com/maxknv)).
|
||||
* Security Policy: Add notification process. [#63773](https://github.com/ClickHouse/ClickHouse/pull/63773) ([Leticia Webb](https://github.com/leticiawebb)).
|
||||
* Fix typo. [#63774](https://github.com/ClickHouse/ClickHouse/pull/63774) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix fuzzer when only explicit faults are used. [#63775](https://github.com/ClickHouse/ClickHouse/pull/63775) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Settings typo. [#63782](https://github.com/ClickHouse/ClickHouse/pull/63782) ([Rory Crispin](https://github.com/RoryCrispin)).
|
||||
* Ref. [#63479](https://github.com/ClickHouse/ClickHouse/issues/63479). [#63783](https://github.com/ClickHouse/ClickHouse/pull/63783) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Fix test_odbc_interaction from aarch64 [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63787](https://github.com/ClickHouse/ClickHouse/pull/63787) ([alesapin](https://github.com/alesapin)).
|
||||
* Fix test `test_catboost_evaluate` for aarch64. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63789](https://github.com/ClickHouse/ClickHouse/pull/63789) ([alesapin](https://github.com/alesapin)).
|
||||
* Rewrite plan for parallel replicas in Planner. [#63796](https://github.com/ClickHouse/ClickHouse/pull/63796) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Follow-up for the `binary_symbols` table in CI. [#63802](https://github.com/ClickHouse/ClickHouse/pull/63802) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Support INSERT with VALUES in the ANTLR syntax file. [#63811](https://github.com/ClickHouse/ClickHouse/pull/63811) ([GG Bond](https://github.com/zzyReal666)).
|
||||
* Fix race in `ReplicatedMergeTreeLogEntryData`. [#63816](https://github.com/ClickHouse/ClickHouse/pull/63816) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Allow allocation during job destructor in `ThreadPool`. [#63829](https://github.com/ClickHouse/ClickHouse/pull/63829) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Remove HDFS from disks config for one integration test for arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63832](https://github.com/ClickHouse/ClickHouse/pull/63832) ([alesapin](https://github.com/alesapin)).
|
||||
* io_uring: add basic io_uring clickhouse perf test. [#63835](https://github.com/ClickHouse/ClickHouse/pull/63835) ([Tomer Shafir](https://github.com/tomershafir)).
|
||||
* Bump version for old image in test_short_strings_aggregation to make it work on arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63836](https://github.com/ClickHouse/ClickHouse/pull/63836) ([alesapin](https://github.com/alesapin)).
|
||||
* fix typo. [#63838](https://github.com/ClickHouse/ClickHouse/pull/63838) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Disable test `test_non_default_compression/test.py::test_preconfigured_deflateqpl_codec` on arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63839](https://github.com/ClickHouse/ClickHouse/pull/63839) ([alesapin](https://github.com/alesapin)).
|
||||
* This PR was reverted. [#63857](https://github.com/ClickHouse/ClickHouse/pull/63857) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* Remove unnecessary logging statements in MergeJoinTransform.cpp. [#63860](https://github.com/ClickHouse/ClickHouse/pull/63860) ([vdimir](https://github.com/vdimir)).
|
||||
* Temporary disables 3 integration tcs on arm until https://github.com/clickhouse/clickhouse/issues/63855 is resolved. [#63867](https://github.com/ClickHouse/ClickHouse/pull/63867) ([Max K.](https://github.com/maxknv)).
|
||||
* Fix some settings values in 02455_one_row_from_csv_memory_usage test to make it less flaky. [#63874](https://github.com/ClickHouse/ClickHouse/pull/63874) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Randomise `allow_experimental_parallel_reading_from_replicas` in stress tests. [#63899](https://github.com/ClickHouse/ClickHouse/pull/63899) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Fix logs test for binary data by converting it to a valid UTF8 string. [#63909](https://github.com/ClickHouse/ClickHouse/pull/63909) ([Alexey Katsman](https://github.com/alexkats)).
|
||||
* More sanity checks for parallel replicas. [#63910](https://github.com/ClickHouse/ClickHouse/pull/63910) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Include checks like `Stateless tests (asan, distributed cache, meta storage in keeper, s3 storage) [2/3]` in `Mergeable Check` and `A Sync`. [#63945](https://github.com/ClickHouse/ClickHouse/pull/63945) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Insignificant libunwind build fixes. [#63946](https://github.com/ClickHouse/ClickHouse/pull/63946) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Revert multiline pretty changes due to performance problems. [#63947](https://github.com/ClickHouse/ClickHouse/pull/63947) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Some usability improvements for c++expr script. [#63948](https://github.com/ClickHouse/ClickHouse/pull/63948) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix 02124_insert_deduplication_token_multiple_blocks. [#63950](https://github.com/ClickHouse/ClickHouse/pull/63950) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* CI: aarch64: disable arm integration tests with kerberaized kafka. [#63961](https://github.com/ClickHouse/ClickHouse/pull/63961) ([Max K.](https://github.com/maxknv)).
|
||||
* Make events like [timeouts](https://play.clickhouse.com/play?user=play#U0VMRUNUICogRlJPTSBjaGVja3MgV0hFUkUgdGVzdF9uYW1lID09ICdDaGVjayB0aW1lb3V0IGV4cGlyZWQnIEFORCBjaGVja19zdGFydF90aW1lIEJFVFdFRU4gdG9EYXRlKCcyMDI0LTA1LTEwJykgQU5EIHRvRGF0ZSgnMjAyNC0wNS0xNScp) visible in CI DB. [#63982](https://github.com/ClickHouse/ClickHouse/pull/63982) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Slightly better setting `force_optimize_projection_name`. [#63997](https://github.com/ClickHouse/ClickHouse/pull/63997) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* chore(ci-workers): remove reusable from tailscale key. [#63999](https://github.com/ClickHouse/ClickHouse/pull/63999) ([Gabriel Martinez](https://github.com/GMartinez-Sisti)).
|
||||
* Better script to collect symbols statistics. [#64013](https://github.com/ClickHouse/ClickHouse/pull/64013) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix a typo in Analyzer. [#64022](https://github.com/ClickHouse/ClickHouse/pull/64022) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix libbcrypt for FreeBSD build. [#64023](https://github.com/ClickHouse/ClickHouse/pull/64023) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Remove some unnecessary `UNREACHABLE`s. [#64035](https://github.com/ClickHouse/ClickHouse/pull/64035) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Add `ClickHouseVersion.copy` method. Create a branch release in advance without spinning out the release to increase the stability. [#64039](https://github.com/ClickHouse/ClickHouse/pull/64039) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix searching for libclang_rt.builtins.*.a on FreeBSD. [#64051](https://github.com/ClickHouse/ClickHouse/pull/64051) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* The mime type is not 100% reliable for Python and shell scripts without shebangs; add a check for file extension. [#64062](https://github.com/ClickHouse/ClickHouse/pull/64062) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix waiting for mutations with retriable errors. [#64063](https://github.com/ClickHouse/ClickHouse/pull/64063) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* harmonize h3PointDist* error messages. [#64080](https://github.com/ClickHouse/ClickHouse/pull/64080) ([Yohann Jardin](https://github.com/yohannj)).
|
||||
* This log message is better in Trace. [#64081](https://github.com/ClickHouse/ClickHouse/pull/64081) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Prevent stack overflow in Fuzzer and Stress test. [#64082](https://github.com/ClickHouse/ClickHouse/pull/64082) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* tests: fix expected error for 03036_reading_s3_archives (fixes CI). [#64089](https://github.com/ClickHouse/ClickHouse/pull/64089) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix sanitizers. [#64090](https://github.com/ClickHouse/ClickHouse/pull/64090) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Update llvm/clang to 18.1.6. [#64091](https://github.com/ClickHouse/ClickHouse/pull/64091) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Set green Mergeable Check status only after all required checks are passed with success - All non-required checks are started at stage Test_3 when all required checks are passed in Test_1/2. [#64093](https://github.com/ClickHouse/ClickHouse/pull/64093) ([Max K.](https://github.com/maxknv)).
|
||||
* Move `isAllASCII` from UTFHelper to StringUtils. [#64108](https://github.com/ClickHouse/ClickHouse/pull/64108) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Throw out some `inline`s. [#64110](https://github.com/ClickHouse/ClickHouse/pull/64110) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Clean up .clang-tidy after transition to Clang 18. [#64111](https://github.com/ClickHouse/ClickHouse/pull/64111) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Ignore exception when checking for cgroupsv2. [#64118](https://github.com/ClickHouse/ClickHouse/pull/64118) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Add retries in git submodule update. [#64125](https://github.com/ClickHouse/ClickHouse/pull/64125) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* See https://s3.amazonaws.com/clickhouse-test-reports/63946/86cf1e13d866333b8a511badd7f2fe186d810646/ast_fuzzer__ubsan_.html. [#64127](https://github.com/ClickHouse/ClickHouse/pull/64127) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Refactoring of Server.h: Isolate server management from other logic. [#64132](https://github.com/ClickHouse/ClickHouse/pull/64132) ([TTPO100AJIEX](https://github.com/TTPO100AJIEX)).
|
||||
* Syncing code. [#64135](https://github.com/ClickHouse/ClickHouse/pull/64135) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Losen build resource limits for unusual architectures. [#64152](https://github.com/ClickHouse/ClickHouse/pull/64152) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* fix clang tidy. [#64179](https://github.com/ClickHouse/ClickHouse/pull/64179) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Fix: 02124_insert_deduplication_token_multiple_blocks_replica. [#64181](https://github.com/ClickHouse/ClickHouse/pull/64181) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Fix global query profiler. [#64187](https://github.com/ClickHouse/ClickHouse/pull/64187) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* CI: cancel running PR wf after adding to MQ. [#64188](https://github.com/ClickHouse/ClickHouse/pull/64188) ([Max K.](https://github.com/maxknv)).
|
||||
* Add profile events for number of rows read during/after prewhere. [#64198](https://github.com/ClickHouse/ClickHouse/pull/64198) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Add debug logging to EmbeddedRocksDBBulkSink. [#64203](https://github.com/ClickHouse/ClickHouse/pull/64203) ([vdimir](https://github.com/vdimir)).
|
||||
* Fix special builds (due to excessive resource usage - memory/CPU). [#64204](https://github.com/ClickHouse/ClickHouse/pull/64204) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Update InterpreterCreateQuery.cpp. [#64207](https://github.com/ClickHouse/ClickHouse/pull/64207) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Remove unused storage_snapshot field from MergeTreeSelectProcessor. [#64217](https://github.com/ClickHouse/ClickHouse/pull/64217) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Add test for [#37090](https://github.com/ClickHouse/ClickHouse/issues/37090). [#64220](https://github.com/ClickHouse/ClickHouse/pull/64220) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Small cli tool. [#64227](https://github.com/ClickHouse/ClickHouse/pull/64227) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Make `settings_changes_history` const. [#64230](https://github.com/ClickHouse/ClickHouse/pull/64230) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* test for [#45804](https://github.com/ClickHouse/ClickHouse/issues/45804). [#64245](https://github.com/ClickHouse/ClickHouse/pull/64245) ([Denny Crane](https://github.com/den-crane)).
|
||||
* Update version after release. [#64283](https://github.com/ClickHouse/ClickHouse/pull/64283) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Followup for [#63691](https://github.com/ClickHouse/ClickHouse/issues/63691). [#64285](https://github.com/ClickHouse/ClickHouse/pull/64285) ([vdimir](https://github.com/vdimir)).
|
||||
* CI: dependency fix for changelog.py. [#64293](https://github.com/ClickHouse/ClickHouse/pull/64293) ([Max K.](https://github.com/maxknv)).
|
||||
* Print query in explain plan with parallel replicas. [#64298](https://github.com/ClickHouse/ClickHouse/pull/64298) ([vdimir](https://github.com/vdimir)).
|
||||
* CI: Cancel sync wf on new push. [#64299](https://github.com/ClickHouse/ClickHouse/pull/64299) ([Max K.](https://github.com/maxknv)).
|
||||
* CI: master workflow with folded jobs. [#64340](https://github.com/ClickHouse/ClickHouse/pull/64340) ([Max K.](https://github.com/maxknv)).
|
||||
* CI: Sync, Merge check, CI gh's statuses fixes. [#64348](https://github.com/ClickHouse/ClickHouse/pull/64348) ([Max K.](https://github.com/maxknv)).
|
||||
* Enable 02494_query_cache_nested_query_bug for Analyzer. [#64357](https://github.com/ClickHouse/ClickHouse/pull/64357) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Rename allow_deprecated_functions to allow_deprecated_error_prone_window_functions. [#64358](https://github.com/ClickHouse/ClickHouse/pull/64358) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Change input_format_parquet_use_native_reader to 24.6. [#64359](https://github.com/ClickHouse/ClickHouse/pull/64359) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Update description for settings `cross_join_min_rows_to_compress` and `cross_join_min_bytes_to_compress`. [#64360](https://github.com/ClickHouse/ClickHouse/pull/64360) ([Nikita Fomichev](https://github.com/fm4v)).
|
||||
* Changed the unreleased setting `aggregate_function_group_array_has_limit_size` to `aggregate_function_group_array_action_when_limit_is_reached`. [#64362](https://github.com/ClickHouse/ClickHouse/pull/64362) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts. [#64363](https://github.com/ClickHouse/ClickHouse/pull/64363) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Try to fix GWPAsan. [#64365](https://github.com/ClickHouse/ClickHouse/pull/64365) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* CI: add secrets to reusable stage wf yml. [#64366](https://github.com/ClickHouse/ClickHouse/pull/64366) ([Max K.](https://github.com/maxknv)).
|
||||
* Do not run tests tagged 'no-s3-storage-with-slow-build' with ASan. [#64367](https://github.com/ClickHouse/ClickHouse/pull/64367) ([vdimir](https://github.com/vdimir)).
|
||||
* This change was reverted. [#64386](https://github.com/ClickHouse/ClickHouse/pull/64386) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
|
||||
* Update s3queue.md. [#64389](https://github.com/ClickHouse/ClickHouse/pull/64389) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* test for [#64211](https://github.com/ClickHouse/ClickHouse/issues/64211). [#64390](https://github.com/ClickHouse/ClickHouse/pull/64390) ([Denny Crane](https://github.com/den-crane)).
|
||||
* Follow-up to [#59767](https://github.com/ClickHouse/ClickHouse/issues/59767). [#64398](https://github.com/ClickHouse/ClickHouse/pull/64398) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Remove wrong comment. [#64403](https://github.com/ClickHouse/ClickHouse/pull/64403) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Follow up to [#59767](https://github.com/ClickHouse/ClickHouse/issues/59767). [#64404](https://github.com/ClickHouse/ClickHouse/pull/64404) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Refactor s3 settings (move settings parsing into single place). [#64412](https://github.com/ClickHouse/ClickHouse/pull/64412) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* This PR was reverted. [#64423](https://github.com/ClickHouse/ClickHouse/pull/64423) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Fix test after [#64404](https://github.com/ClickHouse/ClickHouse/issues/64404). [#64432](https://github.com/ClickHouse/ClickHouse/pull/64432) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Faster TestKeeper shutdown. [#64433](https://github.com/ClickHouse/ClickHouse/pull/64433) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Remove some logging. [#64434](https://github.com/ClickHouse/ClickHouse/pull/64434) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Revert "Revert "Remove some unnecessary UNREACHABLEs"". [#64435](https://github.com/ClickHouse/ClickHouse/pull/64435) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Clean settings in 02943_variant_read_subcolumns test. [#64437](https://github.com/ClickHouse/ClickHouse/pull/64437) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Add a comment after [#64226](https://github.com/ClickHouse/ClickHouse/issues/64226). [#64449](https://github.com/ClickHouse/ClickHouse/pull/64449) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* CI: fix build_report selection in case of job reuse. [#64459](https://github.com/ClickHouse/ClickHouse/pull/64459) ([Max K.](https://github.com/maxknv)).
|
||||
* Add Critical bugfix category in PR template. [#64480](https://github.com/ClickHouse/ClickHouse/pull/64480) ([Max K.](https://github.com/maxknv)).
|
||||
* Remove `generateSnowflakeIDThreadMonotonic`. [#64499](https://github.com/ClickHouse/ClickHouse/pull/64499) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Move analyzer attempt 2. [#64500](https://github.com/ClickHouse/ClickHouse/pull/64500) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Sync some code back from internal to public repository. [#64502](https://github.com/ClickHouse/ClickHouse/pull/64502) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Remove `generateUUIDv7(NonMonotonic|ThreadMonotonic)` functions. [#64506](https://github.com/ClickHouse/ClickHouse/pull/64506) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix bash completion for settings. [#64521](https://github.com/ClickHouse/ClickHouse/pull/64521) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Use max_read_buffer_size for file descriptors as well in file(). [#64532](https://github.com/ClickHouse/ClickHouse/pull/64532) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Temporarily disable `enable_vertical_final` setting by default. This feature should not be used in older releases because it [might crash](https://github.com/ClickHouse/ClickHouse/issues/64543), but it's already fixed in 24.6 where this setting change has been reverted and `enable_vertical_final` is again enabled by default. [#64544](https://github.com/ClickHouse/ClickHouse/pull/64544) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Removed excessive calls to `flush logs` and disabled under sanitizers. [#64550](https://github.com/ClickHouse/ClickHouse/pull/64550) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Sync code moved in private repo back back to public repo. [#64551](https://github.com/ClickHouse/ClickHouse/pull/64551) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Add support for custom type to ASTLiteral, or else the type may be lost when parse the ast. E.g. set a ASTLiteral to DataTime32 with value 19870, then it will be parsed to Int16. [#64562](https://github.com/ClickHouse/ClickHouse/pull/64562) ([shuai.xu](https://github.com/shuai-xu)).
|
||||
* Add a temporary known host for git over ssh. [#64569](https://github.com/ClickHouse/ClickHouse/pull/64569) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Cache first analysis result in ReadFromMergeTree. [#64579](https://github.com/ClickHouse/ClickHouse/pull/64579) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Derive script parameters (labels) from the --repo/--from-repo - fix to not create backports for all release branches if backport for specific branch only. [#64603](https://github.com/ClickHouse/ClickHouse/pull/64603) ([Max K.](https://github.com/maxknv)).
|
||||
* CI fixes. [#64605](https://github.com/ClickHouse/ClickHouse/pull/64605) ([Max K.](https://github.com/maxknv)).
|
||||
* Double-checking [#59318](https://github.com/ClickHouse/ClickHouse/issues/59318) and docs for `Map`. [#64606](https://github.com/ClickHouse/ClickHouse/pull/64606) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Update CHANGELOG.md. [#64609](https://github.com/ClickHouse/ClickHouse/pull/64609) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Tests: Convert numeric to symbolic error codes. [#64635](https://github.com/ClickHouse/ClickHouse/pull/64635) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Move NamedCollectionsFactory into a separate file. [#64642](https://github.com/ClickHouse/ClickHouse/pull/64642) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Shuffle tests for parallel execution. [#64646](https://github.com/ClickHouse/ClickHouse/pull/64646) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* CI: Do not upload binaries for special builds in PRs. [#64653](https://github.com/ClickHouse/ClickHouse/pull/64653) ([Max K.](https://github.com/maxknv)).
|
||||
* Update changelog. [#64654](https://github.com/ClickHouse/ClickHouse/pull/64654) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Parallel replicas: simple cleanup. [#64655](https://github.com/ClickHouse/ClickHouse/pull/64655) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Be more graceful with existing tables with `inverted` indexes. [#64656](https://github.com/ClickHouse/ClickHouse/pull/64656) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* CI: Build Report Check to verify only enabled builds. [#64669](https://github.com/ClickHouse/ClickHouse/pull/64669) ([Max K.](https://github.com/maxknv)).
|
||||
* Tests: Convert error numbers to symbolic error codes, pt. II. [#64670](https://github.com/ClickHouse/ClickHouse/pull/64670) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Split query analyzer. [#64672](https://github.com/ClickHouse/ClickHouse/pull/64672) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* By the end of CI, CI_Running status must be SUCCESS or FAILURE never PENDING. [#64693](https://github.com/ClickHouse/ClickHouse/pull/64693) ([Max K.](https://github.com/maxknv)).
|
||||
* The following list of merged PRs is not present in the release branch and was added to the changelog by mistake:. [#64704](https://github.com/ClickHouse/ClickHouse/pull/64704) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* CI: MergeQueue: add binary_release and unit tests. [#64705](https://github.com/ClickHouse/ClickHouse/pull/64705) ([Max K.](https://github.com/maxknv)).
|
||||
* Fix to get first good enough GH token instead of getting and comparing all of them. [#64709](https://github.com/ClickHouse/ClickHouse/pull/64709) ([Max K.](https://github.com/maxknv)).
|
||||
* Check for missing Upload ID in CreateMultipartUpload reply. [#64714](https://github.com/ClickHouse/ClickHouse/pull/64714) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Update version_date.tsv and changelogs after v24.5.1.1763-stable. [#64715](https://github.com/ClickHouse/ClickHouse/pull/64715) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Fix (unreleased) `loop()` table function crashing on empty table name. [#64716](https://github.com/ClickHouse/ClickHouse/pull/64716) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Update CHANGELOG.md. [#64730](https://github.com/ClickHouse/ClickHouse/pull/64730) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* CI: ci.py refactoring. [#64734](https://github.com/ClickHouse/ClickHouse/pull/64734) ([Max K.](https://github.com/maxknv)).
|
||||
* Return the explanation for session moved error. [#64747](https://github.com/ClickHouse/ClickHouse/pull/64747) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Adjust the version_helper and script to a new release scheme. [#64759](https://github.com/ClickHouse/ClickHouse/pull/64759) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Do not try to write columns.txt if it does not exist for write-once storages. [#64762](https://github.com/ClickHouse/ClickHouse/pull/64762) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Update 02482_load_parts_refcounts.sh. [#64765](https://github.com/ClickHouse/ClickHouse/pull/64765) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix crash with DISTINCT and window functions. [#64767](https://github.com/ClickHouse/ClickHouse/pull/64767) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Fix assert in IObjectStorageIteratorAsync. [#64770](https://github.com/ClickHouse/ClickHouse/pull/64770) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Make table functions always report engine 'StorageProxy' in system.tables. [#64771](https://github.com/ClickHouse/ClickHouse/pull/64771) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Ask about company name on GitHub. [#64774](https://github.com/ClickHouse/ClickHouse/pull/64774) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix flaky tests about SQLite. [#64776](https://github.com/ClickHouse/ClickHouse/pull/64776) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove iostream debug helpers. [#64777](https://github.com/ClickHouse/ClickHouse/pull/64777) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Remove unnecessary comment. [#64785](https://github.com/ClickHouse/ClickHouse/pull/64785) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Follow-ups to some PRs. [#64787](https://github.com/ClickHouse/ClickHouse/pull/64787) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Attempt to fix 02228_merge_tree_insert_memory_usage.sql flakiness for s3. [#64800](https://github.com/ClickHouse/ClickHouse/pull/64800) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Add regression test for filter propagation through `Merge` engine. [#64806](https://github.com/ClickHouse/ClickHouse/pull/64806) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Migrate changelog.py to a descendant of fuzzywuzzy. [#64807](https://github.com/ClickHouse/ClickHouse/pull/64807) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* A follow-up for https://github.com/ClickHouse/ClickHouse/pull/64039 and [#64759](https://github.com/ClickHouse/ClickHouse/issues/64759). [#64813](https://github.com/ClickHouse/ClickHouse/pull/64813) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Make row order optimization non-experimental. [#64814](https://github.com/ClickHouse/ClickHouse/pull/64814) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Didn't catch it at the time when all versions belonged to the current year. [#64817](https://github.com/ClickHouse/ClickHouse/pull/64817) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix clang-tidy build. [#64823](https://github.com/ClickHouse/ClickHouse/pull/64823) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Sets all builds that we run tests on to normal build list. [#64824](https://github.com/ClickHouse/ClickHouse/pull/64824) ([Max K.](https://github.com/maxknv)).
|
||||
* CI: fix CI await feature. [#64825](https://github.com/ClickHouse/ClickHouse/pull/64825) ([Max K.](https://github.com/maxknv)).
|
||||
* Fix clang-tidy. [#64827](https://github.com/ClickHouse/ClickHouse/pull/64827) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Upload blob_storage_log from stateless tests. [#64843](https://github.com/ClickHouse/ClickHouse/pull/64843) ([alesapin](https://github.com/alesapin)).
|
||||
* Follow-up to [#64349](https://github.com/ClickHouse/ClickHouse/issues/64349). [#64845](https://github.com/ClickHouse/ClickHouse/pull/64845) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Simplify handling of old 'inverted' indexes. [#64846](https://github.com/ClickHouse/ClickHouse/pull/64846) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Use issue templates defined in YAML provide more user-friendly experience. [#64850](https://github.com/ClickHouse/ClickHouse/pull/64850) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Handle logs from rocksdb by ClickHouse internal logging. [#64856](https://github.com/ClickHouse/ClickHouse/pull/64856) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Follow-up for https://github.com/ClickHouse/ClickHouse/pull/59357. [#64860](https://github.com/ClickHouse/ClickHouse/pull/64860) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* added mlock and mlockall to aspell-dict to be ignored. [#64863](https://github.com/ClickHouse/ClickHouse/pull/64863) ([Ali](https://github.com/xogoodnow)).
|
||||
* A tiny fix for fancy quotes. [#64883](https://github.com/ClickHouse/ClickHouse/pull/64883) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix possible loss of "Query was cancelled" message in client. [#64888](https://github.com/ClickHouse/ClickHouse/pull/64888) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* We accidentally lost the way to set `PR Check` failure at some point. [#64890](https://github.com/ClickHouse/ClickHouse/pull/64890) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix global trace collector. [#64896](https://github.com/ClickHouse/ClickHouse/pull/64896) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix test_mask_sensitive_info/test.py::test_create_table. [#64901](https://github.com/ClickHouse/ClickHouse/pull/64901) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Update 03165_string_functions_with_token_text_indexes.sql. [#64903](https://github.com/ClickHouse/ClickHouse/pull/64903) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* When the branch is removed, it's impossible to get the diff by the labels. `print` in imported files spoils the `ipython` output. [#64904](https://github.com/ClickHouse/ClickHouse/pull/64904) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Disable transactions for unsupported storages even for materialized v…. [#64918](https://github.com/ClickHouse/ClickHouse/pull/64918) ([alesapin](https://github.com/alesapin)).
|
||||
* additional log for cleanupDetachedTables. [#64919](https://github.com/ClickHouse/ClickHouse/pull/64919) ([Konstantin Morozov](https://github.com/k-morozov)).
|
||||
* Fix tupleConcat of two empty tuples. This fixes [#64885](https://github.com/ClickHouse/ClickHouse/issues/64885). [#64923](https://github.com/ClickHouse/ClickHouse/pull/64923) ([Amos Bird](https://github.com/amosbird)).
|
||||
* CI: Minor fixes in ci scripts. [#64950](https://github.com/ClickHouse/ClickHouse/pull/64950) ([Max K.](https://github.com/maxknv)).
|
||||
* Fix error message (it was strange). [#64952](https://github.com/ClickHouse/ClickHouse/pull/64952) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Update fmtlib version to 9.1.0. [#64959](https://github.com/ClickHouse/ClickHouse/pull/64959) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Test 02908_many_requests_to_system_replicas makes a lot of heavy requests and it overloads server if it's an ASAN build. [#64966](https://github.com/ClickHouse/ClickHouse/pull/64966) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Fix (unreleased) bug in short circuit evaluation. [#64967](https://github.com/ClickHouse/ClickHouse/pull/64967) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Update version_date.tsv and changelogs after v24.4.2.141-stable. [#64968](https://github.com/ClickHouse/ClickHouse/pull/64968) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Fix `test_attach_partition_using_copy`. [#64977](https://github.com/ClickHouse/ClickHouse/pull/64977) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Faster processing of scheduler queue activations. [#64985](https://github.com/ClickHouse/ClickHouse/pull/64985) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* CI: Fix nightly workflow. [#64987](https://github.com/ClickHouse/ClickHouse/pull/64987) ([Max K.](https://github.com/maxknv)).
|
||||
* Fix innocuous data race in detectLanguage. [#64988](https://github.com/ClickHouse/ClickHouse/pull/64988) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* CI: Builds in CI settings. [#64994](https://github.com/ClickHouse/ClickHouse/pull/64994) ([Max K.](https://github.com/maxknv)).
|
||||
* REVERTED. [#65009](https://github.com/ClickHouse/ClickHouse/pull/65009) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* CI: Fix backports. [#65010](https://github.com/ClickHouse/ClickHouse/pull/65010) ([Max K.](https://github.com/maxknv)).
|
||||
* Try fix 03143_prewhere_profile_events. [#65014](https://github.com/ClickHouse/ClickHouse/pull/65014) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Fix 03165_string_functions_with_token_text_indexes. [#65018](https://github.com/ClickHouse/ClickHouse/pull/65018) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* This change was reverted. [#65028](https://github.com/ClickHouse/ClickHouse/pull/65028) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Bump googletest to latest HEAD. [#65038](https://github.com/ClickHouse/ClickHouse/pull/65038) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Improve comment about AsynchronousMetrics. [#65040](https://github.com/ClickHouse/ClickHouse/pull/65040) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* CI: Remove fuzzer build from normal CI run (bugfix). [#65041](https://github.com/ClickHouse/ClickHouse/pull/65041) ([Max K.](https://github.com/maxknv)).
|
||||
* CI config refactoring. [#65045](https://github.com/ClickHouse/ClickHouse/pull/65045) ([Max K.](https://github.com/maxknv)).
|
||||
* Bump abseil to latest HEAD. [#65048](https://github.com/ClickHouse/ClickHouse/pull/65048) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Capture weak_ptr of ContextAccess for safety. [#65051](https://github.com/ClickHouse/ClickHouse/pull/65051) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Stateless tests: add test for SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT. [#65056](https://github.com/ClickHouse/ClickHouse/pull/65056) ([Nikita Fomichev](https://github.com/fm4v)).
|
||||
* Increase timeout in wait_for_all_mutations. [#65058](https://github.com/ClickHouse/ClickHouse/pull/65058) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Tests for _time virtual column in file alike storages. [#65064](https://github.com/ClickHouse/ClickHouse/pull/65064) ([Ilya Golshtein](https://github.com/ilejn)).
|
||||
* Update odbc-bridge.md. [#65099](https://github.com/ClickHouse/ClickHouse/pull/65099) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Small fix for 02340_parts_refcnt_mergetree. [#65105](https://github.com/ClickHouse/ClickHouse/pull/65105) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Re-enable OpenSSL session caching. [#65111](https://github.com/ClickHouse/ClickHouse/pull/65111) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Update test_replicated_database/test.py. [#65112](https://github.com/ClickHouse/ClickHouse/pull/65112) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix false positives leaky memory warnings in OpenSSL. [#65125](https://github.com/ClickHouse/ClickHouse/pull/65125) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix `Initiator received more initial requests than there are replicas` with `loop` engine. [#65133](https://github.com/ClickHouse/ClickHouse/pull/65133) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Fix 'Tasks in BackgroundSchedulePool cannot throw' caused by MergeTreeData::loadUnexpectedDataParts(). [#65135](https://github.com/ClickHouse/ClickHouse/pull/65135) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix bad error message. [#65137](https://github.com/ClickHouse/ClickHouse/pull/65137) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Just fixing flaky unit tests. [#65152](https://github.com/ClickHouse/ClickHouse/pull/65152) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* This change was reverted. [#65164](https://github.com/ClickHouse/ClickHouse/pull/65164) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Ensure submodules are named consistently. [#65167](https://github.com/ClickHouse/ClickHouse/pull/65167) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Remove obsolete fix from aws submodule. [#65168](https://github.com/ClickHouse/ClickHouse/pull/65168) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* CI: Fix not-merged cherry-picks for backports. [#65181](https://github.com/ClickHouse/ClickHouse/pull/65181) ([Max K.](https://github.com/maxknv)).
|
||||
* Add an assertion in ReplicatedMergeTreeQueue. [#65184](https://github.com/ClickHouse/ClickHouse/pull/65184) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix bug in unreleased code. [#65185](https://github.com/ClickHouse/ClickHouse/pull/65185) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix docs for skipping-indexes.md. [#65194](https://github.com/ClickHouse/ClickHouse/pull/65194) ([morning-color](https://github.com/morning-color)).
|
||||
* Fix the descriptions of some server settings. [#65200](https://github.com/ClickHouse/ClickHouse/pull/65200) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix issue after [#64813](https://github.com/ClickHouse/ClickHouse/issues/64813) with broken search in the changelog, and missing zstd in a style-check image. [#65202](https://github.com/ClickHouse/ClickHouse/pull/65202) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix bug in unreleased code. [#65203](https://github.com/ClickHouse/ClickHouse/pull/65203) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Add test prewhere merge. [#65207](https://github.com/ClickHouse/ClickHouse/pull/65207) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Sync ProfileEvents.h. [#65208](https://github.com/ClickHouse/ClickHouse/pull/65208) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* FinishCheck to set failure if workflow failed. [#65228](https://github.com/ClickHouse/ClickHouse/pull/65228) ([Max K.](https://github.com/maxknv)).
|
||||
* Update version_date.tsv and changelogs after v24.3.4.147-lts. [#65235](https://github.com/ClickHouse/ClickHouse/pull/65235) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Update version_date.tsv and changelogs after v24.5.3.5-stable. [#65240](https://github.com/ClickHouse/ClickHouse/pull/65240) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Fails sometimes for debug build https://s3.amazonaws.com/clickhouse-test-reports/0/af6afd904316bfb771737faa147ce8aea72dd705/stateless_tests__debug__[4_5].html. [#65245](https://github.com/ClickHouse/ClickHouse/pull/65245) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix libunwind in CI. [#65247](https://github.com/ClickHouse/ClickHouse/pull/65247) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* CI: Do not skip FinishCheck in Merge Queue. [#65249](https://github.com/ClickHouse/ClickHouse/pull/65249) ([Max K.](https://github.com/maxknv)).
|
||||
* Add a test just in case. [#65271](https://github.com/ClickHouse/ClickHouse/pull/65271) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Disable 02581_share_big_sets_between_multiple_mutations_tasks_long in coverage run. [#65295](https://github.com/ClickHouse/ClickHouse/pull/65295) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Update version_date.tsv and changelogs after v23.8.15.35-lts. [#65300](https://github.com/ClickHouse/ClickHouse/pull/65300) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* mute test test_query_is_canceled_with_inf_retries. [#65301](https://github.com/ClickHouse/ClickHouse/pull/65301) ([Sema Checherinda](https://github.com/CheSema)).
|
||||
* Fix silly typo that caused wrong tags messages. [#65307](https://github.com/ClickHouse/ClickHouse/pull/65307) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Save server data for failed stateless tests. [#65309](https://github.com/ClickHouse/ClickHouse/pull/65309) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix 01246_buffer_flush flakiness (by tuning timeouts). [#65310](https://github.com/ClickHouse/ClickHouse/pull/65310) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Remove outdated override in stress tests. [#65323](https://github.com/ClickHouse/ClickHouse/pull/65323) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix bad code in `system.session_log`. [#65332](https://github.com/ClickHouse/ClickHouse/pull/65332) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* add tests for 'boom filter index with map'. [#65333](https://github.com/ClickHouse/ClickHouse/pull/65333) ([iceFireser](https://github.com/iceFireser)).
|
||||
* Fix crash in 03036_dynamic_read_subcolumns. [#65341](https://github.com/ClickHouse/ClickHouse/pull/65341) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Move tests 02942_variant_cast and 02944_variant_as_common_type to analyzer_tech_debt.txt. [#65342](https://github.com/ClickHouse/ClickHouse/pull/65342) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* REVERTED. [#65384](https://github.com/ClickHouse/ClickHouse/pull/65384) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* CI: Add Non-blocking (Woolen wolfdog) CI mode. [#65385](https://github.com/ClickHouse/ClickHouse/pull/65385) ([Max K.](https://github.com/maxknv)).
|
||||
* Fix compatibility release check. [#65394](https://github.com/ClickHouse/ClickHouse/pull/65394) ([Alexey Katsman](https://github.com/alexkats)).
|
||||
* Move a leaksan suppression from Poco into OpenSSL. [#65396](https://github.com/ClickHouse/ClickHouse/pull/65396) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix tidy build. [#65415](https://github.com/ClickHouse/ClickHouse/pull/65415) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Remove Tests dependency on Builds_2. No tests depend on Builds_2. [#65416](https://github.com/ClickHouse/ClickHouse/pull/65416) ([Max K.](https://github.com/maxknv)).
|
||||
* CI: PR workflow dependencies fix. [#65442](https://github.com/ClickHouse/ClickHouse/pull/65442) ([Max K.](https://github.com/maxknv)).
|
||||
* Fix test_storage_s3_queue/test.py::test_max_set_age. [#65452](https://github.com/ClickHouse/ClickHouse/pull/65452) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* CI: Rename A Sync status. [#65456](https://github.com/ClickHouse/ClickHouse/pull/65456) ([Max K.](https://github.com/maxknv)).
|
||||
* CI: Rename sync status. [#65464](https://github.com/ClickHouse/ClickHouse/pull/65464) ([Max K.](https://github.com/maxknv)).
|
||||
* This change was reverted. [#65466](https://github.com/ClickHouse/ClickHouse/pull/65466) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Remove a feature wasn't part of any release yet. [#65480](https://github.com/ClickHouse/ClickHouse/pull/65480) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#65657](https://github.com/ClickHouse/ClickHouse/issues/65657): Fix of `PlanSquashingTransform`: pipeline stuck. [#65487](https://github.com/ClickHouse/ClickHouse/pull/65487) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Backported in [#65504](https://github.com/ClickHouse/ClickHouse/issues/65504): Fix bad test `02922_deduplication_with_zero_copy`. [#65492](https://github.com/ClickHouse/ClickHouse/pull/65492) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#65591](https://github.com/ClickHouse/ClickHouse/issues/65591): Setting `uniform_snowflake_conversion_functions` (not in any release yet) was replaced by setting `allow_deprecated_snowflake_conversion_functions`. The latter controls if the legacy snowflake conversion functions are available (by default, they are not). [#65522](https://github.com/ClickHouse/ClickHouse/pull/65522) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Backported in [#65759](https://github.com/ClickHouse/ClickHouse/issues/65759): Renames Build report jobs. [#65554](https://github.com/ClickHouse/ClickHouse/pull/65554) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#65773](https://github.com/ClickHouse/ClickHouse/issues/65773): `base64En/Decode64Url` --> `base64En/Decode64URL`. [#65760](https://github.com/ClickHouse/ClickHouse/pull/65760) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Backported in [#65805](https://github.com/ClickHouse/ClickHouse/issues/65805): CI: Fix for Builds report job in backports and releases. [#65774](https://github.com/ClickHouse/ClickHouse/pull/65774) ([Max K.](https://github.com/maxknv)).
|
||||
|
@ -267,7 +267,7 @@ A pull request can be created even if the work is not completed yet. In this cas
|
||||
|
||||
Testing will commence as soon as ClickHouse employees label your PR with a tag “can be tested”. The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour.
|
||||
|
||||
The system will prepare ClickHouse binary builds for your pull request individually. To retrieve these builds click the “Details” link next to “ClickHouse build check” entry in the list of checks. There you will find direct links to the built .deb packages of ClickHouse which you can deploy even on your production servers (if you have no fear).
|
||||
The system will prepare ClickHouse binary builds for your pull request individually. To retrieve these builds click the “Details” link next to “Builds” entry in the list of checks. There you will find direct links to the built .deb packages of ClickHouse which you can deploy even on your production servers (if you have no fear).
|
||||
|
||||
Most probably some of the builds will fail at first times. This is due to the fact that we check builds both with gcc as well as with clang, with almost all of existing warnings (always with the `-Werror` flag) enabled for clang. On that same page, you can find all of the build logs so that you do not have to build ClickHouse in all of the possible ways.
|
||||
|
||||
|
@ -28,7 +28,7 @@ run, for example, the test `01428_hash_set_nan_key`, change to the repository
|
||||
folder and run the following command:
|
||||
|
||||
```
|
||||
PATH=$PATH:<path to clickhouse-client> tests/clickhouse-test 01428_hash_set_nan_key
|
||||
PATH=<path to clickhouse-client>:$PATH tests/clickhouse-test 01428_hash_set_nan_key
|
||||
```
|
||||
|
||||
Test results (`stderr` and `stdout`) are written to files `01428_hash_set_nan_key.[stderr|stdout]` which
|
||||
|
@ -56,6 +56,15 @@ SELECT * FROM test_table;
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
|
||||
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
|
||||
|
||||
## Authentication
|
||||
|
||||
Currently there are 3 ways to authenticate:
|
||||
- `Managed Identity` - Can be used by providing an `endpoint`, `connection_string` or `storage_account_url`.
|
||||
- `SAS Token` - Can be used by providing an `endpoint`, `connection_string` or `storage_account_url`. It is identified by presence of '?' in the url.
|
||||
- `Workload Identity` - Can be used by providing an `endpoint` or `storage_account_url`. If `use_workload_identity` parameter is set in config, ([workload identity](https://github.com/Azure/azure-sdk-for-cpp/tree/main/sdk/identity/azure-identity#authenticate-azure-hosted-applications)) is used for authentication.
|
||||
|
||||
|
||||
|
||||
## See also
|
||||
|
||||
[Azure Blob Storage Table Function](/docs/en/sql-reference/table-functions/azureBlobStorage)
|
||||
|
@ -58,7 +58,7 @@ Optional parameters:
|
||||
- `nats_max_reconnect` – Maximum amount of reconnection attempts per try to connect to NATS. Default: `5`.
|
||||
- `nats_reconnect_wait` – Amount of time in milliseconds to sleep between each reconnect attempt. Default: `5000`.
|
||||
- `nats_server_list` - Server list for connection. Can be specified to connect to NATS cluster.
|
||||
- `nats_skip_broken_messages` - NATS message parser tolerance to schema-incompatible messages per block. Default: `0`. If `nats_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data).
|
||||
- `nats_skip_broken_messages` - NATS message parser tolerance to schema-incompatible messages per block. Default: `0`. If `nats_skip_broken_messages = N` then the engine skips *N* NATS messages that cannot be parsed (a message equals a row of data).
|
||||
- `nats_max_block_size` - Number of row collected by poll(s) for flushing data from NATS. Default: [max_insert_block_size](../../../operations/settings/settings.md#max_insert_block_size).
|
||||
- `nats_flush_interval_ms` - Timeout for flushing data read from NATS. Default: [stream_flush_interval_ms](../../../operations/settings/settings.md#stream-flush-interval-ms).
|
||||
- `nats_username` - NATS username.
|
||||
@ -67,7 +67,7 @@ Optional parameters:
|
||||
- `nats_credential_file` - Path to a NATS credentials file.
|
||||
- `nats_startup_connect_tries` - Number of connect tries at startup. Default: `5`.
|
||||
- `nats_max_rows_per_message` — The maximum number of rows written in one NATS message for row-based formats. (default : `1`).
|
||||
- `nats_handle_error_mode` — How to handle errors for RabbitMQ engine. Possible values: default (the exception will be thrown if we fail to parse a message), stream (the exception message and raw message will be saved in virtual columns `_error` and `_raw_message`).
|
||||
- `nats_handle_error_mode` — How to handle errors for NATS engine. Possible values: default (the exception will be thrown if we fail to parse a message), stream (the exception message and raw message will be saved in virtual columns `_error` and `_raw_message`).
|
||||
|
||||
SSL connection:
|
||||
|
||||
|
@ -28,6 +28,8 @@ CREATE TABLE s3_queue_engine_table (name String, value UInt32)
|
||||
[s3queue_cleanup_interval_max_ms = 30000,]
|
||||
```
|
||||
|
||||
Starting with `24.7` settings without `s3queue_` prefix are also supported.
|
||||
|
||||
**Engine parameters**
|
||||
|
||||
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path).
|
||||
|
@ -991,7 +991,6 @@ They can be used for prewhere optimization only if we enable `set allow_statisti
|
||||
|
||||
#### Available Types of Column Statistics {#available-types-of-column-statistics}
|
||||
|
||||
|
||||
- `TDigest`
|
||||
|
||||
[TDigest](https://github.com/tdunning/t-digest) sketches which allow to compute approximate percentiles (e.g. the 90th percentile) for numeric columns.
|
||||
@ -1000,9 +999,9 @@ They can be used for prewhere optimization only if we enable `set allow_statisti
|
||||
|
||||
[HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) sketches which provide an estimation how many distinct values a column contains.
|
||||
|
||||
- `CountMin`
|
||||
- `count_min`
|
||||
|
||||
[Count-min](https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch) sketches which provide an approximate count of the frequency of each value in a column.
|
||||
[count min sketch](https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch) sketches which provide an approximate count of the frequency of each value in a column.
|
||||
|
||||
## Column-level Settings {#column-level-settings}
|
||||
|
||||
|
Binary file not shown.
After Width: | Height: | Size: 162 KiB |
394
docs/en/getting-started/example-datasets/stackoverflow.md
Normal file
394
docs/en/getting-started/example-datasets/stackoverflow.md
Normal file
@ -0,0 +1,394 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/stackoverflow
|
||||
sidebar_label: Stack Overflow
|
||||
sidebar_position: 1
|
||||
description: Analyzing Stack Overflow data with ClickHouse
|
||||
---
|
||||
|
||||
# Analyzing Stack Overflow data with ClickHouse
|
||||
|
||||
This dataset contains every `Post`, `User`, `Vote`, `Comment`, `Badge, `PostHistory`, and `PostLink` that has occurred on Stack Overflow.
|
||||
|
||||
Users can either download pre-prepared Parquet versions of the data, containing every post up to April 2024, or download the latest data in XML format and load this. Stack Overflow provide updates to this data periodically - historically every 3 months.
|
||||
|
||||
The following diagram shows the schema for the available tables assuming Parquet format.
|
||||
|
||||
![Stack Overflow schema](./images/stackoverflow.png)
|
||||
|
||||
A description of the schema of this data can be found [here](https://meta.stackexchange.com/questions/2677/database-schema-documentation-for-the-public-data-dump-and-sede).
|
||||
|
||||
## Pre-prepared data
|
||||
|
||||
We provide a copy of this data in Parquet format, up to date as of April 2024. While small for ClickHouse with respect to the number of rows (60 million posts), this dataset contains significant volumes of text and large String columns.
|
||||
|
||||
```sql
|
||||
CREATE DATABASE stackoverflow
|
||||
```
|
||||
|
||||
The following timings are for a 96 GiB, 24 vCPU ClickHouse Cloud cluster located in `eu-west-2`. The dataset is located in `eu-west-3`.
|
||||
|
||||
### Posts
|
||||
|
||||
```sql
|
||||
CREATE TABLE stackoverflow.posts
|
||||
(
|
||||
`Id` Int32 CODEC(Delta(4), ZSTD(1)),
|
||||
`PostTypeId` Enum8('Question' = 1, 'Answer' = 2, 'Wiki' = 3, 'TagWikiExcerpt' = 4, 'TagWiki' = 5, 'ModeratorNomination' = 6, 'WikiPlaceholder' = 7, 'PrivilegeWiki' = 8),
|
||||
`AcceptedAnswerId` UInt32,
|
||||
`CreationDate` DateTime64(3, 'UTC'),
|
||||
`Score` Int32,
|
||||
`ViewCount` UInt32 CODEC(Delta(4), ZSTD(1)),
|
||||
`Body` String,
|
||||
`OwnerUserId` Int32,
|
||||
`OwnerDisplayName` String,
|
||||
`LastEditorUserId` Int32,
|
||||
`LastEditorDisplayName` String,
|
||||
`LastEditDate` DateTime64(3, 'UTC') CODEC(Delta(8), ZSTD(1)),
|
||||
`LastActivityDate` DateTime64(3, 'UTC'),
|
||||
`Title` String,
|
||||
`Tags` String,
|
||||
`AnswerCount` UInt16 CODEC(Delta(2), ZSTD(1)),
|
||||
`CommentCount` UInt8,
|
||||
`FavoriteCount` UInt8,
|
||||
`ContentLicense` LowCardinality(String),
|
||||
`ParentId` String,
|
||||
`CommunityOwnedDate` DateTime64(3, 'UTC'),
|
||||
`ClosedDate` DateTime64(3, 'UTC')
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
PARTITION BY toYear(CreationDate)
|
||||
ORDER BY (PostTypeId, toDate(CreationDate), CreationDate)
|
||||
|
||||
INSERT INTO stackoverflow.posts SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/posts/*.parquet')
|
||||
|
||||
0 rows in set. Elapsed: 265.466 sec. Processed 59.82 million rows, 38.07 GB (225.34 thousand rows/s., 143.42 MB/s.)
|
||||
```
|
||||
|
||||
Posts are also available by year e.g. [https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/posts/2020.parquet](https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/posts/2020.parquet)
|
||||
|
||||
|
||||
### Votes
|
||||
|
||||
```sql
|
||||
CREATE TABLE stackoverflow.votes
|
||||
(
|
||||
`Id` UInt32,
|
||||
`PostId` Int32,
|
||||
`VoteTypeId` UInt8,
|
||||
`CreationDate` DateTime64(3, 'UTC'),
|
||||
`UserId` Int32,
|
||||
`BountyAmount` UInt8
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (VoteTypeId, CreationDate, PostId, UserId)
|
||||
|
||||
INSERT INTO stackoverflow.votes SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/votes/*.parquet')
|
||||
|
||||
0 rows in set. Elapsed: 21.605 sec. Processed 238.98 million rows, 2.13 GB (11.06 million rows/s., 98.46 MB/s.)
|
||||
```
|
||||
|
||||
Votes are also available by year e.g. [https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/posts/2020.parquet](https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/votes/2020.parquet)
|
||||
|
||||
|
||||
### Comments
|
||||
|
||||
```sql
|
||||
CREATE TABLE stackoverflow.comments
|
||||
(
|
||||
`Id` UInt32,
|
||||
`PostId` UInt32,
|
||||
`Score` UInt16,
|
||||
`Text` String,
|
||||
`CreationDate` DateTime64(3, 'UTC'),
|
||||
`UserId` Int32,
|
||||
`UserDisplayName` LowCardinality(String)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY CreationDate
|
||||
|
||||
INSERT INTO stackoverflow.comments SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/comments/*.parquet')
|
||||
|
||||
0 rows in set. Elapsed: 56.593 sec. Processed 90.38 million rows, 11.14 GB (1.60 million rows/s., 196.78 MB/s.)
|
||||
```
|
||||
|
||||
Comments are also available by year e.g. [https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/posts/2020.parquet](https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/comments/2020.parquet)
|
||||
|
||||
### Users
|
||||
|
||||
```sql
|
||||
CREATE TABLE stackoverflow.users
|
||||
(
|
||||
`Id` Int32,
|
||||
`Reputation` LowCardinality(String),
|
||||
`CreationDate` DateTime64(3, 'UTC') CODEC(Delta(8), ZSTD(1)),
|
||||
`DisplayName` String,
|
||||
`LastAccessDate` DateTime64(3, 'UTC'),
|
||||
`AboutMe` String,
|
||||
`Views` UInt32,
|
||||
`UpVotes` UInt32,
|
||||
`DownVotes` UInt32,
|
||||
`WebsiteUrl` String,
|
||||
`Location` LowCardinality(String),
|
||||
`AccountId` Int32
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (Id, CreationDate)
|
||||
|
||||
INSERT INTO stackoverflow.users SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/users.parquet')
|
||||
|
||||
0 rows in set. Elapsed: 10.988 sec. Processed 22.48 million rows, 1.36 GB (2.05 million rows/s., 124.10 MB/s.)
|
||||
```
|
||||
|
||||
### Badges
|
||||
|
||||
```sql
|
||||
CREATE TABLE stackoverflow.badges
|
||||
(
|
||||
`Id` UInt32,
|
||||
`UserId` Int32,
|
||||
`Name` LowCardinality(String),
|
||||
`Date` DateTime64(3, 'UTC'),
|
||||
`Class` Enum8('Gold' = 1, 'Silver' = 2, 'Bronze' = 3),
|
||||
`TagBased` Bool
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY UserId
|
||||
|
||||
INSERT INTO stackoverflow.badges SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/badges.parquet')
|
||||
|
||||
0 rows in set. Elapsed: 6.635 sec. Processed 51.29 million rows, 797.05 MB (7.73 million rows/s., 120.13 MB/s.)
|
||||
```
|
||||
|
||||
### `PostLinks`
|
||||
|
||||
```sql
|
||||
CREATE TABLE stackoverflow.postlinks
|
||||
(
|
||||
`Id` UInt64,
|
||||
`CreationDate` DateTime64(3, 'UTC'),
|
||||
`PostId` Int32,
|
||||
`RelatedPostId` Int32,
|
||||
`LinkTypeId` Enum8('Linked' = 1, 'Duplicate' = 3)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (PostId, RelatedPostId)
|
||||
|
||||
INSERT INTO stackoverflow.postlinks SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/postlinks.parquet')
|
||||
|
||||
0 rows in set. Elapsed: 1.534 sec. Processed 6.55 million rows, 129.70 MB (4.27 million rows/s., 84.57 MB/s.)
|
||||
```
|
||||
|
||||
### `PostHistory`
|
||||
|
||||
```sql
|
||||
CREATE TABLE stackoverflow.posthistory
|
||||
(
|
||||
`Id` UInt64,
|
||||
`PostHistoryTypeId` UInt8,
|
||||
`PostId` Int32,
|
||||
`RevisionGUID` String,
|
||||
`CreationDate` DateTime64(3, 'UTC'),
|
||||
`UserId` Int32,
|
||||
`Text` String,
|
||||
`ContentLicense` LowCardinality(String),
|
||||
`Comment` String,
|
||||
`UserDisplayName` String
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (CreationDate, PostId)
|
||||
|
||||
INSERT INTO stackoverflow.posthistory SELECT * FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/stackoverflow/parquet/posthistory/*.parquet')
|
||||
|
||||
0 rows in set. Elapsed: 422.795 sec. Processed 160.79 million rows, 67.08 GB (380.30 thousand rows/s., 158.67 MB/s.)
|
||||
```
|
||||
|
||||
## Original dataset
|
||||
|
||||
The original dataset is available in compressed (7zip) XML format at [https://archive.org/download/stackexchange](https://archive.org/download/stackexchange) - files with prefix `stackoverflow.com*`.
|
||||
|
||||
### Download
|
||||
|
||||
```bash
|
||||
wget https://archive.org/download/stackexchange/stackoverflow.com-Badges.7z
|
||||
wget https://archive.org/download/stackexchange/stackoverflow.com-Comments.7z
|
||||
wget https://archive.org/download/stackexchange/stackoverflow.com-PostHistory.7z
|
||||
wget https://archive.org/download/stackexchange/stackoverflow.com-PostLinks.7z
|
||||
wget https://archive.org/download/stackexchange/stackoverflow.com-Posts.7z
|
||||
wget https://archive.org/download/stackexchange/stackoverflow.com-Users.7z
|
||||
wget https://archive.org/download/stackexchange/stackoverflow.com-Votes.7z
|
||||
```
|
||||
|
||||
These files are up to 35GB and can take around 30 mins to download depending on internet connection - the download server throttles at around 20MB/sec.
|
||||
|
||||
### Convert to JSON
|
||||
|
||||
At the time of writing, ClickHouse does not have native support for XML as an input format. To load the data into ClickHouse we first convert to NDJSON.
|
||||
|
||||
To convert XML to JSON we recommend the [`xq`](https://github.com/kislyuk/yq) linux tool, a simple `jq` wrapper for XML documents.
|
||||
|
||||
Install xq and jq:
|
||||
|
||||
```bash
|
||||
sudo apt install jq
|
||||
pip install yq
|
||||
```
|
||||
|
||||
The following steps apply to any of the above files. We use the `stackoverflow.com-Posts.7z` file as an example. Modify as required.
|
||||
|
||||
Extract the file using [p7zip](https://p7zip.sourceforge.net/). This will produce a single xml file - in this case `Posts.xml`.
|
||||
|
||||
> Files are compressed approximately 4.5x. At 22GB compressed, the posts file requires around 97G uncompressed.
|
||||
|
||||
```bash
|
||||
p7zip -d stackoverflow.com-Posts.7z
|
||||
```
|
||||
|
||||
The following splits the xml file into files, each containing 10000 rows.
|
||||
|
||||
```bash
|
||||
mkdir posts
|
||||
cd posts
|
||||
# the following splits the input xml file into sub files of 10000 rows
|
||||
tail +3 ../Posts.xml | head -n -1 | split -l 10000 --filter='{ printf "<rows>\n"; cat - ; printf "</rows>\n"; } > $FILE' -
|
||||
```
|
||||
|
||||
After running the above users will have a set of files, each with 10000 lines. This ensures the memory overhead of the next command is not excessive (xml to JSON conversion is done in memory).
|
||||
|
||||
```bash
|
||||
find . -maxdepth 1 -type f -exec xq -c '.rows.row[]' {} \; | sed -e 's:"@:":g' > posts_v2.json
|
||||
```
|
||||
|
||||
The above command will produce a single `posts.json` file.
|
||||
|
||||
Load into ClickHouse with the following command. Note the schema is specified for the `posts.json` file. This will need to be adjusted per data type to align with the target table.
|
||||
|
||||
```bash
|
||||
clickhouse local --query "SELECT * FROM file('posts.json', JSONEachRow, 'Id Int32, PostTypeId UInt8, AcceptedAnswerId UInt32, CreationDate DateTime64(3, \'UTC\'), Score Int32, ViewCount UInt32, Body String, OwnerUserId Int32, OwnerDisplayName String, LastEditorUserId Int32, LastEditorDisplayName String, LastEditDate DateTime64(3, \'UTC\'), LastActivityDate DateTime64(3, \'UTC\'), Title String, Tags String, AnswerCount UInt16, CommentCount UInt8, FavoriteCount UInt8, ContentLicense String, ParentId String, CommunityOwnedDate DateTime64(3, \'UTC\'), ClosedDate DateTime64(3, \'UTC\')') FORMAT Native" | clickhouse client --host <host> --secure --password <password> --query "INSERT INTO stackoverflow.posts_v2 FORMAT Native"
|
||||
```
|
||||
|
||||
## Example queries
|
||||
|
||||
A few simple questions to you get started.
|
||||
|
||||
### Most popular tags on Stack Overflow
|
||||
|
||||
```sql
|
||||
|
||||
SELECT
|
||||
arrayJoin(arrayFilter(t -> (t != ''), splitByChar('|', Tags))) AS Tags,
|
||||
count() AS c
|
||||
FROM stackoverflow.posts
|
||||
GROUP BY Tags
|
||||
ORDER BY c DESC
|
||||
LIMIT 10
|
||||
|
||||
┌─Tags───────┬───────c─┐
|
||||
│ javascript │ 2527130 │
|
||||
│ python │ 2189638 │
|
||||
│ java │ 1916156 │
|
||||
│ c# │ 1614236 │
|
||||
│ php │ 1463901 │
|
||||
│ android │ 1416442 │
|
||||
│ html │ 1186567 │
|
||||
│ jquery │ 1034621 │
|
||||
│ c++ │ 806202 │
|
||||
│ css │ 803755 │
|
||||
└────────────┴─────────┘
|
||||
|
||||
10 rows in set. Elapsed: 1.013 sec. Processed 59.82 million rows, 1.21 GB (59.07 million rows/s., 1.19 GB/s.)
|
||||
Peak memory usage: 224.03 MiB.
|
||||
```
|
||||
|
||||
### User with the most answers (active accounts)
|
||||
|
||||
Account requires a `UserId`.
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
any(OwnerUserId) UserId,
|
||||
OwnerDisplayName,
|
||||
count() AS c
|
||||
FROM stackoverflow.posts WHERE OwnerDisplayName != '' AND PostTypeId='Answer' AND OwnerUserId != 0
|
||||
GROUP BY OwnerDisplayName
|
||||
ORDER BY c DESC
|
||||
LIMIT 5
|
||||
|
||||
┌─UserId─┬─OwnerDisplayName─┬────c─┐
|
||||
│ 22656 │ Jon Skeet │ 2727 │
|
||||
│ 23354 │ Marc Gravell │ 2150 │
|
||||
│ 12950 │ tvanfosson │ 1530 │
|
||||
│ 3043 │ Joel Coehoorn │ 1438 │
|
||||
│ 10661 │ S.Lott │ 1087 │
|
||||
└────────┴──────────────────┴──────┘
|
||||
|
||||
5 rows in set. Elapsed: 0.154 sec. Processed 35.83 million rows, 193.39 MB (232.33 million rows/s., 1.25 GB/s.)
|
||||
Peak memory usage: 206.45 MiB.
|
||||
```
|
||||
|
||||
### ClickHouse related posts with the most views
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
Id,
|
||||
Title,
|
||||
ViewCount,
|
||||
AnswerCount
|
||||
FROM stackoverflow.posts
|
||||
WHERE Title ILIKE '%ClickHouse%'
|
||||
ORDER BY ViewCount DESC
|
||||
LIMIT 10
|
||||
|
||||
┌───────Id─┬─Title────────────────────────────────────────────────────────────────────────────┬─ViewCount─┬─AnswerCount─┐
|
||||
│ 52355143 │ Is it possible to delete old records from clickhouse table? │ 41462 │ 3 │
|
||||
│ 37954203 │ Clickhouse Data Import │ 38735 │ 3 │
|
||||
│ 37901642 │ Updating data in Clickhouse │ 36236 │ 6 │
|
||||
│ 58422110 │ Pandas: How to insert dataframe into Clickhouse │ 29731 │ 4 │
|
||||
│ 63621318 │ DBeaver - Clickhouse - SQL Error [159] .. Read timed out │ 27350 │ 1 │
|
||||
│ 47591813 │ How to filter clickhouse table by array column contents? │ 27078 │ 2 │
|
||||
│ 58728436 │ How to search the string in query with case insensitive on Clickhouse database? │ 26567 │ 3 │
|
||||
│ 65316905 │ Clickhouse: DB::Exception: Memory limit (for query) exceeded │ 24899 │ 2 │
|
||||
│ 49944865 │ How to add a column in clickhouse │ 24424 │ 1 │
|
||||
│ 59712399 │ How to cast date Strings to DateTime format with extended parsing in ClickHouse? │ 22620 │ 1 │
|
||||
└──────────┴──────────────────────────────────────────────────────────────────────────────────┴───────────┴─────────────┘
|
||||
|
||||
10 rows in set. Elapsed: 0.472 sec. Processed 59.82 million rows, 1.91 GB (126.63 million rows/s., 4.03 GB/s.)
|
||||
Peak memory usage: 240.01 MiB.
|
||||
```
|
||||
|
||||
### Most controversial posts
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
Id,
|
||||
Title,
|
||||
UpVotes,
|
||||
DownVotes,
|
||||
abs(UpVotes - DownVotes) AS Controversial_ratio
|
||||
FROM stackoverflow.posts
|
||||
INNER JOIN
|
||||
(
|
||||
SELECT
|
||||
PostId,
|
||||
countIf(VoteTypeId = 2) AS UpVotes,
|
||||
countIf(VoteTypeId = 3) AS DownVotes
|
||||
FROM stackoverflow.votes
|
||||
GROUP BY PostId
|
||||
HAVING (UpVotes > 10) AND (DownVotes > 10)
|
||||
) AS votes ON posts.Id = votes.PostId
|
||||
WHERE Title != ''
|
||||
ORDER BY Controversial_ratio ASC
|
||||
LIMIT 3
|
||||
|
||||
┌───────Id─┬─Title─────────────────────────────────────────────┬─UpVotes─┬─DownVotes─┬─Controversial_ratio─┐
|
||||
│ 583177 │ VB.NET Infinite For Loop │ 12 │ 12 │ 0 │
|
||||
│ 9756797 │ Read console input as enumerable - one statement? │ 16 │ 16 │ 0 │
|
||||
│ 13329132 │ What's the point of ARGV in Ruby? │ 22 │ 22 │ 0 │
|
||||
└──────────┴───────────────────────────────────────────────────┴─────────┴───────────┴─────────────────────┘
|
||||
|
||||
3 rows in set. Elapsed: 4.779 sec. Processed 298.80 million rows, 3.16 GB (62.52 million rows/s., 661.05 MB/s.)
|
||||
Peak memory usage: 6.05 GiB.
|
||||
```
|
||||
|
||||
## Attribution
|
||||
|
||||
We thank Stack Overflow for providing this data under the `cc-by-sa 4.0` license, acknowledging their efforts and the original source of the data at [https://archive.org/details/stackexchange](https://archive.org/details/stackexchange).
|
@ -314,7 +314,7 @@ For example, to download a aarch64 binary for ClickHouse v23.4, follow these ste
|
||||
- Find the GitHub pull request for release v23.4: [Release pull request for branch 23.4](https://github.com/ClickHouse/ClickHouse/pull/49238)
|
||||
- Click "Commits", then click a commit similar to "Update autogenerated version to 23.4.2.1 and contributors" for the particular version you like to install.
|
||||
- Click the green check / yellow dot / red cross to open the list of CI checks.
|
||||
- Click "Details" next to "ClickHouse Build Check" in the list, it will open a page similar to [this page](https://s3.amazonaws.com/clickhouse-test-reports/46793/b460eb70bf29b19eadd19a1f959b15d186705394/clickhouse_build_check/report.html)
|
||||
- Click "Details" next to "Builds" in the list, it will open a page similar to [this page](https://s3.amazonaws.com/clickhouse-test-reports/46793/b460eb70bf29b19eadd19a1f959b15d186705394/clickhouse_build_check/report.html)
|
||||
- Find the rows with compiler = "clang-*-aarch64" - there are multiple rows.
|
||||
- Download the artifacts for these builds.
|
||||
|
||||
|
@ -193,6 +193,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
|
||||
- `--hardware-utilization` — Print hardware utilization information in progress bar.
|
||||
- `--print-profile-events` – Print `ProfileEvents` packets.
|
||||
- `--profile-events-delay-ms` – Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet).
|
||||
- `--jwt` – If specified, enables authorization via JSON Web Token. Server JWT authorization is available only in ClickHouse Cloud.
|
||||
|
||||
Instead of `--host`, `--port`, `--user` and `--password` options, ClickHouse client also supports connection strings (see next section).
|
||||
|
||||
|
@ -2169,6 +2169,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
|
||||
- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `lz4`.
|
||||
- [input_format_parquet_max_block_size](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_max_block_size) - Max block row size for parquet reader. Default value - `65409`.
|
||||
- [input_format_parquet_prefer_block_bytes](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_prefer_block_bytes) - Average block bytes output by parquet reader. Default value - `16744704`.
|
||||
- [output_format_parquet_write_page_index](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_max_block_size) - Add a possibility to write page index into parquet files. Need to disable `output_format_parquet_use_custom_encoder` at present. Default value - `true`.
|
||||
|
||||
## ParquetMetadata {data-format-parquet-metadata}
|
||||
|
||||
|
@ -6,23 +6,30 @@ import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.m
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
[SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` field of the certificate is used to identify connected user. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration.
|
||||
[SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` or `subjectAltName extension` field of the certificate is used to identify the connected user. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration.
|
||||
|
||||
To enable SSL certificate authentication, a list of `Common Name`'s for each ClickHouse user must be specified in the settings file `users.xml `:
|
||||
To enable SSL certificate authentication, a list of `Common Name`'s or `Subject Alt Name`'s for each ClickHouse user must be specified in the settings file `users.xml `:
|
||||
|
||||
**Example**
|
||||
```xml
|
||||
<clickhouse>
|
||||
<!- ... -->
|
||||
<users>
|
||||
<user_name>
|
||||
<user_name_1>
|
||||
<ssl_certificates>
|
||||
<common_name>host.domain.com:example_user</common_name>
|
||||
<common_name>host.domain.com:example_user_dev</common_name>
|
||||
<!-- More names -->
|
||||
</ssl_certificates>
|
||||
<!-- Other settings -->
|
||||
</user_name>
|
||||
</user_name_1>
|
||||
<user_name_2>
|
||||
<ssl_certificates>
|
||||
<subject_alt_name>DNS:host.domain.com</subject_alt_name>
|
||||
<!-- More names -->
|
||||
</ssl_certificates>
|
||||
<!-- Other settings -->
|
||||
</user_name_2>
|
||||
</users>
|
||||
</clickhouse>
|
||||
```
|
||||
|
@ -5,6 +5,10 @@ sidebar_label: "Named collections"
|
||||
title: "Named collections"
|
||||
---
|
||||
|
||||
import CloudNotSupportedBadge from '@theme/badges/CloudNotSupportedBadge';
|
||||
|
||||
<CloudNotSupportedBadge />
|
||||
|
||||
Named collections provide a way to store collections of key-value pairs to be
|
||||
used to configure integrations with external sources. You can use named collections with
|
||||
dictionaries, tables, table functions, and object storage.
|
||||
|
@ -498,6 +498,8 @@ Default: 0.9
|
||||
Interval in seconds during which the server's maximum allowed memory consumption is adjusted by the corresponding threshold in cgroups. (see
|
||||
settings `cgroup_memory_watcher_hard_limit_ratio` and `cgroup_memory_watcher_soft_limit_ratio`).
|
||||
|
||||
To disable the cgroup observer, set this value to `0`.
|
||||
|
||||
Type: UInt64
|
||||
|
||||
Default: 15
|
||||
@ -1463,6 +1465,9 @@ Keys:
|
||||
- `size` – Size of the file. Applies to `log` and `errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place.
|
||||
- `count` – The number of archived log files that ClickHouse stores.
|
||||
- `console` – Send `log` and `errorlog` to the console instead of file. To enable, set to `1` or `true`.
|
||||
- `console_log_level` – Logging level for console. Default to `level`.
|
||||
- `use_syslog` - Log to syslog as well.
|
||||
- `syslog_level` - Logging level for logging to syslog.
|
||||
- `stream_compress` – Compress `log` and `errorlog` with `lz4` stream compression. To enable, set to `1` or `true`.
|
||||
- `formatting` – Specify log format to be printed in console log (currently only `json` supported).
|
||||
|
||||
|
@ -1428,6 +1428,13 @@ Average block bytes output by parquet reader. Lowering the configuration in the
|
||||
|
||||
Default value: `65409 * 256 = 16744704`
|
||||
|
||||
### output_format_parquet_write_page_index {#input_format_parquet_max_block_size}
|
||||
|
||||
Could add page index into parquet files. To enable this, need set `output_format_parquet_use_custom_encoder`=`false` and
|
||||
`output_format_parquet_write_page_index`=`true`.
|
||||
|
||||
Enable by default.
|
||||
|
||||
## Hive format settings {#hive-format-settings}
|
||||
|
||||
### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter}
|
||||
|
@ -2536,7 +2536,7 @@ Possible values:
|
||||
- 0 — Optimization disabled.
|
||||
- 1 — Optimization enabled.
|
||||
|
||||
Default value: `0`.
|
||||
Default value: `1`.
|
||||
|
||||
## optimize_trivial_count_query {#optimize-trivial-count-query}
|
||||
|
||||
|
@ -36,9 +36,24 @@ $ echo 0 | sudo tee /proc/sys/vm/overcommit_memory
|
||||
Use `perf top` to watch the time spent in the kernel for memory management.
|
||||
Permanent huge pages also do not need to be allocated.
|
||||
|
||||
:::warning
|
||||
If your system has less than 16 GB of RAM, you may experience various memory exceptions because default settings do not match this amount of memory. The recommended amount of RAM is 32 GB or more. You can use ClickHouse in a system with a small amount of RAM, even with 2 GB of RAM, but it requires additional tuning and can ingest at a low rate.
|
||||
:::
|
||||
### Using less than 16GB of RAM
|
||||
|
||||
The recommended amount of RAM is 32 GB or more.
|
||||
|
||||
If your system has less than 16 GB of RAM, you may experience various memory exceptions because default settings do not match this amount of memory. You can use ClickHouse in a system with a small amount of RAM (as low as 2 GB), but these setups require additional tuning and can only ingest at a low rate.
|
||||
|
||||
When using ClickHouse with less than 16GB of RAM, we recommend the following:
|
||||
|
||||
- Lower the size of the mark cache in the `config.xml`. It can be set as low as 500 MB, but it cannot be set to zero.
|
||||
- Lower the number of query processing threads down to `1`.
|
||||
- Lower the `max_block_size` to `8192`. Values as low as `1024` can still be practical.
|
||||
- Lower `max_download_threads` to `1`.
|
||||
- Set `input_format_parallel_parsing` and `output_format_parallel_formatting` to `0`.
|
||||
|
||||
Additional notes:
|
||||
- To flush the memory cached by the memory allocator, you can run the `SYSTEM JEMALLOC PURGE`
|
||||
command.
|
||||
- We do not recommend using S3 or Kafka integrations on low-memory machines because they require significant memory for buffers.
|
||||
|
||||
## Storage Subsystem {#storage-subsystem}
|
||||
|
||||
|
@ -236,10 +236,10 @@ Read 2 rows, 32.00 B in 0.000 sec., 5182 rows/sec., 80.97 KiB/sec.
|
||||
Previous example is the same as:
|
||||
|
||||
``` bash
|
||||
$ echo -e "1,2\n3,4" | clickhouse-local --query "
|
||||
$ echo -e "1,2\n3,4" | clickhouse-local -n --query "
|
||||
CREATE TABLE table (a Int64, b Int64) ENGINE = File(CSV, stdin);
|
||||
SELECT a, b FROM table;
|
||||
DROP TABLE table"
|
||||
DROP TABLE table;"
|
||||
Read 2 rows, 32.00 B in 0.000 sec., 4987 rows/sec., 77.93 KiB/sec.
|
||||
1 2
|
||||
3 4
|
||||
|
@ -1,24 +1,20 @@
|
||||
---
|
||||
slug: /en/sql-reference/data-types/json
|
||||
slug: /en/sql-reference/data-types/object-data-type
|
||||
sidebar_position: 26
|
||||
sidebar_label: JSON
|
||||
sidebar_label: Object Data Type
|
||||
keywords: [object, data type]
|
||||
---
|
||||
|
||||
# JSON
|
||||
# Object Data Type
|
||||
|
||||
:::note
|
||||
This feature is experimental and is not production-ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead.
|
||||
This feature is not production-ready and is now deprecated. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864)
|
||||
:::
|
||||
|
||||
Stores JavaScript Object Notation (JSON) documents in a single column.
|
||||
|
||||
`JSON` is an alias for `Object('json')`.
|
||||
|
||||
:::note
|
||||
The JSON data type is an obsolete feature. Do not use it.
|
||||
If you want to use it, set `allow_experimental_object_type = 1`.
|
||||
:::
|
||||
|
||||
## Example
|
||||
|
||||
**Example 1**
|
||||
@ -49,7 +45,7 @@ SELECT o.a, o.b.c, o.b.d[3] FROM json
|
||||
|
||||
**Example 2**
|
||||
|
||||
To be able to create an ordered `MergeTree` family table the sorting key has to be extracted into its column. For example, to insert a file of compressed HTTP access logs in JSON format:
|
||||
To be able to create an ordered `MergeTree` family table, the sorting key has to be extracted into its column. For example, to insert a file of compressed HTTP access logs in JSON format:
|
||||
|
||||
```sql
|
||||
CREATE TABLE logs
|
||||
@ -69,7 +65,7 @@ FROM file('access.json.gz', JSONAsString)
|
||||
|
||||
## Displaying JSON columns
|
||||
|
||||
When displaying a `JSON` column ClickHouse only shows the field values by default (because internally, it is represented as a tuple). You can display the field names as well by setting `output_format_json_named_tuples_as_objects = 1`:
|
||||
When displaying a `JSON` column, ClickHouse only shows the field values by default (because internally, it is represented as a tuple). You can also display the field names by setting `output_format_json_named_tuples_as_objects = 1`:
|
||||
|
||||
```sql
|
||||
SET output_format_json_named_tuples_as_objects = 1
|
||||
@ -83,4 +79,5 @@ SELECT * FROM json FORMAT JSONEachRow
|
||||
|
||||
## Related Content
|
||||
|
||||
- [Using JSON in ClickHouse](/docs/en/integrations/data-formats/json)
|
||||
- [Getting Data Into ClickHouse - Part 2 - A JSON detour](https://clickhouse.com/blog/getting-data-into-clickhouse-part-2-json)
|
||||
|
@ -173,7 +173,7 @@ See function [substring](string-functions.md#substring).
|
||||
|
||||
## bitTest
|
||||
|
||||
Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. The countdown starts from 0 from the right to the left.
|
||||
Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. Counting is right-to-left, starting at 0.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -226,7 +226,7 @@ Result:
|
||||
|
||||
## bitTestAll
|
||||
|
||||
Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. The countdown starts from 0 from the right to the left.
|
||||
Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. Counting is right-to-left, starting at 0.
|
||||
|
||||
The conjuction for bit-wise operations:
|
||||
|
||||
@ -289,7 +289,7 @@ Result:
|
||||
|
||||
## bitTestAny
|
||||
|
||||
Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. The countdown starts from 0 from the right to the left.
|
||||
Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. Counting is right-to-left, starting at 0.
|
||||
|
||||
The disjunction for bit-wise operations:
|
||||
|
||||
|
@ -6,26 +6,297 @@ sidebar_label: NLP (experimental)
|
||||
|
||||
# Natural Language Processing (NLP) Functions
|
||||
|
||||
:::note
|
||||
:::warning
|
||||
This is an experimental feature that is currently in development and is not ready for general use. It will change in unpredictable backwards-incompatible ways in future releases. Set `allow_experimental_nlp_functions = 1` to enable it.
|
||||
:::
|
||||
|
||||
## detectCharset
|
||||
|
||||
The `detectCharset` function detects the character set of the non-UTF8-encoded input string.
|
||||
|
||||
*Syntax*
|
||||
|
||||
``` sql
|
||||
detectCharset('text_to_be_analyzed')
|
||||
```
|
||||
|
||||
*Arguments*
|
||||
|
||||
- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string).
|
||||
|
||||
*Returned value*
|
||||
|
||||
- A `String` containing the code of the detected character set
|
||||
|
||||
*Examples*
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT detectCharset('Ich bleibe für ein paar Tage.');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─detectCharset('Ich bleibe für ein paar Tage.')─┐
|
||||
│ WINDOWS-1252 │
|
||||
└────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## detectLanguage
|
||||
|
||||
Detects the language of the UTF8-encoded input string. The function uses the [CLD2 library](https://github.com/CLD2Owners/cld2) for detection, and it returns the 2-letter ISO language code.
|
||||
|
||||
The `detectLanguage` function works best when providing over 200 characters in the input string.
|
||||
|
||||
*Syntax*
|
||||
|
||||
``` sql
|
||||
detectLanguage('text_to_be_analyzed')
|
||||
```
|
||||
|
||||
*Arguments*
|
||||
|
||||
- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string).
|
||||
|
||||
*Returned value*
|
||||
|
||||
- The 2-letter ISO code of the detected language
|
||||
|
||||
Other possible results:
|
||||
|
||||
- `un` = unknown, can not detect any language.
|
||||
- `other` = the detected language does not have 2 letter code.
|
||||
|
||||
*Examples*
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT detectLanguage('Je pense que je ne parviendrai jamais à parler français comme un natif. Where there’s a will, there’s a way.');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
fr
|
||||
```
|
||||
|
||||
## detectLanguageMixed
|
||||
|
||||
Similar to the `detectLanguage` function, but `detectLanguageMixed` returns a `Map` of 2-letter language codes that are mapped to the percentage of the certain language in the text.
|
||||
|
||||
|
||||
*Syntax*
|
||||
|
||||
``` sql
|
||||
detectLanguageMixed('text_to_be_analyzed')
|
||||
```
|
||||
|
||||
*Arguments*
|
||||
|
||||
- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string).
|
||||
|
||||
*Returned value*
|
||||
|
||||
- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a percentage of text found for that language
|
||||
|
||||
|
||||
*Examples*
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT detectLanguageMixed('二兎を追う者は一兎をも得ず二兎を追う者は一兎をも得ず A vaincre sans peril, on triomphe sans gloire.');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─detectLanguageMixed()─┐
|
||||
│ {'ja':0.62,'fr':0.36 │
|
||||
└───────────────────────┘
|
||||
```
|
||||
|
||||
## detectProgrammingLanguage
|
||||
|
||||
Determines the programming language from the source code. Calculates all the unigrams and bigrams of commands in the source code.
|
||||
Then using a marked-up dictionary with weights of unigrams and bigrams of commands for various programming languages finds the biggest weight of the programming language and returns it.
|
||||
|
||||
*Syntax*
|
||||
|
||||
``` sql
|
||||
detectProgrammingLanguage('source_code')
|
||||
```
|
||||
|
||||
*Arguments*
|
||||
|
||||
- `source_code` — String representation of the source code to analyze. [String](../data-types/string.md#string).
|
||||
|
||||
*Returned value*
|
||||
|
||||
- Programming language. [String](../data-types/string.md).
|
||||
|
||||
*Examples*
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT detectProgrammingLanguage('#include <iostream>');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─detectProgrammingLanguage('#include <iostream>')─┐
|
||||
│ C++ │
|
||||
└──────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## detectLanguageUnknown
|
||||
|
||||
Similar to the `detectLanguage` function, except the `detectLanguageUnknown` function works with non-UTF8-encoded strings. Prefer this version when your character set is UTF-16 or UTF-32.
|
||||
|
||||
|
||||
*Syntax*
|
||||
|
||||
``` sql
|
||||
detectLanguageUnknown('text_to_be_analyzed')
|
||||
```
|
||||
|
||||
*Arguments*
|
||||
|
||||
- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string).
|
||||
|
||||
*Returned value*
|
||||
|
||||
- The 2-letter ISO code of the detected language
|
||||
|
||||
Other possible results:
|
||||
|
||||
- `un` = unknown, can not detect any language.
|
||||
- `other` = the detected language does not have 2 letter code.
|
||||
|
||||
*Examples*
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT detectLanguageUnknown('Ich bleibe für ein paar Tage.');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─detectLanguageUnknown('Ich bleibe für ein paar Tage.')─┐
|
||||
│ de │
|
||||
└────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## detectTonality
|
||||
|
||||
Determines the sentiment of text data. Uses a marked-up sentiment dictionary, in which each word has a tonality ranging from `-12` to `6`.
|
||||
For each text, it calculates the average sentiment value of its words and returns it in the range `[-1,1]`.
|
||||
|
||||
:::note
|
||||
This function is limited in its current form. Currently it makes use of the embedded emotional dictionary at `/contrib/nlp-data/tonality_ru.zst` and only works for the Russian language.
|
||||
:::
|
||||
|
||||
*Syntax*
|
||||
|
||||
``` sql
|
||||
detectTonality(text)
|
||||
```
|
||||
|
||||
*Arguments*
|
||||
|
||||
- `text` — The text to be analyzed. [String](../data-types/string.md#string).
|
||||
|
||||
*Returned value*
|
||||
|
||||
- The average sentiment value of the words in `text`. [Float32](../data-types/float.md).
|
||||
|
||||
*Examples*
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT detectTonality('Шарик - хороший пёс'), -- Sharik is a good dog
|
||||
detectTonality('Шарик - пёс'), -- Sharik is a dog
|
||||
detectTonality('Шарик - плохой пёс'); -- Sharkik is a bad dog
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─detectTonality('Шарик - хороший пёс')─┬─detectTonality('Шарик - пёс')─┬─detectTonality('Шарик - плохой пёс')─┐
|
||||
│ 0.44445 │ 0 │ -0.3 │
|
||||
└───────────────────────────────────────┴───────────────────────────────┴──────────────────────────────────────┘
|
||||
```
|
||||
## lemmatize
|
||||
|
||||
Performs lemmatization on a given word. Needs dictionaries to operate, which can be obtained [here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models).
|
||||
|
||||
*Syntax*
|
||||
|
||||
``` sql
|
||||
lemmatize('language', word)
|
||||
```
|
||||
|
||||
*Arguments*
|
||||
|
||||
- `language` — Language which rules will be applied. [String](../data-types/string.md#string).
|
||||
- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../data-types/string.md#string).
|
||||
|
||||
*Examples*
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT lemmatize('en', 'wolves');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─lemmatize("wolves")─┐
|
||||
│ "wolf" │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
*Configuration*
|
||||
|
||||
This configuration specifies that the dictionary `en.bin` should be used for lemmatization of English (`en`) words. The `.bin` files can be downloaded from
|
||||
[here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models).
|
||||
|
||||
``` xml
|
||||
<lemmatizers>
|
||||
<lemmatizer>
|
||||
<!-- highlight-start -->
|
||||
<lang>en</lang>
|
||||
<path>en.bin</path>
|
||||
<!-- highlight-end -->
|
||||
</lemmatizer>
|
||||
</lemmatizers>
|
||||
```
|
||||
|
||||
## stem
|
||||
|
||||
Performs stemming on a given word.
|
||||
|
||||
### Syntax
|
||||
*Syntax*
|
||||
|
||||
``` sql
|
||||
stem('language', word)
|
||||
```
|
||||
|
||||
### Arguments
|
||||
*Arguments*
|
||||
|
||||
- `language` — Language which rules will be applied. Use the two letter [ISO 639-1 code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes).
|
||||
- `word` — word that needs to be stemmed. Must be in lowercase. [String](../data-types/string.md#string).
|
||||
|
||||
### Examples
|
||||
*Examples*
|
||||
|
||||
Query:
|
||||
|
||||
@ -40,7 +311,7 @@ Result:
|
||||
│ ['I','think','it','is','a','bless','in','disguis'] │
|
||||
└────────────────────────────────────────────────────┘
|
||||
```
|
||||
### Supported languages for stem()
|
||||
*Supported languages for stem()*
|
||||
|
||||
:::note
|
||||
The stem() function uses the [Snowball stemming](https://snowballstem.org/) library, see the Snowball website for updated languages etc.
|
||||
@ -76,53 +347,6 @@ The stem() function uses the [Snowball stemming](https://snowballstem.org/) libr
|
||||
- Turkish
|
||||
- Yiddish
|
||||
|
||||
## lemmatize
|
||||
|
||||
Performs lemmatization on a given word. Needs dictionaries to operate, which can be obtained [here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models).
|
||||
|
||||
### Syntax
|
||||
|
||||
``` sql
|
||||
lemmatize('language', word)
|
||||
```
|
||||
|
||||
### Arguments
|
||||
|
||||
- `language` — Language which rules will be applied. [String](../data-types/string.md#string).
|
||||
- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../data-types/string.md#string).
|
||||
|
||||
### Examples
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT lemmatize('en', 'wolves');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─lemmatize("wolves")─┐
|
||||
│ "wolf" │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
This configuration specifies that the dictionary `en.bin` should be used for lemmatization of English (`en`) words. The `.bin` files can be downloaded from
|
||||
[here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models).
|
||||
|
||||
``` xml
|
||||
<lemmatizers>
|
||||
<lemmatizer>
|
||||
<!-- highlight-start -->
|
||||
<lang>en</lang>
|
||||
<path>en.bin</path>
|
||||
<!-- highlight-end -->
|
||||
</lemmatizer>
|
||||
</lemmatizers>
|
||||
```
|
||||
|
||||
## synonyms
|
||||
|
||||
Finds synonyms to a given word. There are two types of synonym extensions: `plain` and `wordnet`.
|
||||
@ -131,18 +355,18 @@ With the `plain` extension type we need to provide a path to a simple text file,
|
||||
|
||||
With the `wordnet` extension type we need to provide a path to a directory with WordNet thesaurus in it. Thesaurus must contain a WordNet sense index.
|
||||
|
||||
### Syntax
|
||||
*Syntax*
|
||||
|
||||
``` sql
|
||||
synonyms('extension_name', word)
|
||||
```
|
||||
|
||||
### Arguments
|
||||
*Arguments*
|
||||
|
||||
- `extension_name` — Name of the extension in which search will be performed. [String](../data-types/string.md#string).
|
||||
- `word` — Word that will be searched in extension. [String](../data-types/string.md#string).
|
||||
|
||||
### Examples
|
||||
*Examples*
|
||||
|
||||
Query:
|
||||
|
||||
@ -158,7 +382,7 @@ Result:
|
||||
└──────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Configuration
|
||||
*Configuration*
|
||||
``` xml
|
||||
<synonyms_extensions>
|
||||
<extension>
|
||||
@ -172,154 +396,4 @@ Result:
|
||||
<path>en/</path>
|
||||
</extension>
|
||||
</synonyms_extensions>
|
||||
```
|
||||
|
||||
## detectLanguage
|
||||
|
||||
Detects the language of the UTF8-encoded input string. The function uses the [CLD2 library](https://github.com/CLD2Owners/cld2) for detection, and it returns the 2-letter ISO language code.
|
||||
|
||||
The `detectLanguage` function works best when providing over 200 characters in the input string.
|
||||
|
||||
### Syntax
|
||||
|
||||
``` sql
|
||||
detectLanguage('text_to_be_analyzed')
|
||||
```
|
||||
|
||||
### Arguments
|
||||
|
||||
- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string).
|
||||
|
||||
### Returned value
|
||||
|
||||
- The 2-letter ISO code of the detected language
|
||||
|
||||
Other possible results:
|
||||
|
||||
- `un` = unknown, can not detect any language.
|
||||
- `other` = the detected language does not have 2 letter code.
|
||||
|
||||
### Examples
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT detectLanguage('Je pense que je ne parviendrai jamais à parler français comme un natif. Where there’s a will, there’s a way.');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
fr
|
||||
```
|
||||
|
||||
## detectLanguageMixed
|
||||
|
||||
Similar to the `detectLanguage` function, but `detectLanguageMixed` returns a `Map` of 2-letter language codes that are mapped to the percentage of the certain language in the text.
|
||||
|
||||
|
||||
### Syntax
|
||||
|
||||
``` sql
|
||||
detectLanguageMixed('text_to_be_analyzed')
|
||||
```
|
||||
|
||||
### Arguments
|
||||
|
||||
- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string).
|
||||
|
||||
### Returned value
|
||||
|
||||
- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a percentage of text found for that language
|
||||
|
||||
|
||||
### Examples
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT detectLanguageMixed('二兎を追う者は一兎をも得ず二兎を追う者は一兎をも得ず A vaincre sans peril, on triomphe sans gloire.');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─detectLanguageMixed()─┐
|
||||
│ {'ja':0.62,'fr':0.36 │
|
||||
└───────────────────────┘
|
||||
```
|
||||
|
||||
## detectLanguageUnknown
|
||||
|
||||
Similar to the `detectLanguage` function, except the `detectLanguageUnknown` function works with non-UTF8-encoded strings. Prefer this version when your character set is UTF-16 or UTF-32.
|
||||
|
||||
|
||||
### Syntax
|
||||
|
||||
``` sql
|
||||
detectLanguageUnknown('text_to_be_analyzed')
|
||||
```
|
||||
|
||||
### Arguments
|
||||
|
||||
- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string).
|
||||
|
||||
### Returned value
|
||||
|
||||
- The 2-letter ISO code of the detected language
|
||||
|
||||
Other possible results:
|
||||
|
||||
- `un` = unknown, can not detect any language.
|
||||
- `other` = the detected language does not have 2 letter code.
|
||||
|
||||
### Examples
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT detectLanguageUnknown('Ich bleibe für ein paar Tage.');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─detectLanguageUnknown('Ich bleibe für ein paar Tage.')─┐
|
||||
│ de │
|
||||
└────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## detectCharset
|
||||
|
||||
The `detectCharset` function detects the character set of the non-UTF8-encoded input string.
|
||||
|
||||
|
||||
### Syntax
|
||||
|
||||
``` sql
|
||||
detectCharset('text_to_be_analyzed')
|
||||
```
|
||||
|
||||
### Arguments
|
||||
|
||||
- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string).
|
||||
|
||||
### Returned value
|
||||
|
||||
- A `String` containing the code of the detected character set
|
||||
|
||||
### Examples
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT detectCharset('Ich bleibe für ein paar Tage.');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─detectCharset('Ich bleibe für ein paar Tage.')─┐
|
||||
│ WINDOWS-1252 │
|
||||
└────────────────────────────────────────────────┘
|
||||
```
|
||||
```
|
@ -3820,3 +3820,178 @@ Result:
|
||||
10. │ df │ │
|
||||
└────┴───────────────────────┘
|
||||
```
|
||||
|
||||
## displayName
|
||||
|
||||
Returns the value of `display_name` from [config](../../operations/configuration-files.md/#configuration-files) or server Fully Qualified Domain Name (FQDN) if not set.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
displayName()
|
||||
```
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Value of `display_name` from config or server FQDN if not set. [String](../data-types/string.md).
|
||||
|
||||
**Example**
|
||||
|
||||
The `display_name` can be set in `config.xml`. Taking for example a server with `display_name` configured to 'production':
|
||||
|
||||
```xml
|
||||
<!-- It is the name that will be shown in the clickhouse-client.
|
||||
By default, anything with "production" will be highlighted in red in query prompt.
|
||||
-->
|
||||
<display_name>production</display_name>
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT displayName();
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─displayName()─┐
|
||||
│ production │
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
## transactionID
|
||||
|
||||
Returns the ID of a [transaction](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback).
|
||||
|
||||
:::note
|
||||
This function is part of an experimental feature set. Enable experimental transaction support by adding this setting to your configuration:
|
||||
|
||||
```
|
||||
<clickhouse>
|
||||
<allow_experimental_transactions>1</allow_experimental_transactions>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
For more information see the page [Transactional (ACID) support](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback).
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
transactionID()
|
||||
```
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns a tuple consisting of `start_csn`, `local_tid` and `host_id`. [Tuple](../data-types/tuple.md).
|
||||
|
||||
- `start_csn`: Global sequential number, the newest commit timestamp that was seen when this transaction began. [UInt64](../data-types/int-uint.md).
|
||||
- `local_tid`: Local sequential number that is unique for each transaction started by this host within a specific start_csn. [UInt64](../data-types/int-uint.md).
|
||||
- `host_id`: UUID of the host that has started this transaction. [UUID](../data-types/uuid.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
BEGIN TRANSACTION;
|
||||
SELECT transactionID();
|
||||
ROLLBACK;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─transactionID()────────────────────────────────┐
|
||||
│ (32,34,'0ee8b069-f2bb-4748-9eae-069c85b5252b') │
|
||||
└────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## transactionLatestSnapshot
|
||||
|
||||
Returns the newest snapshot (Commit Sequence Number) of a [transaction](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback) that is available for reading.
|
||||
|
||||
:::note
|
||||
This function is part of an experimental feature set. Enable experimental transaction support by adding this setting to your configuration:
|
||||
|
||||
```
|
||||
<clickhouse>
|
||||
<allow_experimental_transactions>1</allow_experimental_transactions>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
For more information see the page [Transactional (ACID) support](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback).
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
transactionLatestSnapshot()
|
||||
```
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns the latest snapshot (CSN) of a transaction. [UInt64](../data-types/int-uint.md)
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
BEGIN TRANSACTION;
|
||||
SELECT transactionLatestSnapshot();
|
||||
ROLLBACK;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─transactionLatestSnapshot()─┐
|
||||
│ 32 │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
## transactionOldestSnapshot
|
||||
|
||||
Returns the oldest snapshot (Commit Sequence Number) that is visible for some running [transaction](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback).
|
||||
|
||||
:::note
|
||||
This function is part of an experimental feature set. Enable experimental transaction support by adding this setting to your configuration:
|
||||
|
||||
```
|
||||
<clickhouse>
|
||||
<allow_experimental_transactions>1</allow_experimental_transactions>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
For more information see the page [Transactional (ACID) support](https://clickhouse.com/docs/en/guides/developer/transactional#transactions-commit-and-rollback).
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
transactionOldestSnapshot()
|
||||
```
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns the oldest snapshot (CSN) of a transaction. [UInt64](../data-types/int-uint.md)
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
BEGIN TRANSACTION;
|
||||
SELECT transactionLatestSnapshot();
|
||||
ROLLBACK;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─transactionOldestSnapshot()─┐
|
||||
│ 32 │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
@ -579,7 +579,6 @@ If the length of the UTF-8 byte sequence is different for upper and lower case o
|
||||
|
||||
Converts a string to uppercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
|
||||
|
||||
Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
|
||||
|
||||
If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
|
||||
|
||||
@ -736,7 +735,7 @@ concat(s1, s2, ...)
|
||||
|
||||
**Arguments**
|
||||
|
||||
At least one value of arbitrary type.
|
||||
Values of arbitrary type.
|
||||
|
||||
Arguments which are not of types [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments.
|
||||
|
||||
@ -1168,14 +1167,14 @@ Result:
|
||||
└────────────────────────────┘
|
||||
```
|
||||
|
||||
## base64UrlEncode
|
||||
## base64URLEncode
|
||||
|
||||
Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-5).
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
base64UrlEncode(url)
|
||||
base64URLEncode(url)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
@ -1189,13 +1188,13 @@ base64UrlEncode(url)
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT base64UrlEncode('https://clickhouse.com');
|
||||
SELECT base64URLEncode('https://clickhouse.com');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```result
|
||||
┌─base64UrlEncode('https://clickhouse.com')─┐
|
||||
┌─base64URLEncode('https://clickhouse.com')─┐
|
||||
│ aHR0cDovL2NsaWNraG91c2UuY29t │
|
||||
└───────────────────────────────────────────┘
|
||||
```
|
||||
@ -1234,19 +1233,19 @@ Result:
|
||||
└──────────────────────────────────┘
|
||||
```
|
||||
|
||||
## base64UrlDecode
|
||||
## base64URLDecode
|
||||
|
||||
Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-5). Throws an exception in case of an error.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
base64UrlDecode(encodedUrl)
|
||||
base64URLDecode(encodedUrl)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `encodedUrl` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, an exception is thrown.
|
||||
- `encodedURL` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, an exception is thrown.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -1255,13 +1254,13 @@ base64UrlDecode(encodedUrl)
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t');
|
||||
SELECT base64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```result
|
||||
┌─base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t')─┐
|
||||
┌─base64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t')─┐
|
||||
│ https://clickhouse.com │
|
||||
└─────────────────────────────────────────────────┘
|
||||
```
|
||||
@ -1298,19 +1297,19 @@ SELECT tryBase64Decode('RW5jb2RlZA==') as res, tryBase64Decode('invalid') as res
|
||||
└────────────┴─────────────┘
|
||||
```
|
||||
|
||||
## tryBase64UrlDecode
|
||||
## tryBase64URLDecode
|
||||
|
||||
Like `base64UrlDecode` but returns an empty string in case of error.
|
||||
Like `base64URLDecode` but returns an empty string in case of error.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
tryBase64UrlDecode(encodedUrl)
|
||||
tryBase64URLDecode(encodedUrl)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `encodedUrl`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string.
|
||||
- `encodedURL`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -1321,7 +1320,7 @@ tryBase64UrlDecode(encodedUrl)
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT tryBase64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t') as res, tryBase64Decode('aHR0cHM6Ly9jbGlja') as res_invalid;
|
||||
SELECT tryBase64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t') as res, tryBase64Decode('aHR0cHM6Ly9jbGlja') as res_invalid;
|
||||
```
|
||||
|
||||
```response
|
||||
|
@ -818,6 +818,40 @@ The same as above, but including query string and fragment.
|
||||
|
||||
Example: `/top/news.html?page=2#comments`.
|
||||
|
||||
### protocol
|
||||
|
||||
Extracts the protocol from a URL.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
protocol(url)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `url` — URL to extract protocol from. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Protocol, or an empty string if it cannot be determined. [String](../data-types/string.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT protocol('https://clickhouse.com/');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─protocol('https://clickhouse.com/')─┐
|
||||
│ https │
|
||||
└─────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### queryString
|
||||
|
||||
Returns the query string without the initial question mark, `#` and everything after `#`.
|
||||
|
@ -9,8 +9,8 @@ sidebar_label: CONSTRAINT
|
||||
Constraints could be added or deleted using following syntax:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT constraint_name CHECK expression;
|
||||
ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT constraint_name;
|
||||
ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT [IF NOT EXISTS] constraint_name CHECK expression;
|
||||
ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT [IF EXISTS] constraint_name;
|
||||
```
|
||||
|
||||
See more on [constraints](../../../sql-reference/statements/create/table.md#constraints).
|
||||
|
@ -3,6 +3,10 @@ slug: /en/sql-reference/statements/alter/named-collection
|
||||
sidebar_label: NAMED COLLECTION
|
||||
---
|
||||
|
||||
import CloudNotSupportedBadge from '@theme/badges/CloudNotSupportedBadge';
|
||||
|
||||
<CloudNotSupportedBadge />
|
||||
|
||||
# ALTER NAMED COLLECTION
|
||||
|
||||
This query intends to modify already existing named collections.
|
||||
|
@ -12,7 +12,7 @@ Syntax:
|
||||
``` sql
|
||||
ALTER USER [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
|
||||
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
|
||||
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}]
|
||||
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'}]
|
||||
[[ADD | DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
|
||||
[VALID UNTIL datetime]
|
||||
[DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ]
|
||||
|
@ -134,8 +134,8 @@ PRIMARY KEY (event_type, ts)
|
||||
ORDER BY (event_type, ts, browser)
|
||||
SETTINGS index_granularity = 8192
|
||||
|
||||
-- !!! The columns' definition is unchanged but it does not matter, we are not quering
|
||||
-- MATERIALIZED VIEW, we are quering TO (storage) table.
|
||||
-- !!! The columns' definition is unchanged but it does not matter, we are not querying
|
||||
-- MATERIALIZED VIEW, we are querying TO (storage) table.
|
||||
-- SELECT section is updated.
|
||||
|
||||
SHOW CREATE TABLE mv FORMAT TSVRaw;
|
||||
|
@ -3,6 +3,10 @@ slug: /en/sql-reference/statements/create/named-collection
|
||||
sidebar_label: NAMED COLLECTION
|
||||
---
|
||||
|
||||
import CloudNotSupportedBadge from '@theme/badges/CloudNotSupportedBadge';
|
||||
|
||||
<CloudNotSupportedBadge />
|
||||
|
||||
# CREATE NAMED COLLECTION
|
||||
|
||||
Creates a new named collection.
|
||||
|
@ -152,7 +152,7 @@ SELECT * FROM test;
|
||||
|
||||
`MATERIALIZED expr`
|
||||
|
||||
Materialized expression. Values of such columns are always calculated, they cannot be specified in INSERT queries.
|
||||
Materialized expression. Values of such columns are automatically calculated according to the specified materialized expression when rows are inserted. Values cannot be explicitly specified during `INSERT`s.
|
||||
|
||||
Also, default value columns of this type are not included in the result of `SELECT *`. This is to preserve the invariant that the result of a `SELECT *` can always be inserted back into the table using `INSERT`. This behavior can be disabled with setting `asterisk_include_materialized_columns`.
|
||||
|
||||
|
@ -12,7 +12,7 @@ Syntax:
|
||||
``` sql
|
||||
CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
|
||||
[, name2 [ON CLUSTER cluster_name2] ...]
|
||||
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'} | {WITH http SERVER 'server_name' [SCHEME 'Basic']}]
|
||||
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'} | {WITH http SERVER 'server_name' [SCHEME 'Basic']}]
|
||||
[HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
|
||||
[VALID UNTIL datetime]
|
||||
[IN access_storage_type]
|
||||
|
@ -269,9 +269,9 @@ FROM s3(
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
|
||||
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
|
||||
- `_path` — Path to the file. Type: `LowCardinalty(String)`. In case of archive, shows path in a format: "{path_to_archive}::{path_to_file_inside_archive}"
|
||||
- `_file` — Name of the file. Type: `LowCardinalty(String)`. In case of archive shows name of the file inside the archive.
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. In case of archive shows uncompressed file size of the file inside the archive.
|
||||
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
@ -80,8 +80,8 @@ These functions can be used only as a window function.
|
||||
- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame.
|
||||
- `rank()` - Rank the current row within its partition with gaps.
|
||||
- `dense_rank()` - Rank the current row within its partition without gaps.
|
||||
- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame.
|
||||
- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame.
|
||||
- `lagInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the specified default value is returned.
|
||||
- `leadInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. If offset is not provided, it defaults to 1. If the offset leads to a position outside the window frame, the specified default value is used.
|
||||
|
||||
## Examples
|
||||
|
||||
|
@ -283,7 +283,7 @@ Pull request можно создать, даже если работа над з
|
||||
|
||||
Тесты будут запущены, как только сотрудники ClickHouse поставят для pull request тег «Can be tested». Результаты первых проверок (стиль кода) появятся уже через несколько минут. Результаты сборки появятся примерно через пол часа. Результаты основного набора тестов будут доступны в пределах часа.
|
||||
|
||||
Система подготовит сборки ClickHouse специально для вашего pull request. Для их получения, нажмите на ссылку «Details» у проверки «Clickhouse build check». Там вы сможете найти прямые ссылки на собранные .deb пакеты ClickHouse, которые, при желании, вы даже сможете установить на свои продакшен серверы (если не страшно).
|
||||
Система подготовит сборки ClickHouse специально для вашего pull request. Для их получения, нажмите на ссылку «Details» у проверки «Builds». Там вы сможете найти прямые ссылки на собранные .deb пакеты ClickHouse, которые, при желании, вы даже сможете установить на свои продакшен серверы (если не страшно).
|
||||
|
||||
Вероятнее всего, часть сборок не будет успешной с первого раза. Ведь мы проверяем сборку кода и gcc и clang, а при сборке с помощью clang включаются почти все существующие в природе warnings (всегда с флагом `-Werror`). На той же странице, вы сможете найти логи сборки - вам не обязательно самому собирать ClickHouse всеми возможными способами.
|
||||
|
||||
|
@ -141,6 +141,7 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe
|
||||
- `--secure` — если указано, будет использован безопасный канал.
|
||||
- `--history_file` - путь к файлу с историей команд.
|
||||
- `--param_<name>` — значение параметра для [запроса с параметрами](#cli-queries-with-parameters).
|
||||
- `--jwt` – авторизация с использованием JSON Web Token. Доступно только в ClickHouse Cloud.
|
||||
|
||||
Вместо параметров `--host`, `--port`, `--user` и `--password` клиент ClickHouse также поддерживает строки подключения (смотри следующий раздел).
|
||||
|
||||
|
@ -3,23 +3,30 @@ slug: /ru/operations/external-authenticators/ssl-x509
|
||||
---
|
||||
# Аутентификация по сертификату SSL X.509 {#ssl-external-authentication}
|
||||
|
||||
[Опция 'strict'](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) включает обязательную проверку сертификатов входящих соединений в библиотеке `SSL`. В этом случае могут быть установлены только соединения, представившие действительный сертификат. Соединения с недоверенными сертификатами будут отвергнуты. Таким образом, проверка сертификата позволяет однозначно аутентифицировать входящее соединение. Идентификация пользователя осуществляется по полю `Common Name` сертификата. Это позволяет ассоциировать несколько сертификатов с одним и тем же пользователем. Дополнительно, перевыпуск и отзыв сертификата не требуют изменения конфигурации ClickHouse.
|
||||
[Опция 'strict'](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) включает обязательную проверку сертификатов входящих соединений в библиотеке `SSL`. В этом случае могут быть установлены только соединения, представившие действительный сертификат. Соединения с недоверенными сертификатами будут отвергнуты. Таким образом, проверка сертификата позволяет однозначно аутентифицировать входящее соединение. Идентификация пользователя осуществляется по полю `Common Name` или `subjectAltName` сертификата. Это позволяет ассоциировать несколько сертификатов с одним и тем же пользователем. Дополнительно, перевыпуск и отзыв сертификата не требуют изменения конфигурации ClickHouse.
|
||||
|
||||
Для включения аутентификации по SSL сертификату, необходимо указать список `Common Name` для каждого пользователя ClickHouse в файле настройки `config.xml`:
|
||||
Для включения аутентификации по SSL сертификату, необходимо указать список `Common Name` или `subjectAltName` для каждого пользователя ClickHouse в файле настройки `config.xml`:
|
||||
|
||||
**Example**
|
||||
```xml
|
||||
<clickhouse>
|
||||
<!- ... -->
|
||||
<users>
|
||||
<user_name>
|
||||
<certificates>
|
||||
<user_name_1>
|
||||
<ssl_certificates>
|
||||
<common_name>host.domain.com:example_user</common_name>
|
||||
<common_name>host.domain.com:example_user_dev</common_name>
|
||||
<!-- More names -->
|
||||
</certificates>
|
||||
</ssl_certificates>
|
||||
<!-- Other settings -->
|
||||
</user_name>
|
||||
</user_name_1>
|
||||
<user_name_2>
|
||||
<ssl_certificates>
|
||||
<subject_alt_name>DNS:host.domain.com</subject_alt_name>
|
||||
<!-- More names -->
|
||||
</ssl_certificates>
|
||||
<!-- Other settings -->
|
||||
</user_name_2>
|
||||
</users>
|
||||
</clickhouse>
|
||||
```
|
||||
|
@ -2077,7 +2077,7 @@ SELECT * FROM test_table
|
||||
- 0 — оптимизация отключена.
|
||||
- 1 — оптимизация включена.
|
||||
|
||||
Значение по умолчанию: `0`.
|
||||
Значение по умолчанию: `1`.
|
||||
|
||||
## optimize_trivial_count_query {#optimize-trivial-count-query}
|
||||
|
||||
|
@ -538,7 +538,7 @@ SELECT base58Decode('3dc8KtHrwM');
|
||||
|
||||
Синоним: `TO_BASE64`.
|
||||
|
||||
## base64UrlEncode(s)
|
||||
## base64URLEncode(s)
|
||||
|
||||
Производит кодирование URL (String или FixedString) в base64-представление в соответствии с [RFC 4648](https://tools.ietf.org/html/rfc4648).
|
||||
|
||||
@ -548,7 +548,7 @@ SELECT base58Decode('3dc8KtHrwM');
|
||||
|
||||
Синоним: `FROM_BASE64`.
|
||||
|
||||
## base64UrlDecode(s)
|
||||
## base64URLDecode(s)
|
||||
|
||||
Декодирует base64-представление URL в исходную строку в соответствии с [RFC 4648](https://tools.ietf.org/html/rfc4648). При невозможности декодирования выбрасывает исключение
|
||||
|
||||
@ -556,9 +556,9 @@ SELECT base58Decode('3dc8KtHrwM');
|
||||
|
||||
Функционал аналогичен base64Decode, но при невозможности декодирования возвращает пустую строку.
|
||||
|
||||
## tryBase64UrlDecode(s)
|
||||
## tryBase64URLDecode(s)
|
||||
|
||||
Функционал аналогичен base64UrlDecode, но при невозможности декодирования возвращает пустую строку.
|
||||
Функционал аналогичен base64URLDecode, но при невозможности декодирования возвращает пустую строку.
|
||||
|
||||
## endsWith(s, suffix) {#endswith}
|
||||
|
||||
|
@ -11,8 +11,8 @@ sidebar_label: "Манипуляции с ограничениями"
|
||||
Добавить или удалить ограничение можно с помощью запросов
|
||||
|
||||
``` sql
|
||||
ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT constraint_name CHECK expression;
|
||||
ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT constraint_name;
|
||||
ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT [IF NOT EXISTS] constraint_name CHECK expression;
|
||||
ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT [IF EXISTS] constraint_name;
|
||||
```
|
||||
|
||||
Запросы выполняют добавление или удаление метаданных об ограничениях таблицы `[db].name`, поэтому выполняются мгновенно.
|
||||
|
@ -13,7 +13,7 @@ sidebar_label: "Пользователь"
|
||||
``` sql
|
||||
CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
|
||||
[, name2 [ON CLUSTER cluster_name2] ...]
|
||||
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'}]
|
||||
[NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name' | SAN 'TYPE:subject_alt_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'}]
|
||||
[HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
|
||||
[DEFAULT ROLE role [,...]]
|
||||
[DEFAULT DATABASE database | NONE]
|
||||
|
@ -201,18 +201,18 @@ ClickHouse 不要求主键唯一,所以您可以插入多条具有相同主键
|
||||
|
||||
主键中列的数量并没有明确的限制。依据数据结构,您可以在主键包含多些或少些列。这样可以:
|
||||
|
||||
- 改善索引的性能。
|
||||
- 改善索引的性能。
|
||||
|
||||
- 如果当前主键是 `(a, b)` ,在下列情况下添加另一个 `c` 列会提升性能:
|
||||
如果当前主键是 `(a, b)` ,在下列情况下添加另一个 `c` 列会提升性能:
|
||||
|
||||
- 查询会使用 `c` 列作为条件
|
||||
- 很长的数据范围( `index_granularity` 的数倍)里 `(a, b)` 都是相同的值,并且这样的情况很普遍。换言之,就是加入另一列后,可以让您的查询略过很长的数据范围。
|
||||
- 查询会使用 `c` 列作为条件
|
||||
- 很长的数据范围( `index_granularity` 的数倍)里 `(a, b)` 都是相同的值,并且这样的情况很普遍。换言之,就是加入另一列后,可以让您的查询略过很长的数据范围。
|
||||
|
||||
- 改善数据压缩。
|
||||
- 改善数据压缩。
|
||||
|
||||
ClickHouse 以主键排序片段数据,所以,数据的一致性越高,压缩越好。
|
||||
ClickHouse 以主键排序片段数据,所以,数据的一致性越高,压缩越好。
|
||||
|
||||
- 在[CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) 和 [SummingMergeTree](summingmergetree.md) 引擎里进行数据合并时会提供额外的处理逻辑。
|
||||
- 在[CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) 和 [SummingMergeTree](summingmergetree.md) 引擎里进行数据合并时会提供额外的处理逻辑。
|
||||
|
||||
在这种情况下,指定与主键不同的 *排序键* 也是有意义的。
|
||||
|
||||
|
@ -9,8 +9,8 @@ sidebar_label: 约束
|
||||
约束可以使用以下语法添加或删除:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE [db].name ADD CONSTRAINT constraint_name CHECK expression;
|
||||
ALTER TABLE [db].name DROP CONSTRAINT constraint_name;
|
||||
ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT [IF NOT EXISTS] constraint_name CHECK expression;
|
||||
ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT [IF EXISTS] constraint_name;
|
||||
```
|
||||
|
||||
查看[constraints](../../../sql-reference/statements/create/table.mdx#constraints)。
|
||||
|
@ -64,6 +64,7 @@ namespace ErrorCodes
|
||||
extern const int NETWORK_ERROR;
|
||||
extern const int AUTHENTICATION_FAILED;
|
||||
extern const int NO_ELEMENTS_IN_CONFIG;
|
||||
extern const int USER_EXPIRED;
|
||||
}
|
||||
|
||||
|
||||
@ -74,6 +75,12 @@ void Client::processError(const String & query) const
|
||||
fmt::print(stderr, "Received exception from server (version {}):\n{}\n",
|
||||
server_version,
|
||||
getExceptionMessage(*server_exception, print_stack_trace, true));
|
||||
|
||||
if (server_exception->code() == ErrorCodes::USER_EXPIRED)
|
||||
{
|
||||
server_exception->rethrow();
|
||||
}
|
||||
|
||||
if (is_interactive)
|
||||
{
|
||||
fmt::print(stderr, "\n");
|
||||
@ -241,6 +248,10 @@ std::vector<String> Client::loadWarningMessages()
|
||||
}
|
||||
}
|
||||
|
||||
Poco::Util::LayeredConfiguration & Client::getClientConfiguration()
|
||||
{
|
||||
return config();
|
||||
}
|
||||
|
||||
void Client::initialize(Poco::Util::Application & self)
|
||||
{
|
||||
@ -690,9 +701,7 @@ bool Client::processWithFuzzing(const String & full_query)
|
||||
const char * begin = full_query.data();
|
||||
orig_ast = parseQuery(begin, begin + full_query.size(),
|
||||
global_context->getSettingsRef(),
|
||||
/*allow_multi_statements=*/ true,
|
||||
/*is_interactive=*/ is_interactive,
|
||||
/*ignore_error=*/ ignore_error);
|
||||
/*allow_multi_statements=*/ true);
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
@ -944,6 +953,7 @@ void Client::addOptions(OptionsDescription & options_description)
|
||||
("ssh-key-file", po::value<std::string>(), "File containing the SSH private key for authenticate with the server.")
|
||||
("ssh-key-passphrase", po::value<std::string>(), "Passphrase for the SSH private key specified by --ssh-key-file.")
|
||||
("quota_key", po::value<std::string>(), "A string to differentiate quotas when the user have keyed quotas configured on server")
|
||||
("jwt", po::value<std::string>(), "Use JWT for authentication")
|
||||
|
||||
("max_client_network_bandwidth", po::value<int>(), "the maximum speed of data exchange over the network for the client in bytes per second.")
|
||||
("compression", po::value<bool>(), "enable or disable compression (enabled by default for remote communication and disabled for localhost communication).")
|
||||
@ -1102,6 +1112,12 @@ void Client::processOptions(const OptionsDescription & options_description,
|
||||
config().setBool("no-warnings", true);
|
||||
if (options.count("fake-drop"))
|
||||
config().setString("ignore_drop_queries_probability", "1");
|
||||
if (options.count("jwt"))
|
||||
{
|
||||
if (!options["user"].defaulted())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "User and JWT flags can't be specified together");
|
||||
config().setString("jwt", options["jwt"].as<std::string>());
|
||||
}
|
||||
if (options.count("accept-invalid-certificate"))
|
||||
{
|
||||
config().setString("openSSL.client.invalidCertificateHandler.name", "AcceptCertificateHandler");
|
||||
|
@ -16,6 +16,9 @@ public:
|
||||
int main(const std::vector<String> & /*args*/) override;
|
||||
|
||||
protected:
|
||||
|
||||
Poco::Util::LayeredConfiguration & getClientConfiguration() override;
|
||||
|
||||
bool processWithFuzzing(const String & full_query) override;
|
||||
std::optional<bool> processFuzzingStep(const String & query_to_execute, const ASTPtr & parsed_query);
|
||||
|
||||
|
@ -383,6 +383,9 @@ int KeeperClient::main(const std::vector<String> & /* args */)
|
||||
|
||||
for (const auto & key : keys)
|
||||
{
|
||||
if (key != "node")
|
||||
continue;
|
||||
|
||||
String prefix = "zookeeper." + key;
|
||||
String host = clickhouse_config.configuration->getString(prefix + ".host");
|
||||
String port = clickhouse_config.configuration->getString(prefix + ".port");
|
||||
@ -401,6 +404,7 @@ int KeeperClient::main(const std::vector<String> & /* args */)
|
||||
zk_args.hosts.push_back(host + ":" + port);
|
||||
}
|
||||
|
||||
zk_args.availability_zones.resize(zk_args.hosts.size());
|
||||
zk_args.connection_timeout_ms = config().getInt("connection-timeout", 10) * 1000;
|
||||
zk_args.session_timeout_ms = config().getInt("session-timeout", 10) * 1000;
|
||||
zk_args.operation_timeout_ms = config().getInt("operation-timeout", 10) * 1000;
|
||||
|
@ -355,10 +355,7 @@ try
|
||||
|
||||
std::string include_from_path = config().getString("include_from", "/etc/metrika.xml");
|
||||
|
||||
if (config().has(DB::PlacementInfo::PLACEMENT_CONFIG_PREFIX))
|
||||
{
|
||||
PlacementInfo::PlacementInfo::instance().initialize(config());
|
||||
}
|
||||
PlacementInfo::PlacementInfo::instance().initialize(config());
|
||||
|
||||
GlobalThreadPool::initialize(
|
||||
/// We need to have sufficient amount of threads for connections + nuraft workers + keeper workers, 1000 is an estimation
|
||||
@ -577,8 +574,7 @@ try
|
||||
#if USE_SSL
|
||||
CertificateReloader::instance().tryLoad(*config);
|
||||
#endif
|
||||
},
|
||||
/* already_loaded = */ false); /// Reload it right now (initial loading)
|
||||
});
|
||||
|
||||
SCOPE_EXIT({
|
||||
LOG_INFO(log, "Shutting down.");
|
||||
|
@ -11,7 +11,6 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES
|
||||
LibraryBridgeHandlers.cpp
|
||||
SharedLibrary.cpp
|
||||
library-bridge.cpp
|
||||
createFunctionBaseCast.cpp
|
||||
)
|
||||
|
||||
clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES})
|
||||
@ -20,6 +19,7 @@ target_link_libraries(clickhouse-library-bridge PRIVATE
|
||||
daemon
|
||||
dbms
|
||||
bridge
|
||||
clickhouse_functions
|
||||
)
|
||||
|
||||
set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include <Common/quoteString.h>
|
||||
#include <Common/randomSeed.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Loggers/OwnFormattingChannel.h>
|
||||
#include <Loggers/OwnPatternFormatter.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
@ -59,8 +60,13 @@
|
||||
# include <azure/storage/common/internal/xml_wrapper.hpp>
|
||||
#endif
|
||||
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric MemoryTracking;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -82,6 +88,11 @@ void applySettingsOverridesForLocal(ContextMutablePtr context)
|
||||
context->setSettings(settings);
|
||||
}
|
||||
|
||||
Poco::Util::LayeredConfiguration & LocalServer::getClientConfiguration()
|
||||
{
|
||||
return config();
|
||||
}
|
||||
|
||||
void LocalServer::processError(const String &) const
|
||||
{
|
||||
if (ignore_error)
|
||||
@ -117,20 +128,21 @@ void LocalServer::initialize(Poco::Util::Application & self)
|
||||
Poco::Util::Application::initialize(self);
|
||||
|
||||
/// Load config files if exists
|
||||
if (config().has("config-file") || fs::exists("config.xml"))
|
||||
if (getClientConfiguration().has("config-file") || fs::exists("config.xml"))
|
||||
{
|
||||
const auto config_path = config().getString("config-file", "config.xml");
|
||||
const auto config_path = getClientConfiguration().getString("config-file", "config.xml");
|
||||
ConfigProcessor config_processor(config_path, false, true);
|
||||
ConfigProcessor::setConfigPath(fs::path(config_path).parent_path());
|
||||
auto loaded_config = config_processor.loadConfig();
|
||||
config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
|
||||
getClientConfiguration().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
|
||||
}
|
||||
|
||||
server_settings.loadSettingsFromConfig(config());
|
||||
|
||||
GlobalThreadPool::initialize(
|
||||
config().getUInt("max_thread_pool_size", 10000),
|
||||
config().getUInt("max_thread_pool_free_size", 1000),
|
||||
config().getUInt("thread_pool_queue_size", 10000)
|
||||
);
|
||||
server_settings.max_thread_pool_size,
|
||||
server_settings.max_thread_pool_free_size,
|
||||
server_settings.thread_pool_queue_size);
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
/// See the explanation near the same line in Server.cpp
|
||||
@ -141,18 +153,17 @@ void LocalServer::initialize(Poco::Util::Application & self)
|
||||
#endif
|
||||
|
||||
getIOThreadPool().initialize(
|
||||
config().getUInt("max_io_thread_pool_size", 100),
|
||||
config().getUInt("max_io_thread_pool_free_size", 0),
|
||||
config().getUInt("io_thread_pool_queue_size", 10000));
|
||||
server_settings.max_io_thread_pool_size,
|
||||
server_settings.max_io_thread_pool_free_size,
|
||||
server_settings.io_thread_pool_queue_size);
|
||||
|
||||
|
||||
const size_t active_parts_loading_threads = config().getUInt("max_active_parts_loading_thread_pool_size", 64);
|
||||
const size_t active_parts_loading_threads = server_settings.max_active_parts_loading_thread_pool_size;
|
||||
getActivePartsLoadingThreadPool().initialize(
|
||||
active_parts_loading_threads,
|
||||
0, // We don't need any threads one all the parts will be loaded
|
||||
active_parts_loading_threads);
|
||||
|
||||
const size_t outdated_parts_loading_threads = config().getUInt("max_outdated_parts_loading_thread_pool_size", 32);
|
||||
const size_t outdated_parts_loading_threads = server_settings.max_outdated_parts_loading_thread_pool_size;
|
||||
getOutdatedPartsLoadingThreadPool().initialize(
|
||||
outdated_parts_loading_threads,
|
||||
0, // We don't need any threads one all the parts will be loaded
|
||||
@ -160,7 +171,7 @@ void LocalServer::initialize(Poco::Util::Application & self)
|
||||
|
||||
getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads);
|
||||
|
||||
const size_t unexpected_parts_loading_threads = config().getUInt("max_unexpected_parts_loading_thread_pool_size", 32);
|
||||
const size_t unexpected_parts_loading_threads = server_settings.max_unexpected_parts_loading_thread_pool_size;
|
||||
getUnexpectedPartsLoadingThreadPool().initialize(
|
||||
unexpected_parts_loading_threads,
|
||||
0, // We don't need any threads one all the parts will be loaded
|
||||
@ -168,7 +179,7 @@ void LocalServer::initialize(Poco::Util::Application & self)
|
||||
|
||||
getUnexpectedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads);
|
||||
|
||||
const size_t cleanup_threads = config().getUInt("max_parts_cleaning_thread_pool_size", 128);
|
||||
const size_t cleanup_threads = server_settings.max_parts_cleaning_thread_pool_size;
|
||||
getPartsCleaningThreadPool().initialize(
|
||||
cleanup_threads,
|
||||
0, // We don't need any threads one all the parts will be deleted
|
||||
@ -201,10 +212,10 @@ void LocalServer::tryInitPath()
|
||||
{
|
||||
std::string path;
|
||||
|
||||
if (config().has("path"))
|
||||
if (getClientConfiguration().has("path"))
|
||||
{
|
||||
// User-supplied path.
|
||||
path = config().getString("path");
|
||||
path = getClientConfiguration().getString("path");
|
||||
Poco::trimInPlace(path);
|
||||
|
||||
if (path.empty())
|
||||
@ -263,13 +274,13 @@ void LocalServer::tryInitPath()
|
||||
|
||||
global_context->setUserFilesPath(""); /// user's files are everywhere
|
||||
|
||||
std::string user_scripts_path = config().getString("user_scripts_path", fs::path(path) / "user_scripts/");
|
||||
std::string user_scripts_path = getClientConfiguration().getString("user_scripts_path", fs::path(path) / "user_scripts/");
|
||||
global_context->setUserScriptsPath(user_scripts_path);
|
||||
|
||||
/// top_level_domains_lists
|
||||
const std::string & top_level_domains_path = config().getString("top_level_domains_path", fs::path(path) / "top_level_domains/");
|
||||
const std::string & top_level_domains_path = getClientConfiguration().getString("top_level_domains_path", fs::path(path) / "top_level_domains/");
|
||||
if (!top_level_domains_path.empty())
|
||||
TLDListsHolder::getInstance().parseConfig(fs::path(top_level_domains_path) / "", config());
|
||||
TLDListsHolder::getInstance().parseConfig(fs::path(top_level_domains_path) / "", getClientConfiguration());
|
||||
}
|
||||
|
||||
|
||||
@ -311,14 +322,14 @@ void LocalServer::cleanup()
|
||||
|
||||
std::string LocalServer::getInitialCreateTableQuery()
|
||||
{
|
||||
if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format") && (!isRegularFile(STDIN_FILENO) || queries.empty()))
|
||||
if (!getClientConfiguration().has("table-structure") && !getClientConfiguration().has("table-file") && !getClientConfiguration().has("table-data-format") && (!isRegularFile(STDIN_FILENO) || queries.empty()))
|
||||
return {};
|
||||
|
||||
auto table_name = backQuoteIfNeed(config().getString("table-name", "table"));
|
||||
auto table_structure = config().getString("table-structure", "auto");
|
||||
auto table_name = backQuoteIfNeed(getClientConfiguration().getString("table-name", "table"));
|
||||
auto table_structure = getClientConfiguration().getString("table-structure", "auto");
|
||||
|
||||
String table_file;
|
||||
if (!config().has("table-file") || config().getString("table-file") == "-")
|
||||
if (!getClientConfiguration().has("table-file") || getClientConfiguration().getString("table-file") == "-")
|
||||
{
|
||||
/// Use Unix tools stdin naming convention
|
||||
table_file = "stdin";
|
||||
@ -326,7 +337,7 @@ std::string LocalServer::getInitialCreateTableQuery()
|
||||
else
|
||||
{
|
||||
/// Use regular file
|
||||
auto file_name = config().getString("table-file");
|
||||
auto file_name = getClientConfiguration().getString("table-file");
|
||||
table_file = quoteString(file_name);
|
||||
}
|
||||
|
||||
@ -374,18 +385,18 @@ void LocalServer::setupUsers()
|
||||
|
||||
ConfigurationPtr users_config;
|
||||
auto & access_control = global_context->getAccessControl();
|
||||
access_control.setNoPasswordAllowed(config().getBool("allow_no_password", true));
|
||||
access_control.setPlaintextPasswordAllowed(config().getBool("allow_plaintext_password", true));
|
||||
if (config().has("config-file") || fs::exists("config.xml"))
|
||||
access_control.setNoPasswordAllowed(getClientConfiguration().getBool("allow_no_password", true));
|
||||
access_control.setPlaintextPasswordAllowed(getClientConfiguration().getBool("allow_plaintext_password", true));
|
||||
if (getClientConfiguration().has("config-file") || fs::exists("config.xml"))
|
||||
{
|
||||
String config_path = config().getString("config-file", "");
|
||||
bool has_user_directories = config().has("user_directories");
|
||||
String config_path = getClientConfiguration().getString("config-file", "");
|
||||
bool has_user_directories = getClientConfiguration().has("user_directories");
|
||||
const auto config_dir = fs::path{config_path}.remove_filename().string();
|
||||
String users_config_path = config().getString("users_config", "");
|
||||
String users_config_path = getClientConfiguration().getString("users_config", "");
|
||||
|
||||
if (users_config_path.empty() && has_user_directories)
|
||||
{
|
||||
users_config_path = config().getString("user_directories.users_xml.path");
|
||||
users_config_path = getClientConfiguration().getString("user_directories.users_xml.path");
|
||||
if (fs::path(users_config_path).is_relative() && fs::exists(fs::path(config_dir) / users_config_path))
|
||||
users_config_path = fs::path(config_dir) / users_config_path;
|
||||
}
|
||||
@ -409,10 +420,10 @@ void LocalServer::setupUsers()
|
||||
|
||||
void LocalServer::connect()
|
||||
{
|
||||
connection_parameters = ConnectionParameters(config(), "localhost");
|
||||
connection_parameters = ConnectionParameters(getClientConfiguration(), "localhost");
|
||||
|
||||
ReadBuffer * in;
|
||||
auto table_file = config().getString("table-file", "-");
|
||||
auto table_file = getClientConfiguration().getString("table-file", "-");
|
||||
if (table_file == "-" || table_file == "stdin")
|
||||
{
|
||||
in = &std_in;
|
||||
@ -433,7 +444,7 @@ try
|
||||
UseSSL use_ssl;
|
||||
thread_status.emplace();
|
||||
|
||||
StackTrace::setShowAddresses(config().getBool("show_addresses_in_stack_traces", true));
|
||||
StackTrace::setShowAddresses(server_settings.show_addresses_in_stack_traces);
|
||||
|
||||
setupSignalHandler();
|
||||
|
||||
@ -448,7 +459,7 @@ try
|
||||
|
||||
if (rlim.rlim_cur < rlim.rlim_max)
|
||||
{
|
||||
rlim.rlim_cur = config().getUInt("max_open_files", static_cast<unsigned>(rlim.rlim_max));
|
||||
rlim.rlim_cur = getClientConfiguration().getUInt("max_open_files", static_cast<unsigned>(rlim.rlim_max));
|
||||
int rc = setrlimit(RLIMIT_NOFILE, &rlim);
|
||||
if (rc != 0)
|
||||
std::cerr << fmt::format("Cannot set max number of file descriptors to {}. Try to specify max_open_files according to your system limits. error: {}", rlim.rlim_cur, errnoToString()) << '\n';
|
||||
@ -456,8 +467,8 @@ try
|
||||
}
|
||||
|
||||
is_interactive = stdin_is_a_tty
|
||||
&& (config().hasOption("interactive")
|
||||
|| (queries.empty() && !config().has("table-structure") && queries_files.empty() && !config().has("table-file")));
|
||||
&& (getClientConfiguration().hasOption("interactive")
|
||||
|| (queries.empty() && !getClientConfiguration().has("table-structure") && queries_files.empty() && !getClientConfiguration().has("table-file")));
|
||||
|
||||
if (!is_interactive)
|
||||
{
|
||||
@ -481,7 +492,7 @@ try
|
||||
|
||||
SCOPE_EXIT({ cleanup(); });
|
||||
|
||||
initTTYBuffer(toProgressOption(config().getString("progress", "default")));
|
||||
initTTYBuffer(toProgressOption(getClientConfiguration().getString("progress", "default")));
|
||||
ASTAlterCommand::setFormatAlterCommandsWithParentheses(true);
|
||||
|
||||
applyCmdSettings(global_context);
|
||||
@ -489,7 +500,7 @@ try
|
||||
/// try to load user defined executable functions, throw on error and die
|
||||
try
|
||||
{
|
||||
global_context->loadOrReloadUserDefinedExecutableFunctions(config());
|
||||
global_context->loadOrReloadUserDefinedExecutableFunctions(getClientConfiguration());
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -530,7 +541,7 @@ try
|
||||
}
|
||||
catch (const DB::Exception & e)
|
||||
{
|
||||
bool need_print_stack_trace = config().getBool("stacktrace", false);
|
||||
bool need_print_stack_trace = getClientConfiguration().getBool("stacktrace", false);
|
||||
std::cerr << getExceptionMessage(e, need_print_stack_trace, true) << std::endl;
|
||||
return e.code() ? e.code() : -1;
|
||||
}
|
||||
@ -542,42 +553,42 @@ catch (...)
|
||||
|
||||
void LocalServer::updateLoggerLevel(const String & logs_level)
|
||||
{
|
||||
config().setString("logger.level", logs_level);
|
||||
updateLevels(config(), logger());
|
||||
getClientConfiguration().setString("logger.level", logs_level);
|
||||
updateLevels(getClientConfiguration(), logger());
|
||||
}
|
||||
|
||||
void LocalServer::processConfig()
|
||||
{
|
||||
if (!queries.empty() && config().has("queries-file"))
|
||||
if (!queries.empty() && getClientConfiguration().has("queries-file"))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--query' and '--queries-file' cannot be specified at the same time");
|
||||
|
||||
if (config().has("multiquery"))
|
||||
if (getClientConfiguration().has("multiquery"))
|
||||
is_multiquery = true;
|
||||
|
||||
pager = config().getString("pager", "");
|
||||
pager = getClientConfiguration().getString("pager", "");
|
||||
|
||||
delayed_interactive = config().has("interactive") && (!queries.empty() || config().has("queries-file"));
|
||||
delayed_interactive = getClientConfiguration().has("interactive") && (!queries.empty() || getClientConfiguration().has("queries-file"));
|
||||
if (!is_interactive || delayed_interactive)
|
||||
{
|
||||
echo_queries = config().hasOption("echo") || config().hasOption("verbose");
|
||||
ignore_error = config().getBool("ignore-error", false);
|
||||
echo_queries = getClientConfiguration().hasOption("echo") || getClientConfiguration().hasOption("verbose");
|
||||
ignore_error = getClientConfiguration().getBool("ignore-error", false);
|
||||
}
|
||||
|
||||
print_stack_trace = config().getBool("stacktrace", false);
|
||||
print_stack_trace = getClientConfiguration().getBool("stacktrace", false);
|
||||
const std::string clickhouse_dialect{"clickhouse"};
|
||||
load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false)
|
||||
&& config().getString("dialect", clickhouse_dialect) == clickhouse_dialect;
|
||||
wait_for_suggestions_to_load = config().getBool("wait_for_suggestions_to_load", false);
|
||||
load_suggestions = (is_interactive || delayed_interactive) && !getClientConfiguration().getBool("disable_suggestion", false)
|
||||
&& getClientConfiguration().getString("dialect", clickhouse_dialect) == clickhouse_dialect;
|
||||
wait_for_suggestions_to_load = getClientConfiguration().getBool("wait_for_suggestions_to_load", false);
|
||||
|
||||
auto logging = (config().has("logger.console")
|
||||
|| config().has("logger.level")
|
||||
|| config().has("log-level")
|
||||
|| config().has("send_logs_level")
|
||||
|| config().has("logger.log"));
|
||||
auto logging = (getClientConfiguration().has("logger.console")
|
||||
|| getClientConfiguration().has("logger.level")
|
||||
|| getClientConfiguration().has("log-level")
|
||||
|| getClientConfiguration().has("send_logs_level")
|
||||
|| getClientConfiguration().has("logger.log"));
|
||||
|
||||
auto level = config().getString("log-level", "trace");
|
||||
auto level = getClientConfiguration().getString("log-level", "trace");
|
||||
|
||||
if (config().has("server_logs_file"))
|
||||
if (getClientConfiguration().has("server_logs_file"))
|
||||
{
|
||||
auto poco_logs_level = Poco::Logger::parseLevel(level);
|
||||
Poco::Logger::root().setLevel(poco_logs_level);
|
||||
@ -587,10 +598,10 @@ void LocalServer::processConfig()
|
||||
}
|
||||
else
|
||||
{
|
||||
config().setString("logger", "logger");
|
||||
getClientConfiguration().setString("logger", "logger");
|
||||
auto log_level_default = logging ? level : "fatal";
|
||||
config().setString("logger.level", config().getString("log-level", config().getString("send_logs_level", log_level_default)));
|
||||
buildLoggers(config(), logger(), "clickhouse-local");
|
||||
getClientConfiguration().setString("logger.level", getClientConfiguration().getString("log-level", getClientConfiguration().getString("send_logs_level", log_level_default)));
|
||||
buildLoggers(getClientConfiguration(), logger(), "clickhouse-local");
|
||||
}
|
||||
|
||||
shared_context = Context::createShared();
|
||||
@ -604,13 +615,13 @@ void LocalServer::processConfig()
|
||||
LoggerRawPtr log = &logger();
|
||||
|
||||
/// Maybe useless
|
||||
if (config().has("macros"))
|
||||
global_context->setMacros(std::make_unique<Macros>(config(), "macros", log));
|
||||
if (getClientConfiguration().has("macros"))
|
||||
global_context->setMacros(std::make_unique<Macros>(getClientConfiguration(), "macros", log));
|
||||
|
||||
setDefaultFormatsAndCompressionFromConfiguration();
|
||||
|
||||
/// Sets external authenticators config (LDAP, Kerberos).
|
||||
global_context->setExternalAuthenticatorsConfig(config());
|
||||
global_context->setExternalAuthenticatorsConfig(getClientConfiguration());
|
||||
|
||||
setupUsers();
|
||||
|
||||
@ -619,12 +630,43 @@ void LocalServer::processConfig()
|
||||
global_context->getProcessList().setMaxSize(0);
|
||||
|
||||
const size_t physical_server_memory = getMemoryAmount();
|
||||
const double cache_size_to_ram_max_ratio = config().getDouble("cache_size_to_ram_max_ratio", 0.5);
|
||||
|
||||
size_t max_server_memory_usage = server_settings.max_server_memory_usage;
|
||||
double max_server_memory_usage_to_ram_ratio = server_settings.max_server_memory_usage_to_ram_ratio;
|
||||
|
||||
size_t default_max_server_memory_usage = static_cast<size_t>(physical_server_memory * max_server_memory_usage_to_ram_ratio);
|
||||
|
||||
if (max_server_memory_usage == 0)
|
||||
{
|
||||
max_server_memory_usage = default_max_server_memory_usage;
|
||||
LOG_INFO(log, "Setting max_server_memory_usage was set to {}"
|
||||
" ({} available * {:.2f} max_server_memory_usage_to_ram_ratio)",
|
||||
formatReadableSizeWithBinarySuffix(max_server_memory_usage),
|
||||
formatReadableSizeWithBinarySuffix(physical_server_memory),
|
||||
max_server_memory_usage_to_ram_ratio);
|
||||
}
|
||||
else if (max_server_memory_usage > default_max_server_memory_usage)
|
||||
{
|
||||
max_server_memory_usage = default_max_server_memory_usage;
|
||||
LOG_INFO(log, "Setting max_server_memory_usage was lowered to {}"
|
||||
" because the system has low amount of memory. The amount was"
|
||||
" calculated as {} available"
|
||||
" * {:.2f} max_server_memory_usage_to_ram_ratio",
|
||||
formatReadableSizeWithBinarySuffix(max_server_memory_usage),
|
||||
formatReadableSizeWithBinarySuffix(physical_server_memory),
|
||||
max_server_memory_usage_to_ram_ratio);
|
||||
}
|
||||
|
||||
total_memory_tracker.setHardLimit(max_server_memory_usage);
|
||||
total_memory_tracker.setDescription("(total)");
|
||||
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
|
||||
|
||||
const double cache_size_to_ram_max_ratio = server_settings.cache_size_to_ram_max_ratio;
|
||||
const size_t max_cache_size = static_cast<size_t>(physical_server_memory * cache_size_to_ram_max_ratio);
|
||||
|
||||
String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", DEFAULT_UNCOMPRESSED_CACHE_POLICY);
|
||||
size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE);
|
||||
double uncompressed_cache_size_ratio = config().getDouble("uncompressed_cache_size_ratio", DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO);
|
||||
String uncompressed_cache_policy = server_settings.uncompressed_cache_policy;
|
||||
size_t uncompressed_cache_size = server_settings.uncompressed_cache_size;
|
||||
double uncompressed_cache_size_ratio = server_settings.uncompressed_cache_size_ratio;
|
||||
if (uncompressed_cache_size > max_cache_size)
|
||||
{
|
||||
uncompressed_cache_size = max_cache_size;
|
||||
@ -632,9 +674,9 @@ void LocalServer::processConfig()
|
||||
}
|
||||
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size, uncompressed_cache_size_ratio);
|
||||
|
||||
String mark_cache_policy = config().getString("mark_cache_policy", DEFAULT_MARK_CACHE_POLICY);
|
||||
size_t mark_cache_size = config().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE);
|
||||
double mark_cache_size_ratio = config().getDouble("mark_cache_size_ratio", DEFAULT_MARK_CACHE_SIZE_RATIO);
|
||||
String mark_cache_policy = server_settings.mark_cache_policy;
|
||||
size_t mark_cache_size = server_settings.mark_cache_size;
|
||||
double mark_cache_size_ratio = server_settings.mark_cache_size_ratio;
|
||||
if (!mark_cache_size)
|
||||
LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation.");
|
||||
if (mark_cache_size > max_cache_size)
|
||||
@ -644,9 +686,9 @@ void LocalServer::processConfig()
|
||||
}
|
||||
global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio);
|
||||
|
||||
String index_uncompressed_cache_policy = config().getString("index_uncompressed_cache_policy", DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY);
|
||||
size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE);
|
||||
double index_uncompressed_cache_size_ratio = config().getDouble("index_uncompressed_cache_size_ratio", DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO);
|
||||
String index_uncompressed_cache_policy = server_settings.index_uncompressed_cache_policy;
|
||||
size_t index_uncompressed_cache_size = server_settings.index_uncompressed_cache_size;
|
||||
double index_uncompressed_cache_size_ratio = server_settings.index_uncompressed_cache_size_ratio;
|
||||
if (index_uncompressed_cache_size > max_cache_size)
|
||||
{
|
||||
index_uncompressed_cache_size = max_cache_size;
|
||||
@ -654,9 +696,9 @@ void LocalServer::processConfig()
|
||||
}
|
||||
global_context->setIndexUncompressedCache(index_uncompressed_cache_policy, index_uncompressed_cache_size, index_uncompressed_cache_size_ratio);
|
||||
|
||||
String index_mark_cache_policy = config().getString("index_mark_cache_policy", DEFAULT_INDEX_MARK_CACHE_POLICY);
|
||||
size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE);
|
||||
double index_mark_cache_size_ratio = config().getDouble("index_mark_cache_size_ratio", DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO);
|
||||
String index_mark_cache_policy = server_settings.index_mark_cache_policy;
|
||||
size_t index_mark_cache_size = server_settings.index_mark_cache_size;
|
||||
double index_mark_cache_size_ratio = server_settings.index_mark_cache_size_ratio;
|
||||
if (index_mark_cache_size > max_cache_size)
|
||||
{
|
||||
index_mark_cache_size = max_cache_size;
|
||||
@ -664,7 +706,7 @@ void LocalServer::processConfig()
|
||||
}
|
||||
global_context->setIndexMarkCache(index_mark_cache_policy, index_mark_cache_size, index_mark_cache_size_ratio);
|
||||
|
||||
size_t mmap_cache_size = config().getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE);
|
||||
size_t mmap_cache_size = server_settings.mmap_cache_size;
|
||||
if (mmap_cache_size > max_cache_size)
|
||||
{
|
||||
mmap_cache_size = max_cache_size;
|
||||
@ -676,8 +718,8 @@ void LocalServer::processConfig()
|
||||
global_context->setQueryCache(0, 0, 0, 0);
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE);
|
||||
size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES);
|
||||
size_t compiled_expression_cache_max_size_in_bytes = server_settings.compiled_expression_cache_size;
|
||||
size_t compiled_expression_cache_max_elements = server_settings.compiled_expression_cache_elements_size;
|
||||
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements);
|
||||
#endif
|
||||
|
||||
@ -689,16 +731,16 @@ void LocalServer::processConfig()
|
||||
applyCmdOptions(global_context);
|
||||
|
||||
/// Load global settings from default_profile and system_profile.
|
||||
global_context->setDefaultProfiles(config());
|
||||
global_context->setDefaultProfiles(getClientConfiguration());
|
||||
|
||||
/// We load temporary database first, because projections need it.
|
||||
DatabaseCatalog::instance().initializeAndLoadTemporaryDatabase();
|
||||
|
||||
std::string default_database = config().getString("default_database", "default");
|
||||
std::string default_database = server_settings.default_database;
|
||||
DatabaseCatalog::instance().attachDatabase(default_database, createClickHouseLocalDatabaseOverlay(default_database, global_context));
|
||||
global_context->setCurrentDatabase(default_database);
|
||||
|
||||
if (config().has("path"))
|
||||
if (getClientConfiguration().has("path"))
|
||||
{
|
||||
String path = global_context->getPath();
|
||||
fs::create_directories(fs::path(path));
|
||||
@ -713,7 +755,7 @@ void LocalServer::processConfig()
|
||||
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
|
||||
waitLoad(TablesLoaderForegroundPoolId, startup_system_tasks);
|
||||
|
||||
if (!config().has("only-system-tables"))
|
||||
if (!getClientConfiguration().has("only-system-tables"))
|
||||
{
|
||||
DatabaseCatalog::instance().createBackgroundTasks();
|
||||
waitLoad(loadMetadata(global_context));
|
||||
@ -725,15 +767,15 @@ void LocalServer::processConfig()
|
||||
|
||||
LOG_DEBUG(log, "Loaded metadata.");
|
||||
}
|
||||
else if (!config().has("no-system-tables"))
|
||||
else if (!getClientConfiguration().has("no-system-tables"))
|
||||
{
|
||||
attachSystemTablesServer(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE), false);
|
||||
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
|
||||
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
|
||||
}
|
||||
|
||||
server_display_name = config().getString("display_name", "");
|
||||
prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name.default", ":) ");
|
||||
server_display_name = getClientConfiguration().getString("display_name", "");
|
||||
prompt_by_server_display_name = getClientConfiguration().getRawString("prompt_by_server_display_name.default", ":) ");
|
||||
|
||||
global_context->setQueryKindInitial();
|
||||
global_context->setQueryKind(query_kind);
|
||||
@ -811,7 +853,7 @@ void LocalServer::applyCmdSettings(ContextMutablePtr context)
|
||||
|
||||
void LocalServer::applyCmdOptions(ContextMutablePtr context)
|
||||
{
|
||||
context->setDefaultFormat(config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV")));
|
||||
context->setDefaultFormat(getClientConfiguration().getString("output-format", getClientConfiguration().getString("format", is_interactive ? "PrettyCompact" : "TSV")));
|
||||
applyCmdSettings(context);
|
||||
}
|
||||
|
||||
@ -819,33 +861,33 @@ void LocalServer::applyCmdOptions(ContextMutablePtr context)
|
||||
void LocalServer::processOptions(const OptionsDescription &, const CommandLineOptions & options, const std::vector<Arguments> &, const std::vector<Arguments> &)
|
||||
{
|
||||
if (options.count("table"))
|
||||
config().setString("table-name", options["table"].as<std::string>());
|
||||
getClientConfiguration().setString("table-name", options["table"].as<std::string>());
|
||||
if (options.count("file"))
|
||||
config().setString("table-file", options["file"].as<std::string>());
|
||||
getClientConfiguration().setString("table-file", options["file"].as<std::string>());
|
||||
if (options.count("structure"))
|
||||
config().setString("table-structure", options["structure"].as<std::string>());
|
||||
getClientConfiguration().setString("table-structure", options["structure"].as<std::string>());
|
||||
if (options.count("no-system-tables"))
|
||||
config().setBool("no-system-tables", true);
|
||||
getClientConfiguration().setBool("no-system-tables", true);
|
||||
if (options.count("only-system-tables"))
|
||||
config().setBool("only-system-tables", true);
|
||||
getClientConfiguration().setBool("only-system-tables", true);
|
||||
if (options.count("database"))
|
||||
config().setString("default_database", options["database"].as<std::string>());
|
||||
getClientConfiguration().setString("default_database", options["database"].as<std::string>());
|
||||
|
||||
if (options.count("input-format"))
|
||||
config().setString("table-data-format", options["input-format"].as<std::string>());
|
||||
getClientConfiguration().setString("table-data-format", options["input-format"].as<std::string>());
|
||||
if (options.count("output-format"))
|
||||
config().setString("output-format", options["output-format"].as<std::string>());
|
||||
getClientConfiguration().setString("output-format", options["output-format"].as<std::string>());
|
||||
|
||||
if (options.count("logger.console"))
|
||||
config().setBool("logger.console", options["logger.console"].as<bool>());
|
||||
getClientConfiguration().setBool("logger.console", options["logger.console"].as<bool>());
|
||||
if (options.count("logger.log"))
|
||||
config().setString("logger.log", options["logger.log"].as<std::string>());
|
||||
getClientConfiguration().setString("logger.log", options["logger.log"].as<std::string>());
|
||||
if (options.count("logger.level"))
|
||||
config().setString("logger.level", options["logger.level"].as<std::string>());
|
||||
getClientConfiguration().setString("logger.level", options["logger.level"].as<std::string>());
|
||||
if (options.count("send_logs_level"))
|
||||
config().setString("send_logs_level", options["send_logs_level"].as<std::string>());
|
||||
getClientConfiguration().setString("send_logs_level", options["send_logs_level"].as<std::string>());
|
||||
if (options.count("wait_for_suggestions_to_load"))
|
||||
config().setBool("wait_for_suggestions_to_load", true);
|
||||
getClientConfiguration().setBool("wait_for_suggestions_to_load", true);
|
||||
}
|
||||
|
||||
void LocalServer::readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &)
|
||||
|
@ -30,6 +30,9 @@ public:
|
||||
int main(const std::vector<String> & /*args*/) override;
|
||||
|
||||
protected:
|
||||
|
||||
Poco::Util::LayeredConfiguration & getClientConfiguration() override;
|
||||
|
||||
void connect() override;
|
||||
|
||||
void processError(const String & query) const override;
|
||||
@ -63,6 +66,8 @@ private:
|
||||
void applyCmdOptions(ContextMutablePtr context);
|
||||
void applyCmdSettings(ContextMutablePtr context);
|
||||
|
||||
ServerSettings server_settings;
|
||||
|
||||
std::optional<StatusFile> status;
|
||||
std::optional<std::filesystem::path> temporary_directory_to_delete;
|
||||
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "config_tools.h"
|
||||
|
||||
#include <Common/StringUtils.h>
|
||||
@ -439,6 +440,14 @@ extern "C"
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Prevent messages from JeMalloc in the release build.
|
||||
/// Some of these messages are non-actionable for the users, such as:
|
||||
/// <jemalloc>: Number of CPUs detected is not deterministic. Per-CPU arena disabled.
|
||||
#if USE_JEMALLOC && defined(NDEBUG) && !defined(SANITIZER)
|
||||
extern "C" void (*malloc_message)(void *, const char *s);
|
||||
__attribute__((constructor(0))) void init_je_malloc_message() { malloc_message = [](void *, const char *){}; }
|
||||
#endif
|
||||
|
||||
/// This allows to implement assert to forbid initialization of a class in static constructors.
|
||||
/// Usage:
|
||||
///
|
||||
|
@ -13,7 +13,6 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES
|
||||
getIdentifierQuote.cpp
|
||||
odbc-bridge.cpp
|
||||
validateODBCConnectionString.cpp
|
||||
createFunctionBaseCast.cpp
|
||||
)
|
||||
|
||||
clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES})
|
||||
@ -25,6 +24,7 @@ target_link_libraries(clickhouse-odbc-bridge PRIVATE
|
||||
clickhouse_parsers
|
||||
ch_contrib::nanodbc
|
||||
ch_contrib::unixodbc
|
||||
clickhouse_functions
|
||||
)
|
||||
|
||||
set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeDateTime64.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
@ -47,9 +48,17 @@ Chunk ODBCSource::generate()
|
||||
for (int idx = 0; idx < result.columns(); ++idx)
|
||||
{
|
||||
const auto & sample = description.sample_block.getByPosition(idx);
|
||||
|
||||
if (!result.is_null(idx))
|
||||
insertValue(*columns[idx], removeNullable(sample.type), description.types[idx].first, result, idx);
|
||||
{
|
||||
if (columns[idx]->isNullable())
|
||||
{
|
||||
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[idx]);
|
||||
insertValue(column_nullable.getNestedColumn(), removeNullable(sample.type), description.types[idx].first, result, idx);
|
||||
column_nullable.getNullMapData().emplace_back(0);
|
||||
}
|
||||
else
|
||||
insertValue(*columns[idx], removeNullable(sample.type), description.types[idx].first, result, idx);
|
||||
}
|
||||
else
|
||||
insertDefaultValue(*columns[idx], *sample.column);
|
||||
}
|
||||
|
@ -133,10 +133,6 @@
|
||||
# include <Server/KeeperTCPHandlerFactory.h>
|
||||
#endif
|
||||
|
||||
#if USE_JEMALLOC
|
||||
# include <jemalloc/jemalloc.h>
|
||||
#endif
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
# include <azure/storage/common/internal/xml_wrapper.hpp>
|
||||
# include <azure/core/diagnostics/logger.hpp>
|
||||
@ -176,34 +172,10 @@ namespace ProfileEvents
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
#if USE_JEMALLOC
|
||||
static bool jemallocOptionEnabled(const char *name)
|
||||
{
|
||||
bool value;
|
||||
size_t size = sizeof(value);
|
||||
|
||||
if (mallctl(name, reinterpret_cast<void *>(&value), &size, /* newp= */ nullptr, /* newlen= */ 0))
|
||||
throw Poco::SystemException("mallctl() failed");
|
||||
|
||||
return value;
|
||||
}
|
||||
#else
|
||||
static bool jemallocOptionEnabled(const char *) { return false; }
|
||||
#endif
|
||||
|
||||
int mainEntryClickHouseServer(int argc, char ** argv)
|
||||
{
|
||||
DB::Server app;
|
||||
|
||||
if (jemallocOptionEnabled("opt.background_thread"))
|
||||
{
|
||||
LOG_ERROR(&app.logger(),
|
||||
"jemalloc.background_thread was requested, "
|
||||
"however ClickHouse uses percpu_arena and background_thread most likely will not give any benefits, "
|
||||
"and also background_thread is not compatible with ClickHouse watchdog "
|
||||
"(that can be disabled with CLICKHOUSE_WATCHDOG_ENABLE=0)");
|
||||
}
|
||||
|
||||
/// Do not fork separate process from watchdog if we attached to terminal.
|
||||
/// Otherwise it breaks gdb usage.
|
||||
/// Can be overridden by environment variable (cannot use server config at this moment).
|
||||
@ -1003,6 +975,8 @@ try
|
||||
|
||||
ServerUUID::load(path / "uuid", log);
|
||||
|
||||
PlacementInfo::PlacementInfo::instance().initialize(config());
|
||||
|
||||
zkutil::validateZooKeeperConfig(config());
|
||||
bool has_zookeeper = zkutil::hasZooKeeperConfig(config());
|
||||
|
||||
@ -1540,6 +1514,8 @@ try
|
||||
global_context->setMaxDictionaryNumToWarn(new_server_settings.max_dictionary_num_to_warn);
|
||||
global_context->setMaxDatabaseNumToWarn(new_server_settings.max_database_num_to_warn);
|
||||
global_context->setMaxPartNumToWarn(new_server_settings.max_part_num_to_warn);
|
||||
/// Only for system.server_settings
|
||||
global_context->setConfigReloaderInterval(new_server_settings.config_reload_interval_ms);
|
||||
|
||||
SlotCount concurrent_threads_soft_limit = UnlimitedSlots;
|
||||
if (new_server_settings.concurrent_threads_soft_limit_num > 0 && new_server_settings.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit)
|
||||
@ -1702,8 +1678,7 @@ try
|
||||
|
||||
/// Must be the last.
|
||||
latest_config = config;
|
||||
},
|
||||
/* already_loaded = */ false); /// Reload it right now (initial loading)
|
||||
});
|
||||
|
||||
const auto listen_hosts = getListenHosts(config());
|
||||
const auto interserver_listen_hosts = getInterserverListenHosts(config());
|
||||
@ -1816,11 +1791,6 @@ try
|
||||
|
||||
}
|
||||
|
||||
if (config().has(DB::PlacementInfo::PLACEMENT_CONFIG_PREFIX))
|
||||
{
|
||||
PlacementInfo::PlacementInfo::instance().initialize(config());
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard lock(servers_lock);
|
||||
/// We should start interserver communications before (and more important shutdown after) tables.
|
||||
|
@ -29,7 +29,14 @@
|
||||
-->
|
||||
<size>1000M</size>
|
||||
<count>10</count>
|
||||
|
||||
<!-- <console>1</console> --> <!-- Default behavior is autodetection (log to console if not daemon mode and is tty) -->
|
||||
<!-- <console_log_level>trace</console_log_level> -->
|
||||
|
||||
<!-- <use_syslog>0</use_syslog> -->
|
||||
<!-- <syslog_level>trace</syslog_level> -->
|
||||
|
||||
<!-- <stream_compress>0</stream_compress> -->
|
||||
|
||||
<!-- Per level overrides (legacy):
|
||||
|
||||
@ -408,13 +415,11 @@
|
||||
|
||||
<!-- Approximate size of mark cache, used in tables of MergeTree family.
|
||||
In bytes. Cache is single for server. Memory is allocated only on demand.
|
||||
You should not lower this value.
|
||||
-->
|
||||
<mark_cache_size>5368709120</mark_cache_size>
|
||||
You should not lower this value. -->
|
||||
<!-- <mark_cache_size>5368709120</mark_cache_size> -->
|
||||
|
||||
<!-- For marks of secondary indices.
|
||||
-->
|
||||
<index_mark_cache_size>5368709120</index_mark_cache_size>
|
||||
<!-- For marks of secondary indices. -->
|
||||
<!-- <index_mark_cache_size>5368709120</index_mark_cache_size> -->
|
||||
|
||||
<!-- If you enable the `min_bytes_to_use_mmap_io` setting,
|
||||
the data in MergeTree tables can be read with mmap to avoid copying from kernel to userspace.
|
||||
@ -432,13 +437,23 @@
|
||||
The cache is dropped (the files are closed) automatically on removal of old parts in MergeTree,
|
||||
also it can be dropped manually by the SYSTEM DROP MMAP CACHE query.
|
||||
-->
|
||||
<mmap_cache_size>1000</mmap_cache_size>
|
||||
<!-- <mmap_cache_size>1024</mmap_cache_size> -->
|
||||
|
||||
<!-- Cache size in bytes for compiled expressions.-->
|
||||
<compiled_expression_cache_size>134217728</compiled_expression_cache_size>
|
||||
<!-- <compiled_expression_cache_size>134217728</compiled_expression_cache_size> -->
|
||||
|
||||
<!-- Cache size in elements for compiled expressions.-->
|
||||
<compiled_expression_cache_elements_size>10000</compiled_expression_cache_elements_size>
|
||||
<!-- <compiled_expression_cache_elements_size>10000</compiled_expression_cache_elements_size> -->
|
||||
|
||||
<!-- Configuration for the query cache -->
|
||||
<!--
|
||||
<query_cache>
|
||||
<max_size_in_bytes>1073741824</max_size_in_bytes>
|
||||
<max_entries>1024</max_entries>
|
||||
<max_entry_size_in_bytes>1048576</max_entry_size_in_bytes>
|
||||
<max_entry_size_in_rows>30000000</max_entry_size_in_rows>
|
||||
</query_cache>
|
||||
-->
|
||||
|
||||
<!-- Cache path for custom (created from SQL) cached disks -->
|
||||
<custom_cached_disks_base_directory>/var/lib/clickhouse/caches/</custom_cached_disks_base_directory>
|
||||
@ -1642,14 +1657,6 @@
|
||||
-->
|
||||
<!-- </kafka> -->
|
||||
|
||||
<!-- Configuration for the query cache -->
|
||||
<query_cache>
|
||||
<max_size_in_bytes>1073741824</max_size_in_bytes>
|
||||
<max_entries>1024</max_entries>
|
||||
<max_entry_size_in_bytes>1048576</max_entry_size_in_bytes>
|
||||
<max_entry_size_in_rows>30000000</max_entry_size_in_rows>
|
||||
</query_cache>
|
||||
|
||||
<backups>
|
||||
<allowed_path>backups</allowed_path>
|
||||
|
||||
|
@ -260,7 +260,10 @@ uncompressed_cache_size: 8589934592
|
||||
# Approximate size of mark cache, used in tables of MergeTree family.
|
||||
# In bytes. Cache is single for server. Memory is allocated only on demand.
|
||||
# You should not lower this value.
|
||||
mark_cache_size: 5368709120
|
||||
# mark_cache_size: 5368709120
|
||||
|
||||
# For marks of secondary indices.
|
||||
# index_mark_cache_size: 5368709120
|
||||
|
||||
# If you enable the `min_bytes_to_use_mmap_io` setting,
|
||||
# the data in MergeTree tables can be read with mmap to avoid copying from kernel to userspace.
|
||||
@ -277,13 +280,20 @@ mark_cache_size: 5368709120
|
||||
# in query or server memory usage - because this memory can be discarded similar to OS page cache.
|
||||
# The cache is dropped (the files are closed) automatically on removal of old parts in MergeTree,
|
||||
# also it can be dropped manually by the SYSTEM DROP MMAP CACHE query.
|
||||
mmap_cache_size: 1000
|
||||
# mmap_cache_size: 1024
|
||||
|
||||
# Cache size in bytes for compiled expressions.
|
||||
compiled_expression_cache_size: 134217728
|
||||
# compiled_expression_cache_size: 134217728
|
||||
|
||||
# Cache size in elements for compiled expressions.
|
||||
compiled_expression_cache_elements_size: 10000
|
||||
# compiled_expression_cache_elements_size: 10000
|
||||
|
||||
# Configuration for the query cache
|
||||
# query_cache:
|
||||
# max_size_in_bytes: 1073741824
|
||||
# max_entries: 1024
|
||||
# max_entry_size_in_bytes: 1048576
|
||||
# max_entry_size_in_rows: 30000000
|
||||
|
||||
# Path to data directory, with trailing slash.
|
||||
path: /var/lib/clickhouse/
|
||||
|
@ -506,6 +506,14 @@ let user = 'default';
|
||||
let password = '';
|
||||
let add_http_cors_header = (location.protocol != 'file:');
|
||||
|
||||
const current_url = new URL(window.location);
|
||||
/// Substitute user name if it's specified in the query string
|
||||
const user_from_url = current_url.searchParams.get('user');
|
||||
if (user_from_url) {
|
||||
user = user_from_url;
|
||||
}
|
||||
|
||||
|
||||
const errorCodeMessageMap = {
|
||||
516: 'Error authenticating with database. Please check your connection params and try again.'
|
||||
}
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/SSHWrapper.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Access/Common/SSLCertificateSubjects.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
@ -108,6 +109,9 @@ bool Authentication::areCredentialsValid(
|
||||
case AuthenticationType::HTTP:
|
||||
throw Authentication::Require<BasicCredentials>("ClickHouse Basic Authentication");
|
||||
|
||||
case AuthenticationType::JWT:
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud");
|
||||
|
||||
case AuthenticationType::KERBEROS:
|
||||
return external_authenticators.checkKerberosCredentials(auth_data.getKerberosRealm(), *gss_acceptor_context);
|
||||
|
||||
@ -149,6 +153,9 @@ bool Authentication::areCredentialsValid(
|
||||
case AuthenticationType::SSL_CERTIFICATE:
|
||||
throw Authentication::Require<BasicCredentials>("ClickHouse X.509 Authentication");
|
||||
|
||||
case AuthenticationType::JWT:
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud");
|
||||
|
||||
case AuthenticationType::SSH_KEY:
|
||||
#if USE_SSH
|
||||
throw Authentication::Require<SshCredentials>("SSH Keys Authentication");
|
||||
@ -193,6 +200,9 @@ bool Authentication::areCredentialsValid(
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh");
|
||||
#endif
|
||||
|
||||
case AuthenticationType::JWT:
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud");
|
||||
|
||||
case AuthenticationType::BCRYPT_PASSWORD:
|
||||
return checkPasswordBcrypt(basic_credentials->getPassword(), auth_data.getPasswordHashBinary());
|
||||
|
||||
@ -222,11 +232,22 @@ bool Authentication::areCredentialsValid(
|
||||
case AuthenticationType::HTTP:
|
||||
throw Authentication::Require<BasicCredentials>("ClickHouse Basic Authentication");
|
||||
|
||||
case AuthenticationType::JWT:
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud");
|
||||
|
||||
case AuthenticationType::KERBEROS:
|
||||
throw Authentication::Require<GSSAcceptorContext>(auth_data.getKerberosRealm());
|
||||
|
||||
case AuthenticationType::SSL_CERTIFICATE:
|
||||
return auth_data.getSSLCertificateCommonNames().contains(ssl_certificate_credentials->getCommonName());
|
||||
for (SSLCertificateSubjects::Type type : {SSLCertificateSubjects::Type::CN, SSLCertificateSubjects::Type::SAN})
|
||||
{
|
||||
for (const auto & subject : auth_data.getSSLCertificateSubjects().at(type))
|
||||
{
|
||||
if (ssl_certificate_credentials->getSSLCertificateSubjects().at(type).contains(subject))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
||||
case AuthenticationType::SSH_KEY:
|
||||
#if USE_SSH
|
||||
@ -254,6 +275,9 @@ bool Authentication::areCredentialsValid(
|
||||
case AuthenticationType::HTTP:
|
||||
throw Authentication::Require<BasicCredentials>("ClickHouse Basic Authentication");
|
||||
|
||||
case AuthenticationType::JWT:
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud");
|
||||
|
||||
case AuthenticationType::KERBEROS:
|
||||
throw Authentication::Require<GSSAcceptorContext>(auth_data.getKerberosRealm());
|
||||
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <boost/algorithm/hex.hpp>
|
||||
#include <boost/algorithm/string/case_conv.hpp>
|
||||
|
||||
#include <Access/Common/SSLCertificateSubjects.h>
|
||||
#include "config.h"
|
||||
|
||||
#if USE_SSL
|
||||
@ -107,7 +108,7 @@ bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs)
|
||||
{
|
||||
return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash)
|
||||
&& (lhs.ldap_server_name == rhs.ldap_server_name) && (lhs.kerberos_realm == rhs.kerberos_realm)
|
||||
&& (lhs.ssl_certificate_common_names == rhs.ssl_certificate_common_names)
|
||||
&& (lhs.ssl_certificate_subjects == rhs.ssl_certificate_subjects)
|
||||
#if USE_SSH
|
||||
&& (lhs.ssh_keys == rhs.ssh_keys)
|
||||
#endif
|
||||
@ -135,6 +136,7 @@ void AuthenticationData::setPassword(const String & password_)
|
||||
case AuthenticationType::BCRYPT_PASSWORD:
|
||||
case AuthenticationType::NO_PASSWORD:
|
||||
case AuthenticationType::LDAP:
|
||||
case AuthenticationType::JWT:
|
||||
case AuthenticationType::KERBEROS:
|
||||
case AuthenticationType::SSL_CERTIFICATE:
|
||||
case AuthenticationType::SSH_KEY:
|
||||
@ -251,6 +253,7 @@ void AuthenticationData::setPasswordHashBinary(const Digest & hash)
|
||||
|
||||
case AuthenticationType::NO_PASSWORD:
|
||||
case AuthenticationType::LDAP:
|
||||
case AuthenticationType::JWT:
|
||||
case AuthenticationType::KERBEROS:
|
||||
case AuthenticationType::SSL_CERTIFICATE:
|
||||
case AuthenticationType::SSH_KEY:
|
||||
@ -275,11 +278,16 @@ String AuthenticationData::getSalt() const
|
||||
return salt;
|
||||
}
|
||||
|
||||
void AuthenticationData::setSSLCertificateCommonNames(boost::container::flat_set<String> common_names_)
|
||||
void AuthenticationData::setSSLCertificateSubjects(SSLCertificateSubjects && ssl_certificate_subjects_)
|
||||
{
|
||||
if (common_names_.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The 'SSL CERTIFICATE' authentication type requires a non-empty list of common names.");
|
||||
ssl_certificate_common_names = std::move(common_names_);
|
||||
if (ssl_certificate_subjects_.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The 'SSL CERTIFICATE' authentication type requires a non-empty list of subjects.");
|
||||
ssl_certificate_subjects = std::move(ssl_certificate_subjects_);
|
||||
}
|
||||
|
||||
void AuthenticationData::addSSLCertificateSubject(SSLCertificateSubjects::Type type_, String && subject_)
|
||||
{
|
||||
ssl_certificate_subjects.insert(type_, std::move(subject_));
|
||||
}
|
||||
|
||||
std::shared_ptr<ASTAuthenticationData> AuthenticationData::toAST() const
|
||||
@ -322,6 +330,10 @@ std::shared_ptr<ASTAuthenticationData> AuthenticationData::toAST() const
|
||||
node->children.push_back(std::make_shared<ASTLiteral>(getLDAPServerName()));
|
||||
break;
|
||||
}
|
||||
case AuthenticationType::JWT:
|
||||
{
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud");
|
||||
}
|
||||
case AuthenticationType::KERBEROS:
|
||||
{
|
||||
const auto & realm = getKerberosRealm();
|
||||
@ -333,7 +345,14 @@ std::shared_ptr<ASTAuthenticationData> AuthenticationData::toAST() const
|
||||
}
|
||||
case AuthenticationType::SSL_CERTIFICATE:
|
||||
{
|
||||
for (const auto & name : getSSLCertificateCommonNames())
|
||||
using SSLCertificateSubjects::Type::CN;
|
||||
using SSLCertificateSubjects::Type::SAN;
|
||||
|
||||
const auto &subjects = getSSLCertificateSubjects();
|
||||
SSLCertificateSubjects::Type cert_subject_type = !subjects.at(SAN).empty() ? SAN : CN;
|
||||
|
||||
node->ssl_cert_subject_type = toString(cert_subject_type);
|
||||
for (const auto & name : getSSLCertificateSubjects().at(cert_subject_type))
|
||||
node->children.push_back(std::make_shared<ASTLiteral>(name));
|
||||
|
||||
break;
|
||||
@ -507,11 +526,9 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
|
||||
}
|
||||
else if (query.type == AuthenticationType::SSL_CERTIFICATE)
|
||||
{
|
||||
boost::container::flat_set<String> common_names;
|
||||
auto ssl_cert_subject_type = parseSSLCertificateSubjectType(*query.ssl_cert_subject_type);
|
||||
for (const auto & arg : args)
|
||||
common_names.insert(checkAndGetLiteralArgument<String>(arg, "common_name"));
|
||||
|
||||
auth_data.setSSLCertificateCommonNames(std::move(common_names));
|
||||
auth_data.addSSLCertificateSubject(ssl_cert_subject_type, checkAndGetLiteralArgument<String>(arg, "ssl_certificate_subject"));
|
||||
}
|
||||
else if (query.type == AuthenticationType::HTTP)
|
||||
{
|
||||
|
@ -2,13 +2,14 @@
|
||||
|
||||
#include <Access/Common/AuthenticationType.h>
|
||||
#include <Access/Common/HTTPAuthenticationScheme.h>
|
||||
#include <Access/Common/SSLCertificateSubjects.h>
|
||||
#include <Common/SSHWrapper.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Parsers/Access/ASTAuthenticationData.h>
|
||||
|
||||
#include <vector>
|
||||
#include <base/types.h>
|
||||
#include <boost/container/flat_set.hpp>
|
||||
|
||||
|
||||
#include "config.h"
|
||||
|
||||
@ -58,8 +59,9 @@ public:
|
||||
const String & getKerberosRealm() const { return kerberos_realm; }
|
||||
void setKerberosRealm(const String & realm) { kerberos_realm = realm; }
|
||||
|
||||
const boost::container::flat_set<String> & getSSLCertificateCommonNames() const { return ssl_certificate_common_names; }
|
||||
void setSSLCertificateCommonNames(boost::container::flat_set<String> common_names_);
|
||||
const SSLCertificateSubjects & getSSLCertificateSubjects() const { return ssl_certificate_subjects; }
|
||||
void setSSLCertificateSubjects(SSLCertificateSubjects && ssl_certificate_subjects_);
|
||||
void addSSLCertificateSubject(SSLCertificateSubjects::Type type_, String && subject_);
|
||||
|
||||
#if USE_SSH
|
||||
const std::vector<SSHKey> & getSSHKeys() const { return ssh_keys; }
|
||||
@ -96,7 +98,7 @@ private:
|
||||
Digest password_hash;
|
||||
String ldap_server_name;
|
||||
String kerberos_realm;
|
||||
boost::container::flat_set<String> ssl_certificate_common_names;
|
||||
SSLCertificateSubjects ssl_certificate_subjects;
|
||||
String salt;
|
||||
#if USE_SSH
|
||||
std::vector<SSHKey> ssh_keys;
|
||||
|
@ -72,6 +72,11 @@ const AuthenticationTypeInfo & AuthenticationTypeInfo::get(AuthenticationType ty
|
||||
static const auto info = make_info(Keyword::HTTP);
|
||||
return info;
|
||||
}
|
||||
case AuthenticationType::JWT:
|
||||
{
|
||||
static const auto info = make_info(Keyword::JWT);
|
||||
return info;
|
||||
}
|
||||
case AuthenticationType::MAX:
|
||||
break;
|
||||
}
|
||||
|
@ -41,6 +41,9 @@ enum class AuthenticationType : uint8_t
|
||||
/// Authentication through HTTP protocol
|
||||
HTTP,
|
||||
|
||||
/// JSON Web Token
|
||||
JWT,
|
||||
|
||||
MAX,
|
||||
};
|
||||
|
||||
|
95
src/Access/Common/SSLCertificateSubjects.cpp
Normal file
95
src/Access/Common/SSLCertificateSubjects.cpp
Normal file
@ -0,0 +1,95 @@
|
||||
#include <Access/Common/SSLCertificateSubjects.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#if USE_SSL
|
||||
#include <openssl/x509v3.h>
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
#if USE_SSL
|
||||
SSLCertificateSubjects extractSSLCertificateSubjects(const Poco::Net::X509Certificate & certificate)
|
||||
{
|
||||
|
||||
SSLCertificateSubjects subjects;
|
||||
if (!certificate.commonName().empty())
|
||||
{
|
||||
subjects.insert(SSLCertificateSubjects::Type::CN, certificate.commonName());
|
||||
}
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wused-but-marked-unused"
|
||||
auto stackof_general_name_deleter = [](void * ptr) { GENERAL_NAMES_free(static_cast<STACK_OF(GENERAL_NAME) *>(ptr)); };
|
||||
std::unique_ptr<void, decltype(stackof_general_name_deleter)> cert_names(
|
||||
X509_get_ext_d2i(const_cast<X509 *>(certificate.certificate()), NID_subject_alt_name, nullptr, nullptr),
|
||||
stackof_general_name_deleter);
|
||||
|
||||
if (STACK_OF(GENERAL_NAME) * names = static_cast<STACK_OF(GENERAL_NAME) *>(cert_names.get()))
|
||||
{
|
||||
for (int i = 0; i < sk_GENERAL_NAME_num(names); ++i)
|
||||
{
|
||||
const GENERAL_NAME * name = sk_GENERAL_NAME_value(names, i);
|
||||
if (name->type == GEN_DNS || name->type == GEN_URI)
|
||||
{
|
||||
const char * data = reinterpret_cast<const char *>(ASN1_STRING_get0_data(name->d.ia5));
|
||||
std::size_t len = ASN1_STRING_length(name->d.ia5);
|
||||
std::string subject = (name->type == GEN_DNS ? "DNS:" : "URI:") + std::string(data, len);
|
||||
subjects.insert(SSLCertificateSubjects::Type::SAN, std::move(subject));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#pragma clang diagnostic pop
|
||||
return subjects;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void SSLCertificateSubjects::insert(const String & subject_type_, String && subject)
|
||||
{
|
||||
insert(parseSSLCertificateSubjectType(subject_type_), std::move(subject));
|
||||
}
|
||||
|
||||
void SSLCertificateSubjects::insert(Type subject_type_, String && subject)
|
||||
{
|
||||
subjects[static_cast<size_t>(subject_type_)].insert(std::move(subject));
|
||||
}
|
||||
|
||||
SSLCertificateSubjects::Type parseSSLCertificateSubjectType(const String & type_)
|
||||
{
|
||||
if (type_ == "CN")
|
||||
return SSLCertificateSubjects::Type::CN;
|
||||
if (type_ == "SAN")
|
||||
return SSLCertificateSubjects::Type::SAN;
|
||||
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown SSL Certificate Subject Type: {}", type_);
|
||||
}
|
||||
|
||||
String toString(SSLCertificateSubjects::Type type_)
|
||||
{
|
||||
switch (type_)
|
||||
{
|
||||
case SSLCertificateSubjects::Type::CN:
|
||||
return "CN";
|
||||
case SSLCertificateSubjects::Type::SAN:
|
||||
return "SAN";
|
||||
}
|
||||
}
|
||||
|
||||
bool operator==(const SSLCertificateSubjects & lhs, const SSLCertificateSubjects & rhs)
|
||||
{
|
||||
for (SSLCertificateSubjects::Type type : {SSLCertificateSubjects::Type::CN, SSLCertificateSubjects::Type::SAN})
|
||||
{
|
||||
if (lhs.at(type) != rhs.at(type))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
48
src/Access/Common/SSLCertificateSubjects.h
Normal file
48
src/Access/Common/SSLCertificateSubjects.h
Normal file
@ -0,0 +1,48 @@
|
||||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
#include <base/types.h>
|
||||
#include <boost/container/flat_set.hpp>
|
||||
|
||||
#if USE_SSL
|
||||
# include <Poco/Net/X509Certificate.h>
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class SSLCertificateSubjects
|
||||
{
|
||||
public:
|
||||
using container = boost::container::flat_set<String>;
|
||||
enum class Type
|
||||
{
|
||||
CN,
|
||||
SAN
|
||||
};
|
||||
|
||||
private:
|
||||
std::array<container, size_t(Type::SAN) + 1> subjects;
|
||||
|
||||
public:
|
||||
inline const container & at(Type type_) const { return subjects[static_cast<size_t>(type_)]; }
|
||||
inline bool empty()
|
||||
{
|
||||
for (auto & subject_list : subjects)
|
||||
{
|
||||
if (!subject_list.empty())
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
void insert(const String & subject_type_, String && subject);
|
||||
void insert(Type type_, String && subject);
|
||||
friend bool operator==(const SSLCertificateSubjects & lhs, const SSLCertificateSubjects & rhs);
|
||||
};
|
||||
|
||||
String toString(SSLCertificateSubjects::Type type_);
|
||||
SSLCertificateSubjects::Type parseSSLCertificateSubjectType(const String & type_);
|
||||
|
||||
#if USE_SSL
|
||||
SSLCertificateSubjects extractSSLCertificateSubjects(const Poco::Net::X509Certificate & certificate);
|
||||
#endif
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
#include <Access/Credentials.h>
|
||||
#include <Access/Common/SSLCertificateSubjects.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -48,18 +48,18 @@ void AlwaysAllowCredentials::setUserName(const String & user_name_)
|
||||
user_name = user_name_;
|
||||
}
|
||||
|
||||
SSLCertificateCredentials::SSLCertificateCredentials(const String & user_name_, const String & common_name_)
|
||||
SSLCertificateCredentials::SSLCertificateCredentials(const String & user_name_, SSLCertificateSubjects && subjects_)
|
||||
: Credentials(user_name_)
|
||||
, common_name(common_name_)
|
||||
, certificate_subjects(subjects_)
|
||||
{
|
||||
is_ready = true;
|
||||
}
|
||||
|
||||
const String & SSLCertificateCredentials::getCommonName() const
|
||||
const SSLCertificateSubjects & SSLCertificateCredentials::getSSLCertificateSubjects() const
|
||||
{
|
||||
if (!isReady())
|
||||
throwNotReady();
|
||||
return common_name;
|
||||
return certificate_subjects;
|
||||
}
|
||||
|
||||
BasicCredentials::BasicCredentials()
|
||||
|
@ -1,6 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
#include <boost/container/flat_set.hpp>
|
||||
#include <Access/Common/SSLCertificateSubjects.h>
|
||||
#include <memory>
|
||||
|
||||
#include "config.h"
|
||||
@ -42,11 +44,11 @@ class SSLCertificateCredentials
|
||||
: public Credentials
|
||||
{
|
||||
public:
|
||||
explicit SSLCertificateCredentials(const String & user_name_, const String & common_name_);
|
||||
const String & getCommonName() const;
|
||||
explicit SSLCertificateCredentials(const String & user_name_, SSLCertificateSubjects && subjects_);
|
||||
const SSLCertificateSubjects & getSSLCertificateSubjects() const;
|
||||
|
||||
private:
|
||||
String common_name;
|
||||
SSLCertificateSubjects certificate_subjects;
|
||||
};
|
||||
|
||||
class BasicCredentials
|
||||
|
@ -33,6 +33,8 @@ void User::setName(const String & name_)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_);
|
||||
if (name_.starts_with(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_);
|
||||
if (name_.starts_with(EncodedUserInfo::JWT_AUTHENTICAION_MARKER))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_);
|
||||
name = name_;
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <Access/UsersConfigAccessStorage.h>
|
||||
#include <Access/Common/SSLCertificateSubjects.h>
|
||||
#include <Access/Quota.h>
|
||||
#include <Access/RowPolicy.h>
|
||||
#include <Access/User.h>
|
||||
@ -194,18 +195,23 @@ namespace
|
||||
/// Fill list of allowed certificates.
|
||||
Poco::Util::AbstractConfiguration::Keys keys;
|
||||
config.keys(certificates_config, keys);
|
||||
boost::container::flat_set<String> common_names;
|
||||
for (const String & key : keys)
|
||||
{
|
||||
if (key.starts_with("common_name"))
|
||||
{
|
||||
String value = config.getString(certificates_config + "." + key);
|
||||
common_names.insert(std::move(value));
|
||||
user->auth_data.addSSLCertificateSubject(SSLCertificateSubjects::Type::CN, std::move(value));
|
||||
}
|
||||
else if (key.starts_with("subject_alt_name"))
|
||||
{
|
||||
String value = config.getString(certificates_config + "." + key);
|
||||
if (value.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected ssl_certificates.subject_alt_name to not be empty");
|
||||
user->auth_data.addSSLCertificateSubject(SSLCertificateSubjects::Type::SAN, std::move(value));
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown certificate pattern type: {}", key);
|
||||
}
|
||||
user->auth_data.setSSLCertificateCommonNames(std::move(common_names));
|
||||
}
|
||||
else if (has_ssh_keys)
|
||||
{
|
||||
@ -880,8 +886,7 @@ void UsersConfigAccessStorage::load(
|
||||
Settings::checkNoSettingNamesAtTopLevel(*new_config, users_config_path);
|
||||
parseFromConfig(*new_config);
|
||||
access_control.getChangesNotifier().sendNotifications();
|
||||
},
|
||||
/* already_loaded = */ false);
|
||||
});
|
||||
}
|
||||
|
||||
void UsersConfigAccessStorage::startPeriodicReloading()
|
||||
|
283
src/AggregateFunctions/AggregateFunctionGroupConcat.cpp
Normal file
283
src/AggregateFunctions/AggregateFunctionGroupConcat.cpp
Normal file
@ -0,0 +1,283 @@
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
|
||||
#include <Core/ServerSettings.h>
|
||||
#include <Core/ColumnWithTypeAndName.h>
|
||||
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
struct GroupConcatDataBase
|
||||
{
|
||||
UInt64 data_size = 0;
|
||||
UInt64 allocated_size = 0;
|
||||
char * data = nullptr;
|
||||
|
||||
void checkAndUpdateSize(UInt64 add, Arena * arena)
|
||||
{
|
||||
if (data_size + add >= allocated_size)
|
||||
{
|
||||
auto old_size = allocated_size;
|
||||
allocated_size = std::max(2 * allocated_size, data_size + add);
|
||||
data = arena->realloc(data, old_size, allocated_size);
|
||||
}
|
||||
}
|
||||
|
||||
void insertChar(const char * str, UInt64 str_size, Arena * arena)
|
||||
{
|
||||
checkAndUpdateSize(str_size, arena);
|
||||
memcpy(data + data_size, str, str_size);
|
||||
data_size += str_size;
|
||||
}
|
||||
|
||||
void insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena)
|
||||
{
|
||||
WriteBufferFromOwnString buff;
|
||||
serialization->serializeText(*column, row_num, buff, FormatSettings{});
|
||||
auto string = buff.stringView();
|
||||
insertChar(string.data(), string.size(), arena);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template <bool has_limit>
|
||||
struct GroupConcatData;
|
||||
|
||||
template<>
|
||||
struct GroupConcatData<false> final : public GroupConcatDataBase
|
||||
{
|
||||
};
|
||||
|
||||
template<>
|
||||
struct GroupConcatData<true> final : public GroupConcatDataBase
|
||||
{
|
||||
using Offset = UInt64;
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(Offset), 4096>;
|
||||
using Offsets = PODArray<Offset, 32, Allocator>;
|
||||
|
||||
/// offset[i * 2] - beginning of the i-th row, offset[i * 2 + 1] - end of the i-th row
|
||||
Offsets offsets;
|
||||
UInt64 num_rows = 0;
|
||||
|
||||
UInt64 getSize(size_t i) const { return offsets[i * 2 + 1] - offsets[i * 2]; }
|
||||
|
||||
UInt64 getString(size_t i) const { return offsets[i * 2]; }
|
||||
|
||||
void insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena)
|
||||
{
|
||||
WriteBufferFromOwnString buff;
|
||||
serialization->serializeText(*column, row_num, buff, {});
|
||||
auto string = buff.stringView();
|
||||
|
||||
checkAndUpdateSize(string.size(), arena);
|
||||
memcpy(data + data_size, string.data(), string.size());
|
||||
offsets.push_back(data_size, arena);
|
||||
data_size += string.size();
|
||||
offsets.push_back(data_size, arena);
|
||||
num_rows++;
|
||||
}
|
||||
};
|
||||
|
||||
template <bool has_limit>
|
||||
class GroupConcatImpl final
|
||||
: public IAggregateFunctionDataHelper<GroupConcatData<has_limit>, GroupConcatImpl<has_limit>>
|
||||
{
|
||||
static constexpr auto name = "groupConcat";
|
||||
|
||||
SerializationPtr serialization;
|
||||
UInt64 limit;
|
||||
const String delimiter;
|
||||
|
||||
public:
|
||||
GroupConcatImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 limit_, const String & delimiter_)
|
||||
: IAggregateFunctionDataHelper<GroupConcatData<has_limit>, GroupConcatImpl<has_limit>>(
|
||||
{data_type_}, parameters_, std::make_shared<DataTypeString>())
|
||||
, serialization(this->argument_types[0]->getDefaultSerialization())
|
||||
, limit(limit_)
|
||||
, delimiter(delimiter_)
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
auto & cur_data = this->data(place);
|
||||
|
||||
if constexpr (has_limit)
|
||||
if (cur_data.num_rows >= limit)
|
||||
return;
|
||||
|
||||
if (cur_data.data_size != 0)
|
||||
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
|
||||
|
||||
cur_data.insert(columns[0], serialization, row_num, arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & cur_data = this->data(place);
|
||||
auto & rhs_data = this->data(rhs);
|
||||
|
||||
if (rhs_data.data_size == 0)
|
||||
return;
|
||||
|
||||
if constexpr (has_limit)
|
||||
{
|
||||
UInt64 new_elems_count = std::min(rhs_data.num_rows, limit - cur_data.num_rows);
|
||||
for (UInt64 i = 0; i < new_elems_count; ++i)
|
||||
{
|
||||
if (cur_data.data_size != 0)
|
||||
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
|
||||
|
||||
cur_data.offsets.push_back(cur_data.data_size, arena);
|
||||
cur_data.insertChar(rhs_data.data + rhs_data.getString(i), rhs_data.getSize(i), arena);
|
||||
cur_data.num_rows++;
|
||||
cur_data.offsets.push_back(cur_data.data_size, arena);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (cur_data.data_size != 0)
|
||||
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
|
||||
|
||||
cur_data.insertChar(rhs_data.data, rhs_data.data_size, arena);
|
||||
}
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
auto & cur_data = this->data(place);
|
||||
|
||||
writeVarUInt(cur_data.data_size, buf);
|
||||
|
||||
buf.write(cur_data.data, cur_data.data_size);
|
||||
|
||||
if constexpr (has_limit)
|
||||
{
|
||||
writeVarUInt(cur_data.num_rows, buf);
|
||||
for (const auto & offset : cur_data.offsets)
|
||||
writeVarUInt(offset, buf);
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
auto & cur_data = this->data(place);
|
||||
|
||||
UInt64 temp_size = 0;
|
||||
readVarUInt(temp_size, buf);
|
||||
|
||||
cur_data.checkAndUpdateSize(temp_size, arena);
|
||||
|
||||
buf.readStrict(cur_data.data + cur_data.data_size, temp_size);
|
||||
cur_data.data_size = temp_size;
|
||||
|
||||
if constexpr (has_limit)
|
||||
{
|
||||
readVarUInt(cur_data.num_rows, buf);
|
||||
cur_data.offsets.resize_exact(cur_data.num_rows * 2, arena);
|
||||
for (auto & offset : cur_data.offsets)
|
||||
readVarUInt(offset, buf);
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
auto & cur_data = this->data(place);
|
||||
|
||||
if (cur_data.data_size == 0)
|
||||
{
|
||||
to.insertDefault();
|
||||
return;
|
||||
}
|
||||
|
||||
auto & column_string = assert_cast<ColumnString &>(to);
|
||||
column_string.insertData(cur_data.data, cur_data.data_size);
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return true; }
|
||||
};
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionGroupConcat(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
assertUnary(name, argument_types);
|
||||
|
||||
bool has_limit = false;
|
||||
UInt64 limit = 0;
|
||||
String delimiter;
|
||||
|
||||
if (parameters.size() > 2)
|
||||
throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION,
|
||||
"Incorrect number of parameters for aggregate function {}, should be 0, 1 or 2, got: {}", name, parameters.size());
|
||||
|
||||
if (!parameters.empty())
|
||||
{
|
||||
auto type = parameters[0].getType();
|
||||
if (type != Field::Types::String)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First parameter for aggregate function {} should be string", name);
|
||||
|
||||
delimiter = parameters[0].get<String>();
|
||||
}
|
||||
if (parameters.size() == 2)
|
||||
{
|
||||
auto type = parameters[1].getType();
|
||||
|
||||
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number", name);
|
||||
|
||||
if ((type == Field::Types::Int64 && parameters[1].get<Int64>() <= 0) ||
|
||||
(type == Field::Types::UInt64 && parameters[1].get<UInt64>() == 0))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number, got: {}", name, parameters[1].get<Int64>());
|
||||
|
||||
has_limit = true;
|
||||
limit = parameters[1].get<UInt64>();
|
||||
}
|
||||
|
||||
if (has_limit)
|
||||
return std::make_shared<GroupConcatImpl</* has_limit= */ true>>(argument_types[0], parameters, limit, delimiter);
|
||||
else
|
||||
return std::make_shared<GroupConcatImpl</* has_limit= */ false>>(argument_types[0], parameters, limit, delimiter);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionGroupConcat(AggregateFunctionFactory & factory)
|
||||
{
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
|
||||
|
||||
factory.registerFunction("groupConcat", { createAggregateFunctionGroupConcat, properties });
|
||||
factory.registerAlias("group_concat", "groupConcat", AggregateFunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
}
|
@ -91,7 +91,8 @@ public:
|
||||
return std::make_shared<DataTypeNumber<PointType>>();
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
/// MaxIntersectionsData::Allocator uses the arena
|
||||
bool allocatesMemoryInArena() const override { return true; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
|
@ -19,6 +19,7 @@ void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factor
|
||||
void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionGroupArrayIntersect(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionGroupConcat(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantile(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileDeterministic(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsQuantileExact(AggregateFunctionFactory &);
|
||||
@ -120,6 +121,7 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionGroupUniqArray(factory);
|
||||
registerAggregateFunctionGroupArrayInsertAt(factory);
|
||||
registerAggregateFunctionGroupArrayIntersect(factory);
|
||||
registerAggregateFunctionGroupConcat(factory);
|
||||
registerAggregateFunctionsQuantile(factory);
|
||||
registerAggregateFunctionsQuantileDeterministic(factory);
|
||||
registerAggregateFunctionsQuantileExact(factory);
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Analyzer/ConstantNode.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/Utils.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -164,32 +165,15 @@ private:
|
||||
|
||||
auto aggregate_function_clone = aggregate_function->clone();
|
||||
auto & aggregate_function_clone_typed = aggregate_function_clone->as<FunctionNode &>();
|
||||
|
||||
aggregate_function_clone_typed.getArguments().getNodes() = { arithmetic_function_clone_argument };
|
||||
resolveAggregateFunctionNode(aggregate_function_clone_typed, arithmetic_function_clone_argument, result_aggregate_function_name);
|
||||
resolveAggregateFunctionNodeByName(aggregate_function_clone_typed, result_aggregate_function_name);
|
||||
|
||||
arithmetic_function_clone_arguments_nodes[arithmetic_function_argument_index] = std::move(aggregate_function_clone);
|
||||
resolveOrdinaryFunctionNode(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName());
|
||||
resolveOrdinaryFunctionNodeByName(arithmetic_function_clone_typed, arithmetic_function_clone_typed.getFunctionName(), getContext());
|
||||
|
||||
return arithmetic_function_clone;
|
||||
}
|
||||
|
||||
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
|
||||
{
|
||||
auto function = FunctionFactory::instance().get(function_name, getContext());
|
||||
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
|
||||
}
|
||||
|
||||
static void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name)
|
||||
{
|
||||
auto function_aggregate_function = function_node.getAggregateFunction();
|
||||
|
||||
AggregateFunctionProperties properties;
|
||||
auto action = NullsAction::EMPTY;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get(
|
||||
aggregate_function_name, action, {argument->getResultType()}, function_aggregate_function->getParameters(), properties);
|
||||
|
||||
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Analyzer/ConstantNode.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/Utils.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -18,19 +19,18 @@ namespace DB
|
||||
namespace
|
||||
{
|
||||
|
||||
class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitor<ComparisonTupleEliminationPassVisitor>
|
||||
class ComparisonTupleEliminationPassVisitor : public InDepthQueryTreeVisitorWithContext<ComparisonTupleEliminationPassVisitor>
|
||||
{
|
||||
public:
|
||||
explicit ComparisonTupleEliminationPassVisitor(ContextPtr context_)
|
||||
: context(std::move(context_))
|
||||
{}
|
||||
using Base = InDepthQueryTreeVisitorWithContext<ComparisonTupleEliminationPassVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
static bool needChildVisit(QueryTreeNodePtr &, QueryTreeNodePtr & child)
|
||||
{
|
||||
return child->getNodeType() != QueryTreeNodeType::TABLE_FUNCTION;
|
||||
}
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node) const
|
||||
void enterImpl(QueryTreeNodePtr & node) const
|
||||
{
|
||||
auto * function_node = node->as<FunctionNode>();
|
||||
if (!function_node)
|
||||
@ -171,13 +171,13 @@ private:
|
||||
{
|
||||
auto result_function = std::make_shared<FunctionNode>("and");
|
||||
result_function->getArguments().getNodes() = std::move(tuple_arguments_equals_functions);
|
||||
resolveOrdinaryFunctionNode(*result_function, result_function->getFunctionName());
|
||||
resolveOrdinaryFunctionNodeByName(*result_function, result_function->getFunctionName(), getContext());
|
||||
|
||||
if (comparison_function_name == "notEquals")
|
||||
{
|
||||
auto not_function = std::make_shared<FunctionNode>("not");
|
||||
not_function->getArguments().getNodes().push_back(std::move(result_function));
|
||||
resolveOrdinaryFunctionNode(*not_function, not_function->getFunctionName());
|
||||
resolveOrdinaryFunctionNodeByName(*not_function, not_function->getFunctionName(), getContext());
|
||||
result_function = std::move(not_function);
|
||||
}
|
||||
|
||||
@ -197,18 +197,10 @@ private:
|
||||
comparison_function->getArguments().getNodes().push_back(std::move(lhs_argument));
|
||||
comparison_function->getArguments().getNodes().push_back(std::move(rhs_argument));
|
||||
|
||||
resolveOrdinaryFunctionNode(*comparison_function, comparison_function->getFunctionName());
|
||||
resolveOrdinaryFunctionNodeByName(*comparison_function, comparison_function->getFunctionName(), getContext());
|
||||
|
||||
return comparison_function;
|
||||
}
|
||||
|
||||
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
|
||||
{
|
||||
auto function = FunctionFactory::instance().get(function_name, context);
|
||||
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
|
||||
}
|
||||
|
||||
ContextPtr context;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <Analyzer/ColumnNode.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/QueryNode.h>
|
||||
#include <Analyzer/Utils.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -77,11 +78,9 @@ public:
|
||||
|
||||
/// Replace `countDistinct` of initial query into `count`
|
||||
auto result_type = function_node->getResultType();
|
||||
AggregateFunctionProperties properties;
|
||||
auto action = NullsAction::EMPTY;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get("count", action, {}, {}, properties);
|
||||
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
|
||||
|
||||
function_node->getArguments().getNodes().clear();
|
||||
resolveAggregateFunctionNodeByName(*function_node, "count");
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeVariant.h>
|
||||
|
||||
#include <Storages/IStorage.h>
|
||||
|
||||
@ -16,6 +17,9 @@
|
||||
#include <Analyzer/ColumnNode.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/TableNode.h>
|
||||
#include <Analyzer/TableFunctionNode.h>
|
||||
#include <Analyzer/Utils.h>
|
||||
#include <Analyzer/JoinNode.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -23,202 +27,410 @@ namespace DB
|
||||
namespace
|
||||
{
|
||||
|
||||
class FunctionToSubcolumnsVisitor : public InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitor>
|
||||
struct ColumnContext
|
||||
{
|
||||
NameAndTypePair column;
|
||||
QueryTreeNodePtr column_source;
|
||||
ContextPtr context;
|
||||
};
|
||||
|
||||
using NodeToSubcolumnTransformer = std::function<void(QueryTreeNodePtr &, FunctionNode &, ColumnContext &)>;
|
||||
|
||||
void optimizeFunctionLength(QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx)
|
||||
{
|
||||
/// Replace `length(argument)` with `argument.size0`
|
||||
/// `argument` may be Array or Map.
|
||||
|
||||
NameAndTypePair column{ctx.column.name + ".size0", std::make_shared<DataTypeUInt64>()};
|
||||
node = std::make_shared<ColumnNode>(column, ctx.column_source);
|
||||
}
|
||||
|
||||
template <bool positive>
|
||||
void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx)
|
||||
{
|
||||
/// Replace `empty(argument)` with `equals(argument.size0, 0)` if positive
|
||||
/// Replace `notEmpty(argument)` with `notEquals(argument.size0, 0)` if not positive
|
||||
/// `argument` may be Array or Map.
|
||||
|
||||
NameAndTypePair column{ctx.column.name + ".size0", std::make_shared<DataTypeUInt64>()};
|
||||
auto & function_arguments_nodes = function_node.getArguments().getNodes();
|
||||
|
||||
function_arguments_nodes.clear();
|
||||
function_arguments_nodes.push_back(std::make_shared<ColumnNode>(column, ctx.column_source));
|
||||
function_arguments_nodes.push_back(std::make_shared<ConstantNode>(static_cast<UInt64>(0)));
|
||||
|
||||
const auto * function_name = positive ? "equals" : "notEquals";
|
||||
resolveOrdinaryFunctionNodeByName(function_node, function_name, ctx.context);
|
||||
}
|
||||
|
||||
String getSubcolumnNameForElement(const Field & value, const DataTypeTuple & data_type_tuple)
|
||||
{
|
||||
if (value.getType() == Field::Types::String)
|
||||
return value.get<const String &>();
|
||||
|
||||
if (value.getType() == Field::Types::UInt64)
|
||||
return data_type_tuple.getNameByPosition(value.get<UInt64>());
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
String getSubcolumnNameForElement(const Field & value, const DataTypeVariant &)
|
||||
{
|
||||
if (value.getType() == Field::Types::String)
|
||||
return value.get<const String &>();
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
template <typename DataType>
|
||||
void optimizeTupleOrVariantElement(QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx)
|
||||
{
|
||||
/// Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)` with `tuple_argument.column_name`.
|
||||
/// Replace `variantElement(variant_argument, string_literal)` with `variant_argument.column_name`.
|
||||
|
||||
auto & function_arguments_nodes = function_node.getArguments().getNodes();
|
||||
if (function_arguments_nodes.size() != 2)
|
||||
return;
|
||||
|
||||
const auto * second_argument_constant_node = function_arguments_nodes[1]->as<ConstantNode>();
|
||||
if (!second_argument_constant_node)
|
||||
return;
|
||||
|
||||
const auto & data_type_concrete = assert_cast<const DataType &>(*ctx.column.type);
|
||||
auto subcolumn_name = getSubcolumnNameForElement(second_argument_constant_node->getValue(), data_type_concrete);
|
||||
|
||||
if (subcolumn_name.empty())
|
||||
return;
|
||||
|
||||
NameAndTypePair column{ctx.column.name + "." + subcolumn_name, function_node.getResultType()};
|
||||
node = std::make_shared<ColumnNode>(column, ctx.column_source);
|
||||
}
|
||||
|
||||
std::map<std::pair<TypeIndex, String>, NodeToSubcolumnTransformer> node_transformers =
|
||||
{
|
||||
{
|
||||
{TypeIndex::Array, "length"}, optimizeFunctionLength,
|
||||
},
|
||||
{
|
||||
{TypeIndex::Array, "empty"}, optimizeFunctionEmpty<true>,
|
||||
},
|
||||
{
|
||||
{TypeIndex::Array, "notEmpty"}, optimizeFunctionEmpty<false>,
|
||||
},
|
||||
{
|
||||
{TypeIndex::Map, "length"}, optimizeFunctionLength,
|
||||
},
|
||||
{
|
||||
{TypeIndex::Map, "empty"}, optimizeFunctionEmpty<true>,
|
||||
},
|
||||
{
|
||||
{TypeIndex::Map, "notEmpty"}, optimizeFunctionEmpty<false>,
|
||||
},
|
||||
{
|
||||
{TypeIndex::Map, "mapKeys"},
|
||||
[](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx)
|
||||
{
|
||||
/// Replace `mapKeys(map_argument)` with `map_argument.keys`
|
||||
NameAndTypePair column{ctx.column.name + ".keys", function_node.getResultType()};
|
||||
node = std::make_shared<ColumnNode>(column, ctx.column_source);
|
||||
},
|
||||
},
|
||||
{
|
||||
{TypeIndex::Map, "mapValues"},
|
||||
[](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx)
|
||||
{
|
||||
/// Replace `mapValues(map_argument)` with `map_argument.values`
|
||||
NameAndTypePair column{ctx.column.name + ".values", function_node.getResultType()};
|
||||
node = std::make_shared<ColumnNode>(column, ctx.column_source);
|
||||
},
|
||||
},
|
||||
{
|
||||
{TypeIndex::Map, "mapContains"},
|
||||
[](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx)
|
||||
{
|
||||
/// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)`
|
||||
const auto & data_type_map = assert_cast<const DataTypeMap &>(*ctx.column.type);
|
||||
|
||||
NameAndTypePair column{ctx.column.name + ".keys", std::make_shared<DataTypeArray>(data_type_map.getKeyType())};
|
||||
auto & function_arguments_nodes = function_node.getArguments().getNodes();
|
||||
|
||||
auto has_function_argument = std::make_shared<ColumnNode>(column, ctx.column_source);
|
||||
function_arguments_nodes[0] = std::move(has_function_argument);
|
||||
|
||||
resolveOrdinaryFunctionNodeByName(function_node, "has", ctx.context);
|
||||
},
|
||||
},
|
||||
{
|
||||
{TypeIndex::Nullable, "count"},
|
||||
[](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx)
|
||||
{
|
||||
/// Replace `count(nullable_argument)` with `sum(not(nullable_argument.null))`
|
||||
NameAndTypePair column{ctx.column.name + ".null", std::make_shared<DataTypeUInt8>()};
|
||||
auto & function_arguments_nodes = function_node.getArguments().getNodes();
|
||||
|
||||
auto new_column_node = std::make_shared<ColumnNode>(column, ctx.column_source);
|
||||
auto function_node_not = std::make_shared<FunctionNode>("not");
|
||||
|
||||
function_node_not->getArguments().getNodes().push_back(std::move(new_column_node));
|
||||
resolveOrdinaryFunctionNodeByName(*function_node_not, "not", ctx.context);
|
||||
|
||||
function_arguments_nodes = {std::move(function_node_not)};
|
||||
resolveAggregateFunctionNodeByName(function_node, "sum");
|
||||
},
|
||||
},
|
||||
{
|
||||
{TypeIndex::Nullable, "isNull"},
|
||||
[](QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx)
|
||||
{
|
||||
/// Replace `isNull(nullable_argument)` with `nullable_argument.null`
|
||||
NameAndTypePair column{ctx.column.name + ".null", std::make_shared<DataTypeUInt8>()};
|
||||
node = std::make_shared<ColumnNode>(column, ctx.column_source);
|
||||
},
|
||||
},
|
||||
{
|
||||
{TypeIndex::Nullable, "isNotNull"},
|
||||
[](QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx)
|
||||
{
|
||||
/// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)`
|
||||
NameAndTypePair column{ctx.column.name + ".null", std::make_shared<DataTypeUInt8>()};
|
||||
auto & function_arguments_nodes = function_node.getArguments().getNodes();
|
||||
|
||||
function_arguments_nodes = {std::make_shared<ColumnNode>(column, ctx.column_source)};
|
||||
resolveOrdinaryFunctionNodeByName(function_node, "not", ctx.context);
|
||||
},
|
||||
},
|
||||
{
|
||||
{TypeIndex::Tuple, "tupleElement"}, optimizeTupleOrVariantElement<DataTypeTuple>,
|
||||
},
|
||||
{
|
||||
{TypeIndex::Variant, "variantElement"}, optimizeTupleOrVariantElement<DataTypeVariant>,
|
||||
},
|
||||
};
|
||||
|
||||
std::tuple<FunctionNode *, ColumnNode *, TableNode *> getTypedNodesForOptimization(const QueryTreeNodePtr & node)
|
||||
{
|
||||
auto * function_node = node->as<FunctionNode>();
|
||||
if (!function_node)
|
||||
return {};
|
||||
|
||||
auto & function_arguments_nodes = function_node->getArguments().getNodes();
|
||||
if (function_arguments_nodes.empty() || function_arguments_nodes.size() > 2)
|
||||
return {};
|
||||
|
||||
auto * first_argument_column_node = function_arguments_nodes.front()->as<ColumnNode>();
|
||||
if (!first_argument_column_node || first_argument_column_node->getColumnName() == "__grouping_set")
|
||||
return {};
|
||||
|
||||
auto column_source = first_argument_column_node->getColumnSource();
|
||||
auto * table_node = column_source->as<TableNode>();
|
||||
if (!table_node)
|
||||
return {};
|
||||
|
||||
const auto & storage = table_node->getStorage();
|
||||
const auto & storage_snapshot = table_node->getStorageSnapshot();
|
||||
auto column = first_argument_column_node->getColumn();
|
||||
|
||||
if (!storage->supportsOptimizationToSubcolumns() || storage->isVirtualColumn(column.name, storage_snapshot->metadata))
|
||||
return {};
|
||||
|
||||
auto column_in_table = storage_snapshot->tryGetColumn(GetColumnsOptions::All, column.name);
|
||||
if (!column_in_table || !column_in_table->type->equals(*column.type))
|
||||
return {};
|
||||
|
||||
return std::make_tuple(function_node, first_argument_column_node, table_node);
|
||||
}
|
||||
|
||||
/// First pass collects info about identifiers to determine which identifiers are allowed to optimize.
|
||||
class FunctionToSubcolumnsVisitorFirstPass : public InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorFirstPass>
|
||||
{
|
||||
public:
|
||||
using Base = InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitor>;
|
||||
using Base = InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorFirstPass>;
|
||||
using Base::Base;
|
||||
|
||||
void enterImpl(const QueryTreeNodePtr & node)
|
||||
{
|
||||
if (!getSettings().optimize_functions_to_subcolumns)
|
||||
return;
|
||||
|
||||
if (auto * table_node = node->as<TableNode>())
|
||||
{
|
||||
enterImpl(*table_node);
|
||||
return;
|
||||
}
|
||||
|
||||
if (auto * column_node = node->as<ColumnNode>())
|
||||
{
|
||||
enterImpl(*column_node);
|
||||
return;
|
||||
}
|
||||
|
||||
auto [function_node, first_argument_node, table_node] = getTypedNodesForOptimization(node);
|
||||
if (function_node && first_argument_node && table_node)
|
||||
{
|
||||
enterImpl(*function_node, *first_argument_node, *table_node);
|
||||
return;
|
||||
}
|
||||
|
||||
if (const auto * join_node = node->as<JoinNode>())
|
||||
{
|
||||
can_wrap_result_columns_with_nullable |= getContext()->getSettingsRef().join_use_nulls;
|
||||
return;
|
||||
}
|
||||
|
||||
if (const auto * query_node = node->as<QueryNode>())
|
||||
{
|
||||
if (query_node->isGroupByWithCube() || query_node->isGroupByWithRollup() || query_node->isGroupByWithGroupingSets())
|
||||
can_wrap_result_columns_with_nullable |= getContext()->getSettingsRef().group_by_use_nulls;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::unordered_set<Identifier> getIdentifiersToOptimize() const
|
||||
{
|
||||
if (can_wrap_result_columns_with_nullable)
|
||||
{
|
||||
/// Do not optimize if we have JOIN with setting join_use_null.
|
||||
/// Do not optimize if we have GROUP BY WITH ROLLUP/CUBE/GROUPING SETS with setting group_by_use_nulls.
|
||||
/// It may change the behaviour if subcolumn can be converted
|
||||
/// to Nullable while the original column cannot (e.g. for Array type).
|
||||
return {};
|
||||
}
|
||||
|
||||
/// Do not optimize if full column is requested in other context.
|
||||
/// It doesn't make sense because it doesn't reduce amount of read data
|
||||
/// and optimized functions are not computation heavy. But introducing
|
||||
/// new identifier complicates query analysis and may break it.
|
||||
///
|
||||
/// E.g. query:
|
||||
/// SELECT n FROM table GROUP BY n HAVING isNotNull(n)
|
||||
/// may be optimized to incorrect query:
|
||||
/// SELECT n FROM table GROUP BY n HAVING not(n.null)
|
||||
/// Will produce: `n.null` is not under aggregate function and not in GROUP BY keys)
|
||||
///
|
||||
/// Do not optimize index columns (primary, min-max, secondary),
|
||||
/// because otherwise analysis of indexes may be broken.
|
||||
/// TODO: handle subcolumns in index analysis.
|
||||
|
||||
std::unordered_set<Identifier> identifiers_to_optimize;
|
||||
for (const auto & [identifier, count] : optimized_identifiers_count)
|
||||
{
|
||||
if (all_key_columns.contains(identifier))
|
||||
continue;
|
||||
|
||||
auto it = identifiers_count.find(identifier);
|
||||
if (it != identifiers_count.end() && it->second == count)
|
||||
identifiers_to_optimize.insert(identifier);
|
||||
}
|
||||
|
||||
return identifiers_to_optimize;
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_set<Identifier> all_key_columns;
|
||||
std::unordered_map<Identifier, UInt64> identifiers_count;
|
||||
std::unordered_map<Identifier, UInt64> optimized_identifiers_count;
|
||||
|
||||
NameSet processed_tables;
|
||||
bool can_wrap_result_columns_with_nullable = false;
|
||||
|
||||
void enterImpl(const TableNode & table_node)
|
||||
{
|
||||
auto table_name = table_node.getStorage()->getStorageID().getFullTableName();
|
||||
if (processed_tables.emplace(table_name).second)
|
||||
return;
|
||||
|
||||
auto add_key_columns = [&](const auto & key_columns)
|
||||
{
|
||||
for (const auto & column_name : key_columns)
|
||||
{
|
||||
Identifier identifier({table_name, column_name});
|
||||
all_key_columns.insert(identifier);
|
||||
}
|
||||
};
|
||||
|
||||
const auto & metadata_snapshot = table_node.getStorageSnapshot()->metadata;
|
||||
const auto & primary_key_columns = metadata_snapshot->getColumnsRequiredForPrimaryKey();
|
||||
const auto & partition_key_columns = metadata_snapshot->getColumnsRequiredForPartitionKey();
|
||||
|
||||
add_key_columns(primary_key_columns);
|
||||
add_key_columns(partition_key_columns);
|
||||
|
||||
for (const auto & index : metadata_snapshot->getSecondaryIndices())
|
||||
{
|
||||
const auto & index_columns = index.expression->getRequiredColumns();
|
||||
add_key_columns(index_columns);
|
||||
}
|
||||
}
|
||||
|
||||
void enterImpl(const ColumnNode & column_node)
|
||||
{
|
||||
if (column_node.getColumnName() == "__grouping_set")
|
||||
return;
|
||||
|
||||
auto column_source = column_node.getColumnSource();
|
||||
auto * table_node = column_source->as<TableNode>();
|
||||
if (!table_node)
|
||||
return;
|
||||
|
||||
auto table_name = table_node->getStorage()->getStorageID().getFullTableName();
|
||||
Identifier qualified_name({table_name, column_node.getColumnName()});
|
||||
|
||||
++identifiers_count[qualified_name];
|
||||
}
|
||||
|
||||
void enterImpl(const FunctionNode & function_node, const ColumnNode & first_argument_column_node, const TableNode & table_node)
|
||||
{
|
||||
/// For queries with FINAL converting function to subcolumn may alter
|
||||
/// special merging algorithms and produce wrong result of query.
|
||||
if (table_node.hasTableExpressionModifiers() && table_node.getTableExpressionModifiers()->hasFinal())
|
||||
return;
|
||||
|
||||
const auto & column = first_argument_column_node.getColumn();
|
||||
auto table_name = table_node.getStorage()->getStorageID().getFullTableName();
|
||||
Identifier qualified_name({table_name, column.name});
|
||||
|
||||
if (node_transformers.contains({column.type->getTypeId(), function_node.getFunctionName()}))
|
||||
++optimized_identifiers_count[qualified_name];
|
||||
}
|
||||
};
|
||||
|
||||
/// Second pass optimizes functions to subcolumns for allowed identifiers.
|
||||
class FunctionToSubcolumnsVisitorSecondPass : public InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorSecondPass>
|
||||
{
|
||||
private:
|
||||
std::unordered_set<Identifier> identifiers_to_optimize;
|
||||
|
||||
public:
|
||||
using Base = InDepthQueryTreeVisitorWithContext<FunctionToSubcolumnsVisitorSecondPass>;
|
||||
using Base::Base;
|
||||
|
||||
FunctionToSubcolumnsVisitorSecondPass(ContextPtr context_, std::unordered_set<Identifier> identifiers_to_optimize_)
|
||||
: Base(std::move(context_)), identifiers_to_optimize(std::move(identifiers_to_optimize_))
|
||||
{
|
||||
}
|
||||
|
||||
void enterImpl(QueryTreeNodePtr & node) const
|
||||
{
|
||||
if (!getSettings().optimize_functions_to_subcolumns)
|
||||
return;
|
||||
|
||||
auto * function_node = node->as<FunctionNode>();
|
||||
if (!function_node)
|
||||
return;
|
||||
|
||||
auto & function_arguments_nodes = function_node->getArguments().getNodes();
|
||||
size_t function_arguments_nodes_size = function_arguments_nodes.size();
|
||||
|
||||
if (function_arguments_nodes.empty() || function_arguments_nodes_size > 2)
|
||||
return;
|
||||
|
||||
auto * first_argument_column_node = function_arguments_nodes.front()->as<ColumnNode>();
|
||||
|
||||
if (!first_argument_column_node)
|
||||
return;
|
||||
|
||||
if (first_argument_column_node->getColumnName() == "__grouping_set")
|
||||
return;
|
||||
|
||||
auto column_source = first_argument_column_node->getColumnSource();
|
||||
auto * table_node = column_source->as<TableNode>();
|
||||
|
||||
if (!table_node)
|
||||
return;
|
||||
|
||||
const auto & storage = table_node->getStorage();
|
||||
if (!storage->supportsSubcolumns())
|
||||
auto [function_node, first_argument_column_node, table_node] = getTypedNodesForOptimization(node);
|
||||
if (!function_node || !first_argument_column_node || !table_node)
|
||||
return;
|
||||
|
||||
auto column = first_argument_column_node->getColumn();
|
||||
WhichDataType column_type(column.type);
|
||||
auto table_name = table_node->getStorage()->getStorageID().getFullTableName();
|
||||
|
||||
const auto & function_name = function_node->getFunctionName();
|
||||
Identifier qualified_name({table_name, column.name});
|
||||
if (!identifiers_to_optimize.contains(qualified_name))
|
||||
return;
|
||||
|
||||
if (function_arguments_nodes_size == 1)
|
||||
auto transformer_it = node_transformers.find({column.type->getTypeId(), function_node->getFunctionName()});
|
||||
if (transformer_it != node_transformers.end())
|
||||
{
|
||||
if (column_type.isArray())
|
||||
{
|
||||
if (function_name == "length")
|
||||
{
|
||||
/// Replace `length(array_argument)` with `array_argument.size0`
|
||||
column.name += ".size0";
|
||||
column.type = std::make_shared<DataTypeUInt64>();
|
||||
|
||||
node = std::make_shared<ColumnNode>(column, column_source);
|
||||
}
|
||||
else if (function_name == "empty")
|
||||
{
|
||||
/// Replace `empty(array_argument)` with `equals(array_argument.size0, 0)`
|
||||
column.name += ".size0";
|
||||
column.type = std::make_shared<DataTypeUInt64>();
|
||||
|
||||
function_arguments_nodes.clear();
|
||||
function_arguments_nodes.push_back(std::make_shared<ColumnNode>(column, column_source));
|
||||
function_arguments_nodes.push_back(std::make_shared<ConstantNode>(static_cast<UInt64>(0)));
|
||||
|
||||
resolveOrdinaryFunctionNode(*function_node, "equals");
|
||||
}
|
||||
else if (function_name == "notEmpty")
|
||||
{
|
||||
/// Replace `notEmpty(array_argument)` with `notEquals(array_argument.size0, 0)`
|
||||
column.name += ".size0";
|
||||
column.type = std::make_shared<DataTypeUInt64>();
|
||||
|
||||
function_arguments_nodes.clear();
|
||||
function_arguments_nodes.push_back(std::make_shared<ColumnNode>(column, column_source));
|
||||
function_arguments_nodes.push_back(std::make_shared<ConstantNode>(static_cast<UInt64>(0)));
|
||||
|
||||
resolveOrdinaryFunctionNode(*function_node, "notEquals");
|
||||
}
|
||||
}
|
||||
else if (column_type.isNullable())
|
||||
{
|
||||
if (function_name == "isNull")
|
||||
{
|
||||
/// Replace `isNull(nullable_argument)` with `nullable_argument.null`
|
||||
column.name += ".null";
|
||||
column.type = std::make_shared<DataTypeUInt8>();
|
||||
|
||||
node = std::make_shared<ColumnNode>(column, column_source);
|
||||
}
|
||||
else if (function_name == "isNotNull")
|
||||
{
|
||||
/// Replace `isNotNull(nullable_argument)` with `not(nullable_argument.null)`
|
||||
column.name += ".null";
|
||||
column.type = std::make_shared<DataTypeUInt8>();
|
||||
|
||||
function_arguments_nodes = {std::make_shared<ColumnNode>(column, column_source)};
|
||||
|
||||
resolveOrdinaryFunctionNode(*function_node, "not");
|
||||
}
|
||||
}
|
||||
else if (column_type.isMap())
|
||||
{
|
||||
if (function_name == "mapKeys")
|
||||
{
|
||||
/// Replace `mapKeys(map_argument)` with `map_argument.keys`
|
||||
column.name += ".keys";
|
||||
column.type = function_node->getResultType();
|
||||
|
||||
node = std::make_shared<ColumnNode>(column, column_source);
|
||||
}
|
||||
else if (function_name == "mapValues")
|
||||
{
|
||||
/// Replace `mapValues(map_argument)` with `map_argument.values`
|
||||
column.name += ".values";
|
||||
column.type = function_node->getResultType();
|
||||
|
||||
node = std::make_shared<ColumnNode>(column, column_source);
|
||||
}
|
||||
}
|
||||
ColumnContext ctx{std::move(column), first_argument_column_node->getColumnSource(), getContext()};
|
||||
transformer_it->second(node, *function_node, ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto * second_argument_constant_node = function_arguments_nodes[1]->as<ConstantNode>();
|
||||
|
||||
if (function_name == "tupleElement" && column_type.isTuple() && second_argument_constant_node)
|
||||
{
|
||||
/** Replace `tupleElement(tuple_argument, string_literal)`, `tupleElement(tuple_argument, integer_literal)`
|
||||
* with `tuple_argument.column_name`.
|
||||
*/
|
||||
const auto & tuple_element_constant_value = second_argument_constant_node->getValue();
|
||||
const auto & tuple_element_constant_value_type = tuple_element_constant_value.getType();
|
||||
|
||||
const auto & data_type_tuple = assert_cast<const DataTypeTuple &>(*column.type);
|
||||
|
||||
String subcolumn_name;
|
||||
|
||||
if (tuple_element_constant_value_type == Field::Types::String)
|
||||
{
|
||||
subcolumn_name = tuple_element_constant_value.get<const String &>();
|
||||
}
|
||||
else if (tuple_element_constant_value_type == Field::Types::UInt64)
|
||||
{
|
||||
auto tuple_column_index = tuple_element_constant_value.get<UInt64>();
|
||||
subcolumn_name = data_type_tuple.getNameByPosition(tuple_column_index);
|
||||
}
|
||||
else
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
column.name += '.';
|
||||
column.name += subcolumn_name;
|
||||
column.type = function_node->getResultType();
|
||||
|
||||
node = std::make_shared<ColumnNode>(column, column_source);
|
||||
}
|
||||
else if (function_name == "variantElement" && isVariant(column_type) && second_argument_constant_node)
|
||||
{
|
||||
/// Replace `variantElement(variant_argument, type_name)` with `variant_argument.type_name`.
|
||||
const auto & variant_element_constant_value = second_argument_constant_node->getValue();
|
||||
String subcolumn_name;
|
||||
|
||||
if (variant_element_constant_value.getType() != Field::Types::String)
|
||||
return;
|
||||
|
||||
subcolumn_name = variant_element_constant_value.get<const String &>();
|
||||
|
||||
column.name += '.';
|
||||
column.name += subcolumn_name;
|
||||
column.type = function_node->getResultType();
|
||||
|
||||
node = std::make_shared<ColumnNode>(column, column_source);
|
||||
}
|
||||
else if (function_name == "mapContains" && column_type.isMap())
|
||||
{
|
||||
const auto & data_type_map = assert_cast<const DataTypeMap &>(*column.type);
|
||||
|
||||
/// Replace `mapContains(map_argument, argument)` with `has(map_argument.keys, argument)`
|
||||
column.name += ".keys";
|
||||
column.type = std::make_shared<DataTypeArray>(data_type_map.getKeyType());
|
||||
|
||||
auto has_function_argument = std::make_shared<ColumnNode>(column, column_source);
|
||||
function_arguments_nodes[0] = std::move(has_function_argument);
|
||||
|
||||
resolveOrdinaryFunctionNode(*function_node, "has");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
|
||||
{
|
||||
auto function = FunctionFactory::instance().get(function_name, getContext());
|
||||
function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
|
||||
}
|
||||
};
|
||||
|
||||
@ -226,8 +438,15 @@ private:
|
||||
|
||||
void FunctionToSubcolumnsPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
|
||||
{
|
||||
FunctionToSubcolumnsVisitor visitor(context);
|
||||
visitor.visit(query_tree_node);
|
||||
FunctionToSubcolumnsVisitorFirstPass first_visitor(context);
|
||||
first_visitor.visit(query_tree_node);
|
||||
auto identifiers_to_optimize = first_visitor.getIdentifiersToOptimize();
|
||||
|
||||
if (identifiers_to_optimize.empty())
|
||||
return;
|
||||
|
||||
FunctionToSubcolumnsVisitorSecondPass second_visitor(std::move(context), std::move(identifiers_to_optimize));
|
||||
second_visitor.visit(query_tree_node);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Analyzer/ConstantNode.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/Utils.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
@ -47,25 +48,17 @@ public:
|
||||
|
||||
if (function_node->getFunctionName() == "count" && !first_argument_constant_literal.isNull())
|
||||
{
|
||||
resolveAsCountAggregateFunction(*function_node);
|
||||
function_node->getArguments().getNodes().clear();
|
||||
resolveAggregateFunctionNodeByName(*function_node, "count");
|
||||
}
|
||||
else if (function_node->getFunctionName() == "sum" &&
|
||||
first_argument_constant_literal.getType() == Field::Types::UInt64 &&
|
||||
first_argument_constant_literal.get<UInt64>() == 1)
|
||||
{
|
||||
resolveAsCountAggregateFunction(*function_node);
|
||||
function_node->getArguments().getNodes().clear();
|
||||
resolveAggregateFunctionNodeByName(*function_node, "count");
|
||||
}
|
||||
}
|
||||
private:
|
||||
static void resolveAsCountAggregateFunction(FunctionNode & function_node)
|
||||
{
|
||||
AggregateFunctionProperties properties;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);
|
||||
|
||||
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Analyzer/ColumnNode.h>
|
||||
#include <Analyzer/ConstantNode.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/Utils.h>
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Common/DateLUT.h>
|
||||
#include <Common/DateLUTImpl.h>
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user