diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index d9f9e9d6c8b..e045170561d 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -6,6 +6,7 @@ tests/ci/cancel_and_rerun_workflow_lambda/app.py --> ### Changelog category (leave one): - New Feature +- Experimental Feature - Improvement - Performance Improvement - Backward Incompatible Change diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index e1980ec9ef2..64c3d2f8342 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -159,33 +159,24 @@ jobs: ############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ - BuilderReport: + Builds_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() }} - needs: - - RunConfig - - BuilderDebAarch64 - - BuilderDebAsan - - BuilderDebDebug - - BuilderDebRelease - - BuilderDebTsan - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickHouse build check - runner_type: style-checker-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - BuilderSpecialReport: - # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() }} - needs: - - RunConfig - - BuilderBinDarwin - - BuilderBinDarwinAarch64 - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickHouse special build check - runner_type: style-checker-aarch64 - data: ${{ needs.RunConfig.outputs.data }} + if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }} + needs: [RunConfig, BuilderDebAarch64, BuilderDebAsan, BuilderDebDebug, BuilderDebRelease, BuilderDebTsan, BuilderBinDarwin, BuilderBinDarwinAarch64] + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + - name: Download reports + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --pre --job-name Builds + - name: Builds report + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 ./build_report_check.py --reports package_release package_aarch64 package_asan package_tsan package_debug binary_darwin binary_darwin_aarch64 + - name: Set status + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --post --job-name Builds ############################################################################################ #################################### INSTALL PACKAGES ###################################### ############################################################################################ @@ -256,8 +247,7 @@ jobs: FinishCheck: if: ${{ !failure() && !cancelled() }} needs: - - BuilderReport - - BuilderSpecialReport + - Builds_Report - FunctionalStatelessTestAsan - FunctionalStatefulTestDebug - StressTestTsan @@ -273,5 +263,8 @@ jobs: - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" + # update mergeable check + python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + # update overall ci report python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} python3 merge_pr.py diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml new file mode 100644 index 00000000000..3988df3b2b1 --- /dev/null +++ b/.github/workflows/create_release.yml @@ -0,0 +1,29 @@ +name: CreateRelease + +concurrency: + group: release + +'on': + workflow_dispatch: + inputs: + sha: + description: 'The SHA hash of the commit from which to create the release' + required: true + type: string + type: + description: 'The type of release: "new" for a new release or "patch" for a patch release' + required: true + type: choice + options: + - new + - patch + +jobs: + Release: + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + - name: Print greeting + run: | + python3 ./tests/ci/release.py --commit ${{ inputs.sha }} --type ${{ inputs.type }} --dry-run diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 384bf6825f9..2a7e6f737ab 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -117,11 +117,11 @@ jobs: # Reports should run even if Builds_1/2 fail - run them separately, not in Tests_1/2/3 Builds_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }} + if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }} needs: [RunConfig, Builds_1, Builds_2] uses: ./.github/workflows/reusable_test.yml with: - test_name: ClickHouse build check + test_name: Builds runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} diff --git a/.github/workflows/merge_queue.yml b/.github/workflows/merge_queue.yml index cfa01b0e8f3..01685ee1f5a 100644 --- a/.github/workflows/merge_queue.yml +++ b/.github/workflows/merge_queue.yml @@ -96,20 +96,15 @@ jobs: stage: Tests_1 data: ${{ needs.RunConfig.outputs.data }} - ################################# Stage Final ################################# - # - FinishCheck: - if: ${{ !cancelled() }} + CheckReadyForMerge: + if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }} + # Test_2 or Test_3 must not have jobs required for Mergeable check needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Tests_1] runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code uses: ClickHouse/checkout@v1 - - name: Check sync status + - name: Check and set merge status run: | cd "$GITHUB_WORKSPACE/tests/ci" - python3 sync_pr.py --status - - name: Finish label - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 84dd4222e36..4764e6d3c1a 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -146,11 +146,11 @@ jobs: # Reports should run even if Builds_1/2 fail - run them separately (not in Tests_1/2/3) Builds_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }} + if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }} needs: [RunConfig, StyleCheck, Builds_1, Builds_2] uses: ./.github/workflows/reusable_test.yml with: - test_name: ClickHouse build check + test_name: Builds runner_type: style-checker-aarch64 data: ${{ needs.RunConfig.outputs.data }} diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 45eb7431bb4..6bf846d7535 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -176,35 +176,24 @@ jobs: ############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ - BuilderReport: + Builds_Report: # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() }} - needs: - - RunConfig - - BuilderDebRelease - - BuilderDebAarch64 - - BuilderDebAsan - - BuilderDebTsan - - BuilderDebUBsan - - BuilderDebMsan - - BuilderDebDebug - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickHouse build check - runner_type: style-checker-aarch64 - data: ${{ needs.RunConfig.outputs.data }} - BuilderSpecialReport: - # run report check for failed builds to indicate the CI error - if: ${{ !cancelled() }} - needs: - - RunConfig - - BuilderBinDarwin - - BuilderBinDarwinAarch64 - uses: ./.github/workflows/reusable_test.yml - with: - test_name: ClickHouse special build check - runner_type: style-checker-aarch64 - data: ${{ needs.RunConfig.outputs.data }} + if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'Builds') }} + needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64, BuilderDebAsan, BuilderDebUBsan, BuilderDebMsan, BuilderDebTsan, BuilderDebDebug, BuilderBinDarwin, BuilderBinDarwinAarch64] + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + - name: Download reports + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --pre --job-name Builds + - name: Builds report + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 ./build_report_check.py --reports package_release package_aarch64 package_asan package_msan package_ubsan package_tsan package_debug binary_darwin binary_darwin_aarch64 + - name: Set status + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(needs.RunConfig.outputs.data) }} --post --job-name Builds MarkReleaseReady: if: ${{ !failure() && !cancelled() }} needs: @@ -460,8 +449,7 @@ jobs: needs: - DockerServerImage - DockerKeeperImage - - BuilderReport - - BuilderSpecialReport + - Builds_Report - MarkReleaseReady - FunctionalStatelessTestDebug - FunctionalStatelessTestRelease @@ -496,4 +484,7 @@ jobs: - name: Finish label run: | cd "$GITHUB_WORKSPACE/tests/ci" + # update mergeable check + python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + # update overall ci report python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 4891b79e4c7..c4935f88245 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v24.6, 2024-06-27](#246)**
**[ClickHouse release v24.5, 2024-05-30](#245)**
**[ClickHouse release v24.4, 2024-04-30](#244)**
**[ClickHouse release v24.3 LTS, 2024-03-26](#243)**
@@ -8,6 +9,179 @@ # 2024 Changelog +### ClickHouse release 24.6, 2024-06-27 + +#### Backward Incompatible Change +* Enable asynchronous load of databases and tables by default. See the `async_load_databases` in config.xml. While this change is fully compatible, it can introduce a difference in behavior. When `async_load_databases` is false, as in the previous versions, the server will not accept connections until all tables are loaded. When `async_load_databases` is true, as in the new version, the server can accept connections before all the tables are loaded. If a query is made to a table that is not yet loaded, it will wait for the table's loading, which can take considerable time. It can change the behavior of the server if it is part of a large distributed system under a load balancer. In the first case, the load balancer can get a connection refusal and quickly failover to another server. In the second case, the load balancer can connect to a server that is still loading the tables, and the query will have a higher latency. Moreover, if many queries accumulate in the waiting state, it can lead to a "thundering herd" problem when they start processing simultaneously. This can make a difference only for highly loaded distributed backends. You can set the value of `async_load_databases` to false to avoid this problem. [#57695](https://github.com/ClickHouse/ClickHouse/pull/57695) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Some invalid queries will fail earlier during parsing. Note: disabled the support for inline KQL expressions (the experimental Kusto language) when they are put into a `kql` table function without a string literal, e.g. `kql(garbage | trash)` instead of `kql('garbage | trash')` or `kql($$garbage | trash$$)`. This feature was introduced unintentionally and should not exist. [#61500](https://github.com/ClickHouse/ClickHouse/pull/61500) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Rework parallel processing in `Ordered` mode of storage `S3Queue`. This PR is backward incompatible for Ordered mode if you used settings `s3queue_processing_threads_num` or `s3queue_total_shards_num`. Setting `s3queue_total_shards_num` is deleted, previously it was allowed to use only under `s3queue_allow_experimental_sharded_mode`, which is now deprecated. A new setting is added - `s3queue_buckets`. [#64349](https://github.com/ClickHouse/ClickHouse/pull/64349) ([Kseniia Sumarokova](https://github.com/kssenii)). +* New functions `snowflakeIDToDateTime`, `snowflakeIDToDateTime64`, `dateTimeToSnowflakeID`, and `dateTime64ToSnowflakeID` were added. Unlike the existing functions `snowflakeToDateTime`, `snowflakeToDateTime64`, `dateTimeToSnowflake`, and `dateTime64ToSnowflake`, the new functions are compatible with function `generateSnowflakeID`, i.e. they accept the snowflake IDs generated by `generateSnowflakeID` and produce snowflake IDs of the same type as `generateSnowflakeID` (i.e. `UInt64`). Furthermore, the new functions default to the UNIX epoch (aka. 1970-01-01), just like `generateSnowflakeID`. If necessary, a different epoch, e.g. Twitter's/X's epoch 2010-11-04 aka. 1288834974657 msec since UNIX epoch, can be passed. The old conversion functions are deprecated and will be removed after a transition period: to use them regardless, enable setting `allow_deprecated_snowflake_conversion_functions`. [#64948](https://github.com/ClickHouse/ClickHouse/pull/64948) ([Robert Schulze](https://github.com/rschu1ze)). + +#### New Feature +* Introduce statistics of type "number of distinct values". [#59357](https://github.com/ClickHouse/ClickHouse/pull/59357) ([Han Fei](https://github.com/hanfei1991)). +* Add Hilbert Curve encode and decode functions. [#60156](https://github.com/ClickHouse/ClickHouse/pull/60156) ([Artem Mustafin](https://github.com/Artemmm91)). +* Added support for reading LINESTRING geometry in WKT format using function `readWKTLineString`. [#62519](https://github.com/ClickHouse/ClickHouse/pull/62519) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow to attach parts from a different disk. [#63087](https://github.com/ClickHouse/ClickHouse/pull/63087) ([Unalian](https://github.com/Unalian)). +* Allow proxy to be bypassed for hosts specified in `no_proxy` env variable and ClickHouse proxy configuration. [#63314](https://github.com/ClickHouse/ClickHouse/pull/63314) ([Arthur Passos](https://github.com/arthurpassos)). +* Added a new table function `loop` to support returning query results in an infinite loop. [#63452](https://github.com/ClickHouse/ClickHouse/pull/63452) ([Sariel](https://github.com/sarielwxm)). +* Added new SQL functions `generateSnowflakeID` for generating Twitter-style Snowflake IDs. [#63577](https://github.com/ClickHouse/ClickHouse/pull/63577) ([Danila Puzov](https://github.com/kazalika)). +* Add the ability to reshuffle rows during insert to optimize for size without violating the order set by `PRIMARY KEY`. It's controlled by the setting `optimize_row_order` (off by default). [#63578](https://github.com/ClickHouse/ClickHouse/pull/63578) ([Igor Markelov](https://github.com/ElderlyPassionFruit)). +* Added `merge_workload` and `mutation_workload` settings to regulate how resources are utilized and shared between merges, mutations and other workloads. [#64061](https://github.com/ClickHouse/ClickHouse/pull/64061) ([Sergei Trifonov](https://github.com/serxa)). +* Add support for comparing IPv4 and IPv6 types using the `=` operator. [#64292](https://github.com/ClickHouse/ClickHouse/pull/64292) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Allow to store named collections in zookeeper. [#64574](https://github.com/ClickHouse/ClickHouse/pull/64574) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support decimal arguments in binary math functions (pow, atan2, max2, min2, hypot). [#64582](https://github.com/ClickHouse/ClickHouse/pull/64582) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* Add support for index analysis over `hilbertEncode`. [#64662](https://github.com/ClickHouse/ClickHouse/pull/64662) ([Artem Mustafin](https://github.com/Artemmm91)). +* Added SQL functions `parseReadableSize` (along with `OrNull` and `OrZero` variants). [#64742](https://github.com/ClickHouse/ClickHouse/pull/64742) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Add server settings `max_table_num_to_throw` and `max_database_num_to_throw` to limit the number of databases or tables on `CREATE` queries. [#64781](https://github.com/ClickHouse/ClickHouse/pull/64781) ([Xu Jia](https://github.com/XuJia0210)). +* Add _time virtual column to file alike storages (s3/file/hdfs/url/azureBlobStorage). [#64947](https://github.com/ClickHouse/ClickHouse/pull/64947) ([Ilya Golshtein](https://github.com/ilejn)). +* Introduced new functions `base64URLEncode`, `base64URLDecode` and `tryBase64URLDecode`. [#64991](https://github.com/ClickHouse/ClickHouse/pull/64991) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* Add new function `editDistanceUTF8`, which calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two UTF8 strings. [#65269](https://github.com/ClickHouse/ClickHouse/pull/65269) ([LiuNeng](https://github.com/liuneng1994)). + +#### Performance Improvement +* Add a native parquet reader, which can read parquet binary to ClickHouse Columns directly. It's controlled by the setting `input_format_parquet_use_native_reader` (disabled by default). [#60361](https://github.com/ClickHouse/ClickHouse/pull/60361) ([ZhiHong Zhang](https://github.com/copperybean)). +* Reduce the number of virtual function calls in ColumnNullable::size. [#60556](https://github.com/ClickHouse/ClickHouse/pull/60556) ([HappenLee](https://github.com/HappenLee)). +* Speedup `splitByRegexp` when the regular expression argument is a single-character. [#62696](https://github.com/ClickHouse/ClickHouse/pull/62696) ([Robert Schulze](https://github.com/rschu1ze)). +* Speed up FixedHashTable by keeping track of the min and max keys used. This allows to reduce the number of cells that need to be verified. [#62746](https://github.com/ClickHouse/ClickHouse/pull/62746) ([Jiebin Sun](https://github.com/jiebinn)). +* Optimize the resolution of in(LowCardinality, ConstantSet). [#64060](https://github.com/ClickHouse/ClickHouse/pull/64060) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Use a thread pool to initialize and destroy hash tables inside `ConcurrentHashJoin`. [#64241](https://github.com/ClickHouse/ClickHouse/pull/64241) ([Nikita Taranov](https://github.com/nickitat)). +* Optimized vertical merges in tables with sparse columns. [#64311](https://github.com/ClickHouse/ClickHouse/pull/64311) ([Anton Popov](https://github.com/CurtizJ)). +* Enabled prefetches of data from remote filesystem during vertical merges. It improves latency of vertical merges in tables with data stored on remote filesystem. [#64314](https://github.com/ClickHouse/ClickHouse/pull/64314) ([Anton Popov](https://github.com/CurtizJ)). +* Reduce redundant calls to `isDefault()` of `ColumnSparse::filter` to improve performance. [#64426](https://github.com/ClickHouse/ClickHouse/pull/64426) ([Jiebin Sun](https://github.com/jiebinn)). +* Speedup `find_super_nodes` and `find_big_family` keeper-client commands by making multiple asynchronous getChildren requests. [#64628](https://github.com/ClickHouse/ClickHouse/pull/64628) ([Alexander Gololobov](https://github.com/davenger)). +* Improve function least/greatest for nullable numberic type arguments. [#64668](https://github.com/ClickHouse/ClickHouse/pull/64668) ([KevinyhZou](https://github.com/KevinyhZou)). +* Allow merging two consequent `FilterSteps` of a query plan. This improves filter-push-down optimization if the filter condition can be pushed down from the parent step. [#64760](https://github.com/ClickHouse/ClickHouse/pull/64760) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove bad optimization in vertical final implementation and re-enable vertical final algorithm by default. [#64783](https://github.com/ClickHouse/ClickHouse/pull/64783) ([Duc Canh Le](https://github.com/canhld94)). +* Remove ALIAS nodes from the filter expression. This slightly improves performance for queries with `PREWHERE` (with the new analyzer). [#64793](https://github.com/ClickHouse/ClickHouse/pull/64793) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix performance regression in cross join introduced in [#60459](https://github.com/ClickHouse/ClickHouse/issues/60459) (24.5). [#65243](https://github.com/ClickHouse/ClickHouse/pull/65243) ([Nikita Taranov](https://github.com/nickitat)). + +#### Improvement +* Support empty tuples. [#55061](https://github.com/ClickHouse/ClickHouse/pull/55061) ([Amos Bird](https://github.com/amosbird)). +* Hot reload storage policy for distributed tables when adding a new disk. [#58285](https://github.com/ClickHouse/ClickHouse/pull/58285) ([Duc Canh Le](https://github.com/canhld94)). +* Avoid possible deadlock during MergeTree index analysis when scheduling threads in a saturated service. [#59427](https://github.com/ClickHouse/ClickHouse/pull/59427) ([Sean Haynes](https://github.com/seandhaynes)). +* Support partial trivial count optimization when the query filter is able to select exact ranges from merge tree tables. [#60463](https://github.com/ClickHouse/ClickHouse/pull/60463) ([Amos Bird](https://github.com/amosbird)). +* Reduce max memory usage of multithreaded `INSERT`s by collecting chunks of multiple threads in a single transform. [#61047](https://github.com/ClickHouse/ClickHouse/pull/61047) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Reduce the memory usage when using Azure object storage by using fixed memory allocation, avoiding the allocation of an extra buffer. [#63160](https://github.com/ClickHouse/ClickHouse/pull/63160) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Several minor corner case fixes to proxy support & tunneling. [#63427](https://github.com/ClickHouse/ClickHouse/pull/63427) ([Arthur Passos](https://github.com/arthurpassos)). +* Add `http_response_headers` setting to support custom response headers in custom HTTP handlers. [#63562](https://github.com/ClickHouse/ClickHouse/pull/63562) ([Grigorii](https://github.com/GSokol)). +* Improve io_uring resubmit visibility. Rename profile event `IOUringSQEsResubmits` -> `IOUringSQEsResubmitsAsync` and add a new one `IOUringSQEsResubmitsSync`. [#63699](https://github.com/ClickHouse/ClickHouse/pull/63699) ([Tomer Shafir](https://github.com/tomershafir)). +* Introduce assertions to verify all functions are called with columns of the right size. [#63723](https://github.com/ClickHouse/ClickHouse/pull/63723) ([Raúl Marín](https://github.com/Algunenano)). +* `SHOW CREATE TABLE` executed on top of system tables will now show the super handy comment unique for each table which will explain why this table is needed. [#63788](https://github.com/ClickHouse/ClickHouse/pull/63788) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Added setting `metadata_storage_type` to keep free space on metadata storage disk. [#64128](https://github.com/ClickHouse/ClickHouse/pull/64128) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Add metrics to track the number of directories created and removed by the plain_rewritable metadata storage, and the number of entries in the local-to-remote in-memory map. [#64175](https://github.com/ClickHouse/ClickHouse/pull/64175) ([Julia Kartseva](https://github.com/jkartseva)). +* The query cache now considers identical queries with different settings as different. This increases robustness in cases where different settings (e.g. `limit` or `additional_table_filters`) would affect the query result. [#64205](https://github.com/ClickHouse/ClickHouse/pull/64205) ([Robert Schulze](https://github.com/rschu1ze)). +* Better Exception Message in Delete Table with Projection, users can understand the error and the steps should be taken. [#64212](https://github.com/ClickHouse/ClickHouse/pull/64212) ([jsc0218](https://github.com/jsc0218)). +* Support the non standard error code `QpsLimitExceeded` in object storage as a retryable error. [#64225](https://github.com/ClickHouse/ClickHouse/pull/64225) ([Sema Checherinda](https://github.com/CheSema)). +* Forbid converting a MergeTree table to replicated if the zookeeper path for this table already exists. [#64244](https://github.com/ClickHouse/ClickHouse/pull/64244) ([Kirill](https://github.com/kirillgarbar)). +* If "replica group" is configured for a `Replicated` database, automatically create a cluster that includes replicas from all groups. [#64312](https://github.com/ClickHouse/ClickHouse/pull/64312) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Added settings to disable materialization of skip indexes and statistics on inserts (`materialize_skip_indexes_on_insert` and `materialize_statistics_on_insert`). [#64391](https://github.com/ClickHouse/ClickHouse/pull/64391) ([Anton Popov](https://github.com/CurtizJ)). +* Use the allocated memory size to calculate the row group size and reduce the peak memory of the parquet writer in single-threaded mode. [#64424](https://github.com/ClickHouse/ClickHouse/pull/64424) ([LiuNeng](https://github.com/liuneng1994)). +* Added new configuration input_format_parquet_prefer_block_bytes to control the average output block bytes, and modified the default value of input_format_parquet_max_block_size to 65409. [#64427](https://github.com/ClickHouse/ClickHouse/pull/64427) ([LiuNeng](https://github.com/liuneng1994)). +* Always start Keeper with sufficient amount of threads in global thread pool. [#64444](https://github.com/ClickHouse/ClickHouse/pull/64444) ([Duc Canh Le](https://github.com/canhld94)). +* Settings from user config doesn't affect merges and mutations for MergeTree on top of object storage. [#64456](https://github.com/ClickHouse/ClickHouse/pull/64456) ([alesapin](https://github.com/alesapin)). +* Setting `replace_long_file_name_to_hash` is enabled by default for `MergeTree` tables. [#64457](https://github.com/ClickHouse/ClickHouse/pull/64457) ([Anton Popov](https://github.com/CurtizJ)). +* Improve the iterator of sparse column to reduce call of size(). [#64497](https://github.com/ClickHouse/ClickHouse/pull/64497) ([Jiebin Sun](https://github.com/jiebinn)). +* Update condition to use copy for azure blob storage. [#64518](https://github.com/ClickHouse/ClickHouse/pull/64518) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Support the non standard error code `TotalQpsLimitExceeded` in object storage as a retryable error. [#64520](https://github.com/ClickHouse/ClickHouse/pull/64520) ([Sema Checherinda](https://github.com/CheSema)). +* Optimized memory usage of vertical merges for tables with high number of skip indexes. [#64580](https://github.com/ClickHouse/ClickHouse/pull/64580) ([Anton Popov](https://github.com/CurtizJ)). +* Introduced two additional columns in the `system.query_log`: `used_privileges` and `missing_privileges`. `used_privileges` is populated with the privileges that were checked during query execution, and `missing_privileges` contains required privileges that are missing. [#64597](https://github.com/ClickHouse/ClickHouse/pull/64597) ([Alexey Katsman](https://github.com/alexkats)). +* Add settings `parallel_replicas_custom_key_range_lower` and `parallel_replicas_custom_key_range_upper` to control how parallel replicas with dynamic shards parallelizes queries when using a range filter. [#64604](https://github.com/ClickHouse/ClickHouse/pull/64604) ([josh-hildred](https://github.com/josh-hildred)). +* Updated Advanced Dashboard for both open-source and ClickHouse Cloud versions to include a chart for 'Maximum concurrent network connections'. [#64610](https://github.com/ClickHouse/ClickHouse/pull/64610) ([Thom O'Connor](https://github.com/thomoco)). +* The second argument (scale) of functions `round()`, `roundBankers()`, `floor()`, `ceil()` and `trunc()` can now be non-const. [#64798](https://github.com/ClickHouse/ClickHouse/pull/64798) ([Mikhail Gorshkov](https://github.com/mgorshkov)). +* Improve progress report on zeros_mt and generateRandom. [#64804](https://github.com/ClickHouse/ClickHouse/pull/64804) ([Raúl Marín](https://github.com/Algunenano)). +* Add an asynchronous metric jemalloc.profile.active to show whether sampling is currently active. This is an activation mechanism in addition to prof.active; both must be active for the calling thread to sample. [#64842](https://github.com/ClickHouse/ClickHouse/pull/64842) ([Unalian](https://github.com/Unalian)). +* Support statistics with ReplicatedMergeTree. [#64934](https://github.com/ClickHouse/ClickHouse/pull/64934) ([Han Fei](https://github.com/hanfei1991)). +* Remove mark of `allow_experimental_join_condition` as important. This mark may have prevented distributed queries in a mixed versions cluster from being executed successfully. [#65008](https://github.com/ClickHouse/ClickHouse/pull/65008) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Added server Asynchronous metrics `DiskGetObjectThrottler*` and `DiskGetObjectThrottler*` reflecting request per second rate limit defined with `s3_max_get_rps` and `s3_max_put_rps` disk settings and currently available number of requests that could be sent without hitting throttling limit on the disk. Metrics are defined for every disk that has a configured limit. [#65050](https://github.com/ClickHouse/ClickHouse/pull/65050) ([Sergei Trifonov](https://github.com/serxa)). +* Added a setting `output_format_pretty_display_footer_column_names` which when enabled displays column names at the end of the table for long tables (50 rows by default), with the threshold value for minimum number of rows controlled by `output_format_pretty_display_footer_column_names_min_rows`. [#65144](https://github.com/ClickHouse/ClickHouse/pull/65144) ([Shaun Struwig](https://github.com/Blargian)). +* Returned back the behaviour of how ClickHouse works and interprets Tuples in CSV format. This change effectively reverts https://github.com/ClickHouse/ClickHouse/pull/60994 and makes it available only under a few settings: `output_format_csv_serialize_tuple_into_separate_columns`, `input_format_csv_deserialize_separate_columns_into_tuple` and `input_format_csv_try_infer_strings_from_quoted_tuples`. [#65170](https://github.com/ClickHouse/ClickHouse/pull/65170) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Initialize global trace collector for Poco::ThreadPool (needed for keeper, etc). [#65239](https://github.com/ClickHouse/ClickHouse/pull/65239) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add validation when creating a user with bcrypt_hash. [#65242](https://github.com/ClickHouse/ClickHouse/pull/65242) ([Raúl Marín](https://github.com/Algunenano)). +* Unite s3/hdfs/azure storage implementations into a single class working with IObjectStorage. Same for *Cluster, data lakes and Queue storages. [#59767](https://github.com/ClickHouse/ClickHouse/pull/59767) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Refactor data part writer to remove dependencies on MergeTreeData and DataPart. [#63620](https://github.com/ClickHouse/ClickHouse/pull/63620) ([Alexander Gololobov](https://github.com/davenger)). +* Add profile events for number of rows read during/after prewhere. [#64198](https://github.com/ClickHouse/ClickHouse/pull/64198) ([Nikita Taranov](https://github.com/nickitat)). +* Print query in explain plan with parallel replicas. [#64298](https://github.com/ClickHouse/ClickHouse/pull/64298) ([vdimir](https://github.com/vdimir)). +* Rename `allow_deprecated_functions` to `allow_deprecated_error_prone_window_functions`. [#64358](https://github.com/ClickHouse/ClickHouse/pull/64358) ([Raúl Marín](https://github.com/Algunenano)). +* Respect `max_read_buffer_size` setting for file descriptors as well in file() table function. [#64532](https://github.com/ClickHouse/ClickHouse/pull/64532) ([Azat Khuzhin](https://github.com/azat)). +* Disable transactions for unsupported storages even for materialized views. [#64918](https://github.com/ClickHouse/ClickHouse/pull/64918) ([alesapin](https://github.com/alesapin)). +* Refactor `KeyCondition` and key analysis to improve PartitionPruner and trivial count optimization. This is separated from [#60463](https://github.com/ClickHouse/ClickHouse/issues/60463) . [#61459](https://github.com/ClickHouse/ClickHouse/pull/61459) ([Amos Bird](https://github.com/amosbird)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Fix a permission error where a user in a specific situation can escalate their privileges on the default database without necessary grants. [#64769](https://github.com/ClickHouse/ClickHouse/pull/64769) ([pufit](https://github.com/pufit)). +* Fix crash with UniqInjectiveFunctionsEliminationPass and uniqCombined. [#65188](https://github.com/ClickHouse/ClickHouse/pull/65188) ([Raúl Marín](https://github.com/Algunenano)). +* Fix a bug in ClickHouse Keeper that causes digest mismatch during closing session. [#65198](https://github.com/ClickHouse/ClickHouse/pull/65198) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Forbid `QUALIFY` clause in the old analyzer. The old analyzer ignored `QUALIFY`, so it could lead to unexpected data removal in mutations. [#65356](https://github.com/ClickHouse/ClickHouse/pull/65356) ([Dmitry Novik](https://github.com/novikd)). +* Use correct memory alignment for Distinct combinator. Previously, crash could happen because of invalid memory allocation when the combinator was used. [#65379](https://github.com/ClickHouse/ClickHouse/pull/65379) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix crash with `DISTINCT` and window functions. [#64767](https://github.com/ClickHouse/ClickHouse/pull/64767) ([Igor Nikonov](https://github.com/devcrafter)). +* Fixed 'set' skip index not working with IN and indexHint(). [#62083](https://github.com/ClickHouse/ClickHouse/pull/62083) ([Michael Kolupaev](https://github.com/al13n321)). +* Support executing function during assignment of parameterized view value. [#63502](https://github.com/ClickHouse/ClickHouse/pull/63502) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fixed parquet memory tracking. [#63584](https://github.com/ClickHouse/ClickHouse/pull/63584) ([Michael Kolupaev](https://github.com/al13n321)). +* Fixed reading of columns of type `Tuple(Map(LowCardinality(String), String), ...)`. [#63956](https://github.com/ClickHouse/ClickHouse/pull/63956) ([Anton Popov](https://github.com/CurtizJ)). +* Fix an `Cyclic aliases` error for cyclic aliases of different type (expression and function). [#63993](https://github.com/ClickHouse/ClickHouse/pull/63993) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* This fix will use a proper redefined context with the correct definer for each individual view in the query pipeline. [#64079](https://github.com/ClickHouse/ClickHouse/pull/64079) ([pufit](https://github.com/pufit)). +* Fix analyzer: "Not found column" error is fixed when using INTERPOLATE. [#64096](https://github.com/ClickHouse/ClickHouse/pull/64096) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix creating backups to S3 buckets with different credentials from the disk containing the file. [#64153](https://github.com/ClickHouse/ClickHouse/pull/64153) ([Antonio Andelic](https://github.com/antonio2368)). +* The query cache now considers two identical queries against different databases as different. The previous behavior could be used to bypass missing privileges to read from a table. [#64199](https://github.com/ClickHouse/ClickHouse/pull/64199) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `duplicate alias` error for distributed queries with `ARRAY JOIN`. [#64226](https://github.com/ClickHouse/ClickHouse/pull/64226) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix unexpected accurateCast from string to integer. [#64255](https://github.com/ClickHouse/ClickHouse/pull/64255) ([wudidapaopao](https://github.com/wudidapaopao)). +* Fixed CNF simplification, in case any OR group contains mutually exclusive atoms. [#64256](https://github.com/ClickHouse/ClickHouse/pull/64256) ([Eduard Karacharov](https://github.com/korowa)). +* Fix Query Tree size validation. [#64377](https://github.com/ClickHouse/ClickHouse/pull/64377) ([Dmitry Novik](https://github.com/novikd)). +* Fix `Logical error: Bad cast` for `Buffer` table with `PREWHERE`. [#64388](https://github.com/ClickHouse/ClickHouse/pull/64388) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Prevent recursive logging in `blob_storage_log` when it's stored on object storage. [#64393](https://github.com/ClickHouse/ClickHouse/pull/64393) ([vdimir](https://github.com/vdimir)). +* Fixed `CREATE TABLE AS` queries for tables with default expressions. [#64455](https://github.com/ClickHouse/ClickHouse/pull/64455) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed `optimize_read_in_order` behaviour for ORDER BY ... NULLS FIRST / LAST on tables with nullable keys. [#64483](https://github.com/ClickHouse/ClickHouse/pull/64483) ([Eduard Karacharov](https://github.com/korowa)). +* Fix the `Expression nodes list expected 1 projection names` and `Unknown expression or identifier` errors for queries with aliases to `GLOBAL IN.`. [#64517](https://github.com/ClickHouse/ClickHouse/pull/64517) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix an error `Cannot find column` in distributed queries with constant CTE in the `GROUP BY` key. [#64519](https://github.com/ClickHouse/ClickHouse/pull/64519) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed ORC statistics calculation, when writing, for unsigned types on all platforms and Int8 on ARM. [#64563](https://github.com/ClickHouse/ClickHouse/pull/64563) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix the crash loop when restoring from backup is blocked by creating an MV with a definer that hasn't been restored yet. [#64595](https://github.com/ClickHouse/ClickHouse/pull/64595) ([pufit](https://github.com/pufit)). +* Fix the output of function `formatDateTimeInJodaSyntax` when a formatter generates an uneven number of characters and the last character is `0`. For example, `SELECT formatDateTimeInJodaSyntax(toDate('2012-05-29'), 'D')` now correctly returns `150` instead of previously `15`. [#64614](https://github.com/ClickHouse/ClickHouse/pull/64614) ([LiuNeng](https://github.com/liuneng1994)). +* Do not rewrite aggregation if `-If` combinator is already used. [#64638](https://github.com/ClickHouse/ClickHouse/pull/64638) ([Dmitry Novik](https://github.com/novikd)). +* Fix type inference for float (in case of small buffer, i.e. `--max_read_buffer_size 1`). [#64641](https://github.com/ClickHouse/ClickHouse/pull/64641) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug which could lead to non-working TTLs with expressions. [#64694](https://github.com/ClickHouse/ClickHouse/pull/64694) ([alesapin](https://github.com/alesapin)). +* Fix removing the `WHERE` and `PREWHERE` expressions, which are always true (for the new analyzer). [#64695](https://github.com/ClickHouse/ClickHouse/pull/64695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed excessive part elimination by token-based text indexes (`ngrambf` , `full_text`) when filtering by result of `startsWith`, `endsWith`, `match`, `multiSearchAny`. [#64720](https://github.com/ClickHouse/ClickHouse/pull/64720) ([Eduard Karacharov](https://github.com/korowa)). +* Fixes incorrect behaviour of ANSI CSI escaping in the `UTF8::computeWidth` function. [#64756](https://github.com/ClickHouse/ClickHouse/pull/64756) ([Shaun Struwig](https://github.com/Blargian)). +* Fix a case of incorrect removal of `ORDER BY` / `LIMIT BY` across subqueries. [#64766](https://github.com/ClickHouse/ClickHouse/pull/64766) ([Raúl Marín](https://github.com/Algunenano)). +* Fix (experimental) unequal join with subqueries for sets which are in the mixed join conditions. [#64775](https://github.com/ClickHouse/ClickHouse/pull/64775) ([lgbo](https://github.com/lgbo-ustc)). +* Fix crash in a local cache over `plain_rewritable` disk. [#64778](https://github.com/ClickHouse/ClickHouse/pull/64778) ([Julia Kartseva](https://github.com/jkartseva)). +* Keeper fix: return correct value for `zk_latest_snapshot_size` in `mntr` command. [#64784](https://github.com/ClickHouse/ClickHouse/pull/64784) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `Cannot find column` in distributed query with `ARRAY JOIN` by `Nested` column. Fixes [#64755](https://github.com/ClickHouse/ClickHouse/issues/64755). [#64801](https://github.com/ClickHouse/ClickHouse/pull/64801) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix memory leak in slru cache policy. [#64803](https://github.com/ClickHouse/ClickHouse/pull/64803) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed possible incorrect memory tracking in several kinds of queries: queries that read any data from S3, queries via http protocol, asynchronous inserts. [#64844](https://github.com/ClickHouse/ClickHouse/pull/64844) ([Anton Popov](https://github.com/CurtizJ)). +* Fix the `Block structure mismatch` error for queries reading with `PREWHERE` from the materialized view when the materialized view has columns of different types than the source table. Fixes [#64611](https://github.com/ClickHouse/ClickHouse/issues/64611). [#64855](https://github.com/ClickHouse/ClickHouse/pull/64855) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix rare crash when table has TTL with subquery + database replicated + parallel replicas + analyzer. It's really rare, but please don't use TTLs with subqueries. [#64858](https://github.com/ClickHouse/ClickHouse/pull/64858) ([alesapin](https://github.com/alesapin)). +* Fix duplicating `Delete` events in `blob_storage_log` in case of large batch to delete. [#64924](https://github.com/ClickHouse/ClickHouse/pull/64924) ([vdimir](https://github.com/vdimir)). +* Fixed `Session moved to another server` error from [Zoo]Keeper that might happen after server startup when the config has includes from [Zoo]Keeper. [#64986](https://github.com/ClickHouse/ClickHouse/pull/64986) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `ALTER MODIFY COMMENT` query that was broken for parameterized VIEWs in https://github.com/ClickHouse/ClickHouse/pull/54211. [#65031](https://github.com/ClickHouse/ClickHouse/pull/65031) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix `host_id` in DatabaseReplicated when `cluster_secure_connection` parameter is enabled. Previously all the connections within the cluster created by DatabaseReplicated were not secure, even if the parameter was enabled. [#65054](https://github.com/ClickHouse/ClickHouse/pull/65054) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Avoid writing to finalized buffer in File-like storages. [#65063](https://github.com/ClickHouse/ClickHouse/pull/65063) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible infinite query duration in case of cyclic aliases. Fixes [#64849](https://github.com/ClickHouse/ClickHouse/issues/64849). [#65081](https://github.com/ClickHouse/ClickHouse/pull/65081) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix the `Unknown expression identifier` error for remote queries with `INTERPOLATE (alias)` (new analyzer). Fixes [#64636](https://github.com/ClickHouse/ClickHouse/issues/64636). [#65090](https://github.com/ClickHouse/ClickHouse/pull/65090) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix pushing arithmetic operations out of aggregation. In the new analyzer, optimization was applied only once. [#65104](https://github.com/ClickHouse/ClickHouse/pull/65104) ([Dmitry Novik](https://github.com/novikd)). +* Fix aggregate function name rewriting in the new analyzer. [#65110](https://github.com/ClickHouse/ClickHouse/pull/65110) ([Dmitry Novik](https://github.com/novikd)). +* Respond with 5xx instead of 200 OK in case of receive timeout while reading (parts of) the request body from the client socket. [#65118](https://github.com/ClickHouse/ClickHouse/pull/65118) ([Julian Maicher](https://github.com/jmaicher)). +* Fix possible crash for hedged requests. [#65206](https://github.com/ClickHouse/ClickHouse/pull/65206) ([Azat Khuzhin](https://github.com/azat)). +* Fix the bug in Hashed and Hashed_Array dictionary short circuit evaluation, which may read uninitialized number, leading to various errors. [#65256](https://github.com/ClickHouse/ClickHouse/pull/65256) ([jsc0218](https://github.com/jsc0218)). +* This PR ensures that the type of the constant(IN operator's second parameter) is always visible during the IN operator's type conversion process. Otherwise, losing type information may cause some conversions to fail, such as the conversion from DateTime to Date. This fixes ([#64487](https://github.com/ClickHouse/ClickHouse/issues/64487)). [#65315](https://github.com/ClickHouse/ClickHouse/pull/65315) ([pn](https://github.com/chloro-pn)). + +#### Build/Testing/Packaging Improvement +* Make `network` service be required when using the rc init script to start the ClickHouse server daemon. [#60650](https://github.com/ClickHouse/ClickHouse/pull/60650) ([Chun-Sheng, Li](https://github.com/peter279k)). +* Fix typo in test_hdfsCluster_unset_skip_unavailable_shards. The test writes data to unskip_unavailable_shards, but uses skip_unavailable_shards from the previous test. [#64243](https://github.com/ClickHouse/ClickHouse/pull/64243) ([Mikhail Artemenko](https://github.com/Michicosun)). +* Reduce the size of some slow tests. [#64387](https://github.com/ClickHouse/ClickHouse/pull/64387) ([Raúl Marín](https://github.com/Algunenano)). +* Reduce the size of some slow tests. [#64452](https://github.com/ClickHouse/ClickHouse/pull/64452) ([Raúl Marín](https://github.com/Algunenano)). +* Fix test_lost_part_other_replica. [#64512](https://github.com/ClickHouse/ClickHouse/pull/64512) ([Raúl Marín](https://github.com/Algunenano)). +* Add tests for experimental unequal joins and randomize new settings in clickhouse-test. [#64535](https://github.com/ClickHouse/ClickHouse/pull/64535) ([Nikita Fomichev](https://github.com/fm4v)). +* Upgrade tests: Update config and work with release candidates. [#64542](https://github.com/ClickHouse/ClickHouse/pull/64542) ([Raúl Marín](https://github.com/Algunenano)). +* Add support for LLVM XRay. [#64592](https://github.com/ClickHouse/ClickHouse/pull/64592) ([Tomer Shafir](https://github.com/tomershafir)). +* Speed up 02995_forget_partition. [#64761](https://github.com/ClickHouse/ClickHouse/pull/64761) ([Raúl Marín](https://github.com/Algunenano)). +* Fix 02790_async_queries_in_query_log. [#64764](https://github.com/ClickHouse/ClickHouse/pull/64764) ([Raúl Marín](https://github.com/Algunenano)). +* Support LLVM XRay on Linux amd64 only. [#64837](https://github.com/ClickHouse/ClickHouse/pull/64837) ([Tomer Shafir](https://github.com/tomershafir)). +* Get rid of custom code in `tests/ci/download_release_packages.py` and `tests/ci/get_previous_release_tag.py` to avoid issues after the https://github.com/ClickHouse/ClickHouse/pull/64759 is merged. [#64848](https://github.com/ClickHouse/ClickHouse/pull/64848) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Decrease the `unit-test` image a few times. [#65102](https://github.com/ClickHouse/ClickHouse/pull/65102) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Replay ZooKeeper logs using keeper-bench. [#62481](https://github.com/ClickHouse/ClickHouse/pull/62481) ([Antonio Andelic](https://github.com/antonio2368)). +* Re-enable OpenSSL session caching. [#65111](https://github.com/ClickHouse/ClickHouse/pull/65111) ([Robert Schulze](https://github.com/rschu1ze)). + ### ClickHouse release 24.5, 2024-05-30 #### Backward Incompatible Change diff --git a/README.md b/README.md index 73d989210b5..dc253d4db2d 100644 --- a/README.md +++ b/README.md @@ -34,20 +34,18 @@ curl https://clickhouse.com/ | sh Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know. -* [v24.5 Community Call](https://clickhouse.com/company/events/v24-5-community-release-call) - May 30 +* [v24.6 Community Call](https://clickhouse.com/company/events/v24-6-community-release-call) - Jul 2 ## Upcoming Events Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. -* [ClickHouse Happy Hour @ Tom's Watch Bar - Los Angeles](https://www.meetup.com/clickhouse-los-angeles-user-group/events/300740584/) - May 22 -* [ClickHouse & Confluent Meetup in Dubai](https://www.meetup.com/clickhouse-dubai-meetup-group/events/299629189/) - May 28 -* [ClickHouse Meetup in Stockholm](https://www.meetup.com/clickhouse-stockholm-user-group/events/299752651/) - Jun 3 -* [ClickHouse Meetup @ Cloudflare - San Francisco](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/300523061/) - Jun 4 -* [ClickHouse (クリックハウス) Meetup Tokyo](https://www.meetup.com/clickhouse-tokyo-user-group/events/300798053/) - Jun 5 +* [AWS Summit in DC](https://clickhouse.com/company/events/2024-06-aws-summit-dc) - Jun 26 * [ClickHouse Meetup in Amsterdam](https://www.meetup.com/clickhouse-netherlands-user-group/events/300781068/) - Jun 27 * [ClickHouse Meetup in Paris](https://www.meetup.com/clickhouse-france-user-group/events/300783448/) - Jul 9 +* [ClickHouse Cloud - Live Update Call](https://clickhouse.com/company/events/202407-cloud-update-live) - Jul 9 * [ClickHouse Meetup @ Ramp - New York City](https://www.meetup.com/clickhouse-new-york-user-group/events/300595845/) - Jul 9 +* [AWS Summit in New York](https://clickhouse.com/company/events/2024-07-awssummit-nyc) - Jul 10 * [ClickHouse Meetup @ Klaviyo - Boston](https://www.meetup.com/clickhouse-boston-user-group/events/300907870) - Jul 11 ## Recent Recordings diff --git a/base/poco/Net/include/Poco/Net/HTTPBasicStreamBuf.h b/base/poco/Net/include/Poco/Net/HTTPBasicStreamBuf.h index 3178306363c..c87719b63a4 100644 --- a/base/poco/Net/include/Poco/Net/HTTPBasicStreamBuf.h +++ b/base/poco/Net/include/Poco/Net/HTTPBasicStreamBuf.h @@ -26,7 +26,7 @@ namespace Poco { namespace Net { - constexpr size_t HTTP_DEFAULT_BUFFER_SIZE = 1024 * 1024; + constexpr size_t HTTP_DEFAULT_BUFFER_SIZE = 8 * 1024; typedef Poco::BasicBufferedStreamBuf> HTTPBasicStreamBuf; diff --git a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h index e4037c87927..25dc133fb20 100644 --- a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h +++ b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h @@ -17,6 +17,7 @@ #ifndef NetSSL_SSLManager_INCLUDED #define NetSSL_SSLManager_INCLUDED +#include #include #include "Poco/BasicEvent.h" @@ -219,6 +220,13 @@ namespace Net /// Unless initializeClient() has been called, the first call to this method initializes the default Context /// from the application configuration. + Context::Ptr getCustomServerContext(const std::string & name); + /// Return custom Context used by the server. + + Context::Ptr setCustomServerContext(const std::string & name, Context::Ptr ctx); + /// Set custom Context used by the server. + /// Return pointer on inserted Context or on old Context if exists. + PrivateKeyPassphraseHandlerPtr serverPassphraseHandler(); /// Returns the configured passphrase handler of the server. If none is set, the method will create a default one /// from an application configuration. @@ -258,6 +266,40 @@ namespace Net static const std::string CFG_SERVER_PREFIX; static const std::string CFG_CLIENT_PREFIX; + static const std::string CFG_PRIV_KEY_FILE; + static const std::string CFG_CERTIFICATE_FILE; + static const std::string CFG_CA_LOCATION; + static const std::string CFG_VER_MODE; + static const Context::VerificationMode VAL_VER_MODE; + static const std::string CFG_VER_DEPTH; + static const int VAL_VER_DEPTH; + static const std::string CFG_ENABLE_DEFAULT_CA; + static const bool VAL_ENABLE_DEFAULT_CA; + static const std::string CFG_CIPHER_LIST; + static const std::string CFG_CYPHER_LIST; // for backwards compatibility + static const std::string VAL_CIPHER_LIST; + static const std::string CFG_PREFER_SERVER_CIPHERS; + static const std::string CFG_DELEGATE_HANDLER; + static const std::string VAL_DELEGATE_HANDLER; + static const std::string CFG_CERTIFICATE_HANDLER; + static const std::string VAL_CERTIFICATE_HANDLER; + static const std::string CFG_CACHE_SESSIONS; + static const std::string CFG_SESSION_ID_CONTEXT; + static const std::string CFG_SESSION_CACHE_SIZE; + static const std::string CFG_SESSION_TIMEOUT; + static const std::string CFG_EXTENDED_VERIFICATION; + static const std::string CFG_REQUIRE_TLSV1; + static const std::string CFG_REQUIRE_TLSV1_1; + static const std::string CFG_REQUIRE_TLSV1_2; + static const std::string CFG_DISABLE_PROTOCOLS; + static const std::string CFG_DH_PARAMS_FILE; + static const std::string CFG_ECDH_CURVE; + +#ifdef OPENSSL_FIPS + static const std::string CFG_FIPS_MODE; + static const bool VAL_FIPS_MODE; +#endif + protected: static int verifyClientCallback(int ok, X509_STORE_CTX * pStore); /// The return value of this method defines how errors in @@ -314,39 +356,7 @@ namespace Net InvalidCertificateHandlerPtr _ptrClientCertificateHandler; Poco::FastMutex _mutex; - static const std::string CFG_PRIV_KEY_FILE; - static const std::string CFG_CERTIFICATE_FILE; - static const std::string CFG_CA_LOCATION; - static const std::string CFG_VER_MODE; - static const Context::VerificationMode VAL_VER_MODE; - static const std::string CFG_VER_DEPTH; - static const int VAL_VER_DEPTH; - static const std::string CFG_ENABLE_DEFAULT_CA; - static const bool VAL_ENABLE_DEFAULT_CA; - static const std::string CFG_CIPHER_LIST; - static const std::string CFG_CYPHER_LIST; // for backwards compatibility - static const std::string VAL_CIPHER_LIST; - static const std::string CFG_PREFER_SERVER_CIPHERS; - static const std::string CFG_DELEGATE_HANDLER; - static const std::string VAL_DELEGATE_HANDLER; - static const std::string CFG_CERTIFICATE_HANDLER; - static const std::string VAL_CERTIFICATE_HANDLER; - static const std::string CFG_CACHE_SESSIONS; - static const std::string CFG_SESSION_ID_CONTEXT; - static const std::string CFG_SESSION_CACHE_SIZE; - static const std::string CFG_SESSION_TIMEOUT; - static const std::string CFG_EXTENDED_VERIFICATION; - static const std::string CFG_REQUIRE_TLSV1; - static const std::string CFG_REQUIRE_TLSV1_1; - static const std::string CFG_REQUIRE_TLSV1_2; - static const std::string CFG_DISABLE_PROTOCOLS; - static const std::string CFG_DH_PARAMS_FILE; - static const std::string CFG_ECDH_CURVE; - -#ifdef OPENSSL_FIPS - static const std::string CFG_FIPS_MODE; - static const bool VAL_FIPS_MODE; -#endif + std::unordered_map _mapPtrServerContexts; friend class Poco::SingletonHolder; friend class Context; diff --git a/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp b/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp index d404aed4d13..ae04a994786 100644 --- a/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp +++ b/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp @@ -428,6 +428,23 @@ void SSLManager::initCertificateHandler(bool server) } +Context::Ptr SSLManager::getCustomServerContext(const std::string & name) +{ + Poco::FastMutex::ScopedLock lock(_mutex); + auto it = _mapPtrServerContexts.find(name); + if (it != _mapPtrServerContexts.end()) + return it->second; + return nullptr; +} + +Context::Ptr SSLManager::setCustomServerContext(const std::string & name, Context::Ptr ctx) +{ + Poco::FastMutex::ScopedLock lock(_mutex); + ctx = _mapPtrServerContexts.insert({name, ctx}).first->second; + return ctx; +} + + Poco::Util::AbstractConfiguration& SSLManager::appConfig() { try diff --git a/contrib/openssl b/contrib/openssl index 277de2ba202..5d81fa7068f 160000 --- a/contrib/openssl +++ b/contrib/openssl @@ -1 +1 @@ -Subproject commit 277de2ba202af4eb2291b363456d32ff0960e559 +Subproject commit 5d81fa7068fc8c07f4d0997d5b703f3c541a637c diff --git a/contrib/re2 b/contrib/re2 index a807e8a3aac..85dd7ad833a 160000 --- a/contrib/re2 +++ b/contrib/re2 @@ -1 +1 @@ -Subproject commit a807e8a3aac2cc33c77b7071efea54fcabe38e0c +Subproject commit 85dd7ad833a73095ecf3e3baea608ba051bbe2c7 diff --git a/contrib/re2-cmake/CMakeLists.txt b/contrib/re2-cmake/CMakeLists.txt index f773bc65a69..99d61839b30 100644 --- a/contrib/re2-cmake/CMakeLists.txt +++ b/contrib/re2-cmake/CMakeLists.txt @@ -28,16 +28,20 @@ set(RE2_SOURCES add_library(_re2 ${RE2_SOURCES}) target_include_directories(_re2 PUBLIC "${SRC_DIR}") target_link_libraries(_re2 PRIVATE + absl::absl_check + absl::absl_log absl::base absl::core_headers absl::fixed_array + absl::flags absl::flat_hash_map absl::flat_hash_set + absl::hash absl::inlined_vector - absl::strings - absl::str_format - absl::synchronization absl::optional - absl::span) + absl::span + absl::str_format + absl::strings + absl::synchronization) add_library(ch_contrib::re2 ALIAS _re2) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 4434a5338a7..3ce489b9e0e 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -285,7 +285,7 @@ stop_logs_replication # Try to get logs while server is running failed_to_save_logs=0 -for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log +for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log error_log do err=$(clickhouse-client -q "select * from system.$table into outfile '/test_output/$table.tsv.gz' format TSVWithNamesAndTypes") echo "$err" @@ -339,7 +339,7 @@ if [ $failed_to_save_logs -ne 0 ]; then # directly # - even though ci auto-compress some files (but not *.tsv) it does this only # for files >64MB, we want this files to be compressed explicitly - for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log + for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log error_log do clickhouse-local "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 6ad03852b66..7cd712b73f6 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -37,6 +37,7 @@ RUN pip3 install \ tqdm==4.66.4 \ types-requests \ unidiff \ + jwt \ && rm -rf /root/.cache/pip RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 diff --git a/docs/changelogs/v24.4.3.25-stable.md b/docs/changelogs/v24.4.3.25-stable.md new file mode 100644 index 00000000000..9582753c731 --- /dev/null +++ b/docs/changelogs/v24.4.3.25-stable.md @@ -0,0 +1,30 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.4.3.25-stable (a915dd4eda4) FIXME as compared to v24.4.2.141-stable (9e23d27bd11) + +#### Build/Testing/Packaging Improvement +* Backported in [#65130](https://github.com/ClickHouse/ClickHouse/issues/65130): Decrease the `unit-test` image a few times. [#65102](https://github.com/ClickHouse/ClickHouse/pull/65102) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Backported in [#64982](https://github.com/ClickHouse/ClickHouse/issues/64982): Fix the `Block structure mismatch` error for queries reading with `PREWHERE` from the materialized view when the materialized view has columns of different types than the source table. Fixes [#64611](https://github.com/ClickHouse/ClickHouse/issues/64611). [#64855](https://github.com/ClickHouse/ClickHouse/pull/64855) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#64974](https://github.com/ClickHouse/ClickHouse/issues/64974): Fix rare crash when table has TTL with subquery + database replicated + parallel replicas + analyzer. It's really rare, but please don't use TTLs with subqueries. [#64858](https://github.com/ClickHouse/ClickHouse/pull/64858) ([alesapin](https://github.com/alesapin)). +* Backported in [#65072](https://github.com/ClickHouse/ClickHouse/issues/65072): Fix `ALTER MODIFY COMMENT` query that was broken for parameterized VIEWs in https://github.com/ClickHouse/ClickHouse/pull/54211. [#65031](https://github.com/ClickHouse/ClickHouse/pull/65031) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#65177](https://github.com/ClickHouse/ClickHouse/issues/65177): Fix the `Unknown expression identifier` error for remote queries with `INTERPOLATE (alias)` (new analyzer). Fixes [#64636](https://github.com/ClickHouse/ClickHouse/issues/64636). [#65090](https://github.com/ClickHouse/ClickHouse/pull/65090) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#65263](https://github.com/ClickHouse/ClickHouse/issues/65263): Fix the bug in Hashed and Hashed_Array dictionary short circuit evaluation, which may read uninitialized number, leading to various errors. [#65256](https://github.com/ClickHouse/ClickHouse/pull/65256) ([jsc0218](https://github.com/jsc0218)). + +#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC) + +* Backported in [#65285](https://github.com/ClickHouse/ClickHouse/issues/65285): Fix crash with UniqInjectiveFunctionsEliminationPass and uniqCombined. [#65188](https://github.com/ClickHouse/ClickHouse/pull/65188) ([Raúl Marín](https://github.com/Algunenano)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#65114](https://github.com/ClickHouse/ClickHouse/issues/65114): Adjust the `version_helper` and script to a new release scheme. [#64759](https://github.com/ClickHouse/ClickHouse/pull/64759) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#65225](https://github.com/ClickHouse/ClickHouse/issues/65225): Capture weak_ptr of ContextAccess for safety. [#65051](https://github.com/ClickHouse/ClickHouse/pull/65051) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#65217](https://github.com/ClickHouse/ClickHouse/issues/65217): Fix false positives leaky memory warnings in OpenSSL. [#65125](https://github.com/ClickHouse/ClickHouse/pull/65125) ([Robert Schulze](https://github.com/rschu1ze)). + diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index ec5760541e8..0a1fe58b16f 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -267,7 +267,7 @@ A pull request can be created even if the work is not completed yet. In this cas Testing will commence as soon as ClickHouse employees label your PR with a tag “can be tested”. The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour. -The system will prepare ClickHouse binary builds for your pull request individually. To retrieve these builds click the “Details” link next to “ClickHouse build check” entry in the list of checks. There you will find direct links to the built .deb packages of ClickHouse which you can deploy even on your production servers (if you have no fear). +The system will prepare ClickHouse binary builds for your pull request individually. To retrieve these builds click the “Details” link next to “Builds” entry in the list of checks. There you will find direct links to the built .deb packages of ClickHouse which you can deploy even on your production servers (if you have no fear). Most probably some of the builds will fail at first times. This is due to the fact that we check builds both with gcc as well as with clang, with almost all of existing warnings (always with the `-Werror` flag) enabled for clang. On that same page, you can find all of the build logs so that you do not have to build ClickHouse in all of the possible ways. diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 8dff6f0ed1d..269995a1a96 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -28,7 +28,7 @@ run, for example, the test `01428_hash_set_nan_key`, change to the repository folder and run the following command: ``` -PATH=$PATH: tests/clickhouse-test 01428_hash_set_nan_key +PATH=:$PATH tests/clickhouse-test 01428_hash_set_nan_key ``` Test results (`stderr` and `stdout`) are written to files `01428_hash_set_nan_key.[stderr|stdout]` which diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 67752f223ce..98e73dec451 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -314,7 +314,7 @@ For example, to download a aarch64 binary for ClickHouse v23.4, follow these ste - Find the GitHub pull request for release v23.4: [Release pull request for branch 23.4](https://github.com/ClickHouse/ClickHouse/pull/49238) - Click "Commits", then click a commit similar to "Update autogenerated version to 23.4.2.1 and contributors" for the particular version you like to install. - Click the green check / yellow dot / red cross to open the list of CI checks. -- Click "Details" next to "ClickHouse Build Check" in the list, it will open a page similar to [this page](https://s3.amazonaws.com/clickhouse-test-reports/46793/b460eb70bf29b19eadd19a1f959b15d186705394/clickhouse_build_check/report.html) +- Click "Details" next to "Builds" in the list, it will open a page similar to [this page](https://s3.amazonaws.com/clickhouse-test-reports/46793/b460eb70bf29b19eadd19a1f959b15d186705394/clickhouse_build_check/report.html) - Find the rows with compiler = "clang-*-aarch64" - there are multiple rows. - Download the artifacts for these builds. diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 1eb426af617..e18ff6f1a3f 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -193,6 +193,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va - `--hardware-utilization` — Print hardware utilization information in progress bar. - `--print-profile-events` – Print `ProfileEvents` packets. - `--profile-events-delay-ms` – Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet). +- `--jwt` – If specified, enables authorization via JSON Web Token. Server JWT authorization is available only in ClickHouse Cloud. Instead of `--host`, `--port`, `--user` and `--password` options, ClickHouse client also supports connection strings (see next section). diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index ffdd7e2ca25..a81a17e65d6 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -2169,6 +2169,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t - [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `lz4`. - [input_format_parquet_max_block_size](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_max_block_size) - Max block row size for parquet reader. Default value - `65409`. - [input_format_parquet_prefer_block_bytes](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_prefer_block_bytes) - Average block bytes output by parquet reader. Default value - `16744704`. +- [output_format_parquet_write_page_index](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_max_block_size) - Add a possibility to write page index into parquet files. Need to disable `output_format_parquet_use_custom_encoder` at present. Default value - `true`. ## ParquetMetadata {data-format-parquet-metadata} diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 0c007f1ac79..db8157592db 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -954,6 +954,38 @@ Or it can be set in hex: Everything mentioned above can be applied for `aes_256_gcm_siv` (but the key must be 32 bytes long). +## error_log {#error_log} + +It is disabled by default. + +**Enabling** + +To manually turn on error history collection [`system.error_log`](../../operations/system-tables/error_log.md), create `/etc/clickhouse-server/config.d/error_log.xml` with the following content: + +``` xml + + + system + error_log
+ 7500 + 1000 + 1048576 + 8192 + 524288 + false +
+
+``` + +**Disabling** + +To disable `error_log` setting, you should create the following file `/etc/clickhouse-server/config.d/disable_error_log.xml` with the following content: + +``` xml + + + +``` ## custom_settings_prefixes {#custom_settings_prefixes} @@ -1431,6 +1463,9 @@ Keys: - `size` – Size of the file. Applies to `log` and `errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place. - `count` – The number of archived log files that ClickHouse stores. - `console` – Send `log` and `errorlog` to the console instead of file. To enable, set to `1` or `true`. +- `console_log_level` – Logging level for console. Default to `level`. +- `use_syslog` - Log to syslog as well. +- `syslog_level` - Logging level for logging to syslog. - `stream_compress` – Compress `log` and `errorlog` with `lz4` stream compression. To enable, set to `1` or `true`. - `formatting` – Specify log format to be printed in console log (currently only `json` supported). @@ -1917,7 +1952,7 @@ For more information, see the MergeTreeSettings.h header file. ## metric_log {#metric_log} -It is enabled by default. If it`s not, you can do this manually. +It is disabled by default. **Enabling** diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 670c9c6cbf1..530023df5b7 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -1428,6 +1428,13 @@ Average block bytes output by parquet reader. Lowering the configuration in the Default value: `65409 * 256 = 16744704` +### output_format_parquet_write_page_index {#input_format_parquet_max_block_size} + +Could add page index into parquet files. To enable this, need set `output_format_parquet_use_custom_encoder`=`false` and +`output_format_parquet_write_page_index`=`true`. + +Enable by default. + ## Hive format settings {#hive-format-settings} ### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 3de823321f2..3d6d776f4da 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -5418,11 +5418,14 @@ When set to `false` than all attempts are made with identical timeouts. Default value: `true`. -## uniform_snowflake_conversion_functions {#uniform_snowflake_conversion_functions} +## allow_deprecated_snowflake_conversion_functions {#allow_deprecated_snowflake_conversion_functions} -If set to `true`, then functions `snowflakeIDToDateTime`, `snowflakeIDToDateTime64`, `dateTimeToSnowflakeID`, and `dateTime64ToSnowflakeID` are enabled, and functions `snowflakeToDateTime`, `snowflakeToDateTime64`, `dateTimeToSnowflake`, and `dateTime64ToSnowflake` are disabled (and vice versa if set to `false`). +Functions `snowflakeToDateTime`, `snowflakeToDateTime64`, `dateTimeToSnowflake`, and `dateTime64ToSnowflake` are deprecated and disabled by default. +Please use functions `snowflakeIDToDateTime`, `snowflakeIDToDateTime64`, `dateTimeToSnowflakeID`, and `dateTime64ToSnowflakeID` instead. -Default value: `true` +To re-enable the deprecated functions (e.g., during a transition period), please set this setting to `true`. + +Default value: `false` ## allow_experimental_variant_type {#allow_experimental_variant_type} diff --git a/docs/en/operations/system-tables/error_log.md b/docs/en/operations/system-tables/error_log.md new file mode 100644 index 00000000000..15edef58662 --- /dev/null +++ b/docs/en/operations/system-tables/error_log.md @@ -0,0 +1,39 @@ +--- +slug: /en/operations/system-tables/error_log +--- +# error_log + +Contains history of error values from table `system.errors`, periodically flushed to disk. + +Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. +- `code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code number of the error. +- `error` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) - Name of the error. +- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of times this error happened. +- `remote` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Remote exception (i.e. received during one of the distributed queries). + +**Example** + +``` sql +SELECT * FROM system.error_log LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +hostname: clickhouse.eu-central1.internal +event_date: 2024-06-18 +event_time: 2024-06-18 07:32:39 +code: 999 +error: KEEPER_EXCEPTION +value: 2 +remote: 0 +``` + +**See also** + +- [error_log setting](../../operations/server-configuration-parameters/settings.md#error_log) — Enabling and disabling the setting. +- [system.errors](../../operations/system-tables/errors.md) — Contains error codes with the number of times they have been triggered. +- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index ed22679a3e6..df041f5885e 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -36,9 +36,24 @@ $ echo 0 | sudo tee /proc/sys/vm/overcommit_memory Use `perf top` to watch the time spent in the kernel for memory management. Permanent huge pages also do not need to be allocated. -:::warning -If your system has less than 16 GB of RAM, you may experience various memory exceptions because default settings do not match this amount of memory. The recommended amount of RAM is 32 GB or more. You can use ClickHouse in a system with a small amount of RAM, even with 2 GB of RAM, but it requires additional tuning and can ingest at a low rate. -::: +### Using less than 16GB of RAM + +The recommended amount of RAM is 32 GB or more. + +If your system has less than 16 GB of RAM, you may experience various memory exceptions because default settings do not match this amount of memory. You can use ClickHouse in a system with a small amount of RAM (as low as 2 GB), but these setups require additional tuning and can only ingest at a low rate. + +When using ClickHouse with less than 16GB of RAM, we recommend the following: + +- Lower the size of the mark cache in the `config.xml`. It can be set as low as 500 MB, but it cannot be set to zero. +- Lower the number of query processing threads down to `1`. +- Lower the `max_block_size` to `8192`. Values as low as `1024` can still be practical. +- Lower `max_download_threads` to `1`. +- Set `input_format_parallel_parsing` and `output_format_parallel_formatting` to `0`. + +Additional notes: +- To flush the memory cached by the memory allocator, you can run the `SYSTEM JEMALLOC PURGE` +command. +- We do not recommend using S3 or Kafka integrations on low-memory machines because they require significant memory for buffers. ## Storage Subsystem {#storage-subsystem} diff --git a/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md b/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md index d9b44b3ff07..56e54d3faf9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md +++ b/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/analysis_of_variance -sidebar_position: 6 +sidebar_position: 101 --- # analysisOfVariance diff --git a/docs/en/sql-reference/aggregate-functions/reference/any.md b/docs/en/sql-reference/aggregate-functions/reference/any.md index f1b5a6683e5..cdff7dde4a9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/any.md +++ b/docs/en/sql-reference/aggregate-functions/reference/any.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/any -sidebar_position: 6 +sidebar_position: 102 --- # any diff --git a/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md b/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md index 9fbc21910f8..9c6e6b5fead 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/anyheavy -sidebar_position: 103 +sidebar_position: 104 --- # anyHeavy diff --git a/docs/en/sql-reference/aggregate-functions/reference/anylast.md b/docs/en/sql-reference/aggregate-functions/reference/anylast.md index 8fcee2cf8e6..e43bc07fbdc 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anylast.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anylast.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/anylast -sidebar_position: 104 +sidebar_position: 105 --- # anyLast diff --git a/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md b/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md index b6d0806f35d..8f093cfdb61 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/anylast_respect_nulls -sidebar_position: 104 +sidebar_position: 106 --- # anyLast_respect_nulls diff --git a/docs/en/sql-reference/aggregate-functions/reference/approxtopk.md b/docs/en/sql-reference/aggregate-functions/reference/approxtopk.md index 2bb43a9f665..ea2083ebd04 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/approxtopk.md +++ b/docs/en/sql-reference/aggregate-functions/reference/approxtopk.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/approxtopk -sidebar_position: 212 +sidebar_position: 107 --- # approx_top_k diff --git a/docs/en/sql-reference/aggregate-functions/reference/approxtopsum.md b/docs/en/sql-reference/aggregate-functions/reference/approxtopsum.md index aa884b26d8e..639142331f0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/approxtopsum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/approxtopsum.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/approxtopsum -sidebar_position: 212 +sidebar_position: 108 --- # approx_top_sum diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmax.md b/docs/en/sql-reference/aggregate-functions/reference/argmax.md index 2274dd4a5dc..8c6b2b532e8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmax.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/argmax -sidebar_position: 106 +sidebar_position: 109 --- # argMax diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmin.md b/docs/en/sql-reference/aggregate-functions/reference/argmin.md index 297744fb1db..0ab21fe2b52 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmin.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/argmin -sidebar_position: 105 +sidebar_position: 110 --- # argMin diff --git a/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md b/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md index 3c71129bdb5..c0ac0db33f3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/array_concat_agg -sidebar_position: 110 +sidebar_position: 111 --- # array_concat_agg diff --git a/docs/en/sql-reference/aggregate-functions/reference/avg.md b/docs/en/sql-reference/aggregate-functions/reference/avg.md index 5463d8a1874..7789c30bfe0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/avg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/avg.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/avg -sidebar_position: 5 +sidebar_position: 112 --- # avg diff --git a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md index 99d3bac763d..304d0407d98 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/avgweighted -sidebar_position: 107 +sidebar_position: 113 --- # avgWeighted diff --git a/docs/en/sql-reference/aggregate-functions/reference/boundrat.md b/docs/en/sql-reference/aggregate-functions/reference/boundrat.md index f3907af8030..d253a250600 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/boundrat.md +++ b/docs/en/sql-reference/aggregate-functions/reference/boundrat.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/boundingRatio -sidebar_position: 2 +sidebar_position: 114 title: boundingRatio --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md b/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md index 57edb47950a..7983c3f2e60 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md +++ b/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/categoricalinformationvalue -sidebar_position: 250 +sidebar_position: 115 title: categoricalInformationValue --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/contingency.md b/docs/en/sql-reference/aggregate-functions/reference/contingency.md index 902c1f4af80..a49ff22febc 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/contingency.md +++ b/docs/en/sql-reference/aggregate-functions/reference/contingency.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/contingency -sidebar_position: 350 +sidebar_position: 116 --- # contingency diff --git a/docs/en/sql-reference/aggregate-functions/reference/corr.md b/docs/en/sql-reference/aggregate-functions/reference/corr.md index 5681c942169..c43b4d3b25a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/corr.md +++ b/docs/en/sql-reference/aggregate-functions/reference/corr.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/corr -sidebar_position: 107 +sidebar_position: 117 --- # corr diff --git a/docs/en/sql-reference/aggregate-functions/reference/corrmatrix.md b/docs/en/sql-reference/aggregate-functions/reference/corrmatrix.md index 718477b28dd..96978863646 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/corrmatrix.md +++ b/docs/en/sql-reference/aggregate-functions/reference/corrmatrix.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/corrmatrix -sidebar_position: 108 +sidebar_position: 118 --- # corrMatrix diff --git a/docs/en/sql-reference/aggregate-functions/reference/corrstable.md b/docs/en/sql-reference/aggregate-functions/reference/corrstable.md index b35442a32b6..979cf244245 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/corrstable.md +++ b/docs/en/sql-reference/aggregate-functions/reference/corrstable.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/corrstable -sidebar_position: 107 +sidebar_position: 119 --- # corrStable diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md index ca4067c8d8c..e6f2cdd6aa9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/count.md +++ b/docs/en/sql-reference/aggregate-functions/reference/count.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/count -sidebar_position: 1 +sidebar_position: 120 --- # count diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarpop.md b/docs/en/sql-reference/aggregate-functions/reference/covarpop.md index 78b9f4cffea..7231f92b8fa 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarpop.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/covarpop -sidebar_position: 37 +sidebar_position: 121 --- # covarPop diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarpopmatrix.md b/docs/en/sql-reference/aggregate-functions/reference/covarpopmatrix.md index d7400599a49..c8811b3811e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarpopmatrix.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarpopmatrix.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/covarpopmatrix -sidebar_position: 36 +sidebar_position: 122 --- # covarPopMatrix diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarpopstable.md b/docs/en/sql-reference/aggregate-functions/reference/covarpopstable.md index 68e78fc3bd8..48e5368faac 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarpopstable.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarpopstable.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/covarpopstable -sidebar_position: 36 +sidebar_position: 123 --- # covarPopStable diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md b/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md index 7d5d5d13f35..92fe213b407 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/covarsamp -sidebar_position: 37 +sidebar_position: 124 --- # covarSamp diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarsampmatrix.md b/docs/en/sql-reference/aggregate-functions/reference/covarsampmatrix.md index b71d753f0be..1585c4a9970 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarsampmatrix.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarsampmatrix.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/covarsampmatrix -sidebar_position: 38 +sidebar_position: 125 --- # covarSampMatrix diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarsampstable.md b/docs/en/sql-reference/aggregate-functions/reference/covarsampstable.md index 3e6867b96d6..6764877768e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/covarsampstable.md +++ b/docs/en/sql-reference/aggregate-functions/reference/covarsampstable.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/covarsampstable -sidebar_position: 37 +sidebar_position: 126 --- # covarSampStable diff --git a/docs/en/sql-reference/aggregate-functions/reference/cramersv.md b/docs/en/sql-reference/aggregate-functions/reference/cramersv.md index 2424ff95237..db0e1c5eb4c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/cramersv.md +++ b/docs/en/sql-reference/aggregate-functions/reference/cramersv.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/cramersv -sidebar_position: 351 +sidebar_position: 127 --- # cramersV diff --git a/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md b/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md index 939c04e3fdc..2ff7ce489d3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md +++ b/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/cramersvbiascorrected -sidebar_position: 352 +sidebar_position: 128 --- # cramersVBiasCorrected diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md index 37d9d08cbdb..650135ecfeb 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/deltasum -sidebar_position: 141 +sidebar_position: 129 --- # deltaSum diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md b/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md index c51d86389b0..ec5cfa5fecc 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/deltasumtimestamp -sidebar_position: 141 +sidebar_position: 130 title: deltaSumTimestamp --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/entropy.md b/docs/en/sql-reference/aggregate-functions/reference/entropy.md index fc8d627ecab..7970cdd268b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/entropy.md +++ b/docs/en/sql-reference/aggregate-functions/reference/entropy.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/entropy -sidebar_position: 302 +sidebar_position: 131 --- # entropy diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md index 75041ace7a3..3086a48f819 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md +++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md @@ -1,7 +1,7 @@ --- -slug: /en/sql-reference/aggregate-functions/reference/exponentialmovingaverage -sidebar_position: 108 -sidebar_title: exponentialMovingAverage +slug: /en/sql-reference/aggregate-functions/reference/exponentialMovingAverage +sidebar_position: 132 +title: exponentialMovingAverage --- ## exponentialMovingAverage @@ -96,56 +96,56 @@ Result: ``` text ┌─value─┬─time─┬─round(exp_smooth, 3)─┬─bar────────────────────────────────────────┐ -│ 1 │ 0 │ 0.067 │ ███▎ │ +│ 1 │ 0 │ 0.067 │ ███▎ │ │ 0 │ 1 │ 0.062 │ ███ │ -│ 0 │ 2 │ 0.058 │ ██▊ │ -│ 0 │ 3 │ 0.054 │ ██▋ │ +│ 0 │ 2 │ 0.058 │ ██▊ │ +│ 0 │ 3 │ 0.054 │ ██▋ │ │ 0 │ 4 │ 0.051 │ ██▌ │ -│ 0 │ 5 │ 0.047 │ ██▎ │ -│ 0 │ 6 │ 0.044 │ ██▏ │ +│ 0 │ 5 │ 0.047 │ ██▎ │ +│ 0 │ 6 │ 0.044 │ ██▏ │ │ 0 │ 7 │ 0.041 │ ██ │ -│ 0 │ 8 │ 0.038 │ █▊ │ -│ 0 │ 9 │ 0.036 │ █▋ │ -│ 0 │ 10 │ 0.033 │ █▋ │ +│ 0 │ 8 │ 0.038 │ █▊ │ +│ 0 │ 9 │ 0.036 │ █▋ │ +│ 0 │ 10 │ 0.033 │ █▋ │ │ 0 │ 11 │ 0.031 │ █▌ │ -│ 0 │ 12 │ 0.029 │ █▍ │ -│ 0 │ 13 │ 0.027 │ █▎ │ -│ 0 │ 14 │ 0.025 │ █▎ │ -│ 0 │ 15 │ 0.024 │ █▏ │ +│ 0 │ 12 │ 0.029 │ █▍ │ +│ 0 │ 13 │ 0.027 │ █▎ │ +│ 0 │ 14 │ 0.025 │ █▎ │ +│ 0 │ 15 │ 0.024 │ █▏ │ │ 0 │ 16 │ 0.022 │ █ │ │ 0 │ 17 │ 0.021 │ █ │ -│ 0 │ 18 │ 0.019 │ ▊ │ -│ 0 │ 19 │ 0.018 │ ▊ │ -│ 0 │ 20 │ 0.017 │ ▋ │ -│ 0 │ 21 │ 0.016 │ ▋ │ -│ 0 │ 22 │ 0.015 │ ▋ │ -│ 0 │ 23 │ 0.014 │ ▋ │ -│ 0 │ 24 │ 0.013 │ ▋ │ -│ 1 │ 25 │ 0.079 │ ███▊ │ +│ 0 │ 18 │ 0.019 │ ▊ │ +│ 0 │ 19 │ 0.018 │ ▊ │ +│ 0 │ 20 │ 0.017 │ ▋ │ +│ 0 │ 21 │ 0.016 │ ▋ │ +│ 0 │ 22 │ 0.015 │ ▋ │ +│ 0 │ 23 │ 0.014 │ ▋ │ +│ 0 │ 24 │ 0.013 │ ▋ │ +│ 1 │ 25 │ 0.079 │ ███▊ │ │ 1 │ 26 │ 0.14 │ ███████ │ -│ 1 │ 27 │ 0.198 │ █████████▊ │ +│ 1 │ 27 │ 0.198 │ █████████▊ │ │ 1 │ 28 │ 0.252 │ ████████████▌ │ │ 1 │ 29 │ 0.302 │ ███████████████ │ -│ 1 │ 30 │ 0.349 │ █████████████████▍ │ +│ 1 │ 30 │ 0.349 │ █████████████████▍ │ │ 1 │ 31 │ 0.392 │ ███████████████████▌ │ -│ 1 │ 32 │ 0.433 │ █████████████████████▋ │ +│ 1 │ 32 │ 0.433 │ █████████████████████▋ │ │ 1 │ 33 │ 0.471 │ ███████████████████████▌ │ -│ 1 │ 34 │ 0.506 │ █████████████████████████▎ │ -│ 1 │ 35 │ 0.539 │ ██████████████████████████▊ │ +│ 1 │ 34 │ 0.506 │ █████████████████████████▎ │ +│ 1 │ 35 │ 0.539 │ ██████████████████████████▊ │ │ 1 │ 36 │ 0.57 │ ████████████████████████████▌ │ -│ 1 │ 37 │ 0.599 │ █████████████████████████████▊ │ -│ 1 │ 38 │ 0.626 │ ███████████████████████████████▎ │ +│ 1 │ 37 │ 0.599 │ █████████████████████████████▊ │ +│ 1 │ 38 │ 0.626 │ ███████████████████████████████▎ │ │ 1 │ 39 │ 0.651 │ ████████████████████████████████▌ │ -│ 1 │ 40 │ 0.674 │ █████████████████████████████████▋ │ -│ 1 │ 41 │ 0.696 │ ██████████████████████████████████▋ │ -│ 1 │ 42 │ 0.716 │ ███████████████████████████████████▋ │ -│ 1 │ 43 │ 0.735 │ ████████████████████████████████████▋ │ -│ 1 │ 44 │ 0.753 │ █████████████████████████████████████▋ │ -│ 1 │ 45 │ 0.77 │ ██████████████████████████████████████▍ │ -│ 1 │ 46 │ 0.785 │ ███████████████████████████████████████▎ │ -│ 1 │ 47 │ 0.8 │ ███████████████████████████████████████▊ │ -│ 1 │ 48 │ 0.813 │ ████████████████████████████████████████▋ │ -│ 1 │ 49 │ 0.825 │ █████████████████████████████████████████▎│ +│ 1 │ 40 │ 0.674 │ █████████████████████████████████▋ │ +│ 1 │ 41 │ 0.696 │ ██████████████████████████████████▋ │ +│ 1 │ 42 │ 0.716 │ ███████████████████████████████████▋ │ +│ 1 │ 43 │ 0.735 │ ████████████████████████████████████▋ │ +│ 1 │ 44 │ 0.753 │ █████████████████████████████████████▋ │ +│ 1 │ 45 │ 0.77 │ ██████████████████████████████████████▍ │ +│ 1 │ 46 │ 0.785 │ ███████████████████████████████████████▎ │ +│ 1 │ 47 │ 0.8 │ ███████████████████████████████████████▊ │ +│ 1 │ 48 │ 0.813 │ ████████████████████████████████████████▋ │ +│ 1 │ 49 │ 0.825 │ █████████████████████████████████████████▎ │ └───────┴──────┴──────────────────────┴────────────────────────────────────────────┘ ``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedavg.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedavg.md new file mode 100644 index 00000000000..c729552749a --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedavg.md @@ -0,0 +1,105 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/exponentialTimeDecayedAvg +sidebar_position: 133 +title: exponentialTimeDecayedAvg +--- + +## exponentialTimeDecayedAvg + +Returns the exponentially smoothed weighted moving average of values of a time series at point `t` in time. + +**Syntax** + +```sql +exponentialTimeDecayedAvg(x)(v, t) +``` + +**Arguments** + +- `v` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `t` — Time. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md), [DateTime](../../data-types/datetime.md), [DateTime64](../../data-types/datetime64.md). + +**Parameters** + +- `x` — Half-life period. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). + +**Returned values** + +- Returns an exponentially smoothed weighted moving average at index `t` in time. [Float64](../../data-types/float.md). + +**Examples** + +Query: + +```sql +SELECT + value, + time, + round(exp_smooth, 3), + bar(exp_smooth, 0, 5, 50) AS bar +FROM + ( + SELECT + (number = 0) OR (number >= 25) AS value, + number AS time, + exponentialTimeDecayedAvg(10)(value, time) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS exp_smooth + FROM numbers(50) + ); +``` + +Response: + +```sql + ┌─value─┬─time─┬─round(exp_smooth, 3)─┬─bar────────┐ +1. │ 1 │ 0 │ 1 │ ██████████ │ +2. │ 0 │ 1 │ 0.475 │ ████▊ │ +3. │ 0 │ 2 │ 0.301 │ ███ │ +4. │ 0 │ 3 │ 0.214 │ ██▏ │ +5. │ 0 │ 4 │ 0.162 │ █▌ │ +6. │ 0 │ 5 │ 0.128 │ █▎ │ +7. │ 0 │ 6 │ 0.104 │ █ │ +8. │ 0 │ 7 │ 0.086 │ ▊ │ +9. │ 0 │ 8 │ 0.072 │ ▋ │ +0. │ 0 │ 9 │ 0.061 │ ▌ │ +1. │ 0 │ 10 │ 0.052 │ ▌ │ +2. │ 0 │ 11 │ 0.045 │ ▍ │ +3. │ 0 │ 12 │ 0.039 │ ▍ │ +4. │ 0 │ 13 │ 0.034 │ ▎ │ +5. │ 0 │ 14 │ 0.03 │ ▎ │ +6. │ 0 │ 15 │ 0.027 │ ▎ │ +7. │ 0 │ 16 │ 0.024 │ ▏ │ +8. │ 0 │ 17 │ 0.021 │ ▏ │ +9. │ 0 │ 18 │ 0.018 │ ▏ │ +0. │ 0 │ 19 │ 0.016 │ ▏ │ +1. │ 0 │ 20 │ 0.015 │ ▏ │ +2. │ 0 │ 21 │ 0.013 │ ▏ │ +3. │ 0 │ 22 │ 0.012 │ │ +4. │ 0 │ 23 │ 0.01 │ │ +5. │ 0 │ 24 │ 0.009 │ │ +6. │ 1 │ 25 │ 0.111 │ █ │ +7. │ 1 │ 26 │ 0.202 │ ██ │ +8. │ 1 │ 27 │ 0.283 │ ██▊ │ +9. │ 1 │ 28 │ 0.355 │ ███▌ │ +0. │ 1 │ 29 │ 0.42 │ ████▏ │ +1. │ 1 │ 30 │ 0.477 │ ████▊ │ +2. │ 1 │ 31 │ 0.529 │ █████▎ │ +3. │ 1 │ 32 │ 0.576 │ █████▊ │ +4. │ 1 │ 33 │ 0.618 │ ██████▏ │ +5. │ 1 │ 34 │ 0.655 │ ██████▌ │ +6. │ 1 │ 35 │ 0.689 │ ██████▉ │ +7. │ 1 │ 36 │ 0.719 │ ███████▏ │ +8. │ 1 │ 37 │ 0.747 │ ███████▍ │ +9. │ 1 │ 38 │ 0.771 │ ███████▋ │ +0. │ 1 │ 39 │ 0.793 │ ███████▉ │ +1. │ 1 │ 40 │ 0.813 │ ████████▏ │ +2. │ 1 │ 41 │ 0.831 │ ████████▎ │ +3. │ 1 │ 42 │ 0.848 │ ████████▍ │ +4. │ 1 │ 43 │ 0.862 │ ████████▌ │ +5. │ 1 │ 44 │ 0.876 │ ████████▊ │ +6. │ 1 │ 45 │ 0.888 │ ████████▉ │ +7. │ 1 │ 46 │ 0.898 │ ████████▉ │ +8. │ 1 │ 47 │ 0.908 │ █████████ │ +9. │ 1 │ 48 │ 0.917 │ █████████▏ │ +0. │ 1 │ 49 │ 0.925 │ █████████▏ │ + └───────┴──────┴──────────────────────┴────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedcount.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedcount.md new file mode 100644 index 00000000000..b73d6c2503d --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedcount.md @@ -0,0 +1,104 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/exponentialTimeDecayedCount +sidebar_position: 134 +title: exponentialTimeDecayedCount +--- + +## exponentialTimeDecayedCount + +Returns the cumulative exponential decay over a time series at the index `t` in time. + +**Syntax** + +```sql +exponentialTimeDecayedCount(x)(t) +``` + +**Arguments** + +- `t` — Time. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md), [DateTime](../../data-types/datetime.md), [DateTime64](../../data-types/datetime64.md). + +**Parameters** + +- `x` — Half-life period. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). + +**Returned values** + +- Returns the cumulative exponential decay at the given point in time. [Float64](../../data-types/float.md). + +**Example** + +Query: + +```sql +SELECT + value, + time, + round(exp_smooth, 3), + bar(exp_smooth, 0, 20, 50) AS bar +FROM +( + SELECT + (number % 5) = 0 AS value, + number AS time, + exponentialTimeDecayedCount(10)(time) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS exp_smooth + FROM numbers(50) +); +``` + +Result: + +```response + ┌─value─┬─time─┬─round(exp_smooth, 3)─┬─bar────────────────────────┐ + 1. │ 1 │ 0 │ 1 │ ██▌ │ + 2. │ 0 │ 1 │ 1.905 │ ████▊ │ + 3. │ 0 │ 2 │ 2.724 │ ██████▊ │ + 4. │ 0 │ 3 │ 3.464 │ ████████▋ │ + 5. │ 0 │ 4 │ 4.135 │ ██████████▎ │ + 6. │ 1 │ 5 │ 4.741 │ ███████████▊ │ + 7. │ 0 │ 6 │ 5.29 │ █████████████▏ │ + 8. │ 0 │ 7 │ 5.787 │ ██████████████▍ │ + 9. │ 0 │ 8 │ 6.236 │ ███████████████▌ │ +10. │ 0 │ 9 │ 6.643 │ ████████████████▌ │ +11. │ 1 │ 10 │ 7.01 │ █████████████████▌ │ +12. │ 0 │ 11 │ 7.343 │ ██████████████████▎ │ +13. │ 0 │ 12 │ 7.644 │ ███████████████████ │ +14. │ 0 │ 13 │ 7.917 │ ███████████████████▊ │ +15. │ 0 │ 14 │ 8.164 │ ████████████████████▍ │ +16. │ 1 │ 15 │ 8.387 │ ████████████████████▉ │ +17. │ 0 │ 16 │ 8.589 │ █████████████████████▍ │ +18. │ 0 │ 17 │ 8.771 │ █████████████████████▉ │ +19. │ 0 │ 18 │ 8.937 │ ██████████████████████▎ │ +20. │ 0 │ 19 │ 9.086 │ ██████████████████████▋ │ +21. │ 1 │ 20 │ 9.222 │ ███████████████████████ │ +22. │ 0 │ 21 │ 9.344 │ ███████████████████████▎ │ +23. │ 0 │ 22 │ 9.455 │ ███████████████████████▋ │ +24. │ 0 │ 23 │ 9.555 │ ███████████████████████▉ │ +25. │ 0 │ 24 │ 9.646 │ ████████████████████████ │ +26. │ 1 │ 25 │ 9.728 │ ████████████████████████▎ │ +27. │ 0 │ 26 │ 9.802 │ ████████████████████████▌ │ +28. │ 0 │ 27 │ 9.869 │ ████████████████████████▋ │ +29. │ 0 │ 28 │ 9.93 │ ████████████████████████▊ │ +30. │ 0 │ 29 │ 9.985 │ ████████████████████████▉ │ +31. │ 1 │ 30 │ 10.035 │ █████████████████████████ │ +32. │ 0 │ 31 │ 10.08 │ █████████████████████████▏ │ +33. │ 0 │ 32 │ 10.121 │ █████████████████████████▎ │ +34. │ 0 │ 33 │ 10.158 │ █████████████████████████▍ │ +35. │ 0 │ 34 │ 10.191 │ █████████████████████████▍ │ +36. │ 1 │ 35 │ 10.221 │ █████████████████████████▌ │ +37. │ 0 │ 36 │ 10.249 │ █████████████████████████▌ │ +38. │ 0 │ 37 │ 10.273 │ █████████████████████████▋ │ +39. │ 0 │ 38 │ 10.296 │ █████████████████████████▋ │ +40. │ 0 │ 39 │ 10.316 │ █████████████████████████▊ │ +41. │ 1 │ 40 │ 10.334 │ █████████████████████████▊ │ +42. │ 0 │ 41 │ 10.351 │ █████████████████████████▉ │ +43. │ 0 │ 42 │ 10.366 │ █████████████████████████▉ │ +44. │ 0 │ 43 │ 10.379 │ █████████████████████████▉ │ +45. │ 0 │ 44 │ 10.392 │ █████████████████████████▉ │ +46. │ 1 │ 45 │ 10.403 │ ██████████████████████████ │ +47. │ 0 │ 46 │ 10.413 │ ██████████████████████████ │ +48. │ 0 │ 47 │ 10.422 │ ██████████████████████████ │ +49. │ 0 │ 48 │ 10.43 │ ██████████████████████████ │ +50. │ 0 │ 49 │ 10.438 │ ██████████████████████████ │ + └───────┴──────┴──────────────────────┴────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedmax.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedmax.md new file mode 100644 index 00000000000..06dc5313904 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedmax.md @@ -0,0 +1,105 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/exponentialTimeDecayedMax +sidebar_position: 135 +title: exponentialTimeDecayedMax +--- + +## exponentialTimeDecayedMax + +Returns the maximum of the computed exponentially smoothed moving average at index `t` in time with that at `t-1`. + +**Syntax** + +```sql +exponentialTimeDecayedMax(x)(value, timeunit) +``` + +**Arguments** + +- `value` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `timeunit` — Timeunit. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md), [DateTime](../../data-types/datetime.md), [DateTime64](../../data-types/datetime64.md). + +**Parameters** + +- `x` — Half-life period. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). + +**Returned values** + +- Returns the maximum of the exponentially smoothed weighted moving average at `t` and `t-1`. [Float64](../../data-types/float.md). + +**Example** + +Query: + +```sql +SELECT + value, + time, + round(exp_smooth, 3), + bar(exp_smooth, 0, 5, 50) AS bar +FROM + ( + SELECT + (number = 0) OR (number >= 25) AS value, + number AS time, + exponentialTimeDecayedMax(10)(value, time) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS exp_smooth + FROM numbers(50) + ); +``` + +Result: + +```response + ┌─value─┬─time─┬─round(exp_smooth, 3)─┬─bar────────┐ + 1. │ 1 │ 0 │ 1 │ ██████████ │ + 2. │ 0 │ 1 │ 0.905 │ █████████ │ + 3. │ 0 │ 2 │ 0.819 │ ████████▏ │ + 4. │ 0 │ 3 │ 0.741 │ ███████▍ │ + 5. │ 0 │ 4 │ 0.67 │ ██████▋ │ + 6. │ 0 │ 5 │ 0.607 │ ██████ │ + 7. │ 0 │ 6 │ 0.549 │ █████▍ │ + 8. │ 0 │ 7 │ 0.497 │ ████▉ │ + 9. │ 0 │ 8 │ 0.449 │ ████▍ │ +10. │ 0 │ 9 │ 0.407 │ ████ │ +11. │ 0 │ 10 │ 0.368 │ ███▋ │ +12. │ 0 │ 11 │ 0.333 │ ███▎ │ +13. │ 0 │ 12 │ 0.301 │ ███ │ +14. │ 0 │ 13 │ 0.273 │ ██▋ │ +15. │ 0 │ 14 │ 0.247 │ ██▍ │ +16. │ 0 │ 15 │ 0.223 │ ██▏ │ +17. │ 0 │ 16 │ 0.202 │ ██ │ +18. │ 0 │ 17 │ 0.183 │ █▊ │ +19. │ 0 │ 18 │ 0.165 │ █▋ │ +20. │ 0 │ 19 │ 0.15 │ █▍ │ +21. │ 0 │ 20 │ 0.135 │ █▎ │ +22. │ 0 │ 21 │ 0.122 │ █▏ │ +23. │ 0 │ 22 │ 0.111 │ █ │ +24. │ 0 │ 23 │ 0.1 │ █ │ +25. │ 0 │ 24 │ 0.091 │ ▉ │ +26. │ 1 │ 25 │ 1 │ ██████████ │ +27. │ 1 │ 26 │ 1 │ ██████████ │ +28. │ 1 │ 27 │ 1 │ ██████████ │ +29. │ 1 │ 28 │ 1 │ ██████████ │ +30. │ 1 │ 29 │ 1 │ ██████████ │ +31. │ 1 │ 30 │ 1 │ ██████████ │ +32. │ 1 │ 31 │ 1 │ ██████████ │ +33. │ 1 │ 32 │ 1 │ ██████████ │ +34. │ 1 │ 33 │ 1 │ ██████████ │ +35. │ 1 │ 34 │ 1 │ ██████████ │ +36. │ 1 │ 35 │ 1 │ ██████████ │ +37. │ 1 │ 36 │ 1 │ ██████████ │ +38. │ 1 │ 37 │ 1 │ ██████████ │ +39. │ 1 │ 38 │ 1 │ ██████████ │ +40. │ 1 │ 39 │ 1 │ ██████████ │ +41. │ 1 │ 40 │ 1 │ ██████████ │ +42. │ 1 │ 41 │ 1 │ ██████████ │ +43. │ 1 │ 42 │ 1 │ ██████████ │ +44. │ 1 │ 43 │ 1 │ ██████████ │ +45. │ 1 │ 44 │ 1 │ ██████████ │ +46. │ 1 │ 45 │ 1 │ ██████████ │ +47. │ 1 │ 46 │ 1 │ ██████████ │ +48. │ 1 │ 47 │ 1 │ ██████████ │ +49. │ 1 │ 48 │ 1 │ ██████████ │ +50. │ 1 │ 49 │ 1 │ ██████████ │ + └───────┴──────┴──────────────────────┴────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedsum.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedsum.md new file mode 100644 index 00000000000..617cd265dac --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialtimedecayedsum.md @@ -0,0 +1,105 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/exponentialTimeDecayedSum +sidebar_position: 136 +title: exponentialTimeDecayedSum +--- + +## exponentialTimeDecayedSum + +Returns the sum of exponentially smoothed moving average values of a time series at the index `t` in time. + +**Syntax** + +```sql +exponentialTimeDecayedSum(x)(v, t) +``` + +**Arguments** + +- `v` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `t` — Time. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md), [DateTime](../../data-types/datetime.md), [DateTime64](../../data-types/datetime64.md). + +**Parameters** + +- `x` — Half-life period. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). + +**Returned values** + +- Returns the sum of exponentially smoothed moving average values at the given point in time. [Float64](../../data-types/float.md). + +**Example** + +Query: + +```sql +SELECT + value, + time, + round(exp_smooth, 3), + bar(exp_smooth, 0, 10, 50) AS bar +FROM + ( + SELECT + (number = 0) OR (number >= 25) AS value, + number AS time, + exponentialTimeDecayedSum(10)(value, time) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS exp_smooth + FROM numbers(50) + ); +``` + +Result: + +```response + ┌─value─┬─time─┬─round(exp_smooth, 3)─┬─bar───────────────────────────────────────────────┐ + 1. │ 1 │ 0 │ 1 │ █████ │ + 2. │ 0 │ 1 │ 0.905 │ ████▌ │ + 3. │ 0 │ 2 │ 0.819 │ ████ │ + 4. │ 0 │ 3 │ 0.741 │ ███▋ │ + 5. │ 0 │ 4 │ 0.67 │ ███▎ │ + 6. │ 0 │ 5 │ 0.607 │ ███ │ + 7. │ 0 │ 6 │ 0.549 │ ██▋ │ + 8. │ 0 │ 7 │ 0.497 │ ██▍ │ + 9. │ 0 │ 8 │ 0.449 │ ██▏ │ +10. │ 0 │ 9 │ 0.407 │ ██ │ +11. │ 0 │ 10 │ 0.368 │ █▊ │ +12. │ 0 │ 11 │ 0.333 │ █▋ │ +13. │ 0 │ 12 │ 0.301 │ █▌ │ +14. │ 0 │ 13 │ 0.273 │ █▎ │ +15. │ 0 │ 14 │ 0.247 │ █▏ │ +16. │ 0 │ 15 │ 0.223 │ █ │ +17. │ 0 │ 16 │ 0.202 │ █ │ +18. │ 0 │ 17 │ 0.183 │ ▉ │ +19. │ 0 │ 18 │ 0.165 │ ▊ │ +20. │ 0 │ 19 │ 0.15 │ ▋ │ +21. │ 0 │ 20 │ 0.135 │ ▋ │ +22. │ 0 │ 21 │ 0.122 │ ▌ │ +23. │ 0 │ 22 │ 0.111 │ ▌ │ +24. │ 0 │ 23 │ 0.1 │ ▌ │ +25. │ 0 │ 24 │ 0.091 │ ▍ │ +26. │ 1 │ 25 │ 1.082 │ █████▍ │ +27. │ 1 │ 26 │ 1.979 │ █████████▉ │ +28. │ 1 │ 27 │ 2.791 │ █████████████▉ │ +29. │ 1 │ 28 │ 3.525 │ █████████████████▋ │ +30. │ 1 │ 29 │ 4.19 │ ████████████████████▉ │ +31. │ 1 │ 30 │ 4.791 │ ███████████████████████▉ │ +32. │ 1 │ 31 │ 5.335 │ ██████████████████████████▋ │ +33. │ 1 │ 32 │ 5.827 │ █████████████████████████████▏ │ +34. │ 1 │ 33 │ 6.273 │ ███████████████████████████████▎ │ +35. │ 1 │ 34 │ 6.676 │ █████████████████████████████████▍ │ +36. │ 1 │ 35 │ 7.041 │ ███████████████████████████████████▏ │ +37. │ 1 │ 36 │ 7.371 │ ████████████████████████████████████▊ │ +38. │ 1 │ 37 │ 7.669 │ ██████████████████████████████████████▎ │ +39. │ 1 │ 38 │ 7.939 │ ███████████████████████████████████████▋ │ +40. │ 1 │ 39 │ 8.184 │ ████████████████████████████████████████▉ │ +41. │ 1 │ 40 │ 8.405 │ ██████████████████████████████████████████ │ +42. │ 1 │ 41 │ 8.605 │ ███████████████████████████████████████████ │ +43. │ 1 │ 42 │ 8.786 │ ███████████████████████████████████████████▉ │ +44. │ 1 │ 43 │ 8.95 │ ████████████████████████████████████████████▊ │ +45. │ 1 │ 44 │ 9.098 │ █████████████████████████████████████████████▍ │ +46. │ 1 │ 45 │ 9.233 │ ██████████████████████████████████████████████▏ │ +47. │ 1 │ 46 │ 9.354 │ ██████████████████████████████████████████████▊ │ +48. │ 1 │ 47 │ 9.464 │ ███████████████████████████████████████████████▎ │ +49. │ 1 │ 48 │ 9.563 │ ███████████████████████████████████████████████▊ │ +50. │ 1 │ 49 │ 9.653 │ ████████████████████████████████████████████████▎ │ + └───────┴──────┴──────────────────────┴───────────────────────────────────────────────────┘ +``` \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/first_value.md b/docs/en/sql-reference/aggregate-functions/reference/first_value.md index 0c26b66c64a..2cd0e1fa16f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/first_value.md +++ b/docs/en/sql-reference/aggregate-functions/reference/first_value.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/first_value -sidebar_position: 7 +sidebar_position: 137 --- # first_value diff --git a/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md b/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md index ae17153085c..4abb3e03226 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md +++ b/docs/en/sql-reference/aggregate-functions/reference/flame_graph.md @@ -1,6 +1,6 @@ --- -slug: /en/sql-reference/aggregate-functions/reference/flamegraph -sidebar_position: 110 +slug: /en/sql-reference/aggregate-functions/reference/flame_graph +sidebar_position: 138 --- # flameGraph diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparray.md b/docs/en/sql-reference/aggregate-functions/reference/grouparray.md index a38e35a72ad..1a87e3aeba9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparray.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparray.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparray -sidebar_position: 110 +sidebar_position: 139 --- # groupArray diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md index d745e8a0e7a..c6b23c2f808 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparrayinsertat -sidebar_position: 112 +sidebar_position: 140 --- # groupArrayInsertAt diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparrayintersect.md b/docs/en/sql-reference/aggregate-functions/reference/grouparrayintersect.md index 5cac88be073..a370f595923 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparrayintersect.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparrayintersect.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparrayintersect -sidebar_position: 115 +sidebar_position: 141 --- # groupArrayIntersect diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md index 9b48ee54ecd..ff62dcdde9b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparraylast -sidebar_position: 110 +sidebar_position: 142 --- # groupArrayLast diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md index 32c0608afeb..6b6c4830535 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparraymovingavg -sidebar_position: 114 +sidebar_position: 143 --- # groupArrayMovingAvg diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md index 6f2a60dd080..d1fa6fce9b0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparraymovingsum -sidebar_position: 113 +sidebar_position: 144 --- # groupArrayMovingSum diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md index 393087161df..38ddae48ee7 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/grouparraysample -sidebar_position: 114 +sidebar_position: 145 --- # groupArraySample diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md index 9bee0c29e7a..22a150bb8fb 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md @@ -1,6 +1,7 @@ - --- - toc_priority: 112 - --- +--- +slug: /en/sql-reference/aggregate-functions/reference/grouparraysorted +sidebar_position: 146 +--- # groupArraySorted {#groupArraySorted} diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md index 3d833555a43..eee383d84e9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitand -sidebar_position: 125 +sidebar_position: 147 --- # groupBitAnd diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md index 02b9e0e8821..23b686e29b2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitmap -sidebar_position: 128 +sidebar_position: 148 --- # groupBitmap diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md index 1e649645e75..77bbf7d3d2c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitmapand -sidebar_position: 129 +sidebar_position: 149 title: groupBitmapAnd --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md index c88c80ceff2..7bb3dc689e8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitmapor -sidebar_position: 130 +sidebar_position: 150 title: groupBitmapOr --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md index aa24b3d2128..3212e94a47b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitmapxor -sidebar_position: 131 +sidebar_position: 151 title: groupBitmapXor --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md index 138ee998405..802b839d56e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitor -sidebar_position: 126 +sidebar_position: 152 --- # groupBitOr diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md index 168335a010c..94891891d64 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupbitxor -sidebar_position: 127 +sidebar_position: 153 --- # groupBitXor diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md b/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md index fe5f714c307..0462f4a4ab2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/groupuniqarray -sidebar_position: 111 +sidebar_position: 154 --- # groupUniqArray diff --git a/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md b/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md index 5990345b765..66e23a716ba 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/intervalLengthSum -sidebar_position: 146 +sidebar_position: 155 sidebar_label: intervalLengthSum title: intervalLengthSum --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md b/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md index d159eec7ce6..33afcdfbf38 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest -sidebar_position: 300 +sidebar_position: 156 sidebar_label: kolmogorovSmirnovTest --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md b/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md index e1a29973fcf..c543831addc 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/kurtpop -sidebar_position: 153 +sidebar_position: 157 --- # kurtPop diff --git a/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md b/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md index 911c2bfbe74..57e80729454 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/kurtsamp -sidebar_position: 154 +sidebar_position: 158 --- # kurtSamp diff --git a/docs/en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets.md b/docs/en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets.md index 4f73aadb8da..673f3cb69c7 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets.md +++ b/docs/en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets -sidebar_position: 312 +sidebar_position: 159 sidebar_label: largestTriangleThreeBuckets --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/last_value.md b/docs/en/sql-reference/aggregate-functions/reference/last_value.md index 21a86a5f130..b2aa5c86d81 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/last_value.md +++ b/docs/en/sql-reference/aggregate-functions/reference/last_value.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/last_value -sidebar_position: 8 +sidebar_position: 160 --- # last_value diff --git a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md index af744f445d9..17f6afecde2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/mannwhitneyutest -sidebar_position: 310 +sidebar_position: 161 sidebar_label: mannWhitneyUTest --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/max.md b/docs/en/sql-reference/aggregate-functions/reference/max.md index 4bb2145d683..12c8800ef7f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/max.md +++ b/docs/en/sql-reference/aggregate-functions/reference/max.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/max -sidebar_position: 3 +sidebar_position: 162 title: max --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md index db99b900a3e..c65e31114ff 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md +++ b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/maxintersections -sidebar_position: 360 +sidebar_position: 163 title: maxIntersections --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md b/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md index 7dd63f09316..d5c2b0bd3c2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md +++ b/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/maxintersectionsposition -sidebar_position: 361 +sidebar_position: 164 title: maxIntersectionsPosition --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxmap.md b/docs/en/sql-reference/aggregate-functions/reference/maxmap.md index 4d8c67e1b90..c9c6913249c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/maxmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/maxmap.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/maxmap -sidebar_position: 143 +sidebar_position: 165 --- # maxMap diff --git a/docs/en/sql-reference/aggregate-functions/reference/meanztest.md b/docs/en/sql-reference/aggregate-functions/reference/meanztest.md index 1cf2bebf26f..19afb5ae742 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/meanztest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/meanztest.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/meanztest -sidebar_position: 303 +sidebar_position: 166 sidebar_label: meanZTest --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/median.md b/docs/en/sql-reference/aggregate-functions/reference/median.md index 2a166c83dad..dcf174254ac 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/median.md +++ b/docs/en/sql-reference/aggregate-functions/reference/median.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/median -sidebar_position: 212 +sidebar_position: 167 --- # median diff --git a/docs/en/sql-reference/aggregate-functions/reference/min.md b/docs/en/sql-reference/aggregate-functions/reference/min.md index cca515b76e8..6bfcaf020c8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/min.md +++ b/docs/en/sql-reference/aggregate-functions/reference/min.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/min -sidebar_position: 2 +sidebar_position: 168 title: min --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/minmap.md b/docs/en/sql-reference/aggregate-functions/reference/minmap.md index 5436e1fc6a6..b1fbb9e49f3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/minmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/minmap.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/minmap -sidebar_position: 142 +sidebar_position: 169 --- # minMap diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantile.md b/docs/en/sql-reference/aggregate-functions/reference/quantile.md index 91b6b1b0d80..d5278125cbc 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantile.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantile.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantile -sidebar_position: 200 +sidebar_position: 170 --- # quantile diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileGK.md b/docs/en/sql-reference/aggregate-functions/reference/quantileGK.md index 7352781d126..9582f264a6f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileGK.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileGK.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantileGK -sidebar_position: 204 +sidebar_position: 175 --- # quantileGK diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md b/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md index 4377f2f1b17..4469438db6a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantilebfloat16 -sidebar_position: 209 +sidebar_position: 171 title: quantileBFloat16 --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md b/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md index f9acd2e20cb..fc9db7ef08d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantileddsketch -sidebar_position: 211 +sidebar_position: 171 title: quantileDD --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md index 7235c47da70..0ac4b5e3a51 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantiledeterministic -sidebar_position: 206 +sidebar_position: 172 --- # quantileDeterministic diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md index d7d7413c283..46873bcd2b6 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantileexact -sidebar_position: 202 +sidebar_position: 173 --- # quantileExact Functions diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md index 34def8d7411..4ce212888c4 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantileexactweighted -sidebar_position: 203 +sidebar_position: 174 --- # quantileExactWeighted diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md index 41d2627fb7b..9eb4fde6102 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantileInterpolatedWeighted -sidebar_position: 203 +sidebar_position: 176 --- # quantileInterpolatedWeighted diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index 856d447ac13..e2c3295221d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantiles -sidebar_position: 201 +sidebar_position: 177 --- # quantiles Functions diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md index 796e87b02d8..ece54ca24ab 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantiletdigest -sidebar_position: 207 +sidebar_position: 178 --- # quantileTDigest diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md index b3e21e0e69e..7f8f7f53a97 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantiletdigestweighted -sidebar_position: 208 +sidebar_position: 179 --- # quantileTDigestWeighted diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md index b5b1c8a0c01..78050fe5b5e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantiletiming -sidebar_position: 204 +sidebar_position: 180 --- # quantileTiming diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md index df483aac01e..c5fff0825c3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/quantiletimingweighted -sidebar_position: 205 +sidebar_position: 181 --- # quantileTimingWeighted diff --git a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md index 27f2dd124e4..eb995923d97 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md +++ b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/rankCorr -sidebar_position: 145 +sidebar_position: 182 --- # rankCorr diff --git a/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md b/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md index ea3dbff8691..2aebccfdc53 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/simplelinearregression -sidebar_position: 220 +sidebar_position: 183 --- # simpleLinearRegression diff --git a/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md b/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md index e39af77059a..21344b58ba6 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md +++ b/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/singlevalueornull -sidebar_position: 220 +sidebar_position: 184 --- # singleValueOrNull diff --git a/docs/en/sql-reference/aggregate-functions/reference/skewpop.md b/docs/en/sql-reference/aggregate-functions/reference/skewpop.md index 379fdcfa7c2..58ea33edb81 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/skewpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/skewpop.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/skewpop -sidebar_position: 150 +sidebar_position: 185 --- # skewPop diff --git a/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md b/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md index 9e64b186db3..9c32a0183ef 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/skewsamp -sidebar_position: 151 +sidebar_position: 186 --- # skewSamp diff --git a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md index 62edc221858..8791847ead0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/sparkbar -sidebar_position: 311 +sidebar_position: 187 sidebar_label: sparkbar --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md b/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md index d2406197ecc..e52a442d76a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/stddevpop -sidebar_position: 30 +sidebar_position: 188 --- # stddevPop @@ -25,7 +25,7 @@ stddevPop(x) **Returned value** -Square root of standard deviation of `x`. [Float64](../../data-types/float.md). +- Square root of standard deviation of `x`. [Float64](../../data-types/float.md). **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevpopstable.md b/docs/en/sql-reference/aggregate-functions/reference/stddevpopstable.md index a8ad5956ae8..2051ce7b125 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stddevpopstable.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevpopstable.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/stddevpopstable -sidebar_position: 30 +sidebar_position: 189 --- # stddevPopStable diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md b/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md index cf8b9b20d63..e2cad40b267 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/stddevsamp -sidebar_position: 31 +sidebar_position: 190 --- # stddevSamp diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevsampstable.md b/docs/en/sql-reference/aggregate-functions/reference/stddevsampstable.md index 9ae1f5f8411..205e10cced5 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stddevsampstable.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevsampstable.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/stddevsampstable -sidebar_position: 31 +sidebar_position: 191 --- # stddevSampStable diff --git a/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md b/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md index 7ab9e1d3256..6cc5cbd8fe1 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/stochasticlinearregression -sidebar_position: 221 +sidebar_position: 192 --- # stochasticLinearRegression {#agg_functions_stochasticlinearregression_parameters} diff --git a/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md b/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md index 4bf5529ddcb..dca452a1702 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/stochasticlogisticregression -sidebar_position: 222 +sidebar_position: 193 --- # stochasticLogisticRegression diff --git a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md index fa320b4e336..1605e8efa13 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/studentttest -sidebar_position: 300 +sidebar_position: 194 sidebar_label: studentTTest --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/sum.md b/docs/en/sql-reference/aggregate-functions/reference/sum.md index a33a99f63e6..19636f003c7 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sum.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/sum -sidebar_position: 4 +sidebar_position: 195 --- # sum diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumcount.md b/docs/en/sql-reference/aggregate-functions/reference/sumcount.md index a59b87022d6..ff4ddcec142 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sumcount.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sumcount.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/sumcount -sidebar_position: 144 +sidebar_position: 196 title: sumCount --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md b/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md index 1a729b18b42..ed58b3c3369 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/sumkahan -sidebar_position: 145 +sidebar_position: 197 title: sumKahan --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/summap.md b/docs/en/sql-reference/aggregate-functions/reference/summap.md index fd3f095511b..4ff937f1e4f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/summap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/summap.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/summap -sidebar_position: 141 +sidebar_position: 198 --- # sumMap diff --git a/docs/en/sql-reference/aggregate-functions/reference/summapwithoverflow.md b/docs/en/sql-reference/aggregate-functions/reference/summapwithoverflow.md index 7c0aa31e459..e36818e2ab8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/summapwithoverflow.md +++ b/docs/en/sql-reference/aggregate-functions/reference/summapwithoverflow.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/summapwithoverflow -sidebar_position: 141 +sidebar_position: 199 --- # sumMapWithOverflow diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md b/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md index a120eafe738..5fe3cb7de8e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/sumwithoverflow -sidebar_position: 140 +sidebar_position: 200 --- # sumWithOverflow diff --git a/docs/en/sql-reference/aggregate-functions/reference/theilsu.md b/docs/en/sql-reference/aggregate-functions/reference/theilsu.md index ef19438a53a..73b063cf965 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/theilsu.md +++ b/docs/en/sql-reference/aggregate-functions/reference/theilsu.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/theilsu -sidebar_position: 353 +sidebar_position: 201 --- # theilsU diff --git a/docs/en/sql-reference/aggregate-functions/reference/topk.md b/docs/en/sql-reference/aggregate-functions/reference/topk.md index dd4b2251a8a..695e9b1d7d8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topk.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topk.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/topk -sidebar_position: 108 +sidebar_position: 202 --- # topK diff --git a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md index d2a469828fc..148a8b6ea18 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/topkweighted -sidebar_position: 109 +sidebar_position: 203 --- # topKWeighted diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniq.md b/docs/en/sql-reference/aggregate-functions/reference/uniq.md index b1c8336630b..c1dc6a29e58 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniq.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniq.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/uniq -sidebar_position: 190 +sidebar_position: 204 --- # uniq diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md index 18f44d2fcc4..70bb4463140 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/uniqcombined -sidebar_position: 192 +sidebar_position: 205 --- # uniqCombined diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md index b6e09bcaae3..014984f6291 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/uniqcombined64 -sidebar_position: 193 +sidebar_position: 206 --- # uniqCombined64 diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md index fd68a464881..da4d4aa9588 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/uniqexact -sidebar_position: 191 +sidebar_position: 207 --- # uniqExact diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md index 8594ebb3782..78d84edf1be 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/uniqhll12 -sidebar_position: 194 +sidebar_position: 208 --- # uniqHLL12 diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md b/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md index 45970f144cb..fbae42117ee 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/uniqthetasketch -sidebar_position: 195 +sidebar_position: 209 title: uniqTheta --- diff --git a/docs/en/sql-reference/aggregate-functions/reference/varpop.md b/docs/en/sql-reference/aggregate-functions/reference/varpop.md index 4e010248f6e..182e830f19f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varpop.md @@ -1,33 +1,28 @@ --- title: "varPop" -slug: "/en/sql-reference/aggregate-functions/reference/varpop" -sidebar_position: 32 +slug: "/en/sql-reference/aggregate-functions/reference/varPop" +sidebar_position: 210 --- -This page covers the `varPop` and `varPopStable` functions available in ClickHouse. - ## varPop -Calculates the population covariance between two data columns. The population covariance measures the degree to which two variables vary together. Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`. +Calculates the population variance. **Syntax** ```sql -covarPop(x, y) +varPop(x) ``` +Alias: `VAR_POP`. + **Parameters** -- `x`: The first data column. [Numeric](../../../native-protocol/columns.md) -- `y`: The second data column. [Numeric](../../../native-protocol/columns.md) +- `x`: Population of values to find the population variance of. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md). **Returned value** -Returns an integer of type `Float64`. - -**Implementation details** - -This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varPopStable`](#varpopstable) function. +- Returns the population variance of `x`. [`Float64`](../../data-types/float.md). **Example** @@ -37,69 +32,21 @@ Query: DROP TABLE IF EXISTS test_data; CREATE TABLE test_data ( - x Int32, - y Int32 + x UInt8, ) ENGINE = Memory; -INSERT INTO test_data VALUES (1, 2), (2, 3), (3, 5), (4, 6), (5, 8); +INSERT INTO test_data VALUES (3), (3), (3), (4), (4), (5), (5), (7), (11), (15); SELECT - covarPop(x, y) AS covar_pop + varPop(x) AS var_pop FROM test_data; ``` Result: ```response -3 -``` - -## varPopStable - -Calculates population covariance between two data columns using a stable, numerically accurate method to calculate the variance. This function is designed to provide reliable results even with large datasets or values that might cause numerical instability in other implementations. - -**Syntax** - -```sql -covarPopStable(x, y) -``` - -**Parameters** - -- `x`: The first data column. [String literal](../../syntax#syntax-string-literal) -- `y`: The second data column. [Expression](../../syntax#syntax-expressions) - -**Returned value** - -Returns an integer of type `Float64`. - -**Implementation details** - -Unlike [`varPop`](#varpop), this function uses a stable, numerically accurate algorithm to calculate the population variance to avoid issues like catastrophic cancellation or loss of precision. This function also handles `NaN` and `Inf` values correctly, excluding them from calculations. - -**Example** - -Query: - -```sql -DROP TABLE IF EXISTS test_data; -CREATE TABLE test_data -( - x Int32, - y Int32 -) -ENGINE = Memory; - -INSERT INTO test_data VALUES (1, 2), (2, 9), (9, 5), (4, 6), (5, 8); - -SELECT - covarPopStable(x, y) AS covar_pop_stable -FROM test_data; -``` - -Result: - -```response -0.5999999999999999 +┌─var_pop─┐ +│ 14.4 │ +└─────────┘ ``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/varpopstable.md b/docs/en/sql-reference/aggregate-functions/reference/varpopstable.md new file mode 100644 index 00000000000..68037a5a533 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/varpopstable.md @@ -0,0 +1,52 @@ +--- +title: "varPopStable" +slug: "/en/sql-reference/aggregate-functions/reference/varpopstable" +sidebar_position: 211 +--- + +## varPopStable + +Returns the population variance. Unlike [`varPop`](../reference/varpop.md), this function uses a [numerically stable](https://en.wikipedia.org/wiki/Numerical_stability) algorithm. It works slower but provides a lower computational error. + +**Syntax** + +```sql +varPopStable(x) +``` + +Alias: `VAR_POP_STABLE`. + +**Parameters** + +- `x`: Population of values to find the population variance of. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md). + +**Returned value** + +- Returns the population variance of `x`. [Float64](../../data-types/float.md). + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data +( + x UInt8, +) +ENGINE = Memory; + +INSERT INTO test_data VALUES (3),(3),(3),(4),(4),(5),(5),(7),(11),(15); + +SELECT + varPopStable(x) AS var_pop_stable +FROM test_data; +``` + +Result: + +```response +┌─var_pop_stable─┐ +│ 14.4 │ +└────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md index bd1cfa5742a..87a97c15dd8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md @@ -1,11 +1,9 @@ --- title: "varSamp" -slug: /en/sql-reference/aggregate-functions/reference/varsamp -sidebar_position: 33 +slug: /en/sql-reference/aggregate-functions/reference/varSamp +sidebar_position: 212 --- -This page contains information on the `varSamp` and `varSampStable` ClickHouse functions. - ## varSamp Calculate the sample variance of a data set. @@ -13,24 +11,27 @@ Calculate the sample variance of a data set. **Syntax** ```sql -varSamp(expr) +varSamp(x) ``` +Alias: `VAR_SAMP`. + **Parameters** -- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions) +- `x`: The population for which you want to calculate the sample variance. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md). **Returned value** -Returns a Float64 value representing the sample variance of the input data set. + +- Returns the sample variance of the input data set `x`. [Float64](../../data-types/float.md). **Implementation details** -The `varSamp()` function calculates the sample variance using the following formula: +The `varSamp` function calculates the sample variance using the following formula: -```plaintext -∑(x - mean(x))^2 / (n - 1) -``` +$$ +\sum\frac{(x - \text{mean}(x))^2}{(n - 1)} +$$ Where: @@ -38,91 +39,29 @@ Where: - `mean(x)` is the arithmetic mean of the data set. - `n` is the number of data points in the data set. -The function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPop()` function](./varpop#varpop) instead. - -This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varSampStable`](#varsampstable) function. +The function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use [`varPop`](../reference/varpop.md) instead. **Example** Query: ```sql -CREATE TABLE example_table +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data ( - id UInt64, - value Float64 + x Float64 ) -ENGINE = MergeTree -ORDER BY id; +ENGINE = Memory; -INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7); +INSERT INTO test_data VALUES (10.5), (12.3), (9.8), (11.2), (10.7); -SELECT varSamp(value) FROM example_table; +SELECT round(varSamp(x),3) AS var_samp FROM test_data; ``` Response: ```response -0.8650000000000091 +┌─var_samp─┐ +│ 0.865 │ +└──────────┘ ``` - -## varSampStable - -Calculate the sample variance of a data set using a numerically stable algorithm. - -**Syntax** - -```sql -varSampStable(expr) -``` - -**Parameters** - -- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions) - -**Returned value** - -The `varSampStable` function returns a Float64 value representing the sample variance of the input data set. - -**Implementation details** - -The `varSampStable` function calculates the sample variance using the same formula as the [`varSamp`](#varsamp) function: - -```plaintext -∑(x - mean(x))^2 / (n - 1) -``` - -Where: -- `x` is each individual data point in the data set. -- `mean(x)` is the arithmetic mean of the data set. -- `n` is the number of data points in the data set. - -The difference between `varSampStable` and `varSamp` is that `varSampStable` is designed to provide a more deterministic and stable result when dealing with floating-point arithmetic. It uses an algorithm that minimizes the accumulation of rounding errors, which can be particularly important when dealing with large data sets or data with a wide range of values. - -Like `varSamp`, the `varSampStable` function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPopStable`](./varpop#varpopstable) function instead. - -**Example** - -Query: - -```sql -CREATE TABLE example_table -( - id UInt64, - value Float64 -) -ENGINE = MergeTree -ORDER BY id; - -INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7); - -SELECT varSampStable(value) FROM example_table; -``` - -Response: - -```response -0.865 -``` - -This query calculates the sample variance of the `value` column in the `example_table` using the `varSampStable()` function. The result shows that the sample variance of the values `[10.5, 12.3, 9.8, 11.2, 10.7]` is approximately 0.865, which may differ slightly from the result of `varSamp` due to the more precise handling of floating-point arithmetic. diff --git a/docs/en/sql-reference/aggregate-functions/reference/varsampstable.md b/docs/en/sql-reference/aggregate-functions/reference/varsampstable.md new file mode 100644 index 00000000000..ebe647e1951 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/varsampstable.md @@ -0,0 +1,63 @@ +--- +title: "varSampStable" +slug: /en/sql-reference/aggregate-functions/reference/varsampstable +sidebar_position: 213 +--- + +## varSampStable + +Calculate the sample variance of a data set. Unlike [`varSamp`](../reference/varsamp.md), this function uses a numerically stable algorithm. It works slower but provides a lower computational error. + +**Syntax** + +```sql +varSampStable(x) +``` + +Alias: `VAR_SAMP_STABLE` + +**Parameters** + +- `x`: The population for which you want to calculate the sample variance. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal*](../../data-types/decimal.md). + +**Returned value** + +- Returns the sample variance of the input data set. [Float64](../../data-types/float.md). + +**Implementation details** + +The `varSampStable` function calculates the sample variance using the same formula as the [`varSamp`](../reference/varsamp.md): + +$$ +\sum\frac{(x - \text{mean}(x))^2}{(n - 1)} +$$ + +Where: +- `x` is each individual data point in the data set. +- `mean(x)` is the arithmetic mean of the data set. +- `n` is the number of data points in the data set. + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data +( + x Float64 +) +ENGINE = Memory; + +INSERT INTO test_data VALUES (10.5), (12.3), (9.8), (11.2), (10.7); + +SELECT round(varSampStable(x),3) AS var_samp_stable FROM test_data; +``` + +Response: + +```response +┌─var_samp_stable─┐ +│ 0.865 │ +└─────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md index 4f1085e65b4..296b70f758e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/aggregate-functions/reference/welchttest -sidebar_position: 301 +sidebar_position: 214 sidebar_label: welchTTest --- diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index 12098efc635..b9b5c6d7a05 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -8,7 +8,7 @@ sidebar_label: Mathematical ## e -Returns e ([Euler's constant](https://en.wikipedia.org/wiki/Euler%27s_constant)). +Returns $e$ ([Euler's constant](https://en.wikipedia.org/wiki/Euler%27s_constant)). **Syntax** @@ -22,7 +22,7 @@ Type: [Float64](../data-types/float.md). ## pi -Returns π ([Pi](https://en.wikipedia.org/wiki/Pi)). +Returns $\pi$ ([Pi](https://en.wikipedia.org/wiki/Pi)). **Syntax** @@ -35,7 +35,7 @@ Type: [Float64](../data-types/float.md). ## exp -Returns e to the power of the given argument. +Returns $e^{x}$, where x is the given argument to the function. **Syntax** @@ -47,6 +47,22 @@ exp(x) - `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +**Example** + +Query: + +```sql +SELECT round(exp(-1), 4); +``` + +Result: + +```response +┌─round(exp(-1), 4)─┐ +│ 0.3679 │ +└───────────────────┘ +``` + **Returned value** Type: [Float*](../data-types/float.md). @@ -91,7 +107,7 @@ Type: [Float*](../data-types/float.md). ## intExp2 -Like `exp` but returns a UInt64. +Like [`exp`](#exp) but returns a UInt64. **Syntax** @@ -137,7 +153,7 @@ Type: [Float*](../data-types/float.md). ## intExp10 -Like `exp10` but returns a UInt64. +Like [`exp10`](#exp10) but returns a UInt64. **Syntax** @@ -197,7 +213,7 @@ Type: [Float*](../data-types/float.md). ## erf -If `x` is non-negative, then `erf(x / σ√2)` is the probability that a random variable having a normal distribution with standard deviation `σ` takes the value that is separated from the expected value by more than `x`. +If `x` is non-negative, then $erf(\frac{x}{\sigma\sqrt{2}})$ is the probability that a random variable having a normal distribution with standard deviation $\sigma$ takes the value that is separated from the expected value by more than `x`. **Syntax** @@ -229,7 +245,7 @@ SELECT erf(3 / sqrt(2)); ## erfc -Returns a number close to `1 - erf(x)` without loss of precision for large ‘x’ values. +Returns a number close to $1-erf(x)$ without loss of precision for large `x` values. **Syntax** @@ -403,7 +419,7 @@ Type: [Float*](../data-types/float.md). ## pow -Returns `x` to the power of `y`. +Returns $x^y$. **Syntax** @@ -434,11 +450,11 @@ cosh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — The angle, in radians. Values from the interval: $-\infty \lt x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- Values from the interval: `1 <= cosh(x) < +∞`. +- Values from the interval: $1 \le cosh(x) \lt +\infty$. Type: [Float64](../data-types/float.md#float32-float64). @@ -468,11 +484,11 @@ acosh(x) **Arguments** -- `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — Hyperbolic cosine of angle. Values from the interval: $1 \le x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- The angle, in radians. Values from the interval: `0 <= acosh(x) < +∞`. +- The angle, in radians. Values from the interval: $0 \le acosh(x) \lt +\infty$. Type: [Float64](../data-types/float.md#float32-float64). @@ -502,11 +518,11 @@ sinh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — The angle, in radians. Values from the interval: $-\infty \lt x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- Values from the interval: `-∞ < sinh(x) < +∞`. +- Values from the interval: $-\infty \lt sinh(x) \lt +\infty$. Type: [Float64](../data-types/float.md#float32-float64). @@ -536,11 +552,11 @@ asinh(x) **Arguments** -- `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — Hyperbolic sine of angle. Values from the interval: $-\infty \lt x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- The angle, in radians. Values from the interval: `-∞ < asinh(x) < +∞`. +- The angle, in radians. Values from the interval: $-\infty \lt asinh(x) \lt +\infty$. Type: [Float64](../data-types/float.md#float32-float64). @@ -569,11 +585,11 @@ tanh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — The angle, in radians. Values from the interval: $-\infty \lt x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- Values from the interval: `-1 < tanh(x) < 1`. +- Values from the interval: $-1 \lt tanh(x) \lt 1$. Type: [Float*](../data-types/float.md#float32-float64). @@ -601,11 +617,11 @@ atanh(x) **Arguments** -- `x` — Hyperbolic tangent of angle. Values from the interval: `–1 < x < 1`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — Hyperbolic tangent of angle. Values from the interval: $-1 \lt x \lt 1$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- The angle, in radians. Values from the interval: `-∞ < atanh(x) < +∞`. +- The angle, in radians. Values from the interval: $-\infty \lt atanh(x) \lt +\infty$. Type: [Float64](../data-types/float.md#float32-float64). @@ -640,7 +656,7 @@ atan2(y, x) **Returned value** -- The angle `θ` such that `−π < θ ≤ π`, in radians. +- The angle `θ` such that $-\pi \lt 0 \le \pi$, in radians. Type: [Float64](../data-types/float.md#float32-float64). @@ -705,11 +721,11 @@ log1p(x) **Arguments** -- `x` — Values from the interval: `-1 < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — Values from the interval: $-1 \lt x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** -- Values from the interval: `-∞ < log1p(x) < +∞`. +- Values from the interval: $-\infty < log1p(x) \lt +\infty$. Type: [Float64](../data-types/float.md#float32-float64). @@ -739,7 +755,7 @@ sign(x) **Arguments** -- `x` — Values from `-∞` to `+∞`. Support all numeric types in ClickHouse. +- `x` — Values from $-\infty$ to $+\infty$. Supports all numeric types in ClickHouse. **Returned value** @@ -804,7 +820,7 @@ sigmoid(x) **Parameters** -- `x` — input value. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). +- `x` — input value. Values from the interval: $-\infty \lt x \lt +\infty$. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md). **Returned value** diff --git a/docs/en/sql-reference/functions/nlp-functions.md b/docs/en/sql-reference/functions/nlp-functions.md index 4bfa181a35f..7057ebebfe4 100644 --- a/docs/en/sql-reference/functions/nlp-functions.md +++ b/docs/en/sql-reference/functions/nlp-functions.md @@ -6,26 +6,297 @@ sidebar_label: NLP (experimental) # Natural Language Processing (NLP) Functions -:::note +:::warning This is an experimental feature that is currently in development and is not ready for general use. It will change in unpredictable backwards-incompatible ways in future releases. Set `allow_experimental_nlp_functions = 1` to enable it. ::: +## detectCharset + +The `detectCharset` function detects the character set of the non-UTF8-encoded input string. + +*Syntax* + +``` sql +detectCharset('text_to_be_analyzed') +``` + +*Arguments* + +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). + +*Returned value* + +- A `String` containing the code of the detected character set + +*Examples* + +Query: + +```sql +SELECT detectCharset('Ich bleibe für ein paar Tage.'); +``` + +Result: + +```response +┌─detectCharset('Ich bleibe für ein paar Tage.')─┐ +│ WINDOWS-1252 │ +└────────────────────────────────────────────────┘ +``` + +## detectLanguage + +Detects the language of the UTF8-encoded input string. The function uses the [CLD2 library](https://github.com/CLD2Owners/cld2) for detection, and it returns the 2-letter ISO language code. + +The `detectLanguage` function works best when providing over 200 characters in the input string. + +*Syntax* + +``` sql +detectLanguage('text_to_be_analyzed') +``` + +*Arguments* + +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). + +*Returned value* + +- The 2-letter ISO code of the detected language + +Other possible results: + +- `un` = unknown, can not detect any language. +- `other` = the detected language does not have 2 letter code. + +*Examples* + +Query: + +```sql +SELECT detectLanguage('Je pense que je ne parviendrai jamais à parler français comme un natif. Where there’s a will, there’s a way.'); +``` + +Result: + +```response +fr +``` + +## detectLanguageMixed + +Similar to the `detectLanguage` function, but `detectLanguageMixed` returns a `Map` of 2-letter language codes that are mapped to the percentage of the certain language in the text. + + +*Syntax* + +``` sql +detectLanguageMixed('text_to_be_analyzed') +``` + +*Arguments* + +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). + +*Returned value* + +- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a percentage of text found for that language + + +*Examples* + +Query: + +```sql +SELECT detectLanguageMixed('二兎を追う者は一兎をも得ず二兎を追う者は一兎をも得ず A vaincre sans peril, on triomphe sans gloire.'); +``` + +Result: + +```response +┌─detectLanguageMixed()─┐ +│ {'ja':0.62,'fr':0.36 │ +└───────────────────────┘ +``` + +## detectProgrammingLanguage + +Determines the programming language from the source code. Calculates all the unigrams and bigrams of commands in the source code. +Then using a marked-up dictionary with weights of unigrams and bigrams of commands for various programming languages finds the biggest weight of the programming language and returns it. + +*Syntax* + +``` sql +detectProgrammingLanguage('source_code') +``` + +*Arguments* + +- `source_code` — String representation of the source code to analyze. [String](../data-types/string.md#string). + +*Returned value* + +- Programming language. [String](../data-types/string.md). + +*Examples* + +Query: + +```sql +SELECT detectProgrammingLanguage('#include '); +``` + +Result: + +```response +┌─detectProgrammingLanguage('#include ')─┐ +│ C++ │ +└──────────────────────────────────────────────────┘ +``` + +## detectLanguageUnknown + +Similar to the `detectLanguage` function, except the `detectLanguageUnknown` function works with non-UTF8-encoded strings. Prefer this version when your character set is UTF-16 or UTF-32. + + +*Syntax* + +``` sql +detectLanguageUnknown('text_to_be_analyzed') +``` + +*Arguments* + +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). + +*Returned value* + +- The 2-letter ISO code of the detected language + +Other possible results: + +- `un` = unknown, can not detect any language. +- `other` = the detected language does not have 2 letter code. + +*Examples* + +Query: + +```sql +SELECT detectLanguageUnknown('Ich bleibe für ein paar Tage.'); +``` + +Result: + +```response +┌─detectLanguageUnknown('Ich bleibe für ein paar Tage.')─┐ +│ de │ +└────────────────────────────────────────────────────────┘ +``` + +## detectTonality + +Determines the sentiment of text data. Uses a marked-up sentiment dictionary, in which each word has a tonality ranging from `-12` to `6`. +For each text, it calculates the average sentiment value of its words and returns it in the range `[-1,1]`. + +:::note +This function is limited in its current form. Currently it makes use of the embedded emotional dictionary at `/contrib/nlp-data/tonality_ru.zst` and only works for the Russian language. +::: + +*Syntax* + +``` sql +detectTonality(text) +``` + +*Arguments* + +- `text` — The text to be analyzed. [String](../data-types/string.md#string). + +*Returned value* + +- The average sentiment value of the words in `text`. [Float32](../data-types/float.md). + +*Examples* + +Query: + +```sql +SELECT detectTonality('Шарик - хороший пёс'), -- Sharik is a good dog + detectTonality('Шарик - пёс'), -- Sharik is a dog + detectTonality('Шарик - плохой пёс'); -- Sharkik is a bad dog +``` + +Result: + +```response +┌─detectTonality('Шарик - хороший пёс')─┬─detectTonality('Шарик - пёс')─┬─detectTonality('Шарик - плохой пёс')─┐ +│ 0.44445 │ 0 │ -0.3 │ +└───────────────────────────────────────┴───────────────────────────────┴──────────────────────────────────────┘ +``` +## lemmatize + +Performs lemmatization on a given word. Needs dictionaries to operate, which can be obtained [here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models). + +*Syntax* + +``` sql +lemmatize('language', word) +``` + +*Arguments* + +- `language` — Language which rules will be applied. [String](../data-types/string.md#string). +- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../data-types/string.md#string). + +*Examples* + +Query: + +``` sql +SELECT lemmatize('en', 'wolves'); +``` + +Result: + +``` text +┌─lemmatize("wolves")─┐ +│ "wolf" │ +└─────────────────────┘ +``` + +*Configuration* + +This configuration specifies that the dictionary `en.bin` should be used for lemmatization of English (`en`) words. The `.bin` files can be downloaded from +[here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models). + +``` xml + + + + en + en.bin + + + +``` + ## stem Performs stemming on a given word. -### Syntax +*Syntax* ``` sql stem('language', word) ``` -### Arguments +*Arguments* - `language` — Language which rules will be applied. Use the two letter [ISO 639-1 code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). - `word` — word that needs to be stemmed. Must be in lowercase. [String](../data-types/string.md#string). -### Examples +*Examples* Query: @@ -40,7 +311,7 @@ Result: │ ['I','think','it','is','a','bless','in','disguis'] │ └────────────────────────────────────────────────────┘ ``` -### Supported languages for stem() +*Supported languages for stem()* :::note The stem() function uses the [Snowball stemming](https://snowballstem.org/) library, see the Snowball website for updated languages etc. @@ -76,53 +347,6 @@ The stem() function uses the [Snowball stemming](https://snowballstem.org/) libr - Turkish - Yiddish -## lemmatize - -Performs lemmatization on a given word. Needs dictionaries to operate, which can be obtained [here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models). - -### Syntax - -``` sql -lemmatize('language', word) -``` - -### Arguments - -- `language` — Language which rules will be applied. [String](../data-types/string.md#string). -- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../data-types/string.md#string). - -### Examples - -Query: - -``` sql -SELECT lemmatize('en', 'wolves'); -``` - -Result: - -``` text -┌─lemmatize("wolves")─┐ -│ "wolf" │ -└─────────────────────┘ -``` - -### Configuration - -This configuration specifies that the dictionary `en.bin` should be used for lemmatization of English (`en`) words. The `.bin` files can be downloaded from -[here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models). - -``` xml - - - - en - en.bin - - - -``` - ## synonyms Finds synonyms to a given word. There are two types of synonym extensions: `plain` and `wordnet`. @@ -131,18 +355,18 @@ With the `plain` extension type we need to provide a path to a simple text file, With the `wordnet` extension type we need to provide a path to a directory with WordNet thesaurus in it. Thesaurus must contain a WordNet sense index. -### Syntax +*Syntax* ``` sql synonyms('extension_name', word) ``` -### Arguments +*Arguments* - `extension_name` — Name of the extension in which search will be performed. [String](../data-types/string.md#string). - `word` — Word that will be searched in extension. [String](../data-types/string.md#string). -### Examples +*Examples* Query: @@ -158,7 +382,7 @@ Result: └──────────────────────────────────────────┘ ``` -### Configuration +*Configuration* ``` xml @@ -172,154 +396,4 @@ Result: en/ -``` - -## detectLanguage - -Detects the language of the UTF8-encoded input string. The function uses the [CLD2 library](https://github.com/CLD2Owners/cld2) for detection, and it returns the 2-letter ISO language code. - -The `detectLanguage` function works best when providing over 200 characters in the input string. - -### Syntax - -``` sql -detectLanguage('text_to_be_analyzed') -``` - -### Arguments - -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). - -### Returned value - -- The 2-letter ISO code of the detected language - -Other possible results: - -- `un` = unknown, can not detect any language. -- `other` = the detected language does not have 2 letter code. - -### Examples - -Query: - -```sql -SELECT detectLanguage('Je pense que je ne parviendrai jamais à parler français comme un natif. Where there’s a will, there’s a way.'); -``` - -Result: - -```response -fr -``` - -## detectLanguageMixed - -Similar to the `detectLanguage` function, but `detectLanguageMixed` returns a `Map` of 2-letter language codes that are mapped to the percentage of the certain language in the text. - - -### Syntax - -``` sql -detectLanguageMixed('text_to_be_analyzed') -``` - -### Arguments - -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). - -### Returned value - -- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a percentage of text found for that language - - -### Examples - -Query: - -```sql -SELECT detectLanguageMixed('二兎を追う者は一兎をも得ず二兎を追う者は一兎をも得ず A vaincre sans peril, on triomphe sans gloire.'); -``` - -Result: - -```response -┌─detectLanguageMixed()─┐ -│ {'ja':0.62,'fr':0.36 │ -└───────────────────────┘ -``` - -## detectLanguageUnknown - -Similar to the `detectLanguage` function, except the `detectLanguageUnknown` function works with non-UTF8-encoded strings. Prefer this version when your character set is UTF-16 or UTF-32. - - -### Syntax - -``` sql -detectLanguageUnknown('text_to_be_analyzed') -``` - -### Arguments - -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). - -### Returned value - -- The 2-letter ISO code of the detected language - -Other possible results: - -- `un` = unknown, can not detect any language. -- `other` = the detected language does not have 2 letter code. - -### Examples - -Query: - -```sql -SELECT detectLanguageUnknown('Ich bleibe für ein paar Tage.'); -``` - -Result: - -```response -┌─detectLanguageUnknown('Ich bleibe für ein paar Tage.')─┐ -│ de │ -└────────────────────────────────────────────────────────┘ -``` - -## detectCharset - -The `detectCharset` function detects the character set of the non-UTF8-encoded input string. - - -### Syntax - -``` sql -detectCharset('text_to_be_analyzed') -``` - -### Arguments - -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string). - -### Returned value - -- A `String` containing the code of the detected character set - -### Examples - -Query: - -```sql -SELECT detectCharset('Ich bleibe für ein paar Tage.'); -``` - -Result: - -```response -┌─detectCharset('Ich bleibe für ein paar Tage.')─┐ -│ WINDOWS-1252 │ -└────────────────────────────────────────────────┘ -``` +``` \ No newline at end of file diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index e22dd5d827c..58fc1eba02e 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -3820,3 +3820,43 @@ Result: 10. │ df │ │ └────┴───────────────────────┘ ``` + +## displayName + +Returns the value of `display_name` from [config](../../operations/configuration-files.md/#configuration-files) or server Fully Qualified Domain Name (FQDN) if not set. + +**Syntax** + +```sql +displayName() +``` + +**Returned value** + +- Value of `display_name` from config or server FQDN if not set. [String](../data-types/string.md). + +**Example** + +The `display_name` can be set in `config.xml`. Taking for example a server with `display_name` configured to 'production': + +```xml + +production +``` + +Query: + +```sql +SELECT displayName(); +``` + +Result: + +```response +┌─displayName()─┐ +│ production │ +└───────────────┘ +``` + diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index a258456345e..c068b0e9d17 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1168,14 +1168,14 @@ Result: └────────────────────────────┘ ``` -## base64UrlEncode +## base64URLEncode Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-5). **Syntax** ```sql -base64UrlEncode(url) +base64URLEncode(url) ``` **Arguments** @@ -1189,13 +1189,13 @@ base64UrlEncode(url) **Example** ``` sql -SELECT base64UrlEncode('https://clickhouse.com'); +SELECT base64URLEncode('https://clickhouse.com'); ``` Result: ```result -┌─base64UrlEncode('https://clickhouse.com')─┐ +┌─base64URLEncode('https://clickhouse.com')─┐ │ aHR0cDovL2NsaWNraG91c2UuY29t │ └───────────────────────────────────────────┘ ``` @@ -1234,19 +1234,19 @@ Result: └──────────────────────────────────┘ ``` -## base64UrlDecode +## base64URLDecode Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-5). Throws an exception in case of an error. **Syntax** ```sql -base64UrlDecode(encodedUrl) +base64URLDecode(encodedUrl) ``` **Arguments** -- `encodedUrl` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, an exception is thrown. +- `encodedURL` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, an exception is thrown. **Returned value** @@ -1255,13 +1255,13 @@ base64UrlDecode(encodedUrl) **Example** ``` sql -SELECT base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t'); +SELECT base64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t'); ``` Result: ```result -┌─base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t')─┐ +┌─base64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t')─┐ │ https://clickhouse.com │ └─────────────────────────────────────────────────┘ ``` @@ -1298,19 +1298,19 @@ SELECT tryBase64Decode('RW5jb2RlZA==') as res, tryBase64Decode('invalid') as res └────────────┴─────────────┘ ``` -## tryBase64UrlDecode +## tryBase64URLDecode -Like `base64UrlDecode` but returns an empty string in case of error. +Like `base64URLDecode` but returns an empty string in case of error. **Syntax** ```sql -tryBase64UrlDecode(encodedUrl) +tryBase64URLDecode(encodedUrl) ``` **Parameters** -- `encodedUrl`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string. +- `encodedURL`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string. **Returned value** @@ -1321,7 +1321,7 @@ tryBase64UrlDecode(encodedUrl) Query: ```sql -SELECT tryBase64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t') as res, tryBase64Decode('aHR0cHM6Ly9jbGlja') as res_invalid; +SELECT tryBase64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t') as res, tryBase64Decode('aHR0cHM6Ly9jbGlja') as res_invalid; ``` ```response diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index 8b3e4f44840..76c0141ac8b 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -818,6 +818,40 @@ The same as above, but including query string and fragment. Example: `/top/news.html?page=2#comments`. +### protocol + +Extracts the protocol from a URL. + +**Syntax** + +```sql +protocol(url) +``` + +**Arguments** + +- `url` — URL to extract protocol from. [String](../data-types/string.md). + +**Returned value** + +- Protocol, or an empty string if it cannot be determined. [String](../data-types/string.md). + +**Example** + +Query: + +```sql +SELECT protocol('https://clickhouse.com/'); +``` + +Result: + +```response +┌─protocol('https://clickhouse.com/')─┐ +│ https │ +└─────────────────────────────────────┘ +``` + ### queryString Returns the query string without the initial question mark, `#` and everything after `#`. diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index b7d095c796e..e990023efbc 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -611,7 +611,7 @@ SELECT generateSnowflakeID(1), generateSnowflakeID(2); ## snowflakeToDateTime :::warning -This function is deprecated and can only be used if setting [uniform_snowflake_conversion_functions](../../operations/settings/settings.md#uniform_snowflake_conversion_functions) is disabled. +This function is deprecated and can only be used if setting [allow_deprecated_snowflake_conversion_functions](../../operations/settings/settings.md#allow_deprecated_snowflake_conversion_functions) is enabled. The function will be removed at some point in future. ::: @@ -652,7 +652,7 @@ Result: ## snowflakeToDateTime64 :::warning -This function is deprecated and can only be used if setting [uniform_snowflake_conversion_functions](../../operations/settings/settings.md#uniform_snowflake_conversion_functions) is disabled. +This function is deprecated and can only be used if setting [allow_deprecated_snowflake_conversion_functions](../../operations/settings/settings.md#allow_deprecated_snowflake_conversion_functions) is enabled. The function will be removed at some point in future. ::: @@ -693,7 +693,7 @@ Result: ## dateTimeToSnowflake :::warning -This function is deprecated and can only be used if setting [uniform_snowflake_conversion_functions](../../operations/settings/settings.md#uniform_snowflake_conversion_functions) is disabled. +This function is deprecated and can only be used if setting [allow_deprecated_snowflake_conversion_functions](../../operations/settings/settings.md#allow_deprecated_snowflake_conversion_functions) is enabled. The function will be removed at some point in future. ::: @@ -732,7 +732,7 @@ Result: ## dateTime64ToSnowflake :::warning -This function is deprecated and can only be used if setting [uniform_snowflake_conversion_functions](../../operations/settings/settings.md#uniform_snowflake_conversion_functions) is disabled. +This function is deprecated and can only be used if setting [allow_deprecated_snowflake_conversion_functions](../../operations/settings/settings.md#allow_deprecated_snowflake_conversion_functions) is enabled. The function will be removed at some point in future. ::: diff --git a/docs/en/sql-reference/statements/alter/constraint.md b/docs/en/sql-reference/statements/alter/constraint.md index 29675f704b5..54c456f9aa2 100644 --- a/docs/en/sql-reference/statements/alter/constraint.md +++ b/docs/en/sql-reference/statements/alter/constraint.md @@ -9,8 +9,8 @@ sidebar_label: CONSTRAINT Constraints could be added or deleted using following syntax: ``` sql -ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT constraint_name CHECK expression; -ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT constraint_name; +ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT [IF NOT EXISTS] constraint_name CHECK expression; +ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT [IF EXISTS] constraint_name; ``` See more on [constraints](../../../sql-reference/statements/create/table.md#constraints). diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 32ebc6d028f..3a8afd10359 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -36,7 +36,7 @@ Finds non-negative derivative for given `metric_column` by `timestamp_column`. `INTERVAL` can be omitted, default is `INTERVAL 1 SECOND`. The computed value is the following for each row: - `0` for 1st row, -- ${metric_i - metric_{i-1} \over timestamp_i - timestamp_{i-1}} * interval$ for $i_th$ row. +- ${\text{metric}_i - \text{metric}_{i-1} \over \text{timestamp}_i - \text{timestamp}_{i-1}} * \text{interval}$ for $i_{th}$ row. ## Syntax diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md index 01ff4dd5f28..bf42edf89ff 100644 --- a/docs/ru/development/developer-instruction.md +++ b/docs/ru/development/developer-instruction.md @@ -283,7 +283,7 @@ Pull request можно создать, даже если работа над з Тесты будут запущены, как только сотрудники ClickHouse поставят для pull request тег «Can be tested». Результаты первых проверок (стиль кода) появятся уже через несколько минут. Результаты сборки появятся примерно через пол часа. Результаты основного набора тестов будут доступны в пределах часа. -Система подготовит сборки ClickHouse специально для вашего pull request. Для их получения, нажмите на ссылку «Details» у проверки «Clickhouse build check». Там вы сможете найти прямые ссылки на собранные .deb пакеты ClickHouse, которые, при желании, вы даже сможете установить на свои продакшен серверы (если не страшно). +Система подготовит сборки ClickHouse специально для вашего pull request. Для их получения, нажмите на ссылку «Details» у проверки «Builds». Там вы сможете найти прямые ссылки на собранные .deb пакеты ClickHouse, которые, при желании, вы даже сможете установить на свои продакшен серверы (если не страшно). Вероятнее всего, часть сборок не будет успешной с первого раза. Ведь мы проверяем сборку кода и gcc и clang, а при сборке с помощью clang включаются почти все существующие в природе warnings (всегда с флагом `-Werror`). На той же странице, вы сможете найти логи сборки - вам не обязательно самому собирать ClickHouse всеми возможными способами. diff --git a/docs/ru/interfaces/cli.md b/docs/ru/interfaces/cli.md index 4d19cf50ae1..86eeaac2da7 100644 --- a/docs/ru/interfaces/cli.md +++ b/docs/ru/interfaces/cli.md @@ -141,6 +141,7 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe - `--secure` — если указано, будет использован безопасный канал. - `--history_file` - путь к файлу с историей команд. - `--param_` — значение параметра для [запроса с параметрами](#cli-queries-with-parameters). +- `--jwt` – авторизация с использованием JSON Web Token. Доступно только в ClickHouse Cloud. Вместо параметров `--host`, `--port`, `--user` и `--password` клиент ClickHouse также поддерживает строки подключения (смотри следующий раздел). diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index fa76e84f130..2436581fc7f 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -538,7 +538,7 @@ SELECT base58Decode('3dc8KtHrwM'); Синоним: `TO_BASE64`. -## base64UrlEncode(s) +## base64URLEncode(s) Производит кодирование URL (String или FixedString) в base64-представление в соответствии с [RFC 4648](https://tools.ietf.org/html/rfc4648). @@ -548,7 +548,7 @@ SELECT base58Decode('3dc8KtHrwM'); Синоним: `FROM_BASE64`. -## base64UrlDecode(s) +## base64URLDecode(s) Декодирует base64-представление URL в исходную строку в соответствии с [RFC 4648](https://tools.ietf.org/html/rfc4648). При невозможности декодирования выбрасывает исключение @@ -556,9 +556,9 @@ SELECT base58Decode('3dc8KtHrwM'); Функционал аналогичен base64Decode, но при невозможности декодирования возвращает пустую строку. -## tryBase64UrlDecode(s) +## tryBase64URLDecode(s) -Функционал аналогичен base64UrlDecode, но при невозможности декодирования возвращает пустую строку. +Функционал аналогичен base64URLDecode, но при невозможности декодирования возвращает пустую строку. ## endsWith(s, suffix) {#endswith} diff --git a/docs/ru/sql-reference/statements/alter/constraint.md b/docs/ru/sql-reference/statements/alter/constraint.md index ad5f23e5fdc..45b0f5f6350 100644 --- a/docs/ru/sql-reference/statements/alter/constraint.md +++ b/docs/ru/sql-reference/statements/alter/constraint.md @@ -11,8 +11,8 @@ sidebar_label: "Манипуляции с ограничениями" Добавить или удалить ограничение можно с помощью запросов ``` sql -ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT constraint_name CHECK expression; -ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT constraint_name; +ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT [IF NOT EXISTS] constraint_name CHECK expression; +ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT [IF EXISTS] constraint_name; ``` Запросы выполняют добавление или удаление метаданных об ограничениях таблицы `[db].name`, поэтому выполняются мгновенно. diff --git a/docs/zh/sql-reference/statements/alter/constraint.md b/docs/zh/sql-reference/statements/alter/constraint.md index 86ffcf09d65..59edcf10645 100644 --- a/docs/zh/sql-reference/statements/alter/constraint.md +++ b/docs/zh/sql-reference/statements/alter/constraint.md @@ -9,8 +9,8 @@ sidebar_label: 约束 约束可以使用以下语法添加或删除: ``` sql -ALTER TABLE [db].name ADD CONSTRAINT constraint_name CHECK expression; -ALTER TABLE [db].name DROP CONSTRAINT constraint_name; +ALTER TABLE [db].name [ON CLUSTER cluster] ADD CONSTRAINT [IF NOT EXISTS] constraint_name CHECK expression; +ALTER TABLE [db].name [ON CLUSTER cluster] DROP CONSTRAINT [IF EXISTS] constraint_name; ``` 查看[constraints](../../../sql-reference/statements/create/table.mdx#constraints)。 diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index efe23d57478..c4878b18f00 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -64,6 +64,7 @@ namespace ErrorCodes extern const int NETWORK_ERROR; extern const int AUTHENTICATION_FAILED; extern const int NO_ELEMENTS_IN_CONFIG; + extern const int USER_EXPIRED; } @@ -74,6 +75,12 @@ void Client::processError(const String & query) const fmt::print(stderr, "Received exception from server (version {}):\n{}\n", server_version, getExceptionMessage(*server_exception, print_stack_trace, true)); + + if (server_exception->code() == ErrorCodes::USER_EXPIRED) + { + server_exception->rethrow(); + } + if (is_interactive) { fmt::print(stderr, "\n"); @@ -944,6 +951,7 @@ void Client::addOptions(OptionsDescription & options_description) ("ssh-key-file", po::value(), "File containing the SSH private key for authenticate with the server.") ("ssh-key-passphrase", po::value(), "Passphrase for the SSH private key specified by --ssh-key-file.") ("quota_key", po::value(), "A string to differentiate quotas when the user have keyed quotas configured on server") + ("jwt", po::value(), "Use JWT for authentication") ("max_client_network_bandwidth", po::value(), "the maximum speed of data exchange over the network for the client in bytes per second.") ("compression", po::value(), "enable or disable compression (enabled by default for remote communication and disabled for localhost communication).") @@ -1102,6 +1110,12 @@ void Client::processOptions(const OptionsDescription & options_description, config().setBool("no-warnings", true); if (options.count("fake-drop")) config().setString("ignore_drop_queries_probability", "1"); + if (options.count("jwt")) + { + if (!options["user"].defaulted()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "User and JWT flags can't be specified together"); + config().setString("jwt", options["jwt"].as()); + } if (options.count("accept-invalid-certificate")) { config().setString("openSSL.client.invalidCertificateHandler.name", "AcceptCertificateHandler"); diff --git a/programs/keeper-client/KeeperClient.cpp b/programs/keeper-client/KeeperClient.cpp index ebec337060c..68adc2c2aac 100644 --- a/programs/keeper-client/KeeperClient.cpp +++ b/programs/keeper-client/KeeperClient.cpp @@ -368,7 +368,7 @@ int KeeperClient::main(const std::vector & /* args */) return 0; } - DB::ConfigProcessor config_processor(config().getString("config-file", "config.xml")); + ConfigProcessor config_processor(config().getString("config-file", "config.xml")); /// This will handle a situation when clickhouse is running on the embedded config, but config.d folder is also present. ConfigProcessor::registerEmbeddedConfig("config.xml", ""); diff --git a/programs/keeper-client/Parser.cpp b/programs/keeper-client/Parser.cpp index 5b16e6d2c23..51f85cf4a69 100644 --- a/programs/keeper-client/Parser.cpp +++ b/programs/keeper-client/Parser.cpp @@ -12,8 +12,7 @@ bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result) if (!parseIdentifierOrStringLiteral(pos, expected, result)) return false; } - - while (pos->type != TokenType::Whitespace && pos->type != TokenType::EndOfStream && pos->type != TokenType::Semicolon) + else if (pos->type == TokenType::Number) { result.append(pos->begin, pos->end); ++pos; @@ -40,8 +39,8 @@ bool KeeperParser::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) for (const auto & pair : KeeperClient::commands) expected.add(pos, pair.first.data()); - for (const auto & flwc : four_letter_word_commands) - expected.add(pos, flwc.data()); + for (const auto & four_letter_word_command : four_letter_word_commands) + expected.add(pos, four_letter_word_command.data()); if (pos->type != TokenType::BareWord) return false; diff --git a/programs/keeper-client/Parser.h b/programs/keeper-client/Parser.h index 57ee6ce4a18..503edfa4f73 100644 --- a/programs/keeper-client/Parser.h +++ b/programs/keeper-client/Parser.h @@ -11,7 +11,6 @@ namespace DB { bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result); - bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path); diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 0d3c1f10894..bb04ff88936 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -577,8 +577,7 @@ try #if USE_SSL CertificateReloader::instance().tryLoad(*config); #endif - }, - /* already_loaded = */ false); /// Reload it right now (initial loading) + }); SCOPE_EXIT({ LOG_INFO(log, "Shutting down."); diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 4d5cfb09e6a..cb1c35743b2 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -732,11 +732,8 @@ void LocalServer::processConfig() attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE)); } - server_display_name = config().getString("display_name", getFQDNOrHostName()); - prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name.default", "{display_name} :) "); - std::map prompt_substitutions{{"display_name", server_display_name}}; - for (const auto & [key, value] : prompt_substitutions) - boost::replace_all(prompt_by_server_display_name, "{" + key + "}", value); + server_display_name = config().getString("display_name", ""); + prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name.default", ":) "); global_context->setQueryKindInitial(); global_context->setQueryKind(query_kind); diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 7bc2be806f7..e2554a6ff03 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1380,8 +1380,8 @@ try global_context->setQueryCache(query_cache_max_size_in_bytes, query_cache_max_entries, query_cache_query_cache_max_entry_size_in_bytes, query_cache_max_entry_size_in_rows); #if USE_EMBEDDED_COMPILER - size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE); - size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES); + size_t compiled_expression_cache_max_size_in_bytes = server_settings.compiled_expression_cache_size; + size_t compiled_expression_cache_max_elements = server_settings.compiled_expression_cache_elements_size; CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements); #endif @@ -1407,8 +1407,8 @@ try tryLogCurrentException(log, "Disabling cgroup memory observer because of an error during initialization"); } - const std::string cert_path = config().getString("openSSL.server.certificateFile", ""); - const std::string key_path = config().getString("openSSL.server.privateKeyFile", ""); + std::string cert_path = config().getString("openSSL.server.certificateFile", ""); + std::string key_path = config().getString("openSSL.server.privateKeyFile", ""); std::vector extra_paths = {include_from_path}; if (!cert_path.empty()) @@ -1416,6 +1416,18 @@ try if (!key_path.empty()) extra_paths.emplace_back(key_path); + Poco::Util::AbstractConfiguration::Keys protocols; + config().keys("protocols", protocols); + for (const auto & protocol : protocols) + { + cert_path = config().getString("protocols." + protocol + ".certificateFile", ""); + key_path = config().getString("protocols." + protocol + ".privateKeyFile", ""); + if (!cert_path.empty()) + extra_paths.emplace_back(cert_path); + if (!key_path.empty()) + extra_paths.emplace_back(key_path); + } + auto main_config_reloader = std::make_unique( config_path, extra_paths, @@ -1528,6 +1540,8 @@ try global_context->setMaxDictionaryNumToWarn(new_server_settings.max_dictionary_num_to_warn); global_context->setMaxDatabaseNumToWarn(new_server_settings.max_database_num_to_warn); global_context->setMaxPartNumToWarn(new_server_settings.max_part_num_to_warn); + /// Only for system.server_settings + global_context->setConfigReloaderInterval(new_server_settings.config_reload_interval_ms); SlotCount concurrent_threads_soft_limit = UnlimitedSlots; if (new_server_settings.concurrent_threads_soft_limit_num > 0 && new_server_settings.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit) @@ -1656,7 +1670,7 @@ try CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "encryption_codecs"); #if USE_SSL - CertificateReloader::instance().tryLoad(*config); + CertificateReloader::instance().tryReloadAll(*config); #endif NamedCollectionFactory::instance().reloadFromConfig(*config); @@ -1690,8 +1704,7 @@ try /// Must be the last. latest_config = config; - }, - /* already_loaded = */ false); /// Reload it right now (initial loading) + }); const auto listen_hosts = getListenHosts(config()); const auto interserver_listen_hosts = getInterserverListenHosts(config()); diff --git a/programs/server/config.xml b/programs/server/config.xml index a0cb5b14007..94825a55f67 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -29,7 +29,14 @@ --> 1000M 10 + + + + + + + - 5368709120 + You should not lower this value. --> + - - 5368709120 + + - 1000 + - 134217728 + - 10000 + + + + /var/lib/clickhouse/caches/ @@ -1155,6 +1170,18 @@ false + + + system + error_log
+ 7500 + 1048576 + 8192 + 524288 + 1000 + false +
+ - - - 1073741824 - 1024 - 1048576 - 30000000 - - backups diff --git a/programs/server/config.yaml.example b/programs/server/config.yaml.example index 9fc188e97aa..5d5499f876c 100644 --- a/programs/server/config.yaml.example +++ b/programs/server/config.yaml.example @@ -260,7 +260,10 @@ uncompressed_cache_size: 8589934592 # Approximate size of mark cache, used in tables of MergeTree family. # In bytes. Cache is single for server. Memory is allocated only on demand. # You should not lower this value. -mark_cache_size: 5368709120 +# mark_cache_size: 5368709120 + +# For marks of secondary indices. +# index_mark_cache_size: 5368709120 # If you enable the `min_bytes_to_use_mmap_io` setting, # the data in MergeTree tables can be read with mmap to avoid copying from kernel to userspace. @@ -277,13 +280,20 @@ mark_cache_size: 5368709120 # in query or server memory usage - because this memory can be discarded similar to OS page cache. # The cache is dropped (the files are closed) automatically on removal of old parts in MergeTree, # also it can be dropped manually by the SYSTEM DROP MMAP CACHE query. -mmap_cache_size: 1000 +# mmap_cache_size: 1024 # Cache size in bytes for compiled expressions. -compiled_expression_cache_size: 134217728 +# compiled_expression_cache_size: 134217728 # Cache size in elements for compiled expressions. -compiled_expression_cache_elements_size: 10000 +# compiled_expression_cache_elements_size: 10000 + +# Configuration for the query cache +# query_cache: +# max_size_in_bytes: 1073741824 +# max_entries: 1024 +# max_entry_size_in_bytes: 1048576 +# max_entry_size_in_rows: 30000000 # Path to data directory, with trailing slash. path: /var/lib/clickhouse/ @@ -726,6 +736,13 @@ metric_log: flush_interval_milliseconds: 7500 collect_interval_milliseconds: 1000 +# Error log contains rows with current values of errors collected with "collect_interval_milliseconds" interval. +error_log: + database: system + table: error_log + flush_interval_milliseconds: 7500 + collect_interval_milliseconds: 1000 + # Asynchronous metric log contains values of metrics from # system.asynchronous_metrics. asynchronous_metric_log: diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index c3bb42160ad..353358fac65 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -261,7 +261,24 @@ AccessControl::AccessControl() } -AccessControl::~AccessControl() = default; +AccessControl::~AccessControl() +{ + try + { + AccessControl::shutdown(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + + +void AccessControl::shutdown() +{ + MultipleAccessStorage::shutdown(); + removeAllStorages(); +} void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index d1537219a06..bfaf256ad48 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -53,6 +53,9 @@ public: AccessControl(); ~AccessControl() override; + /// Shutdown the access control and stops all background activity. + void shutdown() override; + /// Initializes access storage (user directories). void setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, const zkutil::GetZooKeeper & get_zookeeper_function_); diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index bf1fe3feec3..f8df56516ec 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -108,6 +108,9 @@ bool Authentication::areCredentialsValid( case AuthenticationType::HTTP: throw Authentication::Require("ClickHouse Basic Authentication"); + case AuthenticationType::JWT: + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud"); + case AuthenticationType::KERBEROS: return external_authenticators.checkKerberosCredentials(auth_data.getKerberosRealm(), *gss_acceptor_context); @@ -149,6 +152,9 @@ bool Authentication::areCredentialsValid( case AuthenticationType::SSL_CERTIFICATE: throw Authentication::Require("ClickHouse X.509 Authentication"); + case AuthenticationType::JWT: + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud"); + case AuthenticationType::SSH_KEY: #if USE_SSH throw Authentication::Require("SSH Keys Authentication"); @@ -193,6 +199,9 @@ bool Authentication::areCredentialsValid( throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); #endif + case AuthenticationType::JWT: + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud"); + case AuthenticationType::BCRYPT_PASSWORD: return checkPasswordBcrypt(basic_credentials->getPassword(), auth_data.getPasswordHashBinary()); @@ -222,6 +231,9 @@ bool Authentication::areCredentialsValid( case AuthenticationType::HTTP: throw Authentication::Require("ClickHouse Basic Authentication"); + case AuthenticationType::JWT: + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud"); + case AuthenticationType::KERBEROS: throw Authentication::Require(auth_data.getKerberosRealm()); @@ -254,6 +266,9 @@ bool Authentication::areCredentialsValid( case AuthenticationType::HTTP: throw Authentication::Require("ClickHouse Basic Authentication"); + case AuthenticationType::JWT: + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud"); + case AuthenticationType::KERBEROS: throw Authentication::Require(auth_data.getKerberosRealm()); diff --git a/src/Access/AuthenticationData.cpp b/src/Access/AuthenticationData.cpp index 70355fadfbd..e9bc111e18a 100644 --- a/src/Access/AuthenticationData.cpp +++ b/src/Access/AuthenticationData.cpp @@ -135,6 +135,7 @@ void AuthenticationData::setPassword(const String & password_) case AuthenticationType::BCRYPT_PASSWORD: case AuthenticationType::NO_PASSWORD: case AuthenticationType::LDAP: + case AuthenticationType::JWT: case AuthenticationType::KERBEROS: case AuthenticationType::SSL_CERTIFICATE: case AuthenticationType::SSH_KEY: @@ -251,6 +252,7 @@ void AuthenticationData::setPasswordHashBinary(const Digest & hash) case AuthenticationType::NO_PASSWORD: case AuthenticationType::LDAP: + case AuthenticationType::JWT: case AuthenticationType::KERBEROS: case AuthenticationType::SSL_CERTIFICATE: case AuthenticationType::SSH_KEY: @@ -322,6 +324,10 @@ std::shared_ptr AuthenticationData::toAST() const node->children.push_back(std::make_shared(getLDAPServerName())); break; } + case AuthenticationType::JWT: + { + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud"); + } case AuthenticationType::KERBEROS: { const auto & realm = getKerberosRealm(); diff --git a/src/Access/Common/AuthenticationType.cpp b/src/Access/Common/AuthenticationType.cpp index 2cc126ad9b7..427765b8a79 100644 --- a/src/Access/Common/AuthenticationType.cpp +++ b/src/Access/Common/AuthenticationType.cpp @@ -72,6 +72,11 @@ const AuthenticationTypeInfo & AuthenticationTypeInfo::get(AuthenticationType ty static const auto info = make_info(Keyword::HTTP); return info; } + case AuthenticationType::JWT: + { + static const auto info = make_info(Keyword::JWT); + return info; + } case AuthenticationType::MAX: break; } diff --git a/src/Access/Common/AuthenticationType.h b/src/Access/Common/AuthenticationType.h index a68549aff4c..16f4388bbff 100644 --- a/src/Access/Common/AuthenticationType.h +++ b/src/Access/Common/AuthenticationType.h @@ -41,6 +41,9 @@ enum class AuthenticationType : uint8_t /// Authentication through HTTP protocol HTTP, + /// JSON Web Token + JWT, + MAX, }; diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index fe698b32816..ee422f7d8ff 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -194,11 +194,9 @@ DiskAccessStorage::DiskAccessStorage(const String & storage_name_, const String DiskAccessStorage::~DiskAccessStorage() { - stopListsWritingThread(); - try { - writeLists(); + DiskAccessStorage::shutdown(); } catch (...) { @@ -207,6 +205,17 @@ DiskAccessStorage::~DiskAccessStorage() } +void DiskAccessStorage::shutdown() +{ + stopListsWritingThread(); + + { + std::lock_guard lock{mutex}; + writeLists(); + } +} + + String DiskAccessStorage::getStorageParamsJSON() const { std::lock_guard lock{mutex}; diff --git a/src/Access/DiskAccessStorage.h b/src/Access/DiskAccessStorage.h index 5d94008b34f..38172b26970 100644 --- a/src/Access/DiskAccessStorage.h +++ b/src/Access/DiskAccessStorage.h @@ -18,6 +18,8 @@ public: DiskAccessStorage(const String & storage_name_, const String & directory_path_, AccessChangesNotifier & changes_notifier_, bool readonly_, bool allow_backup_); ~DiskAccessStorage() override; + void shutdown() override; + const char * getStorageType() const override { return STORAGE_TYPE; } String getStorageParamsJSON() const override; diff --git a/src/Access/IAccessStorage.h b/src/Access/IAccessStorage.h index 4f980bf9212..e88b1601f32 100644 --- a/src/Access/IAccessStorage.h +++ b/src/Access/IAccessStorage.h @@ -44,6 +44,11 @@ public: explicit IAccessStorage(const String & storage_name_) : storage_name(storage_name_) {} virtual ~IAccessStorage() = default; + /// If the AccessStorage has to do some complicated work when destroying - do it in advance. + /// For example, if the AccessStorage contains any threads for background work - ask them to complete and wait for completion. + /// By default, does nothing. + virtual void shutdown() {} + /// Returns the name of this storage. const String & getStorageName() const { return storage_name; } virtual const char * getStorageType() const = 0; diff --git a/src/Access/MultipleAccessStorage.cpp b/src/Access/MultipleAccessStorage.cpp index a8b508202b5..fda6601e4c6 100644 --- a/src/Access/MultipleAccessStorage.cpp +++ b/src/Access/MultipleAccessStorage.cpp @@ -34,11 +34,23 @@ MultipleAccessStorage::MultipleAccessStorage(const String & storage_name_) MultipleAccessStorage::~MultipleAccessStorage() { - /// It's better to remove the storages in the reverse order because they could depend on each other somehow. + try + { + MultipleAccessStorage::shutdown(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + +void MultipleAccessStorage::shutdown() +{ + /// It's better to shutdown the storages in the reverse order because they could depend on each other somehow. const auto storages = getStoragesPtr(); for (const auto & storage : *storages | boost::adaptors::reversed) { - removeStorage(storage); + storage->shutdown(); } } @@ -72,6 +84,16 @@ void MultipleAccessStorage::removeStorage(const StoragePtr & storage_to_remove) ids_cache.clear(); } +void MultipleAccessStorage::removeAllStorages() +{ + /// It's better to remove the storages in the reverse order because they could depend on each other somehow. + const auto storages = getStoragesPtr(); + for (const auto & storage : *storages | boost::adaptors::reversed) + { + removeStorage(storage); + } +} + std::vector MultipleAccessStorage::getStorages() { return *getStoragesPtr(); diff --git a/src/Access/MultipleAccessStorage.h b/src/Access/MultipleAccessStorage.h index 005e6e2b9cd..e1543c59b67 100644 --- a/src/Access/MultipleAccessStorage.h +++ b/src/Access/MultipleAccessStorage.h @@ -21,6 +21,8 @@ public: explicit MultipleAccessStorage(const String & storage_name_ = STORAGE_TYPE); ~MultipleAccessStorage() override; + void shutdown() override; + const char * getStorageType() const override { return STORAGE_TYPE; } bool isReadOnly() const override; bool isReadOnly(const UUID & id) const override; @@ -32,6 +34,7 @@ public: void setStorages(const std::vector & storages); void addStorage(const StoragePtr & new_storage); void removeStorage(const StoragePtr & storage_to_remove); + void removeAllStorages(); std::vector getStorages(); std::vector getStorages() const; std::shared_ptr> getStoragesPtr(); diff --git a/src/Access/ReplicatedAccessStorage.cpp b/src/Access/ReplicatedAccessStorage.cpp index cd9a86a1bd2..ed114327041 100644 --- a/src/Access/ReplicatedAccessStorage.cpp +++ b/src/Access/ReplicatedAccessStorage.cpp @@ -66,6 +66,18 @@ ReplicatedAccessStorage::ReplicatedAccessStorage( } ReplicatedAccessStorage::~ReplicatedAccessStorage() +{ + try + { + ReplicatedAccessStorage::shutdown(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + +void ReplicatedAccessStorage::shutdown() { stopWatchingThread(); } diff --git a/src/Access/ReplicatedAccessStorage.h b/src/Access/ReplicatedAccessStorage.h index cddb20860f7..f8518226997 100644 --- a/src/Access/ReplicatedAccessStorage.h +++ b/src/Access/ReplicatedAccessStorage.h @@ -23,6 +23,8 @@ public: ReplicatedAccessStorage(const String & storage_name, const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper, AccessChangesNotifier & changes_notifier_, bool allow_backup); ~ReplicatedAccessStorage() override; + void shutdown() override; + const char * getStorageType() const override { return STORAGE_TYPE; } void startPeriodicReloading() override { startWatchingThread(); } diff --git a/src/Access/User.cpp b/src/Access/User.cpp index 6a296706baf..c02c598ee40 100644 --- a/src/Access/User.cpp +++ b/src/Access/User.cpp @@ -33,6 +33,8 @@ void User::setName(const String & name_) throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_); if (name_.starts_with(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_); + if (name_.starts_with(EncodedUserInfo::JWT_AUTHENTICAION_MARKER)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_); name = name_; } diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 1f9a977bab6..5e36fe1ad84 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -880,8 +880,7 @@ void UsersConfigAccessStorage::load( Settings::checkNoSettingNamesAtTopLevel(*new_config, users_config_path); parseFromConfig(*new_config); access_control.getChangesNotifier().sendNotifications(); - }, - /* already_loaded = */ false); + }); } void UsersConfigAccessStorage::startPeriodicReloading() diff --git a/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp b/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp new file mode 100644 index 00000000000..1c059dc52aa --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp @@ -0,0 +1,283 @@ +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + + +namespace DB +{ +struct Settings; + +namespace ErrorCodes +{ + extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +struct GroupConcatDataBase +{ + UInt64 data_size = 0; + UInt64 allocated_size = 0; + char * data = nullptr; + + void checkAndUpdateSize(UInt64 add, Arena * arena) + { + if (data_size + add >= allocated_size) + { + auto old_size = allocated_size; + allocated_size = std::max(2 * allocated_size, data_size + add); + data = arena->realloc(data, old_size, allocated_size); + } + } + + void insertChar(const char * str, UInt64 str_size, Arena * arena) + { + checkAndUpdateSize(str_size, arena); + memcpy(data + data_size, str, str_size); + data_size += str_size; + } + + void insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena) + { + WriteBufferFromOwnString buff; + serialization->serializeText(*column, row_num, buff, FormatSettings{}); + auto string = buff.stringView(); + insertChar(string.data(), string.size(), arena); + } + +}; + +template +struct GroupConcatData; + +template<> +struct GroupConcatData final : public GroupConcatDataBase +{ +}; + +template<> +struct GroupConcatData final : public GroupConcatDataBase +{ + using Offset = UInt64; + using Allocator = MixedAlignedArenaAllocator; + using Offsets = PODArray; + + /// offset[i * 2] - beginning of the i-th row, offset[i * 2 + 1] - end of the i-th row + Offsets offsets; + UInt64 num_rows = 0; + + UInt64 getSize(size_t i) const { return offsets[i * 2 + 1] - offsets[i * 2]; } + + UInt64 getString(size_t i) const { return offsets[i * 2]; } + + void insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena) + { + WriteBufferFromOwnString buff; + serialization->serializeText(*column, row_num, buff, {}); + auto string = buff.stringView(); + + checkAndUpdateSize(string.size(), arena); + memcpy(data + data_size, string.data(), string.size()); + offsets.push_back(data_size, arena); + data_size += string.size(); + offsets.push_back(data_size, arena); + num_rows++; + } +}; + +template +class GroupConcatImpl final + : public IAggregateFunctionDataHelper, GroupConcatImpl> +{ + static constexpr auto name = "groupConcat"; + + SerializationPtr serialization; + UInt64 limit; + const String delimiter; + +public: + GroupConcatImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 limit_, const String & delimiter_) + : IAggregateFunctionDataHelper, GroupConcatImpl>( + {data_type_}, parameters_, std::make_shared()) + , serialization(this->argument_types[0]->getDefaultSerialization()) + , limit(limit_) + , delimiter(delimiter_) + { + } + + String getName() const override { return name; } + + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override + { + auto & cur_data = this->data(place); + + if constexpr (has_limit) + if (cur_data.num_rows >= limit) + return; + + if (cur_data.data_size != 0) + cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena); + + cur_data.insert(columns[0], serialization, row_num, arena); + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override + { + auto & cur_data = this->data(place); + auto & rhs_data = this->data(rhs); + + if (rhs_data.data_size == 0) + return; + + if constexpr (has_limit) + { + UInt64 new_elems_count = std::min(rhs_data.num_rows, limit - cur_data.num_rows); + for (UInt64 i = 0; i < new_elems_count; ++i) + { + if (cur_data.data_size != 0) + cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena); + + cur_data.offsets.push_back(cur_data.data_size, arena); + cur_data.insertChar(rhs_data.data + rhs_data.getString(i), rhs_data.getSize(i), arena); + cur_data.num_rows++; + cur_data.offsets.push_back(cur_data.data_size, arena); + } + } + else + { + if (cur_data.data_size != 0) + cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena); + + cur_data.insertChar(rhs_data.data, rhs_data.data_size, arena); + } + } + + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override + { + auto & cur_data = this->data(place); + + writeVarUInt(cur_data.data_size, buf); + + buf.write(cur_data.data, cur_data.data_size); + + if constexpr (has_limit) + { + writeVarUInt(cur_data.num_rows, buf); + for (const auto & offset : cur_data.offsets) + writeVarUInt(offset, buf); + } + } + + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override + { + auto & cur_data = this->data(place); + + UInt64 temp_size = 0; + readVarUInt(temp_size, buf); + + cur_data.checkAndUpdateSize(temp_size, arena); + + buf.readStrict(cur_data.data + cur_data.data_size, temp_size); + cur_data.data_size = temp_size; + + if constexpr (has_limit) + { + readVarUInt(cur_data.num_rows, buf); + cur_data.offsets.resize_exact(cur_data.num_rows * 2, arena); + for (auto & offset : cur_data.offsets) + readVarUInt(offset, buf); + } + } + + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override + { + auto & cur_data = this->data(place); + + if (cur_data.data_size == 0) + { + to.insertDefault(); + return; + } + + auto & column_string = assert_cast(to); + column_string.insertData(cur_data.data, cur_data.data_size); + } + + bool allocatesMemoryInArena() const override { return true; } +}; + +AggregateFunctionPtr createAggregateFunctionGroupConcat( + const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) +{ + assertUnary(name, argument_types); + + bool has_limit = false; + UInt64 limit = 0; + String delimiter; + + if (parameters.size() > 2) + throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, + "Incorrect number of parameters for aggregate function {}, should be 0, 1 or 2, got: {}", name, parameters.size()); + + if (!parameters.empty()) + { + auto type = parameters[0].getType(); + if (type != Field::Types::String) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First parameter for aggregate function {} should be string", name); + + delimiter = parameters[0].get(); + } + if (parameters.size() == 2) + { + auto type = parameters[1].getType(); + + if (type != Field::Types::Int64 && type != Field::Types::UInt64) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number", name); + + if ((type == Field::Types::Int64 && parameters[1].get() <= 0) || + (type == Field::Types::UInt64 && parameters[1].get() == 0)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number, got: {}", name, parameters[1].get()); + + has_limit = true; + limit = parameters[1].get(); + } + + if (has_limit) + return std::make_shared>(argument_types[0], parameters, limit, delimiter); + else + return std::make_shared>(argument_types[0], parameters, limit, delimiter); +} + +} + +void registerAggregateFunctionGroupConcat(AggregateFunctionFactory & factory) +{ + AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true }; + + factory.registerFunction("groupConcat", { createAggregateFunctionGroupConcat, properties }); + factory.registerAlias("group_concat", "groupConcat", AggregateFunctionFactory::CaseInsensitive); +} + +} diff --git a/src/AggregateFunctions/AggregateFunctionMaxIntersections.cpp b/src/AggregateFunctions/AggregateFunctionMaxIntersections.cpp index 05ed85a9004..6c26065a918 100644 --- a/src/AggregateFunctions/AggregateFunctionMaxIntersections.cpp +++ b/src/AggregateFunctions/AggregateFunctionMaxIntersections.cpp @@ -91,7 +91,8 @@ public: return std::make_shared>(); } - bool allocatesMemoryInArena() const override { return false; } + /// MaxIntersectionsData::Allocator uses the arena + bool allocatesMemoryInArena() const override { return true; } void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp index 58e657d3723..4ac25e14ee6 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -19,6 +19,7 @@ void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factor void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &); void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &); void registerAggregateFunctionGroupArrayIntersect(AggregateFunctionFactory &); +void registerAggregateFunctionGroupConcat(AggregateFunctionFactory &); void registerAggregateFunctionsQuantile(AggregateFunctionFactory &); void registerAggregateFunctionsQuantileDeterministic(AggregateFunctionFactory &); void registerAggregateFunctionsQuantileExact(AggregateFunctionFactory &); @@ -120,6 +121,7 @@ void registerAggregateFunctions() registerAggregateFunctionGroupUniqArray(factory); registerAggregateFunctionGroupArrayInsertAt(factory); registerAggregateFunctionGroupArrayIntersect(factory); + registerAggregateFunctionGroupConcat(factory); registerAggregateFunctionsQuantile(factory); registerAggregateFunctionsQuantileDeterministic(factory); registerAggregateFunctionsQuantileExact(factory); diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp index 8a6276008d8..91186db0e0c 100644 --- a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp +++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp @@ -43,50 +43,56 @@ public: bool replaced_argument = false; auto replaced_uniq_function_arguments_nodes = function_node->getArguments().getNodes(); - for (auto & uniq_function_argument_node : replaced_uniq_function_arguments_nodes) + /// Replace injective function with its single argument + auto remove_injective_function = [&replaced_argument](QueryTreeNodePtr & arg) -> bool { - auto * uniq_function_argument_node_typed = uniq_function_argument_node->as(); - if (!uniq_function_argument_node_typed || !uniq_function_argument_node_typed->isOrdinaryFunction()) - continue; - - auto & uniq_function_argument_node_argument_nodes = uniq_function_argument_node_typed->getArguments().getNodes(); + auto * arg_typed = arg->as(); + if (!arg_typed || !arg_typed->isOrdinaryFunction()) + return false; /// Do not apply optimization if injective function contains multiple arguments - if (uniq_function_argument_node_argument_nodes.size() != 1) - continue; + auto & arg_arguments_nodes = arg_typed->getArguments().getNodes(); + if (arg_arguments_nodes.size() != 1) + return false; - const auto & uniq_function_argument_node_function = uniq_function_argument_node_typed->getFunction(); - if (!uniq_function_argument_node_function->isInjective({})) - continue; + const auto & arg_function = arg_typed->getFunction(); + if (!arg_function->isInjective({})) + return false; - /// Replace injective function with its single argument - uniq_function_argument_node = uniq_function_argument_node_argument_nodes[0]; - replaced_argument = true; + arg = arg_arguments_nodes[0]; + return replaced_argument = true; + }; + + for (auto & uniq_function_argument_node : replaced_uniq_function_arguments_nodes) + { + while (remove_injective_function(uniq_function_argument_node)) + ; } if (!replaced_argument) return; - DataTypes argument_types; - argument_types.reserve(replaced_uniq_function_arguments_nodes.size()); + DataTypes replaced_argument_types; + replaced_argument_types.reserve(replaced_uniq_function_arguments_nodes.size()); for (const auto & function_node_argument : replaced_uniq_function_arguments_nodes) - argument_types.emplace_back(function_node_argument->getResultType()); + replaced_argument_types.emplace_back(function_node_argument->getResultType()); + auto current_aggregate_function = function_node->getAggregateFunction(); AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get( + auto replaced_aggregate_function = AggregateFunctionFactory::instance().get( function_node->getFunctionName(), NullsAction::EMPTY, - argument_types, - function_node->getAggregateFunction()->getParameters(), + replaced_argument_types, + current_aggregate_function->getParameters(), properties); /// uniqCombined returns nullable with nullable arguments so the result type might change which breaks the pass - if (!aggregate_function->getResultType()->equals(*function_node->getAggregateFunction()->getResultType())) + if (!replaced_aggregate_function->getResultType()->equals(*current_aggregate_function->getResultType())) return; - function_node->getArguments().getNodes() = replaced_uniq_function_arguments_nodes; - function_node->resolveAsAggregateFunction(std::move(aggregate_function)); + function_node->getArguments().getNodes() = std::move(replaced_uniq_function_arguments_nodes); + function_node->resolveAsAggregateFunction(std::move(replaced_aggregate_function)); } }; diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 8860050c5b9..576c4943ccb 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -1,3 +1,5 @@ +#include + #include #include #include @@ -3495,7 +3497,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi * * 4. If node has alias, update its value in scope alias map. Deregister alias from expression_aliases_in_resolve_process. */ -ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias) +ProjectionNames QueryAnalyzer::resolveExpressionNode( + QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias) { checkStackSize(); @@ -4505,7 +4508,36 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node, table_name = table_identifier[1]; } - auto parametrized_view_storage = scope_context->getQueryContext()->buildParametrizedViewStorage(function_ast, database_name, table_name); + /// Collect parametrized view arguments + NameToNameMap view_params; + for (const auto & argument : table_function_node_typed.getArguments()) + { + if (auto * arg_func = argument->as()) + { + if (arg_func->getFunctionName() != "equals") + continue; + + auto nodes = arg_func->getArguments().getNodes(); + if (nodes.size() != 2) + continue; + + if (auto * identifier_node = nodes[0]->as()) + { + resolveExpressionNode(nodes[1], scope, /* allow_lambda_expression */false, /* allow_table_function */false); + if (auto * constant = nodes[1]->as()) + { + view_params[identifier_node->getIdentifier().getFullName()] = convertFieldToString(constant->getValue()); + } + } + } + } + + auto context = scope_context->getQueryContext(); + auto parametrized_view_storage = context->buildParametrizedViewStorage( + database_name, + table_name, + view_params); + if (parametrized_view_storage) { auto fake_table_node = std::make_shared(parametrized_view_storage, scope_context); diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index 8f32c918c61..3f972c36e47 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -24,8 +24,6 @@ #include #include -#include - namespace ProfileEvents { @@ -93,6 +91,7 @@ BackupImpl::BackupImpl( const std::optional & base_backup_info_, std::shared_ptr reader_, const ContextPtr & context_, + bool is_internal_backup_, bool use_same_s3_credentials_for_base_backup_) : backup_info(backup_info_) , backup_name_for_logging(backup_info.toStringForLogging()) @@ -101,7 +100,7 @@ BackupImpl::BackupImpl( , open_mode(OpenMode::READ) , reader(std::move(reader_)) , context(context_) - , is_internal_backup(false) + , is_internal_backup(is_internal_backup_) , version(INITIAL_BACKUP_VERSION) , base_backup_info(base_backup_info_) , use_same_s3_credentials_for_base_backup(use_same_s3_credentials_for_base_backup_) @@ -256,6 +255,7 @@ std::shared_ptr BackupImpl::getBaseBackupUnlocked() const params.backup_info = *base_backup_info; params.open_mode = OpenMode::READ; params.context = context; + params.is_internal_backup = is_internal_backup; /// use_same_s3_credentials_for_base_backup should be inherited for base backups params.use_same_s3_credentials_for_base_backup = use_same_s3_credentials_for_base_backup; diff --git a/src/Backups/BackupImpl.h b/src/Backups/BackupImpl.h index 6fed5fe758b..2b27e2ab090 100644 --- a/src/Backups/BackupImpl.h +++ b/src/Backups/BackupImpl.h @@ -40,6 +40,7 @@ public: const std::optional & base_backup_info_, std::shared_ptr reader_, const ContextPtr & context_, + bool is_internal_backup_, bool use_same_s3_credentials_for_base_backup_); BackupImpl( diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index 81e3c104da1..03d156d1009 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -153,6 +153,7 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) params.base_backup_info, reader, params.context, + params.is_internal_backup, /* use_same_s3_credentials_for_base_backup*/ false); } else diff --git a/src/Backups/registerBackupEngineS3.cpp b/src/Backups/registerBackupEngineS3.cpp index c34dbe273f5..59ed9506af0 100644 --- a/src/Backups/registerBackupEngineS3.cpp +++ b/src/Backups/registerBackupEngineS3.cpp @@ -119,6 +119,7 @@ void registerBackupEngineS3(BackupFactory & factory) params.base_backup_info, reader, params.context, + params.is_internal_backup, params.use_same_s3_credentials_for_base_backup); } else diff --git a/src/Backups/registerBackupEnginesFileAndDisk.cpp b/src/Backups/registerBackupEnginesFileAndDisk.cpp index c633ebb6a5a..35263d39cba 100644 --- a/src/Backups/registerBackupEnginesFileAndDisk.cpp +++ b/src/Backups/registerBackupEnginesFileAndDisk.cpp @@ -177,6 +177,7 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory) params.base_backup_info, reader, params.context, + params.is_internal_backup, params.use_same_s3_credentials_for_base_backup); } else diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 15a4836ef7a..c97837b685d 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -109,6 +109,7 @@ namespace ErrorCodes extern const int USER_SESSION_LIMIT_EXCEEDED; extern const int NOT_IMPLEMENTED; extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; + extern const int USER_EXPIRED; } } @@ -2270,7 +2271,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) catch (...) { // Surprisingly, this is a client error. A server error would - // have been reported without throwing (see onReceiveSeverException()). + // have been reported without throwing (see onReceiveExceptionFromServer()). client_exception = std::make_unique(getCurrentExceptionMessageAndPattern(print_stack_trace), getCurrentExceptionCode()); have_error = true; } @@ -2643,6 +2644,9 @@ void ClientBase::runInteractive() } catch (const Exception & e) { + if (e.code() == ErrorCodes::USER_EXPIRED) + break; + /// We don't need to handle the test hints in the interactive mode. std::cerr << "Exception on client:" << std::endl << getExceptionMessage(e, print_stack_trace, true) << std::endl << std::endl; client_exception.reset(e.clone()); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 220fcddc038..5202b57040f 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -129,6 +129,7 @@ protected: const std::vector & hosts_and_ports_arguments) = 0; virtual void processConfig() = 0; + /// Returns true if query processing was successful. bool processQueryText(const String & text); virtual void readArguments( diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 19cd8cc4ee5..799c7511982 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -37,6 +38,7 @@ #include #include +#include #include "config.h" #if USE_SSL @@ -68,12 +70,23 @@ namespace ErrorCodes extern const int EMPTY_DATA_PASSED; } -Connection::~Connection() = default; +Connection::~Connection() +{ + try{ + if (connected) + Connection::disconnect(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} Connection::Connection(const String & host_, UInt16 port_, const String & default_database_, const String & user_, const String & password_, [[maybe_unused]] const SSHKey & ssh_private_key_, + const String & jwt_, const String & quota_key_, const String & cluster_, const String & cluster_secret_, @@ -86,6 +99,7 @@ Connection::Connection(const String & host_, UInt16 port_, , ssh_private_key(ssh_private_key_) #endif , quota_key(quota_key_) + , jwt(jwt_) , cluster(cluster_) , cluster_secret(cluster_secret_) , client_name(client_name_) @@ -257,13 +271,31 @@ void Connection::connect(const ConnectionTimeouts & timeouts) void Connection::disconnect() { - maybe_compressed_out = nullptr; in = nullptr; last_input_packet_type.reset(); std::exception_ptr finalize_exception; + + try + { + // finalize() can write and throw an exception. + if (maybe_compressed_out) + maybe_compressed_out->finalize(); + } + catch (...) + { + /// Don't throw an exception here, it will leave Connection in invalid state. + finalize_exception = std::current_exception(); + + if (out) + { + out->cancel(); + out = nullptr; + } + } + maybe_compressed_out = nullptr; + try { - // finalize() can write to socket and throw an exception. if (out) out->finalize(); } @@ -276,6 +308,7 @@ void Connection::disconnect() if (socket) socket->close(); + socket = nullptr; connected = false; nonce.reset(); @@ -341,6 +374,11 @@ void Connection::sendHello() performHandshakeForSSHAuth(); } #endif + else if (!jwt.empty()) + { + writeStringBinary(EncodedUserInfo::JWT_AUTHENTICAION_MARKER, *out); + writeStringBinary(jwt, *out); + } else { writeStringBinary(user, *out); @@ -767,6 +805,8 @@ void Connection::sendQuery( } maybe_compressed_in.reset(); + if (maybe_compressed_out && maybe_compressed_out != out) + maybe_compressed_out->cancel(); maybe_compressed_out.reset(); block_in.reset(); block_logs_in.reset(); @@ -1310,6 +1350,7 @@ ServerConnectionPtr Connection::createConnection(const ConnectionParameters & pa parameters.user, parameters.password, parameters.ssh_private_key, + parameters.jwt, parameters.quota_key, "", /* cluster */ "", /* cluster_secret */ diff --git a/src/Client/Connection.h b/src/Client/Connection.h index 9632eb9d948..0f4b3e436df 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -53,6 +53,7 @@ public: const String & default_database_, const String & user_, const String & password_, const SSHKey & ssh_private_key_, + const String & jwt_, const String & quota_key_, const String & cluster_, const String & cluster_secret_, @@ -173,6 +174,7 @@ private: SSHKey ssh_private_key; #endif String quota_key; + String jwt; /// For inter-server authorization String cluster; diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index 774f3375f63..303bebc30d2 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -52,31 +52,11 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati /// changed the default value to "default" to fix the issue when the user in the prompt is blank user = config.getString("user", "default"); - if (!config.has("ssh-key-file")) + if (config.has("jwt")) { - bool password_prompt = false; - if (config.getBool("ask-password", false)) - { - if (config.has("password")) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specified both --password and --ask-password. Remove one of them"); - password_prompt = true; - } - else - { - password = config.getString("password", ""); - /// if the value of --password is omitted, the password will be set implicitly to "\n" - if (password == ASK_PASSWORD) - password_prompt = true; - } - if (password_prompt) - { - std::string prompt{"Password for user (" + user + "): "}; - char buf[1000] = {}; - if (auto * result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0)) - password = result; - } + jwt = config.getString("jwt"); } - else + else if (config.has("ssh-key-file")) { #if USE_SSH std::string filename = config.getString("ssh-key-file"); @@ -102,6 +82,30 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); #endif } + else + { + bool password_prompt = false; + if (config.getBool("ask-password", false)) + { + if (config.has("password")) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specified both --password and --ask-password. Remove one of them"); + password_prompt = true; + } + else + { + password = config.getString("password", ""); + /// if the value of --password is omitted, the password will be set implicitly to "\n" + if (password == ASK_PASSWORD) + password_prompt = true; + } + if (password_prompt) + { + std::string prompt{"Password for user (" + user + "): "}; + char buf[1000] = {}; + if (auto * result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0)) + password = result; + } + } quota_key = config.getString("quota_key", ""); @@ -139,7 +143,7 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati } UInt16 ConnectionParameters::getPortFromConfig(const Poco::Util::AbstractConfiguration & config, - std::string connection_host) + const std::string & connection_host) { bool is_secure = enableSecureConnection(config, connection_host); return config.getInt("port", diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h index f23522d48b3..c305c7813f2 100644 --- a/src/Client/ConnectionParameters.h +++ b/src/Client/ConnectionParameters.h @@ -22,6 +22,7 @@ struct ConnectionParameters std::string password; std::string quota_key; SSHKey ssh_private_key; + std::string jwt; Protocol::Secure security = Protocol::Secure::Disable; Protocol::Compression compression = Protocol::Compression::Enable; ConnectionTimeouts timeouts; @@ -30,7 +31,7 @@ struct ConnectionParameters ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host); ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host, std::optional port); - static UInt16 getPortFromConfig(const Poco::Util::AbstractConfiguration & config, std::string connection_host); + static UInt16 getPortFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & connection_host); /// Ask to enter the user's password if password option contains this value. /// "\n" is used because there is hardly a chance that a user would use '\n' as password. diff --git a/src/Client/ConnectionPool.h b/src/Client/ConnectionPool.h index d35c2552461..725a5e91ac0 100644 --- a/src/Client/ConnectionPool.h +++ b/src/Client/ConnectionPool.h @@ -123,7 +123,7 @@ protected: { return std::make_shared( host, port, - default_database, user, password, SSHKey(), quota_key, + default_database, user, password, SSHKey(), /*jwt*/ "", quota_key, cluster, cluster_secret, client_name, compression, secure); } diff --git a/src/Common/Config/ConfigReloader.cpp b/src/Common/Config/ConfigReloader.cpp index b2c07dacf07..769a63c036b 100644 --- a/src/Common/Config/ConfigReloader.cpp +++ b/src/Common/Config/ConfigReloader.cpp @@ -19,8 +19,7 @@ ConfigReloader::ConfigReloader( const std::string & preprocessed_dir_, zkutil::ZooKeeperNodeCache && zk_node_cache_, const zkutil::EventPtr & zk_changed_event_, - Updater && updater_, - bool already_loaded) + Updater && updater_) : config_path(config_path_) , extra_paths(extra_paths_) , preprocessed_dir(preprocessed_dir_) @@ -28,10 +27,15 @@ ConfigReloader::ConfigReloader( , zk_changed_event(zk_changed_event_) , updater(std::move(updater_)) { - if (!already_loaded) - reloadIfNewer(/* force = */ true, /* throw_on_error = */ true, /* fallback_to_preprocessed = */ true, /* initial_loading = */ true); -} + auto config = reloadIfNewer(/* force = */ true, /* throw_on_error = */ true, /* fallback_to_preprocessed = */ true, /* initial_loading = */ true); + if (config.has_value()) + reload_interval = std::chrono::milliseconds(config->configuration->getInt64("config_reload_interval_ms", DEFAULT_RELOAD_INTERVAL.count())); + else + reload_interval = DEFAULT_RELOAD_INTERVAL; + + LOG_TRACE(log, "Config reload interval set to {}ms", reload_interval.count()); +} void ConfigReloader::start() { @@ -82,7 +86,17 @@ void ConfigReloader::run() if (quit) return; - reloadIfNewer(zk_changed, /* throw_on_error = */ false, /* fallback_to_preprocessed = */ false, /* initial_loading = */ false); + auto config = reloadIfNewer(zk_changed, /* throw_on_error = */ false, /* fallback_to_preprocessed = */ false, /* initial_loading = */ false); + if (config.has_value()) + { + auto new_reload_interval = std::chrono::milliseconds(config->configuration->getInt64("config_reload_interval_ms", DEFAULT_RELOAD_INTERVAL.count())); + if (new_reload_interval != reload_interval) + { + reload_interval = new_reload_interval; + LOG_TRACE(log, "Config reload interval changed to {}ms", reload_interval.count()); + } + } + } catch (...) { @@ -92,7 +106,7 @@ void ConfigReloader::run() } } -void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallback_to_preprocessed, bool initial_loading) +std::optional ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallback_to_preprocessed, bool initial_loading) { std::lock_guard lock(reload_mutex); @@ -120,7 +134,7 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac throw; tryLogCurrentException(log, "ZooKeeper error when loading config from '" + config_path + "'"); - return; + return std::nullopt; } catch (...) { @@ -128,7 +142,7 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac throw; tryLogCurrentException(log, "Error loading config from '" + config_path + "'"); - return; + return std::nullopt; } config_processor.savePreprocessedConfig(loaded_config, preprocessed_dir); @@ -154,11 +168,13 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac if (throw_on_error) throw; tryLogCurrentException(log, "Error updating configuration from '" + config_path + "' config."); - return; + return std::nullopt; } LOG_DEBUG(log, "Loaded config '{}', performed update on configuration", config_path); + return loaded_config; } + return std::nullopt; } struct ConfigReloader::FileWithTimestamp diff --git a/src/Common/Config/ConfigReloader.h b/src/Common/Config/ConfigReloader.h index 13a797bad08..89ef0fd8a0b 100644 --- a/src/Common/Config/ConfigReloader.h +++ b/src/Common/Config/ConfigReloader.h @@ -17,8 +17,6 @@ namespace Poco { class Logger; } namespace DB { -class Context; - /** Every two seconds checks configuration files for update. * If configuration is changed, then config will be reloaded by ConfigProcessor * and the reloaded config will be applied via Updater functor. @@ -27,6 +25,8 @@ class Context; class ConfigReloader { public: + static constexpr auto DEFAULT_RELOAD_INTERVAL = std::chrono::milliseconds(2000); + using Updater = std::function; ConfigReloader( @@ -35,8 +35,7 @@ public: const std::string & preprocessed_dir, zkutil::ZooKeeperNodeCache && zk_node_cache, const zkutil::EventPtr & zk_changed_event, - Updater && updater, - bool already_loaded); + Updater && updater); ~ConfigReloader(); @@ -53,7 +52,7 @@ public: private: void run(); - void reloadIfNewer(bool force, bool throw_on_error, bool fallback_to_preprocessed, bool initial_loading); + std::optional reloadIfNewer(bool force, bool throw_on_error, bool fallback_to_preprocessed, bool initial_loading); struct FileWithTimestamp; @@ -67,8 +66,6 @@ private: FilesChangesTracker getNewFileList() const; - static constexpr auto reload_interval = std::chrono::seconds(2); - LoggerPtr log = getLogger("ConfigReloader"); std::string config_path; @@ -85,6 +82,8 @@ private: std::atomic quit{false}; ThreadFromGlobalPool thread; + std::chrono::milliseconds reload_interval = DEFAULT_RELOAD_INTERVAL; + /// Locked inside reloadIfNewer. std::mutex reload_mutex; }; diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 731c72d65f2..8516a88c7af 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -1,6 +1,7 @@ #include +// clang-format off /// Available metrics. Add something here as you wish. /// If the metric is generic (i.e. not server specific) /// it should be also added to src/Coordination/KeeperConstant.cpp diff --git a/src/Common/Dwarf.cpp b/src/Common/Dwarf.cpp index 99da3b75429..8439c01b22c 100644 --- a/src/Common/Dwarf.cpp +++ b/src/Common/Dwarf.cpp @@ -202,7 +202,10 @@ uint64_t readU64(std::string_view & sp) { SAFE_CHECK(sp.size() >= N, "underflow"); uint64_t x = 0; - memcpy(&x, sp.data(), N); + if constexpr (std::endian::native == std::endian::little) + memcpy(&x, sp.data(), N); + else + memcpy(reinterpret_cast(&x) + sizeof(uint64_t) - N, sp.data(), N); sp.remove_prefix(N); return x; } diff --git a/src/Common/ErrorCodes.h b/src/Common/ErrorCodes.h index 8879779a5e2..11a163becbe 100644 --- a/src/Common/ErrorCodes.h +++ b/src/Common/ErrorCodes.h @@ -1,8 +1,6 @@ #pragma once #include -#include -#include #include #include #include @@ -35,7 +33,7 @@ namespace ErrorCodes struct Error { - /// Number of times Exception with this ErrorCode had been throw. + /// Number of times Exception with this ErrorCode has been thrown. Value count = 0; /// Time of the last error. UInt64 error_time_ms = 0; diff --git a/src/Common/ICachePolicy.h b/src/Common/ICachePolicy.h index 8aa75d1d81f..301a5c6cbbd 100644 --- a/src/Common/ICachePolicy.h +++ b/src/Common/ICachePolicy.h @@ -48,7 +48,7 @@ public: /// HashFunction usually hashes the entire key and the found key will be equal the provided key. In such cases, use get(). It is also /// possible to store other, non-hashed data in the key. In that case, the found key is potentially different from the provided key. - /// Then use getWithKey() to also return the found key including it's non-hashed data. + /// Then use getWithKey() to also return the found key including its non-hashed data. virtual MappedPtr get(const Key & key) = 0; virtual std::optional getWithKey(const Key &) = 0; diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index fef1c4a2b75..8f181d55042 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -3,6 +3,7 @@ #include +// clang-format off /// Available events. Add something here as you wish. /// If the event is generic (i.e. not server specific) /// it should be also added to src/Coordination/KeeperConstant.cpp @@ -14,6 +15,7 @@ M(QueriesWithSubqueries, "Count queries with all subqueries") \ M(SelectQueriesWithSubqueries, "Count SELECT queries with all subqueries") \ M(InsertQueriesWithSubqueries, "Count INSERT queries with all subqueries") \ + M(SelectQueriesWithPrimaryKeyUsage, "Count SELECT queries which use the primary key to evaluate the WHERE condition") \ M(AsyncInsertQuery, "Same as InsertQuery, but only for asynchronous INSERT queries.") \ M(AsyncInsertBytes, "Data size in bytes of asynchronous INSERT queries.") \ M(AsyncInsertRows, "Number of rows inserted by asynchronous INSERT queries.") \ diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index 15803db4929..17fa8c90a95 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index 95906c63349..616f7c42bb9 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -1,9 +1,7 @@ #pragma once -#include #include #include -#include #include #include @@ -32,7 +30,8 @@ M(FilesystemReadPrefetchesLogElement) \ M(AsynchronousInsertLogElement) \ M(BackupLogElement) \ - M(BlobStorageLogElement) + M(BlobStorageLogElement) \ + M(ErrorLogElement) namespace Poco { diff --git a/src/Compression/CompressedWriteBuffer.cpp b/src/Compression/CompressedWriteBuffer.cpp index f16330332ab..83c9fbc9573 100644 --- a/src/Compression/CompressedWriteBuffer.cpp +++ b/src/Compression/CompressedWriteBuffer.cpp @@ -57,14 +57,16 @@ void CompressedWriteBuffer::nextImpl() } } -CompressedWriteBuffer::~CompressedWriteBuffer() -{ - finalize(); -} - CompressedWriteBuffer::CompressedWriteBuffer(WriteBuffer & out_, CompressionCodecPtr codec_, size_t buf_size) : BufferWithOwnMemory(buf_size), out(out_), codec(std::move(codec_)) { } +CompressedWriteBuffer::~CompressedWriteBuffer() +{ + if (!canceled) + finalize(); +} + + } diff --git a/src/Core/Defines.h b/src/Core/Defines.h index b7675b55b87..6df335a9c8f 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -90,13 +90,13 @@ static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_POLICY = "SLRU"; static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE = 0_MiB; static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO = 0.5l; static constexpr auto DEFAULT_MARK_CACHE_POLICY = "SLRU"; -static constexpr auto DEFAULT_MARK_CACHE_MAX_SIZE = 5368_MiB; +static constexpr auto DEFAULT_MARK_CACHE_MAX_SIZE = 5_GiB; static constexpr auto DEFAULT_MARK_CACHE_SIZE_RATIO = 0.5l; static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY = "SLRU"; static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE = 0; static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO = 0.5; static constexpr auto DEFAULT_INDEX_MARK_CACHE_POLICY = "SLRU"; -static constexpr auto DEFAULT_INDEX_MARK_CACHE_MAX_SIZE = 5368_MiB; +static constexpr auto DEFAULT_INDEX_MARK_CACHE_MAX_SIZE = 5_GiB; static constexpr auto DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO = 0.3; static constexpr auto DEFAULT_MMAP_CACHE_MAX_SIZE = 1_KiB; /// chosen by rolling dice static constexpr auto DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE = 128_MiB; diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h index 3fc9e089451..4c0848c0706 100644 --- a/src/Core/Protocol.h +++ b/src/Core/Protocol.h @@ -63,6 +63,9 @@ const char USER_INTERSERVER_MARKER[] = " INTERSERVER SECRET "; /// Marker for SSH-keys-based authentication (passed as the user name) const char SSH_KEY_AUTHENTICAION_MARKER[] = " SSH KEY AUTHENTICATION "; +/// Market for JSON Web Token authentication +const char JWT_AUTHENTICAION_MARKER[] = " JWT AUTHENTICATION "; + }; namespace Protocol diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 1fde8d58c7b..68ac45fa24f 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -14,6 +14,7 @@ class AbstractConfiguration; namespace DB { +// clang-format off #define SERVER_SETTINGS(M, ALIAS) \ M(Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0) \ M(Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0) \ @@ -85,10 +86,12 @@ namespace DB M(Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index mark cache relative to the cache's total size.", 0) \ M(UInt64, page_cache_chunk_size, 2 << 20, "Bytes per chunk in userspace page cache. Rounded up to a multiple of page size (typically 4 KiB) or huge page size (typically 2 MiB, only if page_cache_use_thp is enabled).", 0) \ M(UInt64, page_cache_mmap_size, 1 << 30, "Bytes per memory mapping in userspace page cache. Not important.", 0) \ - M(UInt64, page_cache_size, 10ul << 30, "Amount of virtual memory to map for userspace page cache. If page_cache_use_madv_free is enabled, it's recommended to set this higher than the machine's RAM size. Use 0 to disable userspace page cache.", 0) \ + M(UInt64, page_cache_size, 0, "Amount of virtual memory to map for userspace page cache. If page_cache_use_madv_free is enabled, it's recommended to set this higher than the machine's RAM size. Use 0 to disable userspace page cache.", 0) \ M(Bool, page_cache_use_madv_free, DBMS_DEFAULT_PAGE_CACHE_USE_MADV_FREE, "If true, the userspace page cache will allow the OS to automatically reclaim memory from the cache on memory pressure (using MADV_FREE).", 0) \ M(Bool, page_cache_use_transparent_huge_pages, true, "Userspace will attempt to use transparent huge pages on Linux. This is best-effort.", 0) \ M(UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0) \ + M(UInt64, compiled_expression_cache_size, DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE, "Byte size of compiled expressions cache.", 0) \ + M(UInt64, compiled_expression_cache_elements_size, DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES, "Maximum entries in compiled expressions cache.", 0) \ \ M(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \ M(UInt64, dns_cache_max_entries, 10000, "Internal DNS cache max entries.", 0) \ @@ -151,6 +154,7 @@ namespace DB M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \ M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \ M(Double, gwp_asan_force_sample_probability, 0, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ + M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp diff --git a/src/Core/Settings.h b/src/Core/Settings.h index e3c122467bd..41878142bdc 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -31,6 +31,7 @@ class IColumn; * for tracking settings changes in different versions and for special `compatibility` setting to work correctly. */ +// clang-format off #define COMMON_SETTINGS(M, ALIAS) \ M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ @@ -933,7 +934,7 @@ class IColumn; M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm.", 0) \ M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \ M(Bool, allow_deprecated_error_prone_window_functions, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)", 0) \ - M(Bool, uniform_snowflake_conversion_functions, true, "Enables functions snowflakeIDToDateTime[64] and dateTime[64]ToSnowflakeID while disabling functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake.", 0) \ + M(Bool, allow_deprecated_snowflake_conversion_functions, false, "Enables deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake.", 0) \ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS. @@ -1159,6 +1160,7 @@ class IColumn; M(Bool, output_format_parquet_parallel_encoding, true, "Do Parquet encoding in multiple threads. Requires output_format_parquet_use_custom_encoder.", 0) \ M(UInt64, output_format_parquet_data_page_size, 1024 * 1024, "Target page size in bytes, before compression.", 0) \ M(UInt64, output_format_parquet_batch_size, 1024, "Check page size every this many rows. Consider decreasing if you have columns with average values size above a few KBs.", 0) \ + M(Bool, output_format_parquet_write_page_index, true, "Add a possibility to write page index into parquet files.", 0) \ M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy', 'zstd'.", 0) \ M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \ M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index cdc955b38bc..fba6386b9bd 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -75,6 +75,7 @@ namespace SettingsChangesHistory using SettingsChanges = std::vector; } +// clang-format off /// History of settings changes that controls some backward incompatible changes /// across all ClickHouse versions. It maps ClickHouse version to settings changes that were done /// in this version. This history contains both changes to existing settings and newly added settings. @@ -85,6 +86,8 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static const std::map settings_changes_history = { + {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, + }}, {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, @@ -101,7 +104,7 @@ static const std::maptrySet(db_name_path, getDatabaseName()); + if (error_code == Coordination::Error::ZNONODE) + current_zookeeper->tryCreate(db_name_path, getDatabaseName(), zkutil::CreateMode::Persistent); + is_readonly = false; } catch (...) @@ -1382,6 +1390,13 @@ void DatabaseReplicated::drop(ContextPtr context_) } } +void DatabaseReplicated::renameDatabase(ContextPtr query_context, const String & new_name) +{ + DatabaseAtomic::renameDatabase(query_context, new_name); + auto db_name_path = fs::path(zookeeper_path) / FIRST_REPLICA_DATABASE_NAME; + getZooKeeper()->set(db_name_path, getDatabaseName()); +} + void DatabaseReplicated::stopReplication() { if (ddl_worker) diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 761d6b4b503..eab5b2ff931 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -86,6 +86,8 @@ public: std::vector tryGetAreReplicasActive(const ClusterPtr & cluster_) const; + void renameDatabase(ContextPtr query_context, const String & new_name) override; + friend struct DatabaseReplicatedTask; friend class DatabaseReplicatedDDLWorker; private: diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index 9b575c65bce..f06f5ba8e17 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -355,6 +355,8 @@ public: { return delegate->getS3StorageClient(); } + + std::shared_ptr tryGetS3StorageClient() const override { return delegate->tryGetS3StorageClient(); } #endif private: diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 658acb01c74..4781839cb01 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -478,6 +478,8 @@ public: "Method getS3StorageClient() is not implemented for disk type: {}", getDataSourceDescription().toString()); } + + virtual std::shared_ptr tryGetS3StorageClient() const { return nullptr; } #endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp index bae58f0b9c6..1a5388349f8 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp @@ -181,7 +181,7 @@ std::unique_ptr getAzureBlobStorageClientWithAuth( if (config.getBool(config_prefix + ".use_workload_identity", false)) { auto workload_identity_credential = std::make_shared(); - return std::make_unique(url, workload_identity_credential); + return std::make_unique(url, workload_identity_credential, client_options); } auto managed_identity_credential = std::make_shared(); diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index e7ecf7cd515..86a035f3be7 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -127,25 +127,22 @@ bool AzureObjectStorage::exists(const StoredObject & object) const { auto client_ptr = client.get(); - /// What a shame, no Exists method... - Azure::Storage::Blobs::ListBlobsOptions options; - options.Prefix = object.remote_path; - options.PageSizeHint = 1; - - ProfileEvents::increment(ProfileEvents::AzureListObjects); + ProfileEvents::increment(ProfileEvents::AzureGetProperties); if (client_ptr->GetClickhouseOptions().IsClientForDisk) - ProfileEvents::increment(ProfileEvents::DiskAzureListObjects); + ProfileEvents::increment(ProfileEvents::DiskAzureGetProperties); - auto blobs_list_response = client_ptr->ListBlobs(options); - auto blobs_list = blobs_list_response.Blobs; - - for (const auto & blob : blobs_list) + try { - if (object.remote_path == blob.Name) - return true; + auto blob_client = client_ptr->GetBlobClient(object.remote_path); + blob_client.GetProperties(); + return true; + } + catch (const Azure::Storage::StorageException & e) + { + if (e.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) + return false; + throw; } - - return false; } ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const @@ -160,7 +157,9 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith { auto client_ptr = client.get(); - /// What a shame, no Exists method... + /// NOTE: list doesn't work if endpoint contains non-empty prefix for blobs. + /// See AzureBlobStorageEndpoint and processAzureBlobStorageEndpoint for details. + Azure::Storage::Blobs::ListBlobsOptions options; options.Prefix = path; if (max_keys) diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h index 6a5a75c08f0..727dbeed853 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h @@ -138,6 +138,11 @@ public: { return object_storage->getS3StorageClient(); } + + std::shared_ptr tryGetS3StorageClient() override + { + return object_storage->tryGetS3StorageClient(); + } #endif private: diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 5803a985000..4de6d78e952 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -587,6 +587,11 @@ std::shared_ptr DiskObjectStorage::getS3StorageClient() const { return object_storage->getS3StorageClient(); } + +std::shared_ptr DiskObjectStorage::tryGetS3StorageClient() const +{ + return object_storage->tryGetS3StorageClient(); +} #endif DiskPtr DiskObjectStorageReservation::getDisk(size_t i) const diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index ffef0a007da..59cc82d8c81 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -214,6 +214,7 @@ public: #if USE_AWS_S3 std::shared_ptr getS3StorageClient() const override; + std::shared_ptr tryGetS3StorageClient() const override; #endif private: diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 7bc9e4073db..9f5c14fdb7c 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -127,8 +127,10 @@ public: /// /, /a, /a/b, /a/b/c, /a/b/c/d while exists will return true only for /a/b/c/d virtual bool existsOrHasAnyChild(const std::string & path) const; + /// List objects recursively by certain prefix. virtual void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const; + /// List objects recursively by certain prefix. Use it instead of listObjects, if you want to list objects lazily. virtual ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const; /// Get object metadata if supported. It should be possible to receive @@ -269,6 +271,7 @@ public: { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "This function is only implemented for S3ObjectStorage"); } + virtual std::shared_ptr tryGetS3StorageClient() { return nullptr; } #endif diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 63e11dcd8c8..0f7024196ea 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -634,6 +634,10 @@ std::shared_ptr S3ObjectStorage::getS3StorageClient() return client.get(); } +std::shared_ptr S3ObjectStorage::tryGetS3StorageClient() +{ + return client.get(); +} } #endif diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 7446a1f6fc8..4170cea22a0 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -169,6 +169,7 @@ public: bool isReadOnly() const override { return s3_settings.get()->read_only; } std::shared_ptr getS3StorageClient() override; + std::shared_ptr tryGetS3StorageClient() override; private: void setNewSettings(std::unique_ptr && s3_settings_); diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index aa9600875db..51b18b2093d 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -171,6 +171,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.parquet.parallel_encoding = settings.output_format_parquet_parallel_encoding; format_settings.parquet.data_page_size = settings.output_format_parquet_data_page_size; format_settings.parquet.write_batch_size = settings.output_format_parquet_batch_size; + format_settings.parquet.write_page_index = settings.output_format_parquet_write_page_index; format_settings.parquet.local_read_min_bytes_for_seek = settings.input_format_parquet_local_file_min_bytes_for_seek; format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8; format_settings.pretty.color = settings.output_format_pretty_color; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 18e7df8f24e..466ce0fe3c9 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -275,6 +275,7 @@ struct FormatSettings bool output_compliant_nested_types = true; size_t data_page_size = 1024 * 1024; size_t write_batch_size = 1024; + bool write_page_index = false; size_t local_read_min_bytes_for_seek = 8192; } parquet{}; diff --git a/src/Functions/FunctionBase64Conversion.h b/src/Functions/FunctionBase64Conversion.h index 05914be3837..083179c3ca8 100644 --- a/src/Functions/FunctionBase64Conversion.h +++ b/src/Functions/FunctionBase64Conversion.h @@ -25,10 +25,10 @@ namespace ErrorCodes enum class Base64Variant : uint8_t { Normal, - Url + URL }; -inline std::string preprocessBase64Url(std::string_view src) +inline std::string preprocessBase64URL(std::string_view src) { std::string padded_src; padded_src.reserve(src.size() + 3); @@ -70,7 +70,7 @@ inline std::string preprocessBase64Url(std::string_view src) return padded_src; } -inline size_t postprocessBase64Url(UInt8 * dst, size_t out_len) +inline size_t postprocessBase64URL(UInt8 * dst, size_t out_len) { // Do symbol substitution as described in https://datatracker.ietf.org/doc/html/rfc4648#section-5 for (size_t i = 0; i < out_len; ++i) @@ -95,7 +95,7 @@ inline size_t postprocessBase64Url(UInt8 * dst, size_t out_len) template struct Base64Encode { - static constexpr auto name = (variant == Base64Variant::Normal) ? "base64Encode" : "base64UrlEncode"; + static constexpr auto name = (variant == Base64Variant::Normal) ? "base64Encode" : "base64URLEncode"; static size_t getBufferSize(size_t string_length, size_t string_count) { @@ -111,8 +111,8 @@ struct Base64Encode /// Memory sanitizer doesn't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle. __msan_unpoison(dst, outlen); - if constexpr (variant == Base64Variant::Url) - outlen = postprocessBase64Url(dst, outlen); + if constexpr (variant == Base64Variant::URL) + outlen = postprocessBase64URL(dst, outlen); return outlen; } @@ -121,7 +121,7 @@ struct Base64Encode template struct Base64Decode { - static constexpr auto name = (variant == Base64Variant::Normal) ? "base64Decode" : "base64UrlDecode"; + static constexpr auto name = (variant == Base64Variant::Normal) ? "base64Decode" : "base64URLDecode"; static size_t getBufferSize(size_t string_length, size_t string_count) { @@ -132,9 +132,9 @@ struct Base64Decode { int rc; size_t outlen = 0; - if constexpr (variant == Base64Variant::Url) + if constexpr (variant == Base64Variant::URL) { - std::string src_padded = preprocessBase64Url(src); + std::string src_padded = preprocessBase64URL(src); rc = base64_decode(src_padded.data(), src_padded.size(), reinterpret_cast(dst), &outlen, 0); } else @@ -156,7 +156,7 @@ struct Base64Decode template struct TryBase64Decode { - static constexpr auto name = (variant == Base64Variant::Normal) ? "tryBase64Decode" : "tryBase64UrlDecode"; + static constexpr auto name = (variant == Base64Variant::Normal) ? "tryBase64Decode" : "tryBase64URLDecode"; static size_t getBufferSize(size_t string_length, size_t string_count) { @@ -167,9 +167,9 @@ struct TryBase64Decode { int rc; size_t outlen = 0; - if constexpr (variant == Base64Variant::Url) + if constexpr (variant == Base64Variant::URL) { - std::string src_padded = preprocessBase64Url(src); + std::string src_padded = preprocessBase64URL(src); rc = base64_decode(src_padded.data(), src_padded.size(), reinterpret_cast(dst), &outlen, 0); } else diff --git a/src/Functions/array/arrayAggregation.cpp b/src/Functions/array/arrayAggregation.cpp index 03aa5fb9086..adb1bb707d8 100644 --- a/src/Functions/array/arrayAggregation.cpp +++ b/src/Functions/array/arrayAggregation.cpp @@ -1,5 +1,7 @@ #include +#include +#include #include #include #include @@ -102,6 +104,11 @@ struct ArrayAggregateImpl static DataTypePtr getReturnType(const DataTypePtr & expression_return, const DataTypePtr & /*array_element*/) { + if (aggregate_operation == AggregateOperation::max || aggregate_operation == AggregateOperation::min) + { + return expression_return; + } + DataTypePtr result; auto call = [&](const auto & types) @@ -133,31 +140,6 @@ struct ArrayAggregateImpl return true; } } - else if constexpr (aggregate_operation == AggregateOperation::max || aggregate_operation == AggregateOperation::min) - { - if constexpr (IsDataTypeDate) - { - result = std::make_shared(); - - return true; - } - else if constexpr (!IsDataTypeDecimal) - { - std::string timezone = getDateTimeTimezone(*expression_return); - result = std::make_shared(timezone); - - return true; - } - else - { - std::string timezone = getDateTimeTimezone(*expression_return); - UInt32 scale = getDecimalScale(*expression_return); - result = std::make_shared(scale, timezone); - - return true; - } - } - return false; }; @@ -378,6 +360,47 @@ struct ArrayAggregateImpl static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped) { + if constexpr (aggregate_operation == AggregateOperation::max || aggregate_operation == AggregateOperation::min) + { + MutableColumnPtr res; + const auto & column = array.getDataPtr(); + const ColumnConst * const_column = checkAndGetColumn(&*column); + if (const_column) + { + res = const_column->getDataColumn().cloneEmpty(); + } + else + { + res = column->cloneEmpty(); + } + const IColumn::Offsets & offsets = array.getOffsets(); + size_t pos = 0; + for (const auto & offset : offsets) + { + if (offset == pos) + { + res->insertDefault(); + continue; + } + size_t current_max_or_min_index = pos; + ++pos; + for (; pos < offset; ++pos) + { + int compare_result = column->compareAt(pos, current_max_or_min_index, *column, 1); + if (aggregate_operation == AggregateOperation::max && compare_result > 0) + { + current_max_or_min_index = pos; + } + else if (aggregate_operation == AggregateOperation::min && compare_result < 0) + { + current_max_or_min_index = pos; + } + } + res->insert((*column)[current_max_or_min_index]); + } + return res; + } + const IColumn::Offsets & offsets = array.getOffsets(); ColumnPtr res; diff --git a/src/Functions/base64UrlDecode.cpp b/src/Functions/base64URLDecode.cpp similarity index 73% rename from src/Functions/base64UrlDecode.cpp rename to src/Functions/base64URLDecode.cpp index 59975d8f9d1..f5766dc60bd 100644 --- a/src/Functions/base64UrlDecode.cpp +++ b/src/Functions/base64URLDecode.cpp @@ -5,16 +5,16 @@ namespace DB { -REGISTER_FUNCTION(Base64UrlDecode) +REGISTER_FUNCTION(Base64URLDecode) { FunctionDocumentation::Description description = R"(Accepts a base64-encoded URL and decodes it from base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)"; - FunctionDocumentation::Syntax syntax = "base64UrlDecode(encodedUrl)"; - FunctionDocumentation::Arguments arguments = {{"encodedUrl", "String column or constant. If the string is not a valid Base64-encoded value, an exception is thrown."}}; + FunctionDocumentation::Syntax syntax = "base64URLDecode(encodedURL)"; + FunctionDocumentation::Arguments arguments = {{"encodedURL", "String column or constant. If the string is not a valid Base64-encoded value, an exception is thrown."}}; FunctionDocumentation::ReturnedValue returned_value = "A string containing the decoded value of the argument."; - FunctionDocumentation::Examples examples = {{"Example", "SELECT base64UrlDecode('aHR0cDovL2NsaWNraG91c2UuY29t')", "https://clickhouse.com"}}; + FunctionDocumentation::Examples examples = {{"Example", "SELECT base64URLDecode('aHR0cDovL2NsaWNraG91c2UuY29t')", "https://clickhouse.com"}}; FunctionDocumentation::Categories categories = {"String encoding"}; - factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); + factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); } } diff --git a/src/Functions/base64UrlEncode.cpp b/src/Functions/base64URLEncode.cpp similarity index 78% rename from src/Functions/base64UrlEncode.cpp rename to src/Functions/base64URLEncode.cpp index 05d50170c14..73a465a30c5 100644 --- a/src/Functions/base64UrlEncode.cpp +++ b/src/Functions/base64URLEncode.cpp @@ -5,16 +5,16 @@ namespace DB { -REGISTER_FUNCTION(Base64UrlEncode) +REGISTER_FUNCTION(Base64URLEncode) { FunctionDocumentation::Description description = R"(Encodes an URL (String or FixedString) as base64 with URL-specific modifications, according to RFC 4648 (https://datatracker.ietf.org/doc/html/rfc4648#section-5).)"; - FunctionDocumentation::Syntax syntax = "base64UrlEncode(url)"; + FunctionDocumentation::Syntax syntax = "base64URLEncode(url)"; FunctionDocumentation::Arguments arguments = {{"url", "String column or constant."}}; FunctionDocumentation::ReturnedValue returned_value = "A string containing the encoded value of the argument."; - FunctionDocumentation::Examples examples = {{"Example", "SELECT base64UrlEncode('https://clickhouse.com')", "aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ"}}; + FunctionDocumentation::Examples examples = {{"Example", "SELECT base64URLEncode('https://clickhouse.com')", "aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ"}}; FunctionDocumentation::Categories categories = {"String encoding"}; - factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); + factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); } } diff --git a/src/Functions/dateTimeToSnowflakeID.cpp b/src/Functions/dateTimeToSnowflakeID.cpp index c37a238e201..968a7628ca5 100644 --- a/src/Functions/dateTimeToSnowflakeID.cpp +++ b/src/Functions/dateTimeToSnowflakeID.cpp @@ -14,31 +14,20 @@ namespace DB { -namespace ErrorCodes -{ - extern const int UNKNOWN_FUNCTION; -} - namespace { /// See generateSnowflakeID.cpp -constexpr int time_shift = 22; +constexpr size_t time_shift = 22; } class FunctionDateTimeToSnowflakeID : public IFunction { -private: - const bool uniform_snowflake_conversion_functions; - public: static constexpr auto name = "dateTimeToSnowflakeID"; - static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionDateTimeToSnowflakeID(ContextPtr context) - : uniform_snowflake_conversion_functions(context->getSettingsRef().uniform_snowflake_conversion_functions) - {} + static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared(); } String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } @@ -52,7 +41,7 @@ public: {"value", static_cast(&isDateTime), nullptr, "DateTime"} }; FunctionArgumentDescriptors optional_args{ - {"epoch", static_cast(&isNativeUInt), isColumnConst, "UInt*"} + {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"} }; validateFunctionArgumentTypes(*this, arguments, args, optional_args); @@ -61,12 +50,9 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - if (!uniform_snowflake_conversion_functions) - throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "To use function {}, setting 'uniform_snowflake_conversion_functions' must be enabled", getName()); - const auto & col_src = *arguments[0].column; - size_t epoch = 0; + UInt64 epoch = 0; if (arguments.size() == 2 && input_rows_count != 0) { const auto & col_epoch = *arguments[1].column; @@ -86,16 +72,10 @@ public: class FunctionDateTime64ToSnowflakeID : public IFunction { -private: - const bool uniform_snowflake_conversion_functions; - public: static constexpr auto name = "dateTime64ToSnowflakeID"; - static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionDateTime64ToSnowflakeID(ContextPtr context) - : uniform_snowflake_conversion_functions(context->getSettingsRef().uniform_snowflake_conversion_functions) - {} + static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared(); } String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } @@ -109,7 +89,7 @@ public: {"value", static_cast(&isDateTime64), nullptr, "DateTime64"} }; FunctionArgumentDescriptors optional_args{ - {"epoch", static_cast(&isNativeUInt), isColumnConst, "UInt*"} + {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"} }; validateFunctionArgumentTypes(*this, arguments, args, optional_args); @@ -118,13 +98,10 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - if (!uniform_snowflake_conversion_functions) - throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "To use function {}, setting 'uniform_snowflake_conversion_functions' must be enabled", getName()); - const auto & col_src = *arguments[0].column; const auto & src_data = typeid_cast(col_src).getData(); - size_t epoch = 0; + UInt64 epoch = 0; if (arguments.size() == 2 && input_rows_count != 0) { const auto & col_epoch = *arguments[1].column; @@ -141,7 +118,7 @@ public: auto factor = multiplier_msec / static_cast(multiplier_src); for (size_t i = 0; i < input_rows_count; ++i) - res_data[i] = static_cast(src_data[i] * factor - epoch) << time_shift; + res_data[i] = std::llround(src_data[i] * factor - epoch) << time_shift; return col_res; } diff --git a/src/Functions/snowflake.cpp b/src/Functions/snowflake.cpp index 801727e9eb9..5ff8a636058 100644 --- a/src/Functions/snowflake.cpp +++ b/src/Functions/snowflake.cpp @@ -13,7 +13,7 @@ /// ------------------------------------------------------------------------------------------------------------------------------ /// The functions in this file are deprecated and should be removed in favor of functions 'snowflakeIDToDateTime[64]' and -/// 'dateTime[64]ToSnowflakeID' by summer 2025. Please also mark setting `uniform_snowflake_conversion_functions` as obsolete then. +/// 'dateTime[64]ToSnowflakeID' by summer 2025. Please also mark setting `allow_deprecated_snowflake_conversion_functions` as obsolete then. /// ------------------------------------------------------------------------------------------------------------------------------ namespace DB @@ -40,7 +40,7 @@ constexpr int time_shift = 22; class FunctionDateTimeToSnowflake : public IFunction { private: - const bool uniform_snowflake_conversion_functions; + const bool allow_deprecated_snowflake_conversion_functions; public: static constexpr auto name = "dateTimeToSnowflake"; @@ -51,7 +51,7 @@ public: } explicit FunctionDateTimeToSnowflake(ContextPtr context) - : uniform_snowflake_conversion_functions(context->getSettingsRef().uniform_snowflake_conversion_functions) + : allow_deprecated_snowflake_conversion_functions(context->getSettingsRef().allow_deprecated_snowflake_conversion_functions) {} String getName() const override { return name; } @@ -71,8 +71,8 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - if (uniform_snowflake_conversion_functions) - throw Exception(ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated, to enable it disable setting 'uniform_snowflake_conversion_functions'", getName()); + if (!allow_deprecated_snowflake_conversion_functions) + throw Exception(ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated, to enable it set setting 'allow_deprecated_snowflake_conversion_functions' to 'true'", getName()); const auto & src = arguments[0]; const auto & src_column = *src.column; @@ -92,7 +92,7 @@ class FunctionSnowflakeToDateTime : public IFunction { private: const bool allow_nonconst_timezone_arguments; - const bool uniform_snowflake_conversion_functions; + const bool allow_deprecated_snowflake_conversion_functions; public: static constexpr auto name = "snowflakeToDateTime"; @@ -104,7 +104,7 @@ public: explicit FunctionSnowflakeToDateTime(ContextPtr context) : allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) - , uniform_snowflake_conversion_functions(context->getSettingsRef().uniform_snowflake_conversion_functions) + , allow_deprecated_snowflake_conversion_functions(context->getSettingsRef().allow_deprecated_snowflake_conversion_functions) {} String getName() const override { return name; } @@ -132,8 +132,8 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - if (uniform_snowflake_conversion_functions) - throw Exception(ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated, to enable it disable setting 'uniform_snowflake_conversion_functions'", getName()); + if (!allow_deprecated_snowflake_conversion_functions) + throw Exception(ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated, to enable it set setting 'allow_deprecated_snowflake_conversion_functions' to 'true'", getName()); const auto & src = arguments[0]; const auto & src_column = *src.column; @@ -166,7 +166,7 @@ public: class FunctionDateTime64ToSnowflake : public IFunction { private: - const bool uniform_snowflake_conversion_functions; + const bool allow_deprecated_snowflake_conversion_functions; public: static constexpr auto name = "dateTime64ToSnowflake"; @@ -177,7 +177,7 @@ public: } explicit FunctionDateTime64ToSnowflake(ContextPtr context) - : uniform_snowflake_conversion_functions(context->getSettingsRef().uniform_snowflake_conversion_functions) + : allow_deprecated_snowflake_conversion_functions(context->getSettingsRef().allow_deprecated_snowflake_conversion_functions) {} String getName() const override { return name; } @@ -197,8 +197,8 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - if (uniform_snowflake_conversion_functions) - throw Exception(ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated, to enable it disable setting 'uniform_snowflake_conversion_functions'", getName()); + if (!allow_deprecated_snowflake_conversion_functions) + throw Exception(ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated, to enable it set setting 'allow_deprecated_snowflake_conversion_functions' to true", getName()); const auto & src = arguments[0]; @@ -226,7 +226,7 @@ class FunctionSnowflakeToDateTime64 : public IFunction { private: const bool allow_nonconst_timezone_arguments; - const bool uniform_snowflake_conversion_functions; + const bool allow_deprecated_snowflake_conversion_functions; public: static constexpr auto name = "snowflakeToDateTime64"; @@ -238,7 +238,7 @@ public: explicit FunctionSnowflakeToDateTime64(ContextPtr context) : allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) - , uniform_snowflake_conversion_functions(context->getSettingsRef().uniform_snowflake_conversion_functions) + , allow_deprecated_snowflake_conversion_functions(context->getSettingsRef().allow_deprecated_snowflake_conversion_functions) {} String getName() const override { return name; } @@ -266,8 +266,8 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - if (uniform_snowflake_conversion_functions) - throw Exception(ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated, to enable it disable setting 'uniform_snowflake_conversion_functions'", getName()); + if (!allow_deprecated_snowflake_conversion_functions) + throw Exception(ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated, to enable it set setting 'allow_deprecated_snowflake_conversion_functions' to true", getName()); const auto & src = arguments[0]; const auto & src_column = *src.column; diff --git a/src/Functions/snowflakeIDToDateTime.cpp b/src/Functions/snowflakeIDToDateTime.cpp index abaf09b165b..b799792a56f 100644 --- a/src/Functions/snowflakeIDToDateTime.cpp +++ b/src/Functions/snowflakeIDToDateTime.cpp @@ -18,21 +18,19 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int UNKNOWN_FUNCTION; } namespace { /// See generateSnowflakeID.cpp -constexpr int time_shift = 22; +constexpr size_t time_shift = 22; } class FunctionSnowflakeIDToDateTime : public IFunction { private: - const bool uniform_snowflake_conversion_functions; const bool allow_nonconst_timezone_arguments; public: @@ -40,8 +38,7 @@ public: static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } explicit FunctionSnowflakeIDToDateTime(ContextPtr context) - : uniform_snowflake_conversion_functions(context->getSettingsRef().uniform_snowflake_conversion_functions) - , allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + : allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) {} String getName() const override { return name; } @@ -56,7 +53,7 @@ public: {"value", static_cast(&isUInt64), nullptr, "UInt64"} }; FunctionArgumentDescriptors optional_args{ - {"epoch", static_cast(&isNativeUInt), isColumnConst, "UInt*"}, + {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"}, {"time_zone", static_cast(&isString), nullptr, "String"} }; validateFunctionArgumentTypes(*this, arguments, args, optional_args); @@ -70,12 +67,9 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - if (!uniform_snowflake_conversion_functions) - throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "To use function {}, setting 'uniform_snowflake_conversion_functions' must be enabled", getName()); - const auto & col_src = *arguments[0].column; - size_t epoch = 0; + UInt64 epoch = 0; if (arguments.size() >= 2 && input_rows_count != 0) { const auto & col_epoch = *arguments[1].column; @@ -108,7 +102,6 @@ public: class FunctionSnowflakeIDToDateTime64 : public IFunction { private: - const bool uniform_snowflake_conversion_functions; const bool allow_nonconst_timezone_arguments; public: @@ -116,8 +109,7 @@ public: static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } explicit FunctionSnowflakeIDToDateTime64(ContextPtr context) - : uniform_snowflake_conversion_functions(context->getSettingsRef().uniform_snowflake_conversion_functions) - , allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + : allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) {} String getName() const override { return name; } @@ -132,7 +124,7 @@ public: {"value", static_cast(&isUInt64), nullptr, "UInt64"} }; FunctionArgumentDescriptors optional_args{ - {"epoch", static_cast(&isNativeUInt), isColumnConst, "UInt*"}, + {"epoch", static_cast(&isNativeUInt), isColumnConst, "const UInt*"}, {"time_zone", static_cast(&isString), nullptr, "String"} }; validateFunctionArgumentTypes(*this, arguments, args, optional_args); @@ -146,12 +138,9 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - if (!uniform_snowflake_conversion_functions) - throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "To use function {}, setting 'uniform_snowflake_conversion_functions' must be enabled", getName()); - const auto & col_src = *arguments[0].column; - size_t epoch = 0; + UInt64 epoch = 0; if (arguments.size() >= 2 && input_rows_count != 0) { const auto & col_epoch = *arguments[1].column; diff --git a/src/Functions/tryBase64UrlDecode.cpp b/src/Functions/tryBase64URLDecode.cpp similarity index 69% rename from src/Functions/tryBase64UrlDecode.cpp rename to src/Functions/tryBase64URLDecode.cpp index b9aaf4f9273..b44bc7538ee 100644 --- a/src/Functions/tryBase64UrlDecode.cpp +++ b/src/Functions/tryBase64URLDecode.cpp @@ -5,16 +5,16 @@ namespace DB { -REGISTER_FUNCTION(TryBase64UrlDecode) +REGISTER_FUNCTION(TryBase64URLDecode) { - FunctionDocumentation::Description description = R"(Decodes an URL from base64, like base64UrlDecode but returns an empty string in case of an error.)"; - FunctionDocumentation::Syntax syntax = "tryBase64UrlDecode(encodedUrl)"; - FunctionDocumentation::Arguments arguments = {{"encodedUrl", "String column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string."}}; + FunctionDocumentation::Description description = R"(Decodes an URL from base64, like base64URLDecode but returns an empty string in case of an error.)"; + FunctionDocumentation::Syntax syntax = "tryBase64URLDecode(encodedUrl)"; + FunctionDocumentation::Arguments arguments = {{"encodedURL", "String column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string."}}; FunctionDocumentation::ReturnedValue returned_value = "A string containing the decoded value of the argument."; - FunctionDocumentation::Examples examples = {{"valid", "SELECT tryBase64UrlDecode('aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ')", "https://clickhouse.com"}, {"invalid", "SELECT tryBase64UrlDecode('aHR0cHM6Ly9jbGlja')", ""}}; + FunctionDocumentation::Examples examples = {{"valid", "SELECT tryBase64URLDecode('aHR0cHM6Ly9jbGlja2hvdXNlLmNvbQ')", "https://clickhouse.com"}, {"invalid", "SELECT tryBase64UrlDecode('aHR0cHM6Ly9jbGlja')", ""}}; FunctionDocumentation::Categories categories = {"String encoding"}; - factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); + factory.registerFunction>>({description, syntax, arguments, returned_value, examples, categories}); } } diff --git a/src/IO/CascadeWriteBuffer.cpp b/src/IO/CascadeWriteBuffer.cpp index 91a42e77fdb..8b863cb253c 100644 --- a/src/IO/CascadeWriteBuffer.cpp +++ b/src/IO/CascadeWriteBuffer.cpp @@ -83,6 +83,20 @@ void CascadeWriteBuffer::finalizeImpl() } } +void CascadeWriteBuffer::cancelImpl() noexcept +{ + if (curr_buffer) + curr_buffer->position() = position(); + + for (auto & buf : prepared_sources) + { + if (buf) + { + buf->cancel(); + } + } +} + WriteBuffer * CascadeWriteBuffer::setNextBuffer() { if (first_lazy_source_num <= curr_buffer_num && curr_buffer_num < num_sources) diff --git a/src/IO/CascadeWriteBuffer.h b/src/IO/CascadeWriteBuffer.h index a003d11bd8a..7a8b11c6a87 100644 --- a/src/IO/CascadeWriteBuffer.h +++ b/src/IO/CascadeWriteBuffer.h @@ -16,7 +16,7 @@ namespace ErrorCodes * (lazy_sources contains not pointers themself, but their delayed constructors) * * Firtly, CascadeWriteBuffer redirects data to first buffer of the sequence - * If current WriteBuffer cannot receive data anymore, it throws special exception MemoryWriteBuffer::CurrentBufferExhausted in nextImpl() body, + * If current WriteBuffer cannot receive data anymore, it throws special exception WriteBuffer::CurrentBufferExhausted in nextImpl() body, * CascadeWriteBuffer prepare next buffer and continuously redirects data to it. * If there are no buffers anymore CascadeWriteBuffer throws an exception. * @@ -48,6 +48,7 @@ public: private: void finalizeImpl() override; + void cancelImpl() noexcept override; WriteBuffer * setNextBuffer(); diff --git a/src/IO/MemoryReadWriteBuffer.h b/src/IO/MemoryReadWriteBuffer.h index d7ca992aa44..a7d3e388cb3 100644 --- a/src/IO/MemoryReadWriteBuffer.h +++ b/src/IO/MemoryReadWriteBuffer.h @@ -16,11 +16,11 @@ namespace DB class MemoryWriteBuffer : public WriteBuffer, public IReadableWriteBuffer, boost::noncopyable, private Allocator { public: - /// Special exception to throw when the current WriteBuffer cannot receive data + /// Special exception to throw when the current MemoryWriteBuffer cannot receive data class CurrentBufferExhausted : public std::exception { public: - const char * what() const noexcept override { return "MemoryWriteBuffer limit is exhausted"; } + const char * what() const noexcept override { return "WriteBuffer limit is exhausted"; } }; /// Use max_total_size_ = 0 for unlimited storage diff --git a/src/IO/WriteBuffer.cpp b/src/IO/WriteBuffer.cpp index bcc7445486e..a86eb4ccea2 100644 --- a/src/IO/WriteBuffer.cpp +++ b/src/IO/WriteBuffer.cpp @@ -11,7 +11,7 @@ namespace DB WriteBuffer::~WriteBuffer() { // That destructor could be call with finalized=false in case of exceptions - if (count() > 0 && !finalized) + if (count() > 0 && !finalized && !canceled) { /// It is totally OK to destroy instance without finalization when an exception occurs /// However it is suspicious to destroy instance without finalization at the green path @@ -20,7 +20,7 @@ WriteBuffer::~WriteBuffer() LoggerPtr log = getLogger("WriteBuffer"); LOG_ERROR( log, - "WriteBuffer is not finalized when destructor is called. " + "WriteBuffer is neither finalized nor canceled when destructor is called. " "No exceptions in flight are detected. " "The file might not be written at all or might be truncated. " "Stack trace: {}", @@ -30,4 +30,13 @@ WriteBuffer::~WriteBuffer() } } +void WriteBuffer::cancel() noexcept +{ + if (canceled || finalized) + return; + + LockMemoryExceptionInThread lock(VariableContext::Global); + cancelImpl(); + canceled = true; +} } diff --git a/src/IO/WriteBuffer.h b/src/IO/WriteBuffer.h index ef4e0058ec3..4759f96a235 100644 --- a/src/IO/WriteBuffer.h +++ b/src/IO/WriteBuffer.h @@ -59,6 +59,7 @@ public: */ pos = working_buffer.begin(); bytes += bytes_in_buffer; + throw; } @@ -75,7 +76,6 @@ public: next(); } - void write(const char * from, size_t n) { if (finalized) @@ -121,6 +121,9 @@ public: if (finalized) return; + if (canceled) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot finalize buffer after cancellation."); + LockMemoryExceptionInThread lock(VariableContext::Global); try { @@ -130,11 +133,15 @@ public: catch (...) { pos = working_buffer.begin(); - finalized = true; + + cancel(); + throw; } } + void cancel() noexcept; + /// Wait for data to be reliably written. Mainly, call fsync for fd. /// May be called after finalize() if needed. virtual void sync() @@ -150,7 +157,12 @@ protected: next(); } + virtual void cancelImpl() noexcept + { + } + bool finalized = false; + bool canceled = false; private: /** Write the data in the buffer (from the beginning of the buffer to the current position). diff --git a/src/IO/WriteBufferDecorator.h b/src/IO/WriteBufferDecorator.h index 88161f8d232..109c2bd24e4 100644 --- a/src/IO/WriteBufferDecorator.h +++ b/src/IO/WriteBufferDecorator.h @@ -47,6 +47,11 @@ public: } } + void cancelImpl() noexcept override + { + out->cancel(); + } + WriteBuffer * getNestedBuffer() { return out; } protected: diff --git a/src/IO/WriteBufferFromFile.cpp b/src/IO/WriteBufferFromFile.cpp index d641e553671..37b1161356f 100644 --- a/src/IO/WriteBufferFromFile.cpp +++ b/src/IO/WriteBufferFromFile.cpp @@ -79,7 +79,8 @@ WriteBufferFromFile::~WriteBufferFromFile() try { - finalize(); + if (!canceled) + finalize(); } catch (...) { @@ -111,7 +112,8 @@ void WriteBufferFromFile::close() if (fd < 0) return; - finalize(); + if (!canceled) + finalize(); if (0 != ::close(fd)) throw Exception(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file"); diff --git a/src/IO/WriteBufferFromFileDecorator.cpp b/src/IO/WriteBufferFromFileDecorator.cpp index 0e4e5e13a86..b1e7d843d92 100644 --- a/src/IO/WriteBufferFromFileDecorator.cpp +++ b/src/IO/WriteBufferFromFileDecorator.cpp @@ -28,6 +28,12 @@ void WriteBufferFromFileDecorator::finalizeImpl() } } +void WriteBufferFromFileDecorator::cancelImpl() noexcept +{ + SwapHelper swap(*this, *impl); + impl->cancel(); +} + WriteBufferFromFileDecorator::~WriteBufferFromFileDecorator() { /// It is not a mistake that swap is called here diff --git a/src/IO/WriteBufferFromFileDecorator.h b/src/IO/WriteBufferFromFileDecorator.h index 5344bb1425c..07f843986bb 100644 --- a/src/IO/WriteBufferFromFileDecorator.h +++ b/src/IO/WriteBufferFromFileDecorator.h @@ -24,6 +24,8 @@ public: protected: void finalizeImpl() override; + void cancelImpl() noexcept override; + std::unique_ptr impl; private: diff --git a/src/IO/WriteBufferFromFileDescriptor.cpp b/src/IO/WriteBufferFromFileDescriptor.cpp index a758f99458d..f1207edc55b 100644 --- a/src/IO/WriteBufferFromFileDescriptor.cpp +++ b/src/IO/WriteBufferFromFileDescriptor.cpp @@ -107,7 +107,8 @@ WriteBufferFromFileDescriptor::~WriteBufferFromFileDescriptor() { try { - finalize(); + if (!canceled) + finalize(); } catch (...) { diff --git a/src/IO/WriteBufferFromPocoSocket.cpp b/src/IO/WriteBufferFromPocoSocket.cpp index 10d9fd131cd..5ed4dbdc787 100644 --- a/src/IO/WriteBufferFromPocoSocket.cpp +++ b/src/IO/WriteBufferFromPocoSocket.cpp @@ -197,7 +197,8 @@ WriteBufferFromPocoSocket::~WriteBufferFromPocoSocket() { try { - finalize(); + if (!canceled) + finalize(); } catch (...) { diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index cd9949862ca..3682e49b018 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -224,6 +224,11 @@ void WriteBufferFromS3::finalizeImpl() } } +void WriteBufferFromS3::cancelImpl() noexcept +{ + tryToAbortMultipartUpload(); +} + String WriteBufferFromS3::getVerboseLogDetails() const { String multipart_upload_details; @@ -246,7 +251,7 @@ String WriteBufferFromS3::getShortLogDetails() const bucket, key, multipart_upload_details); } -void WriteBufferFromS3::tryToAbortMultipartUpload() +void WriteBufferFromS3::tryToAbortMultipartUpload() noexcept { try { @@ -264,8 +269,19 @@ WriteBufferFromS3::~WriteBufferFromS3() { LOG_TRACE(limitedLog, "Close WriteBufferFromS3. {}.", getShortLogDetails()); + if (canceled) + { + LOG_INFO( + log, + "WriteBufferFromS3 was canceled." + "The file might not be written to S3. " + "{}.", + getVerboseLogDetails()); + return; + } + /// That destructor could be call with finalized=false in case of exceptions - if (!finalized) + if (!finalized && !canceled) { LOG_INFO( log, diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 973ca4c7526..b026da607c5 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -54,6 +54,8 @@ private: /// Receives response from the server after sending all data. void finalizeImpl() override; + void cancelImpl() noexcept override; + String getVerboseLogDetails() const; String getShortLogDetails() const; @@ -71,7 +73,7 @@ private: void createMultipartUpload(); void completeMultipartUpload(); void abortMultipartUpload(); - void tryToAbortMultipartUpload(); + void tryToAbortMultipartUpload() noexcept; S3::PutObjectRequest getPutRequest(PartData & data); void makeSinglepartUpload(PartData && data); diff --git a/src/IO/WriteBufferFromVector.h b/src/IO/WriteBufferFromVector.h index 1ea32af2968..17a329d401d 100644 --- a/src/IO/WriteBufferFromVector.h +++ b/src/IO/WriteBufferFromVector.h @@ -63,7 +63,8 @@ public: ~WriteBufferFromVector() override { - finalize(); + if (!canceled) + finalize(); } private: diff --git a/src/Interpreters/AggregationCommon.h b/src/Interpreters/AggregationCommon.h index ab078d1c5e5..43c80d361d1 100644 --- a/src/Interpreters/AggregationCommon.h +++ b/src/Interpreters/AggregationCommon.h @@ -90,10 +90,7 @@ void fillFixedBatch(size_t keys_size, const ColumnRawPtrs & key_columns, const S /// Note: here we violate strict aliasing. /// It should be ok as log as we do not reffer to any value from `out` before filling. const char * source = static_cast(column)->getRawDataBegin(); - size_t offset_to = offset; - if constexpr (std::endian::native == std::endian::big) - offset_to = sizeof(Key) - sizeof(T) - offset; - T * dest = reinterpret_cast(reinterpret_cast(out.data()) + offset_to); + T * dest = reinterpret_cast(reinterpret_cast(out.data()) + offset); fillFixedBatch(num_rows, reinterpret_cast(source), dest); /// NOLINT(bugprone-sizeof-expression) offset += sizeof(T); } diff --git a/src/Interpreters/AggregationMethod.cpp b/src/Interpreters/AggregationMethod.cpp index 3ff4f0cae43..0fc789528b8 100644 --- a/src/Interpreters/AggregationMethod.cpp +++ b/src/Interpreters/AggregationMethod.cpp @@ -160,10 +160,7 @@ void AggregationMethodKeysFixedinsertData(reinterpret_cast(&key) + offset_to, size); + observed_column->insertData(reinterpret_cast(&key) + pos, size); pos += size; } } diff --git a/src/Interpreters/AsynchronousMetricLog.h b/src/Interpreters/AsynchronousMetricLog.h index 739b2aa5b56..2ce1d929592 100644 --- a/src/Interpreters/AsynchronousMetricLog.h +++ b/src/Interpreters/AsynchronousMetricLog.h @@ -8,8 +8,6 @@ #include #include -#include -#include #include diff --git a/src/Interpreters/Cache/FileCache_fwd.h b/src/Interpreters/Cache/FileCache_fwd.h index 55453b78ead..8d2a9d0a2da 100644 --- a/src/Interpreters/Cache/FileCache_fwd.h +++ b/src/Interpreters/Cache/FileCache_fwd.h @@ -6,7 +6,7 @@ namespace DB static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 32 * 1024 * 1024; /// 32Mi static constexpr int FILECACHE_DEFAULT_FILE_SEGMENT_ALIGNMENT = 4 * 1024 * 1024; /// 4Mi -static constexpr int FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_THREADS = 5; +static constexpr int FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_THREADS = 0; static constexpr int FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_QUEUE_SIZE_LIMIT = 5000; static constexpr int FILECACHE_DEFAULT_LOAD_METADATA_THREADS = 16; static constexpr int FILECACHE_DEFAULT_MAX_ELEMENTS = 10000000; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 2807807b294..ff8e2cb2adc 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -91,6 +91,7 @@ #include #include #include +#include #include #include #include @@ -367,6 +368,9 @@ struct ContextSharedPart : boost::noncopyable std::atomic_size_t max_view_num_to_warn = 10000lu; std::atomic_size_t max_dictionary_num_to_warn = 1000lu; std::atomic_size_t max_part_num_to_warn = 100000lu; + /// Only for system.server_settings, actually value stored in reloader itself + std::atomic_size_t config_reload_interval_ms = ConfigReloader::DEFAULT_RELOAD_INTERVAL.count(); + String format_schema_path; /// Path to a directory that contains schema files used by input formats. String google_protos_path; /// Path to a directory that contains the proto files for the well-known Protobuf types. mutable OnceFlag action_locks_manager_initialized; @@ -679,6 +683,9 @@ struct ContextSharedPart : boost::noncopyable } } + LOG_TRACE(log, "Shutting down AccessControl"); + access_control->shutdown(); + { std::lock_guard lock(mutex); @@ -2109,7 +2116,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const } -StoragePtr Context::buildParametrizedViewStorage(const ASTPtr & table_expression, const String & database_name, const String & table_name) +StoragePtr Context::buildParametrizedViewStorage(const String & database_name, const String & table_name, const NameToNameMap & param_values) { if (table_name.empty()) return nullptr; @@ -2122,8 +2129,7 @@ StoragePtr Context::buildParametrizedViewStorage(const ASTPtr & table_expression return nullptr; auto query = original_view->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone(); - NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression, getQueryContext()); - StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values); + StorageView::replaceQueryParametersIfParametrizedView(query, param_values); ASTCreateQuery create; create.select = query->as(); @@ -4500,6 +4506,16 @@ void Context::checkPartitionCanBeDropped(const String & database, const String & checkCanBeDropped(database, table, partition_size, max_partition_size_to_drop); } +void Context::setConfigReloaderInterval(size_t value_ms) +{ + shared->config_reload_interval_ms.store(value_ms, std::memory_order_relaxed); +} + +size_t Context::getConfigReloaderInterval() const +{ + return shared->config_reload_interval_ms.load(std::memory_order_relaxed); +} + InputFormatPtr Context::getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, UInt64 max_block_size, const std::optional & format_settings, std::optional max_parsing_threads) const { return FormatFactory::instance().getInput(name, buf, sample, shared_from_this(), max_block_size, format_settings, max_parsing_threads); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index b3ade94ccdc..d1874b4902f 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -774,7 +774,7 @@ public: /// Overload for the new analyzer. Structure inference is performed in QueryAnalysisPass. StoragePtr executeTableFunction(const ASTPtr & table_expression, const TableFunctionPtr & table_function_ptr); - StoragePtr buildParametrizedViewStorage(const ASTPtr & table_expression, const String & database_name, const String & table_name); + StoragePtr buildParametrizedViewStorage(const String & database_name, const String & table_name, const NameToNameMap & param_values); void addViewSource(const StoragePtr & storage); StoragePtr getViewSource() const; @@ -1161,6 +1161,9 @@ public: size_t getMaxPartitionSizeToDrop() const; void checkPartitionCanBeDropped(const String & database, const String & table, const size_t & partition_size) const; void checkPartitionCanBeDropped(const String & database, const String & table, const size_t & partition_size, const size_t & max_partition_size_to_drop) const; + /// Only for system.server_settings, actual value is stored in ConfigReloader + void setConfigReloaderInterval(size_t value_ms); + size_t getConfigReloaderInterval() const; /// Lets you select the compression codec according to the conditions described in the configuration file. std::shared_ptr chooseCompressionCodec(size_t part_size, double part_size_ratio) const; diff --git a/src/Interpreters/ErrorLog.cpp b/src/Interpreters/ErrorLog.cpp new file mode 100644 index 00000000000..42616f13e24 --- /dev/null +++ b/src/Interpreters/ErrorLog.cpp @@ -0,0 +1,123 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +ColumnsDescription ErrorLogElement::getColumnsDescription() +{ + ParserCodec codec_parser; + return ColumnsDescription { + { + "hostname", + std::make_shared(std::make_shared()), + parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Hostname of the server executing the query." + }, + { + "event_date", + std::make_shared(), + parseQuery(codec_parser, "(Delta(2), ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Event date." + }, + { + "event_time", + std::make_shared(), + parseQuery(codec_parser, "(Delta(4), ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Event time." + }, + { + "code", + std::make_shared(), + parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Error code." + }, + { + "error", + std::make_shared(std::make_shared()), + parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Error name." + }, + { + "value", + std::make_shared(), + parseQuery(codec_parser, "(ZSTD(3))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Number of errors happened in time interval." + }, + { + "remote", + std::make_shared(), + parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), + "Remote exception (i.e. received during one of the distributed queries)." + } + }; +} + +void ErrorLogElement::appendToBlock(MutableColumns & columns) const +{ + size_t column_idx = 0; + + columns[column_idx++]->insert(getFQDNOrHostName()); + columns[column_idx++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); + columns[column_idx++]->insert(event_time); + columns[column_idx++]->insert(code); + columns[column_idx++]->insert(ErrorCodes::getName(code)); + columns[column_idx++]->insert(value); + columns[column_idx++]->insert(remote); +} + +struct ValuePair +{ + UInt64 local = 0; + UInt64 remote = 0; +}; + +void ErrorLog::stepFunction(TimePoint current_time) +{ + /// Static lazy initialization to avoid polluting the header with implementation details + static std::vector previous_values(ErrorCodes::end()); + + auto event_time = std::chrono::system_clock::to_time_t(current_time); + + for (ErrorCodes::ErrorCode code = 0, end = ErrorCodes::end(); code < end; ++code) + { + const auto & error = ErrorCodes::values[code].get(); + if (error.local.count != previous_values.at(code).local) + { + ErrorLogElement local_elem { + .event_time=event_time, + .code=code, + .value=error.local.count - previous_values.at(code).local, + .remote=false + }; + this->add(std::move(local_elem)); + previous_values[code].local = error.local.count; + } + if (error.remote.count != previous_values.at(code).remote) + { + ErrorLogElement remote_elem { + .event_time=event_time, + .code=code, + .value=error.remote.count - previous_values.at(code).remote, + .remote=true + }; + this->add(std::move(remote_elem)); + previous_values[code].remote = error.remote.count; + } + } +} + +} diff --git a/src/Interpreters/ErrorLog.h b/src/Interpreters/ErrorLog.h new file mode 100644 index 00000000000..4afe334d4de --- /dev/null +++ b/src/Interpreters/ErrorLog.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +/** ErrorLog is a log of error values measured at regular time interval. + */ + +struct ErrorLogElement +{ + time_t event_time{}; + ErrorCodes::ErrorCode code{}; + ErrorCodes::Value value{}; + bool remote{}; + + static std::string name() { return "ErrorLog"; } + static ColumnsDescription getColumnsDescription(); + static NamesAndAliases getNamesAndAliases() { return {}; } + void appendToBlock(MutableColumns & columns) const; +}; + + +class ErrorLog : public PeriodicLog +{ + using PeriodicLog::PeriodicLog; + +protected: + void stepFunction(TimePoint current_time) override; +}; + +} diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 72840d64eeb..f396db70d21 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -626,11 +626,16 @@ BlockIO InterpreterInsertQuery::execute() { bool table_prefers_large_blocks = table->prefersLargeBlocks(); + size_t threads = presink_chains.size(); + + pipeline.resize(1); + pipeline.addTransform(std::make_shared( header, table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, - presink_chains.size())); + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + + pipeline.resize(threads); pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { @@ -700,8 +705,7 @@ BlockIO InterpreterInsertQuery::execute() auto balancing = std::make_shared( chain.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, - presink_chains.size()); + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); chain.addSource(std::move(balancing)); } diff --git a/src/Interpreters/MetricLog.cpp b/src/Interpreters/MetricLog.cpp index 6ed29cfadcb..596b0e4f96c 100644 --- a/src/Interpreters/MetricLog.cpp +++ b/src/Interpreters/MetricLog.cpp @@ -56,78 +56,32 @@ void MetricLogElement::appendToBlock(MutableColumns & columns) const columns[column_idx++]->insert(current_metrics[i].toUnderType()); } - -void MetricLog::startCollectMetric(size_t collect_interval_milliseconds_) +void MetricLog::stepFunction(const std::chrono::system_clock::time_point current_time) { - collect_interval_milliseconds = collect_interval_milliseconds_; - is_shutdown_metric_thread = false; - metric_flush_thread = std::make_unique([this] { metricThreadFunction(); }); -} - - -void MetricLog::stopCollectMetric() -{ - bool old_val = false; - if (!is_shutdown_metric_thread.compare_exchange_strong(old_val, true)) - return; - if (metric_flush_thread) - metric_flush_thread->join(); -} - - -void MetricLog::shutdown() -{ - stopCollectMetric(); - stopFlushThread(); -} - - -void MetricLog::metricThreadFunction() -{ - auto desired_timepoint = std::chrono::system_clock::now(); - + /// Static lazy initialization to avoid polluting the header with implementation details /// For differentiation of ProfileEvents counters. - std::vector prev_profile_events(ProfileEvents::end()); + static std::vector prev_profile_events(ProfileEvents::end()); - while (!is_shutdown_metric_thread) + MetricLogElement elem; + elem.event_time = std::chrono::system_clock::to_time_t(current_time); + elem.event_time_microseconds = timeInMicroseconds(current_time); + + elem.profile_events.resize(ProfileEvents::end()); + for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) { - try - { - const auto current_time = std::chrono::system_clock::now(); - - MetricLogElement elem; - elem.event_time = std::chrono::system_clock::to_time_t(current_time); - elem.event_time_microseconds = timeInMicroseconds(current_time); - - elem.profile_events.resize(ProfileEvents::end()); - for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) - { - const ProfileEvents::Count new_value = ProfileEvents::global_counters[i].load(std::memory_order_relaxed); - auto & old_value = prev_profile_events[i]; - elem.profile_events[i] = new_value - old_value; - old_value = new_value; - } - - elem.current_metrics.resize(CurrentMetrics::end()); - for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i) - { - elem.current_metrics[i] = CurrentMetrics::values[i]; - } - - this->add(std::move(elem)); - - /// We will record current time into table but align it to regular time intervals to avoid time drift. - /// We may drop some time points if the server is overloaded and recording took too much time. - while (desired_timepoint <= current_time) - desired_timepoint += std::chrono::milliseconds(collect_interval_milliseconds); - - std::this_thread::sleep_until(desired_timepoint); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } + const ProfileEvents::Count new_value = ProfileEvents::global_counters[i].load(std::memory_order_relaxed); + auto & old_value = prev_profile_events[i]; + elem.profile_events[i] = new_value - old_value; + old_value = new_value; } + + elem.current_metrics.resize(CurrentMetrics::end()); + for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i) + { + elem.current_metrics[i] = CurrentMetrics::values[i]; + } + + this->add(std::move(elem)); } } diff --git a/src/Interpreters/MetricLog.h b/src/Interpreters/MetricLog.h index 4f1e8fafc11..a6fd3ecfcd3 100644 --- a/src/Interpreters/MetricLog.h +++ b/src/Interpreters/MetricLog.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -9,7 +10,6 @@ #include #include -#include #include @@ -33,26 +33,12 @@ struct MetricLogElement void appendToBlock(MutableColumns & columns) const; }; - -class MetricLog : public SystemLog +class MetricLog : public PeriodicLog { - using SystemLog::SystemLog; + using PeriodicLog::PeriodicLog; -public: - void shutdown() override; - - /// Launches a background thread to collect metrics with interval - void startCollectMetric(size_t collect_interval_milliseconds_); - - /// Stop background thread. Call before shutdown. - void stopCollectMetric(); - -private: - void metricThreadFunction(); - - std::unique_ptr metric_flush_thread; - size_t collect_interval_milliseconds; - std::atomic is_shutdown_metric_thread{false}; +protected: + void stepFunction(TimePoint current_time) override; }; } diff --git a/src/Interpreters/PeriodicLog.cpp b/src/Interpreters/PeriodicLog.cpp new file mode 100644 index 00000000000..9d2891e11eb --- /dev/null +++ b/src/Interpreters/PeriodicLog.cpp @@ -0,0 +1,62 @@ +#include +#include +#include + +namespace DB +{ + +template +void PeriodicLog::startCollect(size_t collect_interval_milliseconds_) +{ + collect_interval_milliseconds = collect_interval_milliseconds_; + is_shutdown_metric_thread = false; + flush_thread = std::make_unique([this] { threadFunction(); }); +} + +template +void PeriodicLog::stopCollect() +{ + bool old_val = false; + if (!is_shutdown_metric_thread.compare_exchange_strong(old_val, true)) + return; + if (flush_thread) + flush_thread->join(); +} + +template +void PeriodicLog::shutdown() +{ + stopCollect(); + this->stopFlushThread(); +} + +template +void PeriodicLog::threadFunction() +{ + auto desired_timepoint = std::chrono::system_clock::now(); + while (!is_shutdown_metric_thread) + { + try + { + const auto current_time = std::chrono::system_clock::now(); + + stepFunction(current_time); + + /// We will record current time into table but align it to regular time intervals to avoid time drift. + /// We may drop some time points if the server is overloaded and recording took too much time. + while (desired_timepoint <= current_time) + desired_timepoint += std::chrono::milliseconds(collect_interval_milliseconds); + + std::this_thread::sleep_until(desired_timepoint); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } +} + +#define INSTANTIATE_SYSTEM_LOG(ELEMENT) template class PeriodicLog; +SYSTEM_PERIODIC_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG) + +} diff --git a/src/Interpreters/PeriodicLog.h b/src/Interpreters/PeriodicLog.h new file mode 100644 index 00000000000..08c3f7eb23f --- /dev/null +++ b/src/Interpreters/PeriodicLog.h @@ -0,0 +1,43 @@ +#pragma once + +#include +#include + +#include +#include + +#define SYSTEM_PERIODIC_LOG_ELEMENTS(M) \ + M(ErrorLogElement) \ + M(MetricLogElement) + +namespace DB +{ + +template +class PeriodicLog : public SystemLog +{ + using SystemLog::SystemLog; + +public: + using TimePoint = std::chrono::system_clock::time_point; + + /// Launches a background thread to collect metrics with interval + void startCollect(size_t collect_interval_milliseconds_); + + /// Stop background thread + void stopCollect(); + + void shutdown() final; + +protected: + virtual void stepFunction(TimePoint current_time) = 0; + +private: + void threadFunction(); + + std::unique_ptr flush_thread; + size_t collect_interval_milliseconds; + std::atomic is_shutdown_metric_thread{false}; +}; + +} diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index 83ff025d2a6..872a9f864df 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -233,29 +233,22 @@ void ServerAsynchronousMetrics::updateImpl(TimePoint update_time, TimePoint curr } #if USE_AWS_S3 - try + if (auto s3_client = disk->tryGetS3StorageClient()) { - if (auto s3_client = disk->getS3StorageClient()) + if (auto put_throttler = s3_client->getPutRequestThrottler()) { - if (auto put_throttler = s3_client->getPutRequestThrottler()) - { - new_values[fmt::format("DiskPutObjectThrottlerRPS_{}", name)] = { put_throttler->getMaxSpeed(), - "PutObject Request throttling limit on the disk in requests per second (virtual filesystem). Local filesystems may not provide this information." }; - new_values[fmt::format("DiskPutObjectThrottlerAvailable_{}", name)] = { put_throttler->getAvailable(), - "Number of PutObject requests that can be currently issued without hitting throttling limit on the disk (virtual filesystem). Local filesystems may not provide this information." }; - } - if (auto get_throttler = s3_client->getGetRequestThrottler()) - { - new_values[fmt::format("DiskGetObjectThrottlerRPS_{}", name)] = { get_throttler->getMaxSpeed(), - "GetObject Request throttling limit on the disk in requests per second (virtual filesystem). Local filesystems may not provide this information." }; - new_values[fmt::format("DiskGetObjectThrottlerAvailable_{}", name)] = { get_throttler->getAvailable(), - "Number of GetObject requests that can be currently issued without hitting throttling limit on the disk (virtual filesystem). Local filesystems may not provide this information." }; - } + new_values[fmt::format("DiskPutObjectThrottlerRPS_{}", name)] = { put_throttler->getMaxSpeed(), + "PutObject Request throttling limit on the disk in requests per second (virtual filesystem). Local filesystems may not provide this information." }; + new_values[fmt::format("DiskPutObjectThrottlerAvailable_{}", name)] = { put_throttler->getAvailable(), + "Number of PutObject requests that can be currently issued without hitting throttling limit on the disk (virtual filesystem). Local filesystems may not provide this information." }; + } + if (auto get_throttler = s3_client->getGetRequestThrottler()) + { + new_values[fmt::format("DiskGetObjectThrottlerRPS_{}", name)] = { get_throttler->getMaxSpeed(), + "GetObject Request throttling limit on the disk in requests per second (virtual filesystem). Local filesystems may not provide this information." }; + new_values[fmt::format("DiskGetObjectThrottlerAvailable_{}", name)] = { get_throttler->getAvailable(), + "Number of GetObject requests that can be currently issued without hitting throttling limit on the disk (virtual filesystem). Local filesystems may not provide this information." }; } - } - catch (...) // NOLINT(bugprone-empty-catch) - { - // Skip disk that do not have s3 throttlers } #endif } diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index dd6af8b2a19..0615a2a1d62 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -86,6 +86,7 @@ ColumnsDescription SessionLogElement::getColumnsDescription() AUTH_TYPE_NAME_AND_VALUE(AuthType::SHA256_PASSWORD), AUTH_TYPE_NAME_AND_VALUE(AuthType::DOUBLE_SHA1_PASSWORD), AUTH_TYPE_NAME_AND_VALUE(AuthType::LDAP), + AUTH_TYPE_NAME_AND_VALUE(AuthType::JWT), AUTH_TYPE_NAME_AND_VALUE(AuthType::KERBEROS), AUTH_TYPE_NAME_AND_VALUE(AuthType::SSH_KEY), AUTH_TYPE_NAME_AND_VALUE(AuthType::SSL_CERTIFICATE), @@ -93,7 +94,7 @@ ColumnsDescription SessionLogElement::getColumnsDescription() AUTH_TYPE_NAME_AND_VALUE(AuthType::HTTP), }); #undef AUTH_TYPE_NAME_AND_VALUE - static_assert(static_cast(AuthenticationType::MAX) == 10); + static_assert(static_cast(AuthenticationType::MAX) == 11); auto interface_type_column = std::make_shared( DataTypeEnum8::Values diff --git a/src/Interpreters/StorageID.h b/src/Interpreters/StorageID.h index 69dac8ea32d..f9afbc7b98d 100644 --- a/src/Interpreters/StorageID.h +++ b/src/Interpreters/StorageID.h @@ -1,7 +1,6 @@ #pragma once #include #include -#include #include #include #include diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 3b25deeb59d..7508d2a7e34 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -116,6 +117,7 @@ namespace { constexpr size_t DEFAULT_METRIC_LOG_COLLECT_INTERVAL_MILLISECONDS = 1000; +constexpr size_t DEFAULT_ERROR_LOG_COLLECT_INTERVAL_MILLISECONDS = 1000; /// Creates a system log with MergeTree engine using parameters from config template @@ -286,6 +288,7 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf crash_log = createSystemLog(global_context, "system", "crash_log", config, "crash_log", "Contains information about stack traces for fatal errors. The table does not exist in the database by default, it is created only when fatal errors occur."); text_log = createSystemLog(global_context, "system", "text_log", config, "text_log", "Contains logging entries which are normally written to a log file or to stdout."); metric_log = createSystemLog(global_context, "system", "metric_log", config, "metric_log", "Contains history of metrics values from tables system.metrics and system.events, periodically flushed to disk."); + error_log = createSystemLog(global_context, "system", "error_log", config, "error_log", "Contains history of error values from table system.errors, periodically flushed to disk."); filesystem_cache_log = createSystemLog(global_context, "system", "filesystem_cache_log", config, "filesystem_cache_log", "Contains a history of all events occurred with filesystem cache for objects on a remote filesystem."); filesystem_read_prefetches_log = createSystemLog( global_context, "system", "filesystem_read_prefetches_log", config, "filesystem_read_prefetches_log", "Contains a history of all prefetches done during reading from MergeTables backed by a remote filesystem."); @@ -320,6 +323,8 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf logs.emplace_back(text_log.get()); if (metric_log) logs.emplace_back(metric_log.get()); + if (error_log) + logs.emplace_back(error_log.get()); if (asynchronous_metric_log) logs.emplace_back(asynchronous_metric_log.get()); if (opentelemetry_span_log) @@ -366,7 +371,14 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf { size_t collect_interval_milliseconds = config.getUInt64("metric_log.collect_interval_milliseconds", DEFAULT_METRIC_LOG_COLLECT_INTERVAL_MILLISECONDS); - metric_log->startCollectMetric(collect_interval_milliseconds); + metric_log->startCollect(collect_interval_milliseconds); + } + + if (error_log) + { + size_t collect_interval_milliseconds = config.getUInt64("error_log.collect_interval_milliseconds", + DEFAULT_ERROR_LOG_COLLECT_INTERVAL_MILLISECONDS); + error_log->startCollect(collect_interval_milliseconds); } if (crash_log) diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index af635ca1bdb..c6f4a8ea9a0 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -40,6 +40,7 @@ class PartLog; class TextLog; class TraceLog; class CrashLog; +class ErrorLog; class MetricLog; class AsynchronousMetricLog; class OpenTelemetrySpanLog; @@ -72,6 +73,7 @@ struct SystemLogs std::shared_ptr crash_log; /// Used to log server crashes. std::shared_ptr text_log; /// Used to log all text messages. std::shared_ptr metric_log; /// Used to log all metrics. + std::shared_ptr error_log; /// Used to log errors. std::shared_ptr filesystem_cache_log; std::shared_ptr filesystem_read_prefetches_log; std::shared_ptr s3_queue_log; diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index 0bd4b94d999..35b96bce42a 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -321,7 +321,12 @@ void Loggers::updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Log bool should_log_to_console = isatty(STDIN_FILENO) || isatty(STDERR_FILENO); if (config.getBool("logger.console", false) || (!config.hasProperty("logger.console") && !is_daemon && should_log_to_console)) - split->setLevel("console", log_level); + { + auto console_log_level_string = config.getString("logger.console_log_level", log_level_string); + auto console_log_level = Poco::Logger::parseLevel(console_log_level_string); + max_log_level = std::max(console_log_level, max_log_level); + split->setLevel("console", console_log_level); + } else split->setLevel("console", 0); diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 90b63d2ce6f..58eeb7c4cbf 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -60,6 +60,8 @@ ASTPtr ASTAlterCommand::clone() const res->settings_resets = res->children.emplace_back(settings_resets->clone()).get(); if (select) res->select = res->children.emplace_back(select->clone()).get(); + if (sql_security) + res->sql_security = res->children.emplace_back(sql_security->clone()).get(); if (rename_to) res->rename_to = res->children.emplace_back(rename_to->clone()).get(); @@ -522,6 +524,7 @@ void ASTAlterCommand::forEachPointerToChild(std::function f) f(reinterpret_cast(&settings_changes)); f(reinterpret_cast(&settings_resets)); f(reinterpret_cast(&select)); + f(reinterpret_cast(&sql_security)); f(reinterpret_cast(&rename_to)); } diff --git a/src/Parsers/ASTSQLSecurity.cpp b/src/Parsers/ASTSQLSecurity.cpp index d6f1c21d035..74408747290 100644 --- a/src/Parsers/ASTSQLSecurity.cpp +++ b/src/Parsers/ASTSQLSecurity.cpp @@ -7,7 +7,7 @@ namespace DB void ASTSQLSecurity::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - if (!type.has_value()) + if (!type) return; if (definer || is_definer_current_user) diff --git a/src/Parsers/Access/ASTAuthenticationData.cpp b/src/Parsers/Access/ASTAuthenticationData.cpp index 3a62480dc0c..dab93ba3de5 100644 --- a/src/Parsers/Access/ASTAuthenticationData.cpp +++ b/src/Parsers/Access/ASTAuthenticationData.cpp @@ -89,6 +89,12 @@ void ASTAuthenticationData::formatImpl(const FormatSettings & settings, FormatSt password = true; break; } + case AuthenticationType::JWT: + { + prefix = "CLAIMS"; + parameter = true; + break; + } case AuthenticationType::LDAP: { prefix = "SERVER"; diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index f0cbe42da80..93642d94880 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -250,6 +250,7 @@ namespace DB MR_MACROS(IS_NOT_NULL, "IS NOT NULL") \ MR_MACROS(IS_NULL, "IS NULL") \ MR_MACROS(JOIN, "JOIN") \ + MR_MACROS(JWT, "JWT") \ MR_MACROS(KERBEROS, "KERBEROS") \ MR_MACROS(KEY_BY, "KEY BY") \ MR_MACROS(KEY, "KEY") \ diff --git a/src/Parsers/FunctionSecretArgumentsFinderAST.h b/src/Parsers/FunctionSecretArgumentsFinderAST.h index 348b2ca9e3a..5b77485afb0 100644 --- a/src/Parsers/FunctionSecretArgumentsFinderAST.h +++ b/src/Parsers/FunctionSecretArgumentsFinderAST.h @@ -82,6 +82,16 @@ private: /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...) findS3FunctionSecretArguments(/* is_cluster_function= */ true); } + else if (function.name == "azureBlobStorage") + { + /// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure) + findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false); + } + else if (function.name == "azureBlobStorageCluster") + { + /// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]) + findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true); + } else if ((function.name == "remote") || (function.name == "remoteSecure")) { /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...) @@ -169,6 +179,43 @@ private: markSecretArgument(url_arg_idx + 2); } + void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function) + { + /// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument. + size_t url_arg_idx = is_cluster_function ? 1 : 0; + + if (!is_cluster_function && isNamedCollectionName(0)) + { + /// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...) + findSecretNamedArgument("account_key", 1); + return; + } + else if (is_cluster_function && isNamedCollectionName(1)) + { + /// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...) + findSecretNamedArgument("account_key", 2); + return; + } + + /// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used + /// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure) + /// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]) + size_t count = arguments->size(); + if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7)) + { + String second_arg; + if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg)) + { + if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg)) + return; /// The argument after 'url' is a format: s3('url', 'format', ...) + } + } + + /// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature + if (url_arg_idx + 4 < count) + markSecretArgument(url_arg_idx + 4); + } + void findURLSecretArguments() { if (!isNamedCollectionName(0)) diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 5f6df33176f..bb37491a366 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -213,6 +213,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E return res; }; + /// Keep this list of keywords in sync with ParserDataType::parseImpl(). if (!null_check_without_moving() && !s_default.checkWithoutMoving(pos, expected) && !s_materialized.checkWithoutMoving(pos, expected) diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index b5bc9f89990..ad33c7e4558 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -103,12 +104,28 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; tryGetIdentifierNameInto(identifier, type_name); - /// Don't accept things like Array(`x.y`). + /// When parsing we accept quoted type names (e.g. `UInt64`), but when formatting we print them + /// unquoted (e.g. UInt64). This introduces problems when the string in the quotes is garbage: + /// * Array(`x.y`) -> Array(x.y) -> fails to parse + /// * `Null` -> Null -> parses as keyword instead of type name + /// Here we check for these cases and reject. if (!std::all_of(type_name.begin(), type_name.end(), [](char c) { return isWordCharASCII(c) || c == '$'; })) { expected.add(pos, "type name"); return false; } + /// Keywords that IParserColumnDeclaration recognizes before the type name. + /// E.g. reject CREATE TABLE a (x `Null`) because in "x Null" the Null would be parsed as + /// column attribute rather than type name. + { + String n = type_name; + boost::to_upper(n); + if (n == "NOT" || n == "NULL" || n == "DEFAULT" || n == "MATERIALIZED" || n == "EPHEMERAL" || n == "ALIAS" || n == "AUTO" || n == "PRIMARY" || n == "COMMENT" || n == "CODEC") + { + expected.add(pos, "type name"); + return false; + } + } String type_name_upper = Poco::toUpper(type_name); String type_name_suffix; diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 2662232a048..a5d334f4f1d 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -321,6 +321,9 @@ void ParquetBlockOutputFormat::writeUsingArrow(std::vector chunks) parquet::WriterProperties::Builder builder; builder.version(getParquetVersion(format_settings)); builder.compression(getParquetCompression(format_settings.parquet.output_compression_method)); + // write page index is disable at default. + if (format_settings.parquet.write_page_index) + builder.enable_write_page_index(); parquet::ArrowWriterProperties::Builder writer_props_builder; if (format_settings.parquet.output_compliant_nested_types) diff --git a/src/Processors/IProcessor.cpp b/src/Processors/IProcessor.cpp index 5ab5e5277aa..f403aca2280 100644 --- a/src/Processors/IProcessor.cpp +++ b/src/Processors/IProcessor.cpp @@ -1,21 +1,57 @@ #include #include +#include +#include +#include + namespace DB { +void IProcessor::cancel() +{ + + bool already_cancelled = is_cancelled.exchange(true, std::memory_order_acq_rel); + if (already_cancelled) + return; + + onCancel(); +} + +String IProcessor::debug() const +{ + WriteBufferFromOwnString buf; + writeString(getName(), buf); + buf.write('\n'); + + writeString("inputs (hasData, isFinished):\n", buf); + for (const auto & port : inputs) + { + buf.write('\t'); + writeBoolText(port.hasData(), buf); + buf.write(' '); + writeBoolText(port.isFinished(), buf); + buf.write('\n'); + } + + writeString("outputs (hasData, isNeeded):\n", buf); + for (const auto & port : outputs) + { + buf.write('\t'); + writeBoolText(port.hasData(), buf); + buf.write(' '); + writeBoolText(port.isNeeded(), buf); + buf.write('\n'); + } + + buf.finalize(); + return buf.str(); +} + void IProcessor::dump() const { - std::cerr << getName() << "\n"; - - std::cerr << "inputs:\n"; - for (const auto & port : inputs) - std::cerr << "\t" << port.hasData() << " " << port.isFinished() << "\n"; - - std::cerr << "outputs:\n"; - for (const auto & port : outputs) - std::cerr << "\t" << port.hasData() << " " << port.isNeeded() << "\n"; + std::cerr << debug(); } @@ -39,4 +75,3 @@ std::string IProcessor::statusToName(Status status) } } - diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 63f32d8deb7..6f779e7a8d4 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -238,12 +238,7 @@ public: /// In case if query was cancelled executor will wait till all processors finish their jobs. /// Generally, there is no reason to check this flag. However, it may be reasonable for long operations (e.g. i/o). bool isCancelled() const { return is_cancelled.load(std::memory_order_acquire); } - void cancel() - { - bool already_cancelled = is_cancelled.exchange(true, std::memory_order_acq_rel); - if (!already_cancelled) - onCancel(); - } + void cancel(); /// Additional method which is called in case if ports were updated while work() method. /// May be used to stop execution in rare cases. @@ -286,6 +281,7 @@ public: const auto & getOutputs() const { return outputs; } /// Debug output. + String debug() const; void dump() const; /// Used to print pipeline. diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 8a095348232..aba3f6ff2da 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -120,6 +120,7 @@ namespace ProfileEvents extern const Event SelectedParts; extern const Event SelectedRanges; extern const Event SelectedMarks; + extern const Event SelectQueriesWithPrimaryKeyUsage; } namespace DB @@ -1229,7 +1230,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( bool no_merging_final = do_not_merge_across_partitions_select_final && std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 && parts_to_merge_ranges[range_index]->data_part->info.level > 0 && - data.merging_params.is_deleted_column.empty(); + data.merging_params.is_deleted_column.empty() && !reader_settings.read_in_order; if (no_merging_final) { @@ -1264,7 +1265,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( /// Parts of non-zero level still may contain duplicate PK values to merge on FINAL if there's is_deleted column, /// so we have to process all ranges. It would be more optimal to remove this flag and add an extra filtering step. bool split_parts_ranges_into_intersecting_and_non_intersecting_final = settings.split_parts_ranges_into_intersecting_and_non_intersecting_final && - data.merging_params.is_deleted_column.empty(); + data.merging_params.is_deleted_column.empty() && !reader_settings.read_in_order; SplitPartsWithRangesByPrimaryKeyResult split_ranges_result = splitPartsWithRangesByPrimaryKey( metadata_for_reading->getPrimaryKey(), @@ -1569,11 +1570,17 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead( if (indexes->part_values && indexes->part_values->empty()) return std::make_shared(std::move(result)); - if (settings.force_primary_key && indexes->key_condition.alwaysUnknownOrTrue()) + if (indexes->key_condition.alwaysUnknownOrTrue()) { - throw Exception(ErrorCodes::INDEX_NOT_USED, - "Primary key ({}) is not used and setting 'force_primary_key' is set", - fmt::join(primary_key_column_names, ", ")); + if (settings.force_primary_key) + { + throw Exception(ErrorCodes::INDEX_NOT_USED, + "Primary key ({}) is not used and setting 'force_primary_key' is set", + fmt::join(primary_key_column_names, ", ")); + } + } else + { + ProfileEvents::increment(ProfileEvents::SelectQueriesWithPrimaryKeyUsage); } LOG_DEBUG(log, "Key condition: {}", indexes->key_condition.toString()); diff --git a/src/Processors/Sources/PostgreSQLSource.cpp b/src/Processors/Sources/PostgreSQLSource.cpp index 4b828d6699c..f18c63ed385 100644 --- a/src/Processors/Sources/PostgreSQLSource.cpp +++ b/src/Processors/Sources/PostgreSQLSource.cpp @@ -120,7 +120,7 @@ Chunk PostgreSQLSource::generate() MutableColumns columns = description.sample_block.cloneEmptyColumns(); size_t num_rows = 0; - while (true) + while (!isCancelled()) { const std::vector * row{stream->read_row()}; diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index 159a3244fe9..fb3b2faa9c5 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -9,7 +9,6 @@ #include #include -#include #include #include #include @@ -19,6 +18,7 @@ #include #include #include +#include #include @@ -40,7 +40,7 @@ FullMergeJoinCursorPtr createCursor(const Block & block, const Names & columns) desc.reserve(columns.size()); for (const auto & name : columns) desc.emplace_back(name); - return std::make_unique(materializeBlock(block), desc); + return std::make_unique(block, desc); } template @@ -234,9 +234,14 @@ void inline addMany(PaddedPODArray & left_or_right_map, size_t idx, size for (size_t i = 0; i < num; ++i) left_or_right_map.push_back(idx); } - } +FullMergeJoinCursor::FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_) + : sample_block(materializeBlock(sample_block_).cloneEmpty()), desc(description_) +{ +} + + const Chunk & FullMergeJoinCursor::getCurrent() const { return current_chunk; @@ -260,6 +265,10 @@ void FullMergeJoinCursor::setChunk(Chunk && chunk) return; } + // should match the structure of sample_block (after materialization) + convertToFullIfConst(chunk); + convertToFullIfSparse(chunk); + current_chunk = std::move(chunk); cursor = SortCursorImpl(sample_block, current_chunk.getColumns(), desc); } diff --git a/src/Processors/Transforms/MergeJoinTransform.h b/src/Processors/Transforms/MergeJoinTransform.h index cf9331abd59..5ca6b076544 100644 --- a/src/Processors/Transforms/MergeJoinTransform.h +++ b/src/Processors/Transforms/MergeJoinTransform.h @@ -193,11 +193,7 @@ private: class FullMergeJoinCursor : boost::noncopyable { public: - explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_) - : sample_block(sample_block_.cloneEmpty()) - , desc(description_) - { - } + explicit FullMergeJoinCursor(const Block & sample_block_, const SortDescription & description_); bool fullyCompleted() const; void setChunk(Chunk && chunk); diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 8598a29278a..0f433165f14 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -1,5 +1,4 @@ #include -#include #include namespace DB @@ -10,136 +9,36 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -PlanSquashingTransform::PlanSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t num_ports) - : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), squashing(header, min_block_size_rows, min_block_size_bytes) +PlanSquashingTransform::PlanSquashingTransform( + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) + : IInflatingTransform(header, header), squashing(header, min_block_size_rows, min_block_size_bytes) { } -IProcessor::Status PlanSquashingTransform::prepare() +void PlanSquashingTransform::consume(Chunk chunk) { - Status status = Status::Ready; - - while (planning_status != PlanningStatus::FINISH) - { - switch (planning_status) - { - case INIT: - init(); - break; - case READ_IF_CAN: - return prepareConsume(); - case PUSH: - return sendOrFlush(); - case FLUSH: - return sendOrFlush(); - case FINISH: - break; /// never reached - } - } - if (status == Status::Ready) - status = finish(); - else - throw Exception(ErrorCodes::LOGICAL_ERROR, "There should be a Ready status to finish the PlanSquashing"); - - return status; + if (Chunk current_chunk = squashing.add(std::move(chunk)); current_chunk.hasChunkInfo()) + squashed_chunk.swap(current_chunk); } -void PlanSquashingTransform::work() +Chunk PlanSquashingTransform::generate() { - prepare(); + if (!squashed_chunk.hasChunkInfo()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); + + Chunk result_chunk; + result_chunk.swap(squashed_chunk); + return result_chunk; } -void PlanSquashingTransform::init() +bool PlanSquashingTransform::canGenerate() { - for (auto input: inputs) - if (!input.isFinished()) - input.setNeeded(); - - planning_status = PlanningStatus::READ_IF_CAN; + return squashed_chunk.hasChunkInfo(); } -IProcessor::Status PlanSquashingTransform::prepareConsume() +Chunk PlanSquashingTransform::getRemaining() { - bool all_finished = true; - for (auto & input : inputs) - { - if (!input.isFinished()) - { - all_finished = false; - input.setNeeded(); - } - else - continue; - - if (input.hasData()) - { - chunk = input.pull(); - chunk = transform(std::move(chunk)); - - if (chunk.hasChunkInfo()) - { - planning_status = PlanningStatus::PUSH; - return Status::Ready; - } - } - } - - if (all_finished) /// If all inputs are closed, we check if we have data in balancing - { - if (squashing.isDataLeft()) /// If we have data in balancing, we process this data - { - planning_status = PlanningStatus::FLUSH; - chunk = flushChunk(); - return Status::Ready; - } - planning_status = PlanningStatus::FINISH; - return Status::Ready; - } - - return Status::NeedData; -} - -Chunk PlanSquashingTransform::transform(Chunk && chunk_) -{ - return squashing.add(std::move(chunk_)); -} - -Chunk PlanSquashingTransform::flushChunk() -{ - return squashing.flush(); -} - -IProcessor::Status PlanSquashingTransform::sendOrFlush() -{ - if (!chunk) - { - planning_status = PlanningStatus::FINISH; - return Status::Ready; - } - - for (auto &output : outputs) - { - if (output.canPush()) - { - if (planning_status == PlanningStatus::PUSH) - planning_status = PlanningStatus::READ_IF_CAN; - else - planning_status = PlanningStatus::FINISH; - - output.push(std::move(chunk)); - return Status::Ready; - } - } - return Status::PortFull; -} - -IProcessor::Status PlanSquashingTransform::finish() -{ - for (auto & in : inputs) - in.close(); - for (auto & output : outputs) - output.finish(); - - return Status::Finished; + Chunk current_chunk = squashing.flush(); + return current_chunk; } } diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index a9152d9dbe9..4ad2ec2d089 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -1,47 +1,29 @@ #pragma once -#include -#include #include - -enum PlanningStatus -{ - INIT, - READ_IF_CAN, - PUSH, - FLUSH, - FINISH -}; +#include namespace DB { -class PlanSquashingTransform : public IProcessor +class PlanSquashingTransform : public IInflatingTransform { public: PlanSquashingTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t num_ports); + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "PlanSquashingTransform"; } - InputPorts & getInputPorts() { return inputs; } - OutputPorts & getOutputPorts() { return outputs; } - - Status prepare() override; - void work() override; - void init(); - Status prepareConsume(); - Status sendOrFlush(); - Status waitForDataIn(); - Status finish(); - - Chunk transform(Chunk && chunk); - Chunk flushChunk(); +protected: + void consume(Chunk chunk) override; + bool canGenerate() override; + Chunk generate() override; + Chunk getRemaining() override; private: - Chunk chunk; Squashing squashing; - PlanningStatus planning_status = PlanningStatus::INIT; + Chunk squashed_chunk; + Chunk finish_chunk; }; } diff --git a/src/Server/CertificateReloader.cpp b/src/Server/CertificateReloader.cpp index 98d7a362bd7..df7b6e7fbd7 100644 --- a/src/Server/CertificateReloader.cpp +++ b/src/Server/CertificateReloader.cpp @@ -15,18 +15,23 @@ namespace DB namespace { + /// Call set process for certificate. -int callSetCertificate(SSL * ssl, [[maybe_unused]] void * arg) +int callSetCertificate(SSL * ssl, void * arg) { - return CertificateReloader::instance().setCertificate(ssl); + if (!arg) + return -1; + + const CertificateReloader::MultiData * pdata = reinterpret_cast(arg); + return CertificateReloader::instance().setCertificate(ssl, pdata); } } /// This is callback for OpenSSL. It will be called on every connection to obtain a certificate and private key. -int CertificateReloader::setCertificate(SSL * ssl) +int CertificateReloader::setCertificate(SSL * ssl, const CertificateReloader::MultiData * pdata) { - auto current = data.get(); + auto current = pdata->data.get(); if (!current) return -1; @@ -65,24 +70,54 @@ int CertificateReloader::setCertificate(SSL * ssl) } -void CertificateReloader::init() +void CertificateReloader::init(MultiData * pdata) { LOG_DEBUG(log, "Initializing certificate reloader."); /// Set a callback for OpenSSL to allow get the updated cert and key. - auto* ctx = Poco::Net::SSLManager::instance().defaultServerContext()->sslContext(); - SSL_CTX_set_cert_cb(ctx, callSetCertificate, nullptr); - init_was_not_made = false; + SSL_CTX_set_cert_cb(pdata->ctx, callSetCertificate, reinterpret_cast(pdata)); + pdata->init_was_not_made = false; } void CertificateReloader::tryLoad(const Poco::Util::AbstractConfiguration & config) +{ + tryLoad(config, nullptr, Poco::Net::SSLManager::CFG_SERVER_PREFIX); +} + + +void CertificateReloader::tryLoad(const Poco::Util::AbstractConfiguration & config, SSL_CTX * ctx, const std::string & prefix) +{ + std::lock_guard lock{data_mutex}; + tryLoadImpl(config, ctx, prefix); +} + + +std::list::iterator CertificateReloader::findOrInsert(SSL_CTX * ctx, const std::string & prefix) +{ + auto it = data.end(); + auto i = data_index.find(prefix); + if (i != data_index.end()) + it = i->second; + else + { + if (!ctx) + ctx = Poco::Net::SSLManager::instance().defaultServerContext()->sslContext(); + data.push_back(MultiData(ctx)); + --it; + data_index[prefix] = it; + } + return it; +} + + +void CertificateReloader::tryLoadImpl(const Poco::Util::AbstractConfiguration & config, SSL_CTX * ctx, const std::string & prefix) { /// If at least one of the files is modified - recreate - std::string new_cert_path = config.getString("openSSL.server.certificateFile", ""); - std::string new_key_path = config.getString("openSSL.server.privateKeyFile", ""); + std::string new_cert_path = config.getString(prefix + "certificateFile", ""); + std::string new_key_path = config.getString(prefix + "privateKeyFile", ""); /// For empty paths (that means, that user doesn't want to use certificates) /// no processing required @@ -93,32 +128,41 @@ void CertificateReloader::tryLoad(const Poco::Util::AbstractConfiguration & conf } else { - bool cert_file_changed = cert_file.changeIfModified(std::move(new_cert_path), log); - bool key_file_changed = key_file.changeIfModified(std::move(new_key_path), log); - std::string pass_phrase = config.getString("openSSL.server.privateKeyPassphraseHandler.options.password", ""); - - if (cert_file_changed || key_file_changed) - { - LOG_DEBUG(log, "Reloading certificate ({}) and key ({}).", cert_file.path, key_file.path); - data.set(std::make_unique(cert_file.path, key_file.path, pass_phrase)); - LOG_INFO(log, "Reloaded certificate ({}) and key ({}).", cert_file.path, key_file.path); - } - - /// If callback is not set yet try { - if (init_was_not_made) - init(); + auto it = findOrInsert(ctx, prefix); + + bool cert_file_changed = it->cert_file.changeIfModified(std::move(new_cert_path), log); + bool key_file_changed = it->key_file.changeIfModified(std::move(new_key_path), log); + + if (cert_file_changed || key_file_changed) + { + LOG_DEBUG(log, "Reloading certificate ({}) and key ({}).", it->cert_file.path, it->key_file.path); + std::string pass_phrase = config.getString(prefix + "privateKeyPassphraseHandler.options.password", ""); + it->data.set(std::make_unique(it->cert_file.path, it->key_file.path, pass_phrase)); + LOG_INFO(log, "Reloaded certificate ({}) and key ({}).", it->cert_file.path, it->key_file.path); + } + + /// If callback is not set yet + if (it->init_was_not_made) + init(&*it); } catch (...) { - init_was_not_made = true; LOG_ERROR(log, getCurrentExceptionMessageAndPattern(/* with_stacktrace */ false)); } } } +void CertificateReloader::tryReloadAll(const Poco::Util::AbstractConfiguration & config) +{ + std::lock_guard lock{data_mutex}; + for (auto & item : data_index) + tryLoadImpl(config, item.second->ctx, item.first); +} + + CertificateReloader::Data::Data(std::string cert_path, std::string key_path, std::string pass_phrase) : certs_chain(Poco::Crypto::X509Certificate::readPEM(cert_path)), key(/* public key */ "", /* private key */ key_path, pass_phrase) { diff --git a/src/Server/CertificateReloader.h b/src/Server/CertificateReloader.h index 5ab799037d5..7472d2f6baa 100644 --- a/src/Server/CertificateReloader.h +++ b/src/Server/CertificateReloader.h @@ -6,6 +6,9 @@ #include #include +#include +#include +#include #include #include @@ -31,28 +34,13 @@ class CertificateReloader public: using stat_t = struct stat; - /// Singleton - CertificateReloader(CertificateReloader const &) = delete; - void operator=(CertificateReloader const &) = delete; - static CertificateReloader & instance() + struct Data { - static CertificateReloader instance; - return instance; - } + Poco::Crypto::X509Certificate::List certs_chain; + Poco::Crypto::EVPPKey key; - /// Initialize the callback and perform the initial cert loading - void init(); - - /// Handle configuration reload - void tryLoad(const Poco::Util::AbstractConfiguration & config); - - /// A callback for OpenSSL - int setCertificate(SSL * ssl); - -private: - CertificateReloader() = default; - - LoggerPtr log = getLogger("CertificateReloader"); + Data(std::string cert_path, std::string key_path, std::string pass_phrase); + }; struct File { @@ -65,19 +53,55 @@ private: bool changeIfModified(std::string new_path, LoggerPtr logger); }; - File cert_file{"certificate"}; - File key_file{"key"}; - - struct Data + struct MultiData { - Poco::Crypto::X509Certificate::List certs_chain; - Poco::Crypto::EVPPKey key; + SSL_CTX * ctx = nullptr; + MultiVersion data; + bool init_was_not_made = true; - Data(std::string cert_path, std::string key_path, std::string pass_phrase); + File cert_file{"certificate"}; + File key_file{"key"}; + + explicit MultiData(SSL_CTX * ctx_) : ctx(ctx_) {} }; - MultiVersion data; - bool init_was_not_made = true; + /// Singleton + CertificateReloader(CertificateReloader const &) = delete; + void operator=(CertificateReloader const &) = delete; + static CertificateReloader & instance() + { + static CertificateReloader instance; + return instance; + } + + /// Handle configuration reload for default path + void tryLoad(const Poco::Util::AbstractConfiguration & config); + + /// Handle configuration reload + void tryLoad(const Poco::Util::AbstractConfiguration & config, SSL_CTX * ctx, const std::string & prefix); + + /// Handle configuration reload for all contexts + void tryReloadAll(const Poco::Util::AbstractConfiguration & config); + + /// A callback for OpenSSL + int setCertificate(SSL * ssl, const MultiData * pdata); + +private: + CertificateReloader() = default; + + /// Initialize the callback and perform the initial cert loading + void init(MultiData * pdata) TSA_REQUIRES(data_mutex); + + /// Unsafe implementation + void tryLoadImpl(const Poco::Util::AbstractConfiguration & config, SSL_CTX * ctx, const std::string & prefix) TSA_REQUIRES(data_mutex); + + std::list::iterator findOrInsert(SSL_CTX * ctx, const std::string & prefix) TSA_REQUIRES(data_mutex); + + LoggerPtr log = getLogger("CertificateReloader"); + + std::list data TSA_GUARDED_BY(data_mutex); + std::unordered_map::iterator> data_index TSA_GUARDED_BY(data_mutex); + mutable std::mutex data_mutex; }; } diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp index 8098671a903..e2098b284bf 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp @@ -162,7 +162,8 @@ WriteBufferFromHTTPServerResponse::~WriteBufferFromHTTPServerResponse() { try { - finalize(); + if (!canceled) + finalize(); } catch (...) { diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 26ec185f5ad..3241e22fa35 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -1027,14 +1027,7 @@ catch (...) { tryLogCurrentException(log, "Cannot send exception to client"); - try - { - used_output.finalize(); - } - catch (...) - { - tryLogCurrentException(log, "Cannot flush data to client (after sending exception)"); - } + used_output.cancel(); } void HTTPHandler::formatExceptionForClient(int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output) @@ -1172,7 +1165,7 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse /// Check if exception was thrown in used_output.finalize(). /// In this case used_output can be in invalid state and we /// cannot write in it anymore. So, just log this exception. - if (used_output.isFinalized()) + if (used_output.isFinalized() || used_output.isCanceled()) { if (thread_trace_context) thread_trace_context->root_span.addAttribute("clickhouse.exception", "Cannot flush data to client"); @@ -1191,6 +1184,8 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse if (thread_trace_context) thread_trace_context->root_span.addAttribute(status); + + return; } used_output.finalize(); diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h index c5551102f7a..c78c45826f0 100644 --- a/src/Server/HTTPHandler.h +++ b/src/Server/HTTPHandler.h @@ -78,6 +78,7 @@ private: WriteBuffer * out_maybe_delayed_and_compressed = nullptr; bool finalized = false; + bool canceled = false; bool exception_is_written = false; std::function exception_writer; @@ -99,6 +100,24 @@ private: out->finalize(); } + void cancel() + { + if (canceled) + return; + canceled = true; + + if (out_compressed_holder) + out_compressed_holder->cancel(); + if (out) + out->cancel(); + } + + + bool isCanceled() const + { + return canceled; + } + bool isFinalized() const { return finalized; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 14a2bceebf1..b59fe2c1849 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -387,7 +387,7 @@ void TCPHandler::runImpl() query_scope.emplace(query_context, /* fatal_error_callback */ [this] { - std::lock_guard lock(fatal_error_mutex); + std::lock_guard lock(out_mutex); sendLogs(); }); @@ -475,7 +475,7 @@ void TCPHandler::runImpl() Stopwatch watch; CurrentMetrics::Increment callback_metric_increment(CurrentMetrics::ReadTaskRequestsSent); - std::lock_guard lock(task_callback_mutex); + std::scoped_lock lock(out_mutex, task_callback_mutex); if (state.cancellation_status == CancellationStatus::FULLY_CANCELLED) return {}; @@ -491,7 +491,7 @@ void TCPHandler::runImpl() { Stopwatch watch; CurrentMetrics::Increment callback_metric_increment(CurrentMetrics::MergeTreeAllRangesAnnouncementsSent); - std::lock_guard lock(task_callback_mutex); + std::scoped_lock lock(out_mutex, task_callback_mutex); if (state.cancellation_status == CancellationStatus::FULLY_CANCELLED) return; @@ -505,7 +505,7 @@ void TCPHandler::runImpl() { Stopwatch watch; CurrentMetrics::Increment callback_metric_increment(CurrentMetrics::MergeTreeReadTaskRequestsSent); - std::lock_guard lock(task_callback_mutex); + std::scoped_lock lock(out_mutex, task_callback_mutex); if (state.cancellation_status == CancellationStatus::FULLY_CANCELLED) return std::nullopt; @@ -553,7 +553,7 @@ void TCPHandler::runImpl() { auto callback = [this]() { - std::scoped_lock lock(task_callback_mutex, fatal_error_mutex); + std::scoped_lock lock(out_mutex, task_callback_mutex); if (getQueryCancellationStatus() == CancellationStatus::FULLY_CANCELLED) return true; @@ -572,7 +572,7 @@ void TCPHandler::runImpl() finish_or_cancel(); - std::lock_guard lock(task_callback_mutex); + std::lock_guard lock(out_mutex); /// Send final progress after calling onFinish(), since it will update the progress. /// @@ -595,7 +595,7 @@ void TCPHandler::runImpl() break; { - std::lock_guard lock(task_callback_mutex); + std::lock_guard lock(out_mutex); sendLogs(); sendEndOfStream(); } @@ -1014,7 +1014,7 @@ void TCPHandler::processOrdinaryQuery() if (query_context->getSettingsRef().allow_experimental_query_deduplication) { - std::lock_guard lock(task_callback_mutex); + std::lock_guard lock(out_mutex); sendPartUUIDs(); } @@ -1024,13 +1024,13 @@ void TCPHandler::processOrdinaryQuery() if (header) { - std::lock_guard lock(task_callback_mutex); + std::lock_guard lock(out_mutex); sendData(header); } } /// Defer locking to cover a part of the scope below and everything after it - std::unique_lock progress_lock(task_callback_mutex, std::defer_lock); + std::unique_lock out_lock(out_mutex, std::defer_lock); { PullingAsyncPipelineExecutor executor(pipeline); @@ -1056,6 +1056,9 @@ void TCPHandler::processOrdinaryQuery() executor.cancelReading(); } + lock.unlock(); + out_lock.lock(); + if (after_send_progress.elapsed() / 1000 >= interactive_delay) { /// Some time passed and there is a progress. @@ -1071,12 +1074,14 @@ void TCPHandler::processOrdinaryQuery() if (!state.io.null_format) sendData(block); } + + out_lock.unlock(); } /// This lock wasn't acquired before and we make .lock() call here /// so everything under this line is covered even together /// with sendProgress() out of the scope - progress_lock.lock(); + out_lock.lock(); /** If data has run out, we will send the profiling data and total values to * the last zero block to be able to use diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 75e36836b63..74afb5a14a5 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -19,6 +19,7 @@ #include #include +#include "Core/Types.h" #include "IServer.h" #include "Interpreters/AsynchronousInsertQueue.h" #include "Server/TCPProtocolStackData.h" @@ -225,8 +226,13 @@ private: std::optional nonce; String cluster; + /// `out_mutex` protects `out` (WriteBuffer). + /// So it is used for method sendData(), sendProgress(), sendLogs(), etc. + std::mutex out_mutex; + /// `task_callback_mutex` protects tasks callbacks. + /// Inside these callbacks we might also change cancellation status, + /// so it also protects cancellation status checks. std::mutex task_callback_mutex; - std::mutex fatal_error_mutex; /// At the moment, only one ongoing query in the connection is supported at a time. QueryState state; diff --git a/src/Server/TLSHandler.cpp b/src/Server/TLSHandler.cpp new file mode 100644 index 00000000000..b0ed342c251 --- /dev/null +++ b/src/Server/TLSHandler.cpp @@ -0,0 +1,118 @@ +#include + +#include +#include + + +#if USE_SSL +# include +# include +# include +#endif + +#if !defined(USE_SSL) || USE_SSL == 0 +namespace ErrorCodes +{ + extern const int SUPPORT_IS_DISABLED; +} +#endif + +DB::TLSHandler::TLSHandler( + const StreamSocket & socket, + [[maybe_unused]] const LayeredConfiguration & config_, + [[maybe_unused]] const std::string & prefix_, + TCPProtocolStackData & stack_data_) + : Poco::Net::TCPServerConnection(socket) +#if USE_SSL + , config(config_) + , prefix(prefix_) +#endif + , stack_data(stack_data_) +{ +#if USE_SSL + params.privateKeyFile = config.getString(prefix + SSLManager::CFG_PRIV_KEY_FILE, ""); + params.certificateFile = config.getString(prefix + SSLManager::CFG_CERTIFICATE_FILE, params.privateKeyFile); + if (!params.privateKeyFile.empty() && !params.certificateFile.empty()) + { + // for backwards compatibility + auto ctx = SSLManager::instance().defaultServerContext(); + params.caLocation = config.getString(prefix + SSLManager::CFG_CA_LOCATION, ctx->getCAPaths().caLocation); + + // optional options for which we have defaults defined + params.verificationMode = SSLManager::VAL_VER_MODE; + if (config.hasProperty(prefix + SSLManager::CFG_VER_MODE)) + { + // either: none, relaxed, strict, once + std::string mode = config.getString(prefix + SSLManager::CFG_VER_MODE); + params.verificationMode = Poco::Net::Utility::convertVerificationMode(mode); + } + + params.verificationDepth = config.getInt(prefix + SSLManager::CFG_VER_DEPTH, SSLManager::VAL_VER_DEPTH); + params.loadDefaultCAs = config.getBool(prefix + SSLManager::CFG_ENABLE_DEFAULT_CA, SSLManager::VAL_ENABLE_DEFAULT_CA); + params.cipherList = config.getString(prefix + SSLManager::CFG_CIPHER_LIST, SSLManager::VAL_CIPHER_LIST); + params.cipherList = config.getString(prefix + SSLManager::CFG_CYPHER_LIST, params.cipherList); // for backwards compatibility + + bool require_tlsv1 = config.getBool(prefix + SSLManager::CFG_REQUIRE_TLSV1, false); + bool require_tlsv1_1 = config.getBool(prefix + SSLManager::CFG_REQUIRE_TLSV1_1, false); + bool require_tlsv1_2 = config.getBool(prefix + SSLManager::CFG_REQUIRE_TLSV1_2, false); + if (require_tlsv1_2) + usage = Context::TLSV1_2_SERVER_USE; + else if (require_tlsv1_1) + usage = Context::TLSV1_1_SERVER_USE; + else if (require_tlsv1) + usage = Context::TLSV1_SERVER_USE; + else + usage = Context::SERVER_USE; + + params.dhParamsFile = config.getString(prefix + SSLManager::CFG_DH_PARAMS_FILE, ""); + params.ecdhCurve = config.getString(prefix + SSLManager::CFG_ECDH_CURVE, ""); + + std::string disabled_protocols_list = config.getString(prefix + SSLManager::CFG_DISABLE_PROTOCOLS, ""); + Poco::StringTokenizer dp_tok(disabled_protocols_list, ";,", Poco::StringTokenizer::TOK_TRIM | Poco::StringTokenizer::TOK_IGNORE_EMPTY); + disabled_protocols = 0; + for (const auto & token : dp_tok) + { + if (token == "sslv2") + disabled_protocols |= Context::PROTO_SSLV2; + else if (token == "sslv3") + disabled_protocols |= Context::PROTO_SSLV3; + else if (token == "tlsv1") + disabled_protocols |= Context::PROTO_TLSV1; + else if (token == "tlsv1_1") + disabled_protocols |= Context::PROTO_TLSV1_1; + else if (token == "tlsv1_2") + disabled_protocols |= Context::PROTO_TLSV1_2; + } + + extended_verification = config.getBool(prefix + SSLManager::CFG_EXTENDED_VERIFICATION, false); + prefer_server_ciphers = config.getBool(prefix + SSLManager::CFG_PREFER_SERVER_CIPHERS, false); + } +#endif +} + + +void DB::TLSHandler::run() +{ +#if USE_SSL + auto ctx = SSLManager::instance().defaultServerContext(); + if (!params.privateKeyFile.empty() && !params.certificateFile.empty()) + { + ctx = SSLManager::instance().getCustomServerContext(prefix); + if (!ctx) + { + ctx = new Context(usage, params); + ctx->disableProtocols(disabled_protocols); + ctx->enableExtendedCertificateVerification(extended_verification); + if (prefer_server_ciphers) + ctx->preferServerCiphers(); + CertificateReloader::instance().tryLoad(config, ctx->sslContext(), prefix); + ctx = SSLManager::instance().setCustomServerContext(prefix, ctx); + } + } + socket() = SecureStreamSocket::attach(socket(), ctx); + stack_data.socket = socket(); + stack_data.certificate = params.certificateFile; +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); +#endif +} diff --git a/src/Server/TLSHandler.h b/src/Server/TLSHandler.h index dd025e3e165..2bec7380b08 100644 --- a/src/Server/TLSHandler.h +++ b/src/Server/TLSHandler.h @@ -1,9 +1,10 @@ #pragma once #include -#include -#include #include +#include + +#include "config.h" #if USE_SSL # include @@ -14,11 +15,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int SUPPORT_IS_DISABLED; -} - class TLSHandler : public Poco::Net::TCPServerConnection { #if USE_SSL @@ -27,30 +23,22 @@ class TLSHandler : public Poco::Net::TCPServerConnection using Context = Poco::Net::Context; #endif using StreamSocket = Poco::Net::StreamSocket; + using LayeredConfiguration = Poco::Util::LayeredConfiguration; public: - explicit TLSHandler(const StreamSocket & socket, const std::string & key_, const std::string & certificate_, TCPProtocolStackData & stack_data_) - : Poco::Net::TCPServerConnection(socket) - , key(key_) - , certificate(certificate_) - , stack_data(stack_data_) - {} + explicit TLSHandler(const StreamSocket & socket, const LayeredConfiguration & config_, const std::string & prefix_, TCPProtocolStackData & stack_data_); + + void run() override; - void run() override - { -#if USE_SSL - auto ctx = SSLManager::instance().defaultServerContext(); - if (!key.empty() && !certificate.empty()) - ctx = new Context(Context::Usage::SERVER_USE, key, certificate, ctx->getCAPaths().caLocation); - socket() = SecureStreamSocket::attach(socket(), ctx); - stack_data.socket = socket(); - stack_data.certificate = certificate; -#else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); -#endif - } private: - std::string key [[maybe_unused]]; - std::string certificate [[maybe_unused]]; +#if USE_SSL + Context::Params params [[maybe_unused]]; + Context::Usage usage [[maybe_unused]]; + int disabled_protocols = 0; + bool extended_verification = false; + bool prefer_server_ciphers = false; + const LayeredConfiguration & config [[maybe_unused]]; + std::string prefix [[maybe_unused]]; +#endif TCPProtocolStackData & stack_data [[maybe_unused]]; }; diff --git a/src/Server/TLSHandlerFactory.h b/src/Server/TLSHandlerFactory.h index 19602c7d25e..e8f3a1b7853 100644 --- a/src/Server/TLSHandlerFactory.h +++ b/src/Server/TLSHandlerFactory.h @@ -48,8 +48,8 @@ public: LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); return new TLSHandler( socket, - server.config().getString(conf_name + ".privateKeyFile", ""), - server.config().getString(conf_name + ".certificateFile", ""), + server.config(), + conf_name + ".", stack_data); } catch (const Poco::Net::NetException &) diff --git a/src/Server/grpc_protos/clickhouse_grpc.proto b/src/Server/grpc_protos/clickhouse_grpc.proto index c9ba6f28506..dc17570f833 100644 --- a/src/Server/grpc_protos/clickhouse_grpc.proto +++ b/src/Server/grpc_protos/clickhouse_grpc.proto @@ -90,6 +90,7 @@ message QueryInfo { string user_name = 9; string password = 10; string quota = 11; + string jwt = 25; // Works exactly like sessions in the HTTP protocol. string session_id = 12; diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 0d491067afc..35a5e95e643 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -1057,7 +1057,7 @@ bool AlterCommand::isRemovingProperty() const bool AlterCommand::isDropSomething() const { - return type == Type::DROP_COLUMN || type == Type::DROP_INDEX + return type == Type::DROP_COLUMN || type == Type::DROP_INDEX || type == Type::DROP_STATISTICS || type == Type::DROP_CONSTRAINT || type == Type::DROP_PROJECTION; } diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 56bd1181fef..7ab8fa2430a 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -531,9 +531,9 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const global_ctx->merge_list_element_ptr->columns_written = global_ctx->merging_columns.size(); global_ctx->merge_list_element_ptr->progress.store(ctx->column_sizes->keyColumnsWeight(), std::memory_order_relaxed); - ctx->rows_sources_write_buf->next(); - ctx->rows_sources_uncompressed_write_buf->next(); /// Ensure data has written to disk. + ctx->rows_sources_write_buf->finalize(); + ctx->rows_sources_uncompressed_write_buf->finalize(); ctx->rows_sources_uncompressed_write_buf->finalize(); size_t rows_sources_count = ctx->rows_sources_write_buf->count(); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 2e0ea4cdbcd..f9cc65871fe 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8083,6 +8083,13 @@ void MergeTreeData::checkDropCommandDoesntAffectInProgressMutations(const AlterC throw_exception(mutation_name, "column", command.column_name); } } + else if (command.type == AlterCommand::DROP_STATISTICS) + { + for (const auto & stats_col1 : command.statistics_columns) + for (const auto & stats_col2 : mutation_command.statistics_columns) + if (stats_col1 == stats_col2) + throw_exception(mutation_name, "statistics", stats_col1); + } } } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index f33f4293023..b327480fa92 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -245,6 +245,8 @@ void MergeTreeDataPartChecksums::write(WriteBuffer & to) const writeBinaryLittleEndian(sum.uncompressed_hash, out); } } + + out.finalize(); } void MergeTreeDataPartChecksums::addFile(const String & file_name, UInt64 file_size, MergeTreeDataPartChecksum::uint128 file_hash) diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index b7dede3cb00..05751e0fa6f 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -54,6 +54,10 @@ void MergeTreeSink::onFinish() finishDelayedChunk(); } +void MergeTreeSink::onCancel() +{ +} + void MergeTreeSink::consume(Chunk chunk) { if (num_blocks_processed > 0) diff --git a/src/Storages/MergeTree/MergeTreeSink.h b/src/Storages/MergeTree/MergeTreeSink.h index 07ab3850df2..cf6715a3415 100644 --- a/src/Storages/MergeTree/MergeTreeSink.h +++ b/src/Storages/MergeTree/MergeTreeSink.h @@ -28,6 +28,7 @@ public: void consume(Chunk chunk) override; void onStart() override; void onFinish() override; + void onCancel() override; private: StorageMergeTree & storage; diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index 79c0e6ad262..5a84c6fd684 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -125,6 +125,7 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; +extern const int ALL_CONNECTION_TRIES_FAILED; } class ParallelReplicasReadingCoordinator::ImplInterface @@ -1025,7 +1026,11 @@ void ParallelReplicasReadingCoordinator::markReplicaAsUnavailable(size_t replica std::lock_guard lock(mutex); if (!pimpl) + { unavailable_nodes_registered_before_initialization.push_back(replica_number); + if (unavailable_nodes_registered_before_initialization.size() == replicas_count) + throw Exception(ErrorCodes::ALL_CONNECTION_TRIES_FAILED, "Can't connect to any replica chosen for query execution"); + } else pimpl->markReplicaAsUnavailable(replica_number); } diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h index 60343988f03..8b463fda395 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h @@ -34,7 +34,7 @@ private: void initialize(CoordinationMode mode); std::mutex mutex; - size_t replicas_count{0}; + const size_t replicas_count{0}; size_t mark_segment_size{0}; std::unique_ptr pimpl; ProgressCallback progress_callback; // store the callback only to bypass it to coordinator implementation diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index e30d63c343a..30ba95c46f0 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -2004,7 +2004,9 @@ MutationCommands ReplicatedMergeTreeQueue::getMutationCommands( MutationCommands commands; for (auto it = begin; it != end; ++it) { - chassert(mutation_pointer < it->second->entry->znode_name); + /// FIXME : This was supposed to be fixed after releasing 23.5 (it fails in Upgrade check) + /// but it's still present https://github.com/ClickHouse/ClickHouse/issues/65275 + /// chassert(mutation_pointer < it->second->entry->znode_name); mutation_ids.push_back(it->second->entry->znode_name); const auto & commands_from_entry = it->second->entry->commands; commands.insert(commands.end(), commands_from_entry.begin(), commands_from_entry.end()); diff --git a/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp index 8277a769a11..e2e7f238a5e 100644 --- a/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp +++ b/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp @@ -136,7 +136,8 @@ WriteBufferFromHDFS::~WriteBufferFromHDFS() { try { - finalize(); + if (!canceled) + finalize(); } catch (...) { diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index d13aec4a4f6..f2f6eac333c 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -50,56 +50,58 @@ void StorageObjectStorageSink::consume(Chunk chunk) void StorageObjectStorageSink::onCancel() { std::lock_guard lock(cancel_mutex); - finalize(); + cancelBuffers(); + releaseBuffers(); cancelled = true; } -void StorageObjectStorageSink::onException(std::exception_ptr exception) +void StorageObjectStorageSink::onException(std::exception_ptr) { std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization. - release(); - } + cancelBuffers(); + releaseBuffers(); } void StorageObjectStorageSink::onFinish() { std::lock_guard lock(cancel_mutex); - finalize(); + finalizeBuffers(); } -void StorageObjectStorageSink::finalize() +void StorageObjectStorageSink::finalizeBuffers() { if (!writer) return; try { - writer->finalize(); writer->flush(); + writer->finalize(); } catch (...) { /// Stop ParallelFormattingOutputFormat correctly. - release(); + releaseBuffers(); throw; } write_buf->finalize(); } -void StorageObjectStorageSink::release() +void StorageObjectStorageSink::releaseBuffers() { writer.reset(); write_buf.reset(); } +void StorageObjectStorageSink::cancelBuffers() +{ + if (writer) + writer->cancel(); + if (write_buf) + write_buf->cancel(); +} + PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink( ObjectStoragePtr object_storage_, ConfigurationPtr configuration_, diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h index 45cf83d606f..e0081193686 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -35,8 +35,9 @@ private: bool cancelled = false; std::mutex cancel_mutex; - void finalize(); - void release(); + void finalizeBuffers(); + void releaseBuffers(); + void cancelBuffers(); }; class PartitionedStorageObjectStorageSink : public PartitionedSink diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 2fc6993369d..aef783fc3c4 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -193,21 +193,21 @@ Chunk StorageObjectStorageSource::generate() progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); const auto & object_info = reader.getObjectInfo(); - const auto & filename = object_info.getFileName(); - chassert(object_info.metadata); + const auto & filename = object_info->getFileName(); + chassert(object_info->metadata); VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( chunk, read_from_format_info.requested_virtual_columns, { - .path = getUniqueStoragePathIdentifier(*configuration, reader.getObjectInfo(), false), - .size = object_info.metadata->size_bytes, + .path = getUniqueStoragePathIdentifier(*configuration, *object_info, false), + .size = object_info->metadata->size_bytes, .filename = &filename, - .last_modified = object_info.metadata->last_modified + .last_modified = object_info->metadata->last_modified }); return chunk; } if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files) - addNumRowsToCache(reader.getObjectInfo(), total_rows_in_file); + addNumRowsToCache(*reader.getObjectInfo(), total_rows_in_file); total_rows_in_file = 0; @@ -517,24 +517,22 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne else ++it; } + + if (filter_dag) + { + std::vector paths; + paths.reserve(new_batch.size()); + for (const auto & object_info : new_batch) + paths.push_back(getUniqueStoragePathIdentifier(*configuration, *object_info, false)); + + VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext()); + + LOG_TEST(logger, "Filtered files: {} -> {}", paths.size(), new_batch.size()); + } } index = 0; - if (filter_dag) - { - std::vector paths; - paths.reserve(new_batch.size()); - for (const auto & object_info : new_batch) - { - chassert(object_info); - paths.push_back(getUniqueStoragePathIdentifier(*configuration, *object_info, false)); - } - - VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext()); - LOG_TEST(logger, "Filtered files: {} -> {}", paths.size(), new_batch.size()); - } - if (read_keys) read_keys->insert(read_keys->end(), new_batch.begin(), new_batch.end()); @@ -551,7 +549,12 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne } if (index >= object_infos.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Index out of bound for blob metadata"); + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Index out of bound for blob metadata. Index: {}, size: {}", + index, object_infos.size()); + } return object_infos[index++]; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h index fd7c7aa7102..1b7c3b9be3f 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h @@ -100,7 +100,7 @@ protected: PullingPipelineExecutor * operator->() { return reader.get(); } const PullingPipelineExecutor * operator->() const { return reader.get(); } - const ObjectInfo & getObjectInfo() const { return *object_info; } + ObjectInfoPtr getObjectInfo() const { return object_info; } const IInputFormat * getInputFormat() const { return dynamic_cast(source.get()); } private: diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index ea87d97d81f..9654b4ef37a 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -308,7 +308,7 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context) // Generate aggregated blocks with rows less or equal than the original block. // There should be only one output block after this transformation. - builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0, 1)); + builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 0baa234e7a3..31812406d34 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -26,6 +26,7 @@ #include #include #include +#include namespace DB @@ -167,7 +168,7 @@ std::pair EmbeddedRocksDBBulkSink::seriali auto & serialized_value_offsets = serialized_value_column->getOffsets(); WriteBufferFromVector writer_key(serialized_key_data); WriteBufferFromVector writer_value(serialized_value_data); - + FormatSettings format_settings; /// Format settings is 1.5KB, so it's not wise to create it for each row for (auto && chunk : input_chunks) { const auto & columns = chunk.getColumns(); @@ -175,7 +176,7 @@ std::pair EmbeddedRocksDBBulkSink::seriali for (size_t i = 0; i < rows; ++i) { for (size_t idx = 0; idx < columns.size(); ++idx) - serializations[idx]->serializeBinary(*columns[idx], i, idx == primary_key_pos ? writer_key : writer_value, {}); + serializations[idx]->serializeBinary(*columns[idx], i, idx == primary_key_pos ? writer_key : writer_value, format_settings); /// String in ColumnString must be null-terminated writeChar('\0', writer_key); writeChar('\0', writer_value); diff --git a/src/Storages/S3Queue/S3QueueIFileMetadata.cpp b/src/Storages/S3Queue/S3QueueIFileMetadata.cpp index 6c4089115d4..6d550571f22 100644 --- a/src/Storages/S3Queue/S3QueueIFileMetadata.cpp +++ b/src/Storages/S3Queue/S3QueueIFileMetadata.cpp @@ -35,6 +35,11 @@ namespace } } +void S3QueueIFileMetadata::FileStatus::setProcessingEndTime() +{ + processing_end_time = now(); +} + void S3QueueIFileMetadata::FileStatus::onProcessing() { state = FileStatus::State::Processing; @@ -44,13 +49,15 @@ void S3QueueIFileMetadata::FileStatus::onProcessing() void S3QueueIFileMetadata::FileStatus::onProcessed() { state = FileStatus::State::Processed; - processing_end_time = now(); + if (!processing_end_time) + setProcessingEndTime(); } void S3QueueIFileMetadata::FileStatus::onFailed(const std::string & exception) { state = FileStatus::State::Failed; - processing_end_time = now(); + if (!processing_end_time) + setProcessingEndTime(); std::lock_guard lock(last_exception_mutex); last_exception = exception; } @@ -120,7 +127,14 @@ S3QueueIFileMetadata::~S3QueueIFileMetadata() { if (processing_id_version.has_value()) { - file_status->onFailed("Uncaught exception"); + if (file_status->getException().empty()) + { + if (std::current_exception()) + file_status->onFailed(getCurrentExceptionMessage(true)); + else + file_status->onFailed("Unprocessed exception"); + } + LOG_TEST(log, "Removing processing node in destructor for file: {}", path); try { @@ -227,7 +241,16 @@ void S3QueueIFileMetadata::setProcessed() ProfileEvents::increment(ProfileEvents::S3QueueProcessedFiles); file_status->onProcessed(); - setProcessedImpl(); + + try + { + setProcessedImpl(); + } + catch (...) + { + file_status->onFailed(getCurrentExceptionMessage(true)); + throw; + } processing_id.reset(); processing_id_version.reset(); @@ -235,18 +258,36 @@ void S3QueueIFileMetadata::setProcessed() LOG_TRACE(log, "Set file {} as processed (rows: {})", path, file_status->processed_rows); } -void S3QueueIFileMetadata::setFailed(const std::string & exception) +void S3QueueIFileMetadata::setFailed(const std::string & exception_message, bool reduce_retry_count, bool overwrite_status) { - LOG_TRACE(log, "Setting file {} as failed (exception: {}, path: {})", path, exception, failed_node_path); + LOG_TRACE(log, "Setting file {} as failed (path: {}, reduce retry count: {}, exception: {})", + path, failed_node_path, reduce_retry_count, exception_message); ProfileEvents::increment(ProfileEvents::S3QueueFailedFiles); - file_status->onFailed(exception); - node_metadata.last_exception = exception; + if (overwrite_status || file_status->state != FileStatus::State::Failed) + file_status->onFailed(exception_message); - if (max_loading_retries == 0) - setFailedNonRetriable(); - else - setFailedRetriable(); + node_metadata.last_exception = exception_message; + + if (reduce_retry_count) + { + try + { + if (max_loading_retries == 0) + setFailedNonRetriable(); + else + setFailedRetriable(); + } + catch (...) + { + auto full_exception = fmt::format( + "First exception: {}, exception while setting file as failed: {}", + exception_message, getCurrentExceptionMessage(true)); + + file_status->onFailed(full_exception); + throw; + } + } processing_id.reset(); processing_id_version.reset(); @@ -296,19 +337,20 @@ void S3QueueIFileMetadata::setFailedRetriable() auto zk_client = getZooKeeper(); /// Extract the number of already done retries from node_hash.retriable node if it exists. + Coordination::Requests requests; Coordination::Stat stat; std::string res; - if (zk_client->tryGet(retrieable_failed_node_path, res, &stat)) + bool has_failed_before = zk_client->tryGet(retrieable_failed_node_path, res, &stat); + if (has_failed_before) { auto failed_node_metadata = NodeMetadata::fromString(res); node_metadata.retries = failed_node_metadata.retries + 1; file_status->retries = node_metadata.retries; } - LOG_TRACE(log, "File `{}` failed to process, try {}/{}", - path, node_metadata.retries, max_loading_retries); + LOG_TRACE(log, "File `{}` failed to process, try {}/{}, retries node exists: {} (failed node path: {})", + path, node_metadata.retries, max_loading_retries, has_failed_before, failed_node_path); - Coordination::Requests requests; if (node_metadata.retries >= max_loading_retries) { /// File is no longer retriable. diff --git a/src/Storages/S3Queue/S3QueueIFileMetadata.h b/src/Storages/S3Queue/S3QueueIFileMetadata.h index e0b0d16cbcc..6f40338a421 100644 --- a/src/Storages/S3Queue/S3QueueIFileMetadata.h +++ b/src/Storages/S3Queue/S3QueueIFileMetadata.h @@ -19,6 +19,7 @@ public: None }; + void setProcessingEndTime(); void onProcessing(); void onProcessed(); void onFailed(const std::string & exception); @@ -54,13 +55,15 @@ public: bool setProcessing(); void setProcessed(); - void setFailed(const std::string & exception); + void setFailed(const std::string & exception_message, bool reduce_retry_count, bool overwrite_status); virtual void setProcessedAtStartRequests( Coordination::Requests & requests, const zkutil::ZooKeeperPtr & zk_client) = 0; FileStatusPtr getFileStatus() { return file_status; } + const std::string & getPath() const { return path; } + size_t getMaxTries() const { return max_loading_retries; } struct NodeMetadata { diff --git a/src/Storages/S3Queue/S3QueueMetadata.cpp b/src/Storages/S3Queue/S3QueueMetadata.cpp index e828e9f0716..734d0eed625 100644 --- a/src/Storages/S3Queue/S3QueueMetadata.cpp +++ b/src/Storages/S3Queue/S3QueueMetadata.cpp @@ -133,6 +133,9 @@ S3QueueMetadata::S3QueueMetadata(const fs::path & zookeeper_path_, const S3Queue generateRescheduleInterval( settings.s3queue_cleanup_interval_min_ms, settings.s3queue_cleanup_interval_max_ms)); } + LOG_TRACE(log, "Mode: {}, buckets: {}, processing threads: {}, result buckets num: {}", + settings.mode.toString(), settings.s3queue_buckets, settings.s3queue_processing_threads_num, buckets_num); + } S3QueueMetadata::~S3QueueMetadata() @@ -219,7 +222,7 @@ S3QueueMetadata::Bucket S3QueueMetadata::getBucketForPath(const std::string & pa S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueMetadata::tryAcquireBucket(const Bucket & bucket, const Processor & processor) { - return S3QueueOrderedFileMetadata::tryAcquireBucket(zookeeper_path, bucket, processor); + return S3QueueOrderedFileMetadata::tryAcquireBucket(zookeeper_path, bucket, processor, log); } void S3QueueMetadata::initialize( diff --git a/src/Storages/S3Queue/S3QueueOrderedFileMetadata.cpp b/src/Storages/S3Queue/S3QueueOrderedFileMetadata.cpp index bac87c95cc9..da1e394ef82 100644 --- a/src/Storages/S3Queue/S3QueueOrderedFileMetadata.cpp +++ b/src/Storages/S3Queue/S3QueueOrderedFileMetadata.cpp @@ -45,13 +45,15 @@ S3QueueOrderedFileMetadata::BucketHolder::BucketHolder( int bucket_version_, const std::string & bucket_lock_path_, const std::string & bucket_lock_id_path_, - zkutil::ZooKeeperPtr zk_client_) + zkutil::ZooKeeperPtr zk_client_, + LoggerPtr log_) : bucket_info(std::make_shared(BucketInfo{ .bucket = bucket_, .bucket_version = bucket_version_, .bucket_lock_path = bucket_lock_path_, .bucket_lock_id_path = bucket_lock_id_path_})) , zk_client(zk_client_) + , log(log_) { } @@ -61,7 +63,9 @@ void S3QueueOrderedFileMetadata::BucketHolder::release() return; released = true; - LOG_TEST(getLogger("S3QueueBucketHolder"), "Releasing bucket {}", bucket_info->bucket); + + LOG_TEST(log, "Releasing bucket {}, version {}", + bucket_info->bucket, bucket_info->bucket_version); Coordination::Requests requests; /// Check that bucket lock version has not changed @@ -72,11 +76,24 @@ void S3QueueOrderedFileMetadata::BucketHolder::release() Coordination::Responses responses; const auto code = zk_client->tryMulti(requests, responses); + + if (code == Coordination::Error::ZOK) + LOG_TEST(log, "Released bucket {}, version {}", + bucket_info->bucket, bucket_info->bucket_version); + else + LOG_TRACE(log, + "Failed to release bucket {}, version {}: {}. " + "This is normal if keeper session expired.", + bucket_info->bucket, bucket_info->bucket_version, code); + zkutil::KeeperMultiException::check(code, requests, responses); } S3QueueOrderedFileMetadata::BucketHolder::~BucketHolder() { + if (!released) + LOG_TEST(log, "Releasing bucket ({}) holder in destructor", bucket_info->bucket); + try { release(); @@ -154,7 +171,8 @@ S3QueueOrderedFileMetadata::Bucket S3QueueOrderedFileMetadata::getBucketForPath( S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcquireBucket( const std::filesystem::path & zk_path, const Bucket & bucket, - const Processor & processor) + const Processor & processor, + LoggerPtr log_) { const auto zk_client = getZooKeeper(); const auto bucket_lock_path = zk_path / "buckets" / toString(bucket) / "lock"; @@ -183,7 +201,7 @@ S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcqui const auto bucket_lock_version = set_response->stat.version; LOG_TEST( - getLogger("S3QueueOrderedFileMetadata"), + log_, "Processor {} acquired bucket {} for processing (bucket lock version: {})", processor, bucket, bucket_lock_version); @@ -192,7 +210,8 @@ S3QueueOrderedFileMetadata::BucketHolderPtr S3QueueOrderedFileMetadata::tryAcqui bucket_lock_version, bucket_lock_path, bucket_lock_id_path, - zk_client); + zk_client, + log_); } if (code == Coordination::Error::ZNODEEXISTS) @@ -384,8 +403,11 @@ void S3QueueOrderedFileMetadata::setProcessedImpl() auto code = zk_client->tryMulti(requests, responses); if (code == Coordination::Error::ZOK) { - if (max_loading_retries) - zk_client->tryRemove(failed_node_path + ".retriable", -1); + if (max_loading_retries + && zk_client->tryRemove(failed_node_path + ".retriable", -1) == Coordination::Error::ZOK) + { + LOG_TEST(log, "Removed node {}.retriable", failed_node_path); + } return; } diff --git a/src/Storages/S3Queue/S3QueueOrderedFileMetadata.h b/src/Storages/S3Queue/S3QueueOrderedFileMetadata.h index 698ec0f54cc..82ca87e3251 100644 --- a/src/Storages/S3Queue/S3QueueOrderedFileMetadata.h +++ b/src/Storages/S3Queue/S3QueueOrderedFileMetadata.h @@ -36,7 +36,8 @@ public: static BucketHolderPtr tryAcquireBucket( const std::filesystem::path & zk_path, const Bucket & bucket, - const Processor & processor); + const Processor & processor, + LoggerPtr log_); static S3QueueOrderedFileMetadata::Bucket getBucketForPath(const std::string & path, size_t buckets_num); @@ -72,26 +73,32 @@ private: bool ignore_if_exists); }; -struct S3QueueOrderedFileMetadata::BucketHolder +struct S3QueueOrderedFileMetadata::BucketHolder : private boost::noncopyable { BucketHolder( const Bucket & bucket_, int bucket_version_, const std::string & bucket_lock_path_, const std::string & bucket_lock_id_path_, - zkutil::ZooKeeperPtr zk_client_); + zkutil::ZooKeeperPtr zk_client_, + LoggerPtr log_); ~BucketHolder(); Bucket getBucket() const { return bucket_info->bucket; } BucketInfoPtr getBucketInfo() const { return bucket_info; } + void setFinished() { finished = true; } + bool isFinished() const { return finished; } + void release(); private: BucketInfoPtr bucket_info; const zkutil::ZooKeeperPtr zk_client; bool released = false; + bool finished = false; + LoggerPtr log; }; } diff --git a/src/Storages/S3Queue/S3QueueSettings.h b/src/Storages/S3Queue/S3QueueSettings.h index 4a92d99c411..681713e8378 100644 --- a/src/Storages/S3Queue/S3QueueSettings.h +++ b/src/Storages/S3Queue/S3QueueSettings.h @@ -19,7 +19,7 @@ class ASTStorage; 0) \ M(S3QueueAction, after_processing, S3QueueAction::KEEP, "Delete or keep file in S3 after successful processing", 0) \ M(String, keeper_path, "", "Zookeeper node path", 0) \ - M(UInt32, s3queue_loading_retries, 0, "Retry loading up to specified number of times", 0) \ + M(UInt32, s3queue_loading_retries, 10, "Retry loading up to specified number of times", 0) \ M(UInt32, s3queue_processing_threads_num, 1, "Number of processing threads", 0) \ M(UInt32, s3queue_enable_logging_to_s3queue_log, 1, "Enable logging to system table system.s3queue_log", 0) \ M(String, s3queue_last_processed_path, "", "For Ordered mode. Files that have lexicographically smaller file name are considered already processed", 0) \ @@ -31,6 +31,10 @@ class ASTStorage; M(UInt32, s3queue_cleanup_interval_min_ms, 60000, "For unordered mode. Polling backoff min for cleanup", 0) \ M(UInt32, s3queue_cleanup_interval_max_ms, 60000, "For unordered mode. Polling backoff max for cleanup", 0) \ M(UInt32, s3queue_buckets, 0, "Number of buckets for Ordered mode parallel processing", 0) \ + M(UInt32, s3queue_max_processed_files_before_commit, 100, "Number of files which can be processed before being committed to keeper", 0) \ + M(UInt32, s3queue_max_processed_rows_before_commit, 0, "Number of rows which can be processed before being committed to keeper", 0) \ + M(UInt32, s3queue_max_processed_bytes_before_commit, 0, "Number of bytes which can be processed before being committed to keeper", 0) \ + M(UInt32, s3queue_max_processing_time_sec_before_commit, 0, "Timeout in seconds after which to commit files committed to keeper", 0) \ #define LIST_OF_S3QUEUE_SETTINGS(M, ALIAS) \ S3QUEUE_RELATED_SETTINGS(M, ALIAS) \ diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp index b5b1a8dd992..3a611ece51b 100644 --- a/src/Storages/S3Queue/S3QueueSource.cpp +++ b/src/Storages/S3Queue/S3QueueSource.cpp @@ -32,16 +32,16 @@ namespace ErrorCodes } StorageS3QueueSource::S3QueueObjectInfo::S3QueueObjectInfo( - const ObjectInfo & object_info, - Metadata::FileMetadataPtr processing_holder_) - : ObjectInfo(object_info.relative_path, object_info.metadata) - , processing_holder(processing_holder_) + const Source::ObjectInfo & object_info, + S3QueueMetadata::FileMetadataPtr file_metadata_) + : Source::ObjectInfo(object_info.relative_path, object_info.metadata) + , file_metadata(file_metadata_) { } StorageS3QueueSource::FileIterator::FileIterator( std::shared_ptr metadata_, - std::unique_ptr glob_iterator_, + std::unique_ptr glob_iterator_, std::atomic & shutdown_called_, LoggerPtr logger_) : StorageObjectStorageSource::IIterator("S3QueueIterator") @@ -52,25 +52,52 @@ StorageS3QueueSource::FileIterator::FileIterator( { } +bool StorageS3QueueSource::FileIterator::isFinished() const +{ + LOG_TEST(log, "Iterator finished: {}, objects to retry: {}", iterator_finished, objects_to_retry.size()); + return iterator_finished + && std::all_of(listed_keys_cache.begin(), listed_keys_cache.end(), [](const auto & v) { return v.second.keys.empty(); }) + && objects_to_retry.empty(); +} + size_t StorageS3QueueSource::FileIterator::estimatedKeysCount() { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method estimateKeysCount is not implemented"); } -StorageS3QueueSource::ObjectInfoPtr StorageS3QueueSource::FileIterator::nextImpl(size_t processor) +StorageS3QueueSource::Source::ObjectInfoPtr StorageS3QueueSource::FileIterator::nextImpl(size_t processor) { - ObjectInfoPtr object_info; + Source::ObjectInfoPtr object_info; S3QueueOrderedFileMetadata::BucketInfoPtr bucket_info; while (!shutdown_called) { if (metadata->useBucketsForProcessing()) + { + std::lock_guard lock(mutex); std::tie(object_info, bucket_info) = getNextKeyFromAcquiredBucket(processor); + } else - object_info = glob_iterator->next(processor); + { + std::lock_guard lock(mutex); + if (objects_to_retry.empty()) + { + object_info = glob_iterator->next(processor); + if (!object_info) + iterator_finished = true; + } + else + { + object_info = objects_to_retry.front(); + objects_to_retry.pop_front(); + } + } if (!object_info) + { + LOG_TEST(log, "No object left"); return {}; + } if (shutdown_called) { @@ -85,19 +112,64 @@ StorageS3QueueSource::ObjectInfoPtr StorageS3QueueSource::FileIterator::nextImpl return {}; } -std::pair +void StorageS3QueueSource::FileIterator::returnForRetry(Source::ObjectInfoPtr object_info) +{ + chassert(object_info); + if (metadata->useBucketsForProcessing()) + { + const auto bucket = metadata->getBucketForPath(object_info->relative_path); + listed_keys_cache[bucket].keys.emplace_front(object_info); + } + else + { + objects_to_retry.push_back(object_info); + } +} + +void StorageS3QueueSource::FileIterator::releaseFinishedBuckets() +{ + for (const auto & [processor, holders] : bucket_holders) + { + LOG_TEST(log, "Releasing {} bucket holders for processor {}", holders.size(), processor); + + for (auto it = holders.begin(); it != holders.end(); ++it) + { + const auto & holder = *it; + const auto bucket = holder->getBucketInfo()->bucket; + if (!holder->isFinished()) + { + /// Only the last holder in the list of holders can be non-finished. + chassert(std::next(it) == holders.end()); + + /// Do not release non-finished bucket holder. We will continue processing it. + LOG_TEST(log, "Bucket {} is not finished yet, will not release it", bucket); + break; + } + + /// Release bucket lock. + holder->release(); + + /// Reset bucket processor in cached state. + auto cached_info = listed_keys_cache.find(bucket); + if (cached_info != listed_keys_cache.end()) + cached_info->second.processor.reset(); + } + } +} + +std::pair StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processor) { - /// We need this lock to maintain consistency between listing s3 directory - /// and getting/putting result into listed_keys_cache. - std::lock_guard lock(buckets_mutex); + auto bucket_holder_it = bucket_holders.emplace(processor, std::vector{}).first; + BucketHolder * current_bucket_holder = bucket_holder_it->second.empty() || bucket_holder_it->second.back()->isFinished() + ? nullptr + : bucket_holder_it->second.back().get(); - auto bucket_holder_it = bucket_holders.emplace(processor, nullptr).first; auto current_processor = toString(processor); LOG_TEST( log, "Current processor: {}, acquired bucket: {}", - processor, bucket_holder_it->second ? toString(bucket_holder_it->second->getBucket()) : "None"); + processor, current_bucket_holder ? toString(current_bucket_holder->getBucket()) : "None"); while (true) { @@ -106,9 +178,9 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo /// In case it is already acquired, they put the key into listed_keys_cache, /// so that the thread who acquired the bucket will be able to see /// those keys without the need to list s3 directory once again. - if (bucket_holder_it->second) + if (current_bucket_holder) { - const auto bucket = bucket_holder_it->second->getBucket(); + const auto bucket = current_bucket_holder->getBucket(); auto it = listed_keys_cache.find(bucket); if (it != listed_keys_cache.end()) { @@ -141,7 +213,7 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo LOG_TEST(log, "Current bucket: {}, will process file: {}", bucket, object_info->getFileName()); - return std::pair{object_info, bucket_holder_it->second->getBucketInfo()}; + return std::pair{object_info, current_bucket_holder->getBucketInfo()}; } LOG_TEST(log, "Cache of bucket {} is empty", bucket); @@ -156,9 +228,9 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo if (iterator_finished) { - /// Bucket is fully processed - release the bucket. - bucket_holder_it->second->release(); - bucket_holder_it->second.reset(); + /// Bucket is fully processed, but we will release it later + /// - once we write and commit files via commit() method. + current_bucket_holder->setFinished(); } } /// If processing thread has already acquired some bucket @@ -167,8 +239,10 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo /// because one processing thread can acquire only one bucket at a time. /// Once a thread is finished with its acquired bucket, it checks listed_keys_cache /// to see if there are keys from buckets not acquired by anyone. - if (!bucket_holder_it->second) + if (!current_bucket_holder) { + LOG_TEST(log, "Checking caches keys: {}", listed_keys_cache.size()); + for (auto it = listed_keys_cache.begin(); it != listed_keys_cache.end();) { auto & [bucket, bucket_info] = *it; @@ -193,8 +267,8 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo continue; } - bucket_holder_it->second = metadata->tryAcquireBucket(bucket, current_processor); - if (!bucket_holder_it->second) + auto acquired_bucket = metadata->tryAcquireBucket(bucket, current_processor); + if (!acquired_bucket) { LOG_TEST(log, "Bucket {} is already locked for processing (keys: {})", bucket, bucket_keys.size()); @@ -202,6 +276,9 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo continue; } + bucket_holder_it->second.push_back(acquired_bucket); + current_bucket_holder = bucket_holder_it->second.back().get(); + bucket_processor = current_processor; /// Take the key from the front, the order is important. @@ -211,7 +288,7 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo LOG_TEST(log, "Acquired bucket: {}, will process file: {}", bucket, object_info->getFileName()); - return std::pair{object_info, bucket_holder_it->second->getBucketInfo()}; + return std::pair{object_info, current_bucket_holder->getBucketInfo()}; } } @@ -229,12 +306,12 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo LOG_TEST(log, "Found next file: {}, bucket: {}, current bucket: {}, cached_keys: {}", object_info->getFileName(), bucket, - bucket_holder_it->second ? toString(bucket_holder_it->second->getBucket()) : "None", + current_bucket_holder ? toString(current_bucket_holder->getBucket()) : "None", bucket_cache.keys.size()); - if (bucket_holder_it->second) + if (current_bucket_holder) { - if (bucket_holder_it->second->getBucket() != bucket) + if (current_bucket_holder->getBucket() != bucket) { /// Acquired bucket differs from object's bucket, /// put it into bucket's cache and continue. @@ -242,13 +319,16 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo continue; } /// Bucket is already acquired, process the file. - return std::pair{object_info, bucket_holder_it->second->getBucketInfo()}; + return std::pair{object_info, current_bucket_holder->getBucketInfo()}; } else { - bucket_holder_it->second = metadata->tryAcquireBucket(bucket, current_processor); - if (bucket_holder_it->second) + auto acquired_bucket = metadata->tryAcquireBucket(bucket, current_processor); + if (acquired_bucket) { + bucket_holder_it->second.push_back(acquired_bucket); + current_bucket_holder = bucket_holder_it->second.back().get(); + bucket_cache.processor = current_processor; if (!bucket_cache.keys.empty()) { @@ -258,7 +338,7 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo object_info = bucket_cache.keys.front(); bucket_cache.keys.pop_front(); } - return std::pair{object_info, bucket_holder_it->second->getBucketInfo()}; + return std::pair{object_info, current_bucket_holder->getBucketInfo()}; } else { @@ -270,12 +350,6 @@ StorageS3QueueSource::FileIterator::getNextKeyFromAcquiredBucket(size_t processo } else { - if (bucket_holder_it->second) - { - bucket_holder_it->second->release(); - bucket_holder_it->second.reset(); - } - LOG_TEST(log, "Reached the end of file iterator"); iterator_finished = true; @@ -301,7 +375,12 @@ StorageS3QueueSource::StorageS3QueueSource( const std::atomic & table_is_being_dropped_, std::shared_ptr s3_queue_log_, const StorageID & storage_id_, - LoggerPtr log_) + LoggerPtr log_, + size_t max_processed_files_before_commit_, + size_t max_processed_rows_before_commit_, + size_t max_processed_bytes_before_commit_, + size_t max_processing_time_sec_before_commit_, + bool commit_once_processed_) : ISource(header_) , WithContext(context_) , name(std::move(name_)) @@ -314,6 +393,11 @@ StorageS3QueueSource::StorageS3QueueSource( , table_is_being_dropped(table_is_being_dropped_) , s3_queue_log(s3_queue_log_) , storage_id(storage_id_) + , max_processed_files_before_commit(max_processed_files_before_commit_) + , max_processed_rows_before_commit(max_processed_rows_before_commit_) + , max_processed_bytes_before_commit(max_processed_bytes_before_commit_) + , max_processing_time_sec_before_commit(max_processing_time_sec_before_commit_) + , commit_once_processed(commit_once_processed_) , remove_file_func(remove_file_func_) , log(log_) { @@ -329,24 +413,52 @@ void StorageS3QueueSource::lazyInitialize(size_t processor) if (initialized) return; + LOG_TEST(log, "Initializing a new reader"); + internal_source->lazyInitialize(processor); reader = std::move(internal_source->reader); if (reader) reader_future = std::move(internal_source->reader_future); + initialized = true; } Chunk StorageS3QueueSource::generate() +{ + Chunk chunk; + try + { + chunk = generateImpl(); + } + catch (...) + { + if (commit_once_processed) + commit(false, getCurrentExceptionMessage(true)); + + throw; + } + + if (!chunk && commit_once_processed) + { + commit(true); + } + return chunk; +} + +Chunk StorageS3QueueSource::generateImpl() { lazyInitialize(processor_id); while (true) { if (!reader) + { + LOG_TEST(log, "No reader"); break; + } - const auto * object_info = dynamic_cast(&reader.getObjectInfo()); - auto file_metadata = object_info->processing_holder; + const auto * object_info = dynamic_cast(reader.getObjectInfo().get()); + auto file_metadata = object_info->file_metadata; auto file_status = file_metadata->getFileStatus(); if (isCancelled()) @@ -357,7 +469,7 @@ Chunk StorageS3QueueSource::generate() { try { - file_metadata->setFailed("Cancelled"); + file_metadata->setFailed("Cancelled", /* reduce_retry_count */true, /* overwrite_status */false); } catch (...) { @@ -365,16 +477,19 @@ Chunk StorageS3QueueSource::generate() object_info->relative_path, getCurrentExceptionMessage(true)); } - appendLogElement(reader.getObjectInfo().getPath(), *file_status, processed_rows_from_file, false); + appendLogElement(reader.getObjectInfo()->getPath(), *file_status, processed_rows_from_file, false); } + LOG_TEST(log, "Query is cancelled"); break; } - const auto & path = reader.getObjectInfo().getPath(); + const auto & path = reader.getObjectInfo()->getPath(); if (shutdown_called) { + LOG_TEST(log, "Shutdown called"); + if (processed_rows_from_file == 0) break; @@ -386,7 +501,7 @@ Chunk StorageS3QueueSource::generate() try { - file_metadata->setFailed("Table is dropped"); + file_metadata->setFailed("Table is dropped", /* reduce_retry_count */true, /* overwrite_status */false); } catch (...) { @@ -420,15 +535,16 @@ Chunk StorageS3QueueSource::generate() file_status->processed_rows += chunk.getNumRows(); processed_rows_from_file += chunk.getNumRows(); + total_processed_rows += chunk.getNumRows(); + total_processed_bytes += chunk.bytes(); VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( chunk, requested_virtual_columns, { .path = path, - .size = reader.getObjectInfo().metadata->size_bytes + .size = reader.getObjectInfo()->metadata->size_bytes }); - return chunk; } } @@ -437,22 +553,84 @@ Chunk StorageS3QueueSource::generate() const auto message = getCurrentExceptionMessage(true); LOG_ERROR(log, "Got an error while pulling chunk. Will set file {} as failed. Error: {} ", path, message); - file_metadata->setFailed(message); - + failed_during_read_files.push_back(file_metadata); + file_status->onFailed(getCurrentExceptionMessage(true)); appendLogElement(path, *file_status, processed_rows_from_file, false); + + if (processed_rows_from_file == 0) + { + auto * file_iterator = dynamic_cast(internal_source->file_iterator.get()); + chassert(file_iterator); + + if (file_status->retries < file_metadata->getMaxTries()) + file_iterator->returnForRetry(reader.getObjectInfo()); + + /// If we did not process any rows from the failed file, + /// commit all previously processed files, + /// not to lose the work already done. + return {}; + } + throw; } - file_metadata->setProcessed(); - applyActionAfterProcessing(reader.getObjectInfo().relative_path); - appendLogElement(path, *file_status, processed_rows_from_file, true); + + file_status->setProcessingEndTime(); file_status.reset(); + processed_rows_from_file = 0; + processed_files.push_back(file_metadata); + + if (processed_files.size() == max_processed_files_before_commit) + { + LOG_TRACE(log, "Number of max processed files before commit reached " + "(rows: {}, bytes: {}, files: {})", + total_processed_rows, total_processed_bytes, processed_files.size()); + break; + } + + bool rows_or_bytes_or_time_limit_reached = false; + if (max_processed_rows_before_commit + && total_processed_rows == max_processed_rows_before_commit) + { + LOG_TRACE(log, "Number of max processed rows before commit reached " + "(rows: {}, bytes: {}, files: {})", + total_processed_rows, total_processed_bytes, processed_files.size()); + + rows_or_bytes_or_time_limit_reached = true; + } + else if (max_processed_bytes_before_commit + && total_processed_bytes == max_processed_bytes_before_commit) + { + LOG_TRACE(log, "Number of max processed bytes before commit reached " + "(rows: {}, bytes: {}, files: {})", + total_processed_rows, total_processed_bytes, processed_files.size()); + + rows_or_bytes_or_time_limit_reached = true; + } + else if (max_processing_time_sec_before_commit + && total_stopwatch.elapsedSeconds() >= max_processing_time_sec_before_commit) + { + LOG_TRACE(log, "Max processing time before commit reached " + "(rows: {}, bytes: {}, files: {})", + total_processed_rows, total_processed_bytes, processed_files.size()); + + rows_or_bytes_or_time_limit_reached = true; + } + + if (rows_or_bytes_or_time_limit_reached) + { + if (!reader_future.valid()) + break; + + LOG_TRACE(log, "Rows or bytes limit reached, but we have one more file scheduled already, " + "will process it despite the limit"); + } if (shutdown_called) { - LOG_INFO(log, "Shutdown was called, stopping sync"); + LOG_TRACE(log, "Shutdown was called, stopping sync"); break; } @@ -460,19 +638,55 @@ Chunk StorageS3QueueSource::generate() reader = reader_future.get(); if (!reader) + { + LOG_TEST(log, "Reader finished"); break; + } - file_status = files_metadata->getFileStatus(reader.getObjectInfo().getPath()); + file_status = files_metadata->getFileStatus(reader.getObjectInfo()->getPath()); - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - internal_source->create_reader_pool->wait(); - reader_future = internal_source->createReaderAsync(processor_id); + if (!rows_or_bytes_or_time_limit_reached && processed_files.size() + 1 < max_processed_files_before_commit) + { + /// Even if task is finished the thread may be not freed in pool. + /// So wait until it will be freed before scheduling a new task. + internal_source->create_reader_pool->wait(); + reader_future = internal_source->createReaderAsync(processor_id); + } } return {}; } +void StorageS3QueueSource::commit(bool success, const std::string & exception_message) +{ + LOG_TEST(log, "Having {} files to set as {}, failed files: {}", + processed_files.size(), success ? "Processed" : "Failed", failed_during_read_files.size()); + + for (const auto & file_metadata : processed_files) + { + if (success) + { + file_metadata->setProcessed(); + applyActionAfterProcessing(file_metadata->getPath()); + } + else + file_metadata->setFailed( + exception_message, + /* reduce_retry_count */false, + /* overwrite_status */true); + } + + for (const auto & file_metadata : failed_during_read_files) + { + /// `exception` from commit args is from insertion to storage. + /// Here we do not used it as failed_during_read_files were not inserted into storage, but skipped. + file_metadata->setFailed( + file_metadata->getFileStatus()->getException(), + /* reduce_retry_count */true, + /* overwrite_status */false); + } +} + void StorageS3QueueSource::applyActionAfterProcessing(const String & path) { switch (action) diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h index 6e098f8cb63..bfa1c358fa9 100644 --- a/src/Storages/S3Queue/S3QueueSource.h +++ b/src/Storages/S3Queue/S3QueueSource.h @@ -20,24 +20,18 @@ class StorageS3QueueSource : public ISource, WithContext { public: using Storage = StorageObjectStorage; - using ConfigurationPtr = Storage::ConfigurationPtr; - using GlobIterator = StorageObjectStorageSource::GlobIterator; - using ZooKeeperGetter = std::function; + using Source = StorageObjectStorageSource; using RemoveFileFunc = std::function; - using FileStatusPtr = S3QueueMetadata::FileStatusPtr; - using ReaderHolder = StorageObjectStorageSource::ReaderHolder; - using Metadata = S3QueueMetadata; - using ObjectInfo = StorageObjectStorageSource::ObjectInfo; - using ObjectInfoPtr = std::shared_ptr; - using ObjectInfos = std::vector; + using BucketHolderPtr = S3QueueOrderedFileMetadata::BucketHolderPtr; + using BucketHolder = S3QueueOrderedFileMetadata::BucketHolder; - struct S3QueueObjectInfo : public ObjectInfo + struct S3QueueObjectInfo : public Source::ObjectInfo { S3QueueObjectInfo( - const ObjectInfo & object_info, - Metadata::FileMetadataPtr processing_holder_); + const Source::ObjectInfo & object_info, + S3QueueMetadata::FileMetadataPtr file_metadata_); - Metadata::FileMetadataPtr processing_holder; + S3QueueMetadata::FileMetadataPtr file_metadata; }; class FileIterator : public StorageObjectStorageSource::IIterator @@ -45,39 +39,59 @@ public: public: FileIterator( std::shared_ptr metadata_, - std::unique_ptr glob_iterator_, + std::unique_ptr glob_iterator_, std::atomic & shutdown_called_, LoggerPtr logger_); + bool isFinished() const; + /// Note: /// List results in s3 are always returned in UTF-8 binary order. /// (https://docs.aws.amazon.com/AmazonS3/latest/userguide/ListingKeysUsingAPIs.html) - ObjectInfoPtr nextImpl(size_t processor) override; + Source::ObjectInfoPtr nextImpl(size_t processor) override; size_t estimatedKeysCount() override; + /// If the key was taken from iterator via next() call, + /// we might later want to return it back for retrying. + void returnForRetry(Source::ObjectInfoPtr object_info); + + /// Release hold buckets. + /// In fact, they could be released in destructors of BucketHolder, + /// but we anyway try to release them explicitly, + /// because we want to be able to rethrow exceptions if they might happen. + void releaseFinishedBuckets(); + private: using Bucket = S3QueueMetadata::Bucket; using Processor = S3QueueMetadata::Processor; const std::shared_ptr metadata; - const std::unique_ptr glob_iterator; + const std::unique_ptr glob_iterator; std::atomic & shutdown_called; std::mutex mutex; LoggerPtr log; - std::mutex buckets_mutex; struct ListedKeys { - std::deque keys; + std::deque keys; std::optional processor; }; + /// A cache of keys which were iterated via glob_iterator, but not taken for processing. std::unordered_map listed_keys_cache; - bool iterator_finished = false; - std::unordered_map bucket_holders; - std::pair getNextKeyFromAcquiredBucket(size_t processor); + /// We store a vector of holders, because we cannot release them until processed files are committed. + std::unordered_map> bucket_holders; + + /// Is glob_iterator finished? + std::atomic_bool iterator_finished = false; + + /// Only for processing without buckets. + std::deque objects_to_retry; + + std::pair getNextKeyFromAcquiredBucket(size_t processor); + bool hasKeysForProcessor(const Processor & processor) const; }; StorageS3QueueSource( @@ -94,7 +108,12 @@ public: const std::atomic & table_is_being_dropped_, std::shared_ptr s3_queue_log_, const StorageID & storage_id_, - LoggerPtr log_); + LoggerPtr log_, + size_t max_processed_files_before_commit_, + size_t max_processed_rows_before_commit_, + size_t max_processed_bytes_before_commit_, + size_t max_processing_time_sec_before_commit_, + bool commit_once_processed_); static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); @@ -102,6 +121,10 @@ public: Chunk generate() override; + /// Commit files after insertion into storage finished. + /// `success` defines whether insertion was successful or not. + void commit(bool success, const std::string & exception_message = {}); + private: const String name; const size_t processor_id; @@ -113,17 +136,29 @@ private: const std::atomic & table_is_being_dropped; const std::shared_ptr s3_queue_log; const StorageID storage_id; + const size_t max_processed_files_before_commit; + const size_t max_processed_rows_before_commit; + const size_t max_processed_bytes_before_commit; + const size_t max_processing_time_sec_before_commit; + const bool commit_once_processed; RemoveFileFunc remove_file_func; LoggerPtr log; - ReaderHolder reader; - std::future reader_future; + std::vector processed_files; + std::vector failed_during_read_files; + + Source::ReaderHolder reader; + std::future reader_future; std::atomic initialized{false}; + size_t processed_rows_from_file = 0; + size_t total_processed_rows = 0; + size_t total_processed_bytes = 0; - S3QueueOrderedFileMetadata::BucketHolderPtr current_bucket_holder; + Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; + Chunk generateImpl(); void applyActionAfterProcessing(const String & path); void appendLogElement(const std::string & filename, S3QueueMetadata::FileStatus & file_status_, size_t processed_rows, bool processed); void lazyInitialize(size_t processor); diff --git a/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.cpp b/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.cpp index c61e9557fc2..2f7c238cd4d 100644 --- a/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.cpp +++ b/src/Storages/S3Queue/S3QueueUnorderedFileMetadata.cpp @@ -130,8 +130,11 @@ void S3QueueUnorderedFileMetadata::setProcessedImpl() const auto code = zk_client->tryMulti(requests, responses); if (code == Coordination::Error::ZOK) { - if (max_loading_retries) - zk_client->tryRemove(failed_node_path + ".retriable", -1); + if (max_loading_retries + && zk_client->tryRemove(failed_node_path + ".retriable", -1) == Coordination::Error::ZOK) + { + LOG_TEST(log, "Removed node {}.retriable", failed_node_path); + } LOG_TRACE(log, "Moved file `{}` to processed (node path: {})", path, processed_node_path); return; diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index afb75a21b21..b1253516f17 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -71,7 +72,12 @@ namespace return zkutil::extractZooKeeperPath(result_zk_path, true); } - void checkAndAdjustSettings(S3QueueSettings & s3queue_settings, const Settings & settings, bool is_attach) + void checkAndAdjustSettings( + S3QueueSettings & s3queue_settings, + const Settings & settings, + bool is_attach, + const LoggerPtr & log, + ASTStorage * engine_args) { if (!is_attach && !s3queue_settings.mode.changed) { @@ -79,11 +85,6 @@ namespace } /// In case !is_attach, we leave Ordered mode as default for compatibility. - if (!s3queue_settings.s3queue_processing_threads_num) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `s3queue_processing_threads_num` cannot be set to zero"); - } - if (!s3queue_settings.s3queue_enable_logging_to_s3queue_log.changed) { s3queue_settings.s3queue_enable_logging_to_s3queue_log = settings.s3queue_enable_logging_to_s3queue_log; @@ -95,6 +96,21 @@ namespace "Setting `s3queue_cleanup_interval_min_ms` ({}) must be less or equal to `s3queue_cleanup_interval_max_ms` ({})", s3queue_settings.s3queue_cleanup_interval_min_ms, s3queue_settings.s3queue_cleanup_interval_max_ms); } + + if (!s3queue_settings.s3queue_processing_threads_num) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `s3queue_processing_threads_num` cannot be set to zero"); + } + + if (!is_attach && !s3queue_settings.s3queue_processing_threads_num.changed) + { + s3queue_settings.s3queue_processing_threads_num = std::max(getNumberOfPhysicalCPUCores(), 16); + engine_args->settings->as()->changes.insertSetting( + "s3queue_processing_threads_num", + s3queue_settings.s3queue_processing_threads_num.value); + + LOG_TRACE(log, "Set `processing_threads_num` to {}", s3queue_settings.s3queue_processing_threads_num); + } } } @@ -107,7 +123,7 @@ StorageS3Queue::StorageS3Queue( const String & comment, ContextPtr context_, std::optional format_settings_, - ASTStorage * /* engine_args */, + ASTStorage * engine_args, LoadingStrictnessLevel mode) : IStorage(table_id_) , WithContext(context_) @@ -131,7 +147,7 @@ StorageS3Queue::StorageS3Queue( throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "S3Queue url must either end with '/' or contain globs"); } - checkAndAdjustSettings(*s3queue_settings, context_->getSettingsRef(), mode > LoadingStrictnessLevel::CREATE); + checkAndAdjustSettings(*s3queue_settings, context_->getSettingsRef(), mode > LoadingStrictnessLevel::CREATE, log, engine_args); object_storage = configuration->createObjectStorage(context_, /* is_readonly */true); FormatFactory::instance().checkFormatName(configuration->format); @@ -305,10 +321,12 @@ void ReadFromS3Queue::initializePipeline(QueryPipelineBuilder & pipeline, const createIterator(nullptr); for (size_t i = 0; i < adjusted_num_streams; ++i) pipes.emplace_back(storage->createSource( - i, + i/* processor_id */, info, iterator, - max_block_size, context)); + max_block_size, + context, + true/* commit_once_processed */)); auto pipe = Pipe::unitePipes(std::move(pipes)); if (pipe.empty()) @@ -325,7 +343,8 @@ std::shared_ptr StorageS3Queue::createSource( const ReadFromFormatInfo & info, std::shared_ptr file_iterator, size_t max_block_size, - ContextPtr local_context) + ContextPtr local_context, + bool commit_once_processed) { auto internal_source = std::make_unique( getName(), @@ -358,7 +377,12 @@ std::shared_ptr StorageS3Queue::createSource( table_is_being_dropped, s3_queue_log, getStorageID(), - log); + log, + s3queue_settings->s3queue_max_processed_files_before_commit, + s3queue_settings->s3queue_max_processed_rows_before_commit, + s3queue_settings->s3queue_max_processed_bytes_before_commit, + s3queue_settings->s3queue_max_processing_time_sec_before_commit, + commit_once_processed); } bool StorageS3Queue::hasDependencies(const StorageID & table_id) @@ -433,48 +457,83 @@ void StorageS3Queue::threadFunc() bool StorageS3Queue::streamToViews() { + // Create a stream for each consumer and join them in a union stream + // Only insert into dependent views and expect that input blocks contain virtual columns + auto table_id = getStorageID(); auto table = DatabaseCatalog::instance().getTable(table_id, getContext()); if (!table) throw Exception(ErrorCodes::LOGICAL_ERROR, "Engine table {} doesn't exist.", table_id.getNameForLogs()); - auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); - - // Create an INSERT query for streaming data auto insert = std::make_shared(); insert->table_id = table_id; + auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); auto s3queue_context = Context::createCopy(getContext()); s3queue_context->makeQueryContext(); - // Create a stream for each consumer and join them in a union stream - // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, s3queue_context, false, true, true); - auto block_io = interpreter.execute(); auto file_iterator = createFileIterator(s3queue_context, nullptr); + size_t total_rows = 0; - auto read_from_format_info = prepareReadingFromFormat(block_io.pipeline.getHeader().getNames(), storage_snapshot, supportsSubsetOfColumns(s3queue_context)); - - Pipes pipes; - pipes.reserve(s3queue_settings->s3queue_processing_threads_num); - for (size_t i = 0; i < s3queue_settings->s3queue_processing_threads_num; ++i) + while (!shutdown_called && !file_iterator->isFinished()) { - auto source = createSource(i, read_from_format_info, file_iterator, DBMS_DEFAULT_BUFFER_SIZE, s3queue_context); - pipes.emplace_back(std::move(source)); + InterpreterInsertQuery interpreter(insert, s3queue_context, false, true, true); + auto block_io = interpreter.execute(); + auto read_from_format_info = prepareReadingFromFormat( + block_io.pipeline.getHeader().getNames(), + storage_snapshot, + supportsSubsetOfColumns(s3queue_context)); + + Pipes pipes; + std::vector> sources; + + pipes.reserve(s3queue_settings->s3queue_processing_threads_num); + sources.reserve(s3queue_settings->s3queue_processing_threads_num); + + for (size_t i = 0; i < s3queue_settings->s3queue_processing_threads_num; ++i) + { + auto source = createSource( + i/* processor_id */, + read_from_format_info, + file_iterator, + DBMS_DEFAULT_BUFFER_SIZE, + s3queue_context, + false/* commit_once_processed */); + + pipes.emplace_back(source); + sources.emplace_back(source); + } + auto pipe = Pipe::unitePipes(std::move(pipes)); + + block_io.pipeline.complete(std::move(pipe)); + block_io.pipeline.setNumThreads(s3queue_settings->s3queue_processing_threads_num); + block_io.pipeline.setConcurrencyControl(s3queue_context->getSettingsRef().use_concurrency_control); + + std::atomic_size_t rows = 0; + block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); + + try + { + CompletedPipelineExecutor executor(block_io.pipeline); + executor.execute(); + } + catch (...) + { + for (auto & source : sources) + source->commit(/* success */false, getCurrentExceptionMessage(true)); + + file_iterator->releaseFinishedBuckets(); + throw; + } + + for (auto & source : sources) + source->commit(/* success */true); + + file_iterator->releaseFinishedBuckets(); + total_rows += rows; } - auto pipe = Pipe::unitePipes(std::move(pipes)); - block_io.pipeline.complete(std::move(pipe)); - block_io.pipeline.setNumThreads(s3queue_settings->s3queue_processing_threads_num); - block_io.pipeline.setConcurrencyControl(s3queue_context->getSettingsRef().use_concurrency_control); - - std::atomic_size_t rows = 0; - block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); - - CompletedPipelineExecutor executor(block_io.pipeline); - executor.execute(); - - return rows > 0; + return total_rows > 0; } zkutil::ZooKeeperPtr StorageS3Queue::getZooKeeper() const diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h index ef83a1ccc25..f465fa92d1a 100644 --- a/src/Storages/S3Queue/StorageS3Queue.h +++ b/src/Storages/S3Queue/StorageS3Queue.h @@ -88,7 +88,8 @@ private: const ReadFromFormatInfo & info, std::shared_ptr file_iterator, size_t max_block_size, - ContextPtr local_context); + ContextPtr local_context, + bool commit_once_processed); bool hasDependencies(const StorageID & table_id); bool streamToViews(); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 16c248f1b7b..7f39ff615f0 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1789,7 +1789,8 @@ public: void onCancel() override { std::lock_guard cancel_lock(cancel_mutex); - finalize(); + cancelBuffers(); + releaseBuffers(); cancelled = true; } @@ -1803,18 +1804,18 @@ public: catch (...) { /// An exception context is needed to proper delete write buffers without finalization - release(); + releaseBuffers(); } } void onFinish() override { std::lock_guard cancel_lock(cancel_mutex); - finalize(); + finalizeBuffers(); } private: - void finalize() + void finalizeBuffers() { if (!writer) return; @@ -1827,19 +1828,27 @@ private: catch (...) { /// Stop ParallelFormattingOutputFormat correctly. - release(); + releaseBuffers(); throw; } write_buf->finalize(); } - void release() + void releaseBuffers() { writer.reset(); write_buf.reset(); } + void cancelBuffers() + { + if (writer) + writer->cancel(); + if (write_buf) + write_buf->cancel(); + } + StorageMetadataPtr metadata_snapshot; String table_name_for_log; diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 08e0526550d..de0324d7998 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -322,6 +322,10 @@ public: /// Rollback partial writes. /// No more writing. + for (auto & [_, stream] : streams) + { + stream.cancel(); + } streams.clear(); /// Truncate files to the older sizes. @@ -373,6 +377,12 @@ private: plain->next(); plain->finalize(); } + + void cancel() + { + compressed.cancel(); + plain->cancel(); + } }; using FileStreams = std::map; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 9352f772ce1..9255ee00340 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1269,6 +1269,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( if (command.type != MutationCommand::Type::DROP_COLUMN && command.type != MutationCommand::Type::DROP_INDEX && command.type != MutationCommand::Type::DROP_PROJECTION + && command.type != MutationCommand::Type::DROP_STATISTICS && command.type != MutationCommand::Type::RENAME_COLUMN) { commands_for_size_validation.push_back(command); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index db58d0081c6..a127384c03c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5656,7 +5656,7 @@ std::optional StorageReplicatedMergeTree::distributedWriteFromClu { auto connection = std::make_shared( node.host_name, node.port, query_context->getGlobalContext()->getCurrentDatabase(), - node.user, node.password, SSHKey(), node.quota_key, node.cluster, node.cluster_secret, + node.user, node.password, SSHKey(), /*jwt*/"", node.quota_key, node.cluster, node.cluster_secret, "ParallelInsertSelectInititiator", node.compression, node.secure diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index a8c8e81e23d..5b7f9fc0ac2 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -97,8 +97,7 @@ void SetOrJoinSink::onFinish() if (persistent) { backup_stream.flush(); - compressed_backup_buf.next(); - backup_buf->next(); + compressed_backup_buf.finalize(); backup_buf->finalize(); table.disk->replaceFile(fs::path(backup_tmp_path) / backup_file_name, fs::path(backup_path) / backup_file_name); diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index f0c5103d657..8df87d6290f 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -207,7 +207,10 @@ public: /// Rollback partial writes. /// No more writing. + data_out->cancel(); data_out.reset(); + + data_out_compressed->cancel(); data_out_compressed.reset(); /// Truncate files to the older sizes. @@ -233,8 +236,7 @@ public: if (done) return; - data_out->next(); - data_out_compressed->next(); + data_out->finalize(); data_out_compressed->finalize(); /// Save the new indices. @@ -494,8 +496,7 @@ void StorageStripeLog::saveIndices(const WriteLock & /* already locked for writi for (size_t i = start; i != num_indices; ++i) indices.blocks[i].write(*index_out); - index_out->next(); - index_out_compressed->next(); + index_out->finalize(); index_out_compressed->finalize(); num_indices_saved = num_indices; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index f8424bc3d1b..895da028fc2 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -576,31 +576,25 @@ void StorageURLSink::consume(Chunk chunk) void StorageURLSink::onCancel() { std::lock_guard lock(cancel_mutex); - finalize(); + cancelBuffers(); + releaseBuffers(); cancelled = true; } -void StorageURLSink::onException(std::exception_ptr exception) +void StorageURLSink::onException(std::exception_ptr) { std::lock_guard lock(cancel_mutex); - try - { - std::rethrow_exception(exception); - } - catch (...) - { - /// An exception context is needed to proper delete write buffers without finalization - release(); - } + cancelBuffers(); + releaseBuffers(); } void StorageURLSink::onFinish() { std::lock_guard lock(cancel_mutex); - finalize(); + finalizeBuffers(); } -void StorageURLSink::finalize() +void StorageURLSink::finalizeBuffers() { if (!writer) return; @@ -613,19 +607,27 @@ void StorageURLSink::finalize() catch (...) { /// Stop ParallelFormattingOutputFormat correctly. - release(); + releaseBuffers(); throw; } write_buf->finalize(); } -void StorageURLSink::release() +void StorageURLSink::releaseBuffers() { writer.reset(); write_buf.reset(); } +void StorageURLSink::cancelBuffers() +{ + if (writer) + writer->cancel(); + if (write_buf) + write_buf->cancel(); +} + class PartitionedStorageURLSink : public PartitionedSink { public: diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index f550ccb2bc4..3090f8db12e 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -257,8 +257,10 @@ public: void onFinish() override; private: - void finalize(); - void release(); + void finalizeBuffers(); + void releaseBuffers(); + void cancelBuffers(); + std::unique_ptr write_buf; OutputFormatPtr writer; std::mutex cancel_mutex; diff --git a/src/Storages/System/StorageSystemS3Queue.cpp b/src/Storages/System/StorageSystemS3Queue.cpp index 637182067f2..131570709d0 100644 --- a/src/Storages/System/StorageSystemS3Queue.cpp +++ b/src/Storages/System/StorageSystemS3Queue.cpp @@ -26,6 +26,7 @@ ColumnsDescription StorageSystemS3Queue::getColumnsDescription() return ColumnsDescription { {"zookeeper_path", std::make_shared(), "Path in zookeeper to S3Queue metadata"}, + {"file_path", std::make_shared(), "File path of a file which is being processed by S3Queue"}, {"file_name", std::make_shared(), "File name of a file which is being processed by S3Queue"}, {"rows_processed", std::make_shared(), "Currently processed number of rows"}, {"status", std::make_shared(), "Status of processing: Processed, Processing, Failed"}, @@ -45,11 +46,12 @@ void StorageSystemS3Queue::fillData(MutableColumns & res_columns, ContextPtr, co { for (const auto & [zookeeper_path, metadata] : S3QueueMetadataFactory::instance().getAll()) { - for (const auto & [file_name, file_status] : metadata->getFileStatuses()) + for (const auto & [file_path, file_status] : metadata->getFileStatuses()) { size_t i = 0; res_columns[i++]->insert(zookeeper_path); - res_columns[i++]->insert(file_name); + res_columns[i++]->insert(file_path); + res_columns[i++]->insert(std::filesystem::path(file_path).filename().string()); res_columns[i++]->insert(file_status->processed_rows.load()); res_columns[i++]->insert(magic_enum::enum_name(file_status->state.load())); diff --git a/src/Storages/System/StorageSystemServerSettings.cpp b/src/Storages/System/StorageSystemServerSettings.cpp index ef10b2f45da..d242b6de4ec 100644 --- a/src/Storages/System/StorageSystemServerSettings.cpp +++ b/src/Storages/System/StorageSystemServerSettings.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -84,7 +85,8 @@ void StorageSystemServerSettings::fillData(MutableColumns & res_columns, Context {"mmap_cache_size", {std::to_string(context->getMMappedFileCache()->maxSizeInBytes()), ChangeableWithoutRestart::Yes}}, {"merge_workload", {context->getMergeWorkload(), ChangeableWithoutRestart::Yes}}, - {"mutation_workload", {context->getMutationWorkload(), ChangeableWithoutRestart::Yes}} + {"mutation_workload", {context->getMutationWorkload(), ChangeableWithoutRestart::Yes}}, + {"config_reload_interval_ms", {std::to_string(context->getConfigReloaderInterval()), ChangeableWithoutRestart::Yes}} }; if (context->areBackgroundExecutorsInitialized()) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index a6e7ce301a0..77e6ee9cb24 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1068,9 +1068,10 @@ void StorageWindowView::threadFuncFireProc() if (max_watermark >= timestamp_now) clean_cache_task->schedule(); + UInt64 next_fire_ms = static_cast(next_fire_signal) * 1000; UInt64 timestamp_ms = static_cast(Poco::Timestamp().epochMicroseconds()) / 1000; if (!shutdown_called) - fire_task->scheduleAfter(std::max(UInt64(0), static_cast(next_fire_signal) * 1000 - timestamp_ms)); + fire_task->scheduleAfter(next_fire_ms - std::min(next_fire_ms, timestamp_ms)); } void StorageWindowView::threadFuncFireEvent() diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index 1f7357b6494..bd92465e1aa 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -1,13 +1,4 @@ -00725_memory_tracking 01624_soft_constraints 02354_vector_search_queries -02901_parallel_replicas_rollup -02999_scalar_subqueries_bug_2 -# Flaky list -01825_type_json_in_array -01414_mutations_and_errors_zookeeper -01287_max_execution_speed # Check after ConstantNode refactoring -02154_parser_backtracking 02944_variant_as_common_type -02942_variant_cast diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 664f6a7cbb9..04c8d12fc30 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -64,11 +64,14 @@ def main(): + ci_config["jobs_data"]["jobs_to_do"] ) builds_for_check = [job for job in CI.BuildNames if job in all_ci_jobs] - print(f"NOTE: following build reports will be checked: [{builds_for_check}]") + print("NOTE: builds for check taken from ci configuration") else: builds_for_check = parse_args().reports for job in builds_for_check: assert job in CI.BuildNames, "Builds must be known build job names" + print("NOTE: builds for check taken from input arguments") + + print(f"NOTE: following build reports will be checked: [{builds_for_check}]") required_builds = len(builds_for_check) missing_builds = 0 diff --git a/tests/ci/changelog.py b/tests/ci/changelog.py index fcb61d3f605..3ba618f3ae5 100755 --- a/tests/ci/changelog.py +++ b/tests/ci/changelog.py @@ -33,10 +33,11 @@ from version_helper import ( categories_preferred_order = ( "Backward Incompatible Change", "New Feature", + "Experimental Feature", "Performance Improvement", "Improvement", - "Critical Bug Fix", - "Bug Fix", + "Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)", + "Bug Fix (user-visible misbehavior in an official stable release)", "Build/Testing/Packaging Improvement", "Other", ) @@ -205,7 +206,7 @@ def generate_description(item: PullRequest, repo: Repository) -> Optional[Descri try: item = gh.get_pull_cached(repo, int(branch_parts[-1])) except Exception as e: - logging.warning("unable to get backpoted PR, exception: %s", e) + logging.warning("unable to get backported PR, exception: %s", e) else: logging.warning( "The branch %s doesn't match backport template, using PR %s as is", @@ -280,12 +281,17 @@ def generate_description(item: PullRequest, repo: Repository) -> Optional[Descri category, ): category = "NOT FOR CHANGELOG / INSIGNIFICANT" - entry = item.title + # Sometimes we declare not for changelog but still write a description. Keep it + if len(entry) <= 4 or "Documentation entry" in entry: + entry = item.title # Normalize bug fixes - if re.match( - r"(?i)bug\Wfix", - category, + if ( + re.match( + r"(?i)bug\Wfix", + category, + ) + and "Critical Bug Fix" not in category ): category = "Bug Fix (user-visible misbehavior in an official stable release)" diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 4e34e6b6135..af2f4c0a1fc 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1065,18 +1065,20 @@ def main() -> int: ) # rerun helper check - # FIXME: remove rerun_helper check and rely on ci cache only + # FIXME: Find a way to identify if job restarted manually (by developer) or by automatic workflow restart (died spot-instance) + # disable rerun check for the former if check_name not in ( CI.JobNames.BUILD_CHECK, ): # we might want to rerun build report job rerun_helper = RerunHelper(commit, check_name_with_group) if rerun_helper.is_already_finished_by_status(): + print("WARNING: Rerunning job with GH status ") status = rerun_helper.get_finished_status() assert status - previous_status = status.state print("::group::Commit Status") print(status) print("::endgroup::") + previous_status = status.state # ci cache check if not previous_status and not ci_settings.no_ci_cache: diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index d60c67d318d..48e1280d939 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -185,8 +185,7 @@ class JobNames(metaclass=WithIter): LIBFUZZER_TEST = "libFuzzer tests" - BUILD_CHECK = "ClickHouse build check" - # BUILD_CHECK_SPECIAL = "ClickHouse special build check" + BUILD_CHECK = "Builds" DOCS_CHECK = "Docs check" BUGFIX_VALIDATE = "Bugfix validation" @@ -214,8 +213,12 @@ class StatusNames(metaclass=WithIter): class SyncState(metaclass=WithIter): - PENDING = "awaiting merge" - MERGE_FAILED = "merge failed" + PENDING = "awaiting sync" + # temporary state if GH does not know mergeable state + MERGE_UNKNOWN = "unknown state (might be auto recoverable)" + # changes cannot be pushed/merged to a sync branch + PUSH_FAILED = "push failed" + MERGE_CONFLICTS = "merge conflicts" TESTING = "awaiting test results" TESTS_FAILED = "tests failed" COMPLETED = "completed" @@ -331,7 +334,7 @@ class CommonJobConfigs: """ BUILD_REPORT = JobConfig( - job_name_keyword="build_check", + job_name_keyword="builds", run_command="build_report_check.py", digest=DigestConfig( include_paths=[ @@ -638,7 +641,7 @@ CHECK_DESCRIPTIONS = [ lambda x: x == "CI running", ), CheckDescription( - "ClickHouse build check", + "Builds", "Builds ClickHouse in various configurations for use in further steps. " "You have to fix the builds that fail. Build logs often has enough " "information to fix the error, but you might have to reproduce the failure " diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 96cf700ed77..fdc9c002b66 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -18,8 +18,7 @@ from github.IssueComment import IssueComment from github.Repository import Repository from ci_config import CI -from env_helper import GITHUB_REPOSITORY, GITHUB_UPSTREAM_REPOSITORY, TEMP_PATH -from lambda_shared_package.lambda_shared.pr import Labels +from env_helper import GITHUB_REPOSITORY, TEMP_PATH from pr_info import PRInfo from report import ( ERROR, @@ -29,7 +28,6 @@ from report import ( StatusType, TestResult, TestResults, - get_status, get_worst_status, ) from s3_helper import S3Helper @@ -103,7 +101,12 @@ def post_commit_status( if i == RETRY - 1: raise ex time.sleep(i) - if pr_info: + if pr_info and check_name not in ( + CI.StatusNames.MERGEABLE, + CI.StatusNames.CI, + CI.StatusNames.PR_CHECK, + CI.StatusNames.SYNC, + ): status_updated = False for i in range(RETRY): try: @@ -157,6 +160,17 @@ def set_status_comment(commit: Commit, pr_info: PRInfo) -> None: gh.__requester = commit._requester # type:ignore #pylint:disable=protected-access repo = get_repo(gh) statuses = sorted(get_commit_filtered_statuses(commit), key=lambda x: x.context) + statuses = [ + status + for status in statuses + if status.context + not in ( + CI.StatusNames.MERGEABLE, + CI.StatusNames.CI, + CI.StatusNames.PR_CHECK, + CI.StatusNames.SYNC, + ) + ] if not statuses: return @@ -439,29 +453,10 @@ def set_mergeable_check( ) -def update_mergeable_check(commit: Commit, pr_info: PRInfo, check_name: str) -> None: - "check if the check_name in REQUIRED_CHECKS and then trigger update" - not_run = ( - pr_info.labels.intersection({Labels.SKIP_MERGEABLE_CHECK, Labels.RELEASE}) - or not CI.is_required(check_name) - or pr_info.release_pr - or pr_info.number == 0 - ) - - if not_run: - # Let's avoid unnecessary work - return - - logging.info("Update Mergeable Check by %s", check_name) - - statuses = get_commit_filtered_statuses(commit) - trigger_mergeable_check(commit, statuses) - - def trigger_mergeable_check( commit: Commit, statuses: CommitStatuses, - set_if_green: bool = False, + set_from_sync: bool = False, workflow_failed: bool = False, ) -> StatusType: """calculate and update StatusNames.MERGEABLE""" @@ -501,63 +496,43 @@ def trigger_mergeable_check( description = format_description(description) - if not set_if_green and state == SUCCESS: - # do not set green Mergeable Check status - pass - else: - if mergeable_status is None or mergeable_status.description != description: + if set_from_sync: + # update Mergeable Check from sync WF only if its status already present or its new status is not SUCCESS + # to avoid false-positives + if mergeable_status or state != SUCCESS: set_mergeable_check(commit, description, state) + elif mergeable_status is None or mergeable_status.description != description: + set_mergeable_check(commit, description, state) return state def update_upstream_sync_status( - upstream_pr_number: int, - sync_pr_number: int, - gh: Github, + pr_info: PRInfo, state: StatusType, - can_set_green_mergeable_status: bool = False, ) -> None: - upstream_repo = gh.get_repo(GITHUB_UPSTREAM_REPOSITORY) - upstream_pr = upstream_repo.get_pull(upstream_pr_number) - sync_repo = gh.get_repo(GITHUB_REPOSITORY) - sync_pr = sync_repo.get_pull(sync_pr_number) - # Find the commit that is in both repos, upstream and cloud - sync_commits = sync_pr.get_commits().reversed - upstream_commits = upstream_pr.get_commits().reversed - # Github objects are compared by _url attribute. We can't compare them directly and - # should compare commits by SHA1 - upstream_shas = [c.sha for c in upstream_commits] - logging.info("Commits in upstream PR:\n %s", ", ".join(upstream_shas)) - sync_shas = [c.sha for c in sync_commits] - logging.info("Commits in sync PR:\n %s", ", ".join(reversed(sync_shas))) + last_synced_upstream_commit = pr_info.get_latest_sync_commit() - # find latest synced commit - last_synced_upstream_commit = None - for commit in upstream_commits: - if commit.sha in sync_shas: - last_synced_upstream_commit = commit - break - - assert last_synced_upstream_commit - - sync_status = get_status(state) logging.info( - "Using commit %s to post the %s status `%s`: [%s]", + "Using commit [%s] to post the [%s] status [%s]", last_synced_upstream_commit.sha, - sync_status, + state, CI.StatusNames.SYNC, - "", ) + if state == SUCCESS: + description = CI.SyncState.COMPLETED + else: + description = CI.SyncState.TESTS_FAILED + post_commit_status( last_synced_upstream_commit, - sync_status, - "", + state, "", + description, CI.StatusNames.SYNC, ) trigger_mergeable_check( last_synced_upstream_commit, get_commit_filtered_statuses(last_synced_upstream_commit), - set_if_green=can_set_green_mergeable_status, + set_from_sync=True, ) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index bd83f875790..385caccc8cd 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -9,15 +9,10 @@ from commit_status_helper import ( get_commit, get_commit_filtered_statuses, post_commit_status, - set_mergeable_check, - trigger_mergeable_check, - update_upstream_sync_status, ) -from env_helper import GITHUB_REPOSITORY, GITHUB_UPSTREAM_REPOSITORY from get_robot_token import get_best_robot_token from pr_info import PRInfo from report import FAILURE, PENDING, SUCCESS, StatusType -from synchronizer_utils import SYNC_BRANCH_PREFIX def parse_args() -> argparse.Namespace: @@ -45,31 +40,7 @@ def main(): gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) - if pr_info.is_merge_queue: - # in MQ Mergeable check status must never be green if any failures in the workflow - if has_workflow_failures: - set_mergeable_check(commit, "workflow failed", FAILURE) - else: - # This must be the only place where green MCheck is set in the MQ (in the end of CI) to avoid early merge - set_mergeable_check(commit, "workflow passed", SUCCESS) - return - statuses = get_commit_filtered_statuses(commit) - state = trigger_mergeable_check(commit, statuses, set_if_green=True) - - # Process upstream StatusNames.SYNC - if ( - pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/") - and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY - ): - upstream_pr_number = int(pr_info.head_ref.split("/pr/", maxsplit=1)[1]) - update_upstream_sync_status( - upstream_pr_number, - pr_info.number, - gh, - state, - can_set_green_mergeable_status=True, - ) ci_running_statuses = [s for s in statuses if s.context == CI.StatusNames.CI] if not ci_running_statuses: diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 9678efd8631..ee459ce35a0 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -122,6 +122,10 @@ def _get_statless_tests_to_run(pr_info: PRInfo) -> List[str]: for fpath in pr_info.changed_files: if re.match(r"tests/queries/0_stateless/[0-9]{5}", fpath): + path_ = Path(REPO_COPY + "/" + fpath) + if not path_.exists(): + logging.info("File '%s' is removed - skip", fpath) + continue logging.info("File '%s' is changed and seems like a test", fpath) fname = fpath.split("/")[3] fname_without_ext = os.path.splitext(fname)[0] diff --git a/tests/ci/lambda_shared_package/lambda_shared/token.py b/tests/ci/lambda_shared_package/lambda_shared/token.py index f9860f6ad2a..9749122bd39 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/token.py +++ b/tests/ci/lambda_shared_package/lambda_shared/token.py @@ -63,7 +63,10 @@ def get_access_token_by_key_app(private_key: str, app_id: int) -> str: "iss": app_id, } - encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256") + # FIXME: apparently should be switched to this so that mypy is happy + # jwt_instance = JWT() + # encoded_jwt = jwt_instance.encode(payload, private_key, algorithm="RS256") + encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256") # type: ignore installation_id = get_installation_id(encoded_jwt) return get_access_token_by_jwt(encoded_jwt, installation_id) diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index e1c7bf94ff5..37c08fc4efe 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -4,6 +4,7 @@ import argparse import logging +import sys from datetime import datetime from os import getenv from pprint import pformat @@ -17,11 +18,14 @@ from commit_status_helper import ( get_commit_filtered_statuses, get_commit, trigger_mergeable_check, + update_upstream_sync_status, ) from get_robot_token import get_best_robot_token from github_helper import GitHub, NamedUser, PullRequest, Repository from pr_info import PRInfo -from report import SUCCESS +from report import SUCCESS, FAILURE +from env_helper import GITHUB_UPSTREAM_REPOSITORY, GITHUB_REPOSITORY +from synchronizer_utils import SYNC_BRANCH_PREFIX # The team name for accepted approvals TEAM_NAME = getenv("GITHUB_TEAM_NAME", "core") @@ -243,17 +247,29 @@ def main(): repo = gh.get_repo(args.repo) if args.set_ci_status: - assert args.wf_status in ("failure", "success") + assert args.wf_status in (FAILURE, SUCCESS) # set mergeable check status and exit commit = get_commit(gh, args.pr_info.sha) statuses = get_commit_filtered_statuses(commit) - trigger_mergeable_check( + state = trigger_mergeable_check( commit, statuses, - set_if_green=True, workflow_failed=(args.wf_status != "success"), ) - return + + # Process upstream StatusNames.SYNC + pr_info = PRInfo() + if ( + pr_info.head_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/") + and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY + ): + print("Updating upstream statuses") + update_upstream_sync_status(pr_info, state) + + if args.wf_status != "success": + # exit with 1 to rerun on workflow failed job restart + sys.exit(1) + sys.exit(0) # An ugly and not nice fix to patch the wrong organization URL, # see https://github.com/PyGithub/PyGithub/issues/2395#issuecomment-1378629710 diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index fb25a29cc57..a411fc4e8f6 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -13,8 +13,11 @@ from env_helper import ( GITHUB_REPOSITORY, GITHUB_RUN_URL, GITHUB_SERVER_URL, + GITHUB_UPSTREAM_REPOSITORY, ) from lambda_shared_package.lambda_shared.pr import Labels +from get_robot_token import get_best_robot_token +from github_helper import GitHub NeedsDataType = Dict[str, Dict[str, Union[str, Dict[str, str]]]] @@ -432,6 +435,34 @@ class PRInfo: return True return False + def get_latest_sync_commit(self): + gh = GitHub(get_best_robot_token(), per_page=100) + assert self.head_ref.startswith("sync-upstream/pr/") + assert self.repo_full_name != GITHUB_UPSTREAM_REPOSITORY + upstream_repo = gh.get_repo(GITHUB_UPSTREAM_REPOSITORY) + upstream_pr_number = int(self.head_ref.split("/pr/", maxsplit=1)[1]) + upstream_pr = upstream_repo.get_pull(upstream_pr_number) + sync_repo = gh.get_repo(GITHUB_REPOSITORY) + sync_pr = sync_repo.get_pull(self.number) + # Find the commit that is in both repos, upstream and cloud + sync_commits = sync_pr.get_commits().reversed + upstream_commits = upstream_pr.get_commits().reversed + # Github objects are compared by _url attribute. We can't compare them directly and + # should compare commits by SHA1 + upstream_shas = [c.sha for c in upstream_commits] + logging.info("Commits in upstream PR:\n %s", ", ".join(upstream_shas)) + sync_shas = [c.sha for c in sync_commits] + logging.info("Commits in sync PR:\n %s", ", ".join(reversed(sync_shas))) + + # find latest synced commit + last_synced_upstream_commit = None + for commit in upstream_commits: + if commit.sha in sync_shas: + last_synced_upstream_commit = commit + break + assert last_synced_upstream_commit + return last_synced_upstream_commit + class FakePRInfo: def __init__(self): diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index da0ccb2b74d..86656e6e7c0 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -104,7 +104,7 @@ class S3Helper: self.client.upload_file(file_path, bucket_name, s3_path, ExtraArgs=metadata) url = self.s3_url(bucket_name, s3_path) - logging.info("Upload %s to %s. Meta: %s", file_path, url, metadata) + logging.info("Upload %s to %s Meta: %s", file_path, url, metadata) return url def delete_file_from_s3(self, bucket_name: str, s3_path: str) -> None: diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 35e48246be9..9deae06d9f4 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -13,7 +13,7 @@ from typing import List, Tuple, Union import magic from docker_images_helper import get_docker_image, pull_image -from env_helper import IS_CI, REPO_COPY, TEMP_PATH +from env_helper import IS_CI, REPO_COPY, TEMP_PATH, GITHUB_EVENT_PATH from git_helper import GIT_PREFIX, git_runner from pr_info import PRInfo from report import ERROR, FAILURE, SUCCESS, JobReport, TestResults, read_test_results @@ -216,7 +216,8 @@ def main(): status=state, start_time=stopwatch.start_time_str, duration=stopwatch.duration_seconds, - additional_files=additional_files, + # add GITHUB_EVENT_PATH json file to have it in style check report. sometimes it's needed for debugging. + additional_files=additional_files + [Path(GITHUB_EVENT_PATH)], ).dump() if state in [ERROR, FAILURE]: diff --git a/tests/ci/test_ci_options.py b/tests/ci/test_ci_options.py index ee256f73abc..3f158e79f30 100644 --- a/tests/ci/test_ci_options.py +++ b/tests/ci/test_ci_options.py @@ -132,8 +132,7 @@ _TEST_JOB_LIST = [ "ClickBench (release)", "ClickBench (aarch64)", "libFuzzer tests", - "ClickHouse build check", - "ClickHouse special build check", + "Builds", "Docs check", "Bugfix validation", ] diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py index 39fa0d0f074..be710db37d1 100644 --- a/tests/integration/helpers/keeper_utils.py +++ b/tests/integration/helpers/keeper_utils.py @@ -124,27 +124,27 @@ class KeeperClient(object): return data def cd(self, path: str, timeout: float = 60.0): - self.execute_query(f"cd {path}", timeout) + self.execute_query(f"cd '{path}'", timeout) def ls(self, path: str, timeout: float = 60.0) -> list[str]: - return self.execute_query(f"ls {path}", timeout).split(" ") + return self.execute_query(f"ls '{path}'", timeout).split(" ") def create(self, path: str, value: str, timeout: float = 60.0): - self.execute_query(f"create {path} {value}", timeout) + self.execute_query(f"create '{path}' '{value}'", timeout) def get(self, path: str, timeout: float = 60.0) -> str: - return self.execute_query(f"get {path}", timeout) + return self.execute_query(f"get '{path}'", timeout) def set(self, path: str, value: str, version: tp.Optional[int] = None) -> None: self.execute_query( - f"set {path} {value} {version if version is not None else ''}" + f"set '{path}' '{value}' {version if version is not None else ''}" ) def rm(self, path: str, version: tp.Optional[int] = None) -> None: - self.execute_query(f"rm {path} {version if version is not None else ''}") + self.execute_query(f"rm '{path}' {version if version is not None else ''}") def exists(self, path: str, timeout: float = 60.0) -> bool: - return bool(int(self.execute_query(f"exists {path}", timeout))) + return bool(int(self.execute_query(f"exists '{path}'", timeout))) def stop(self): if not self.stopped: @@ -152,22 +152,22 @@ class KeeperClient(object): self.proc.communicate(b"exit\n", timeout=10.0) def sync(self, path: str, timeout: float = 60.0): - self.execute_query(f"sync {path}", timeout) + self.execute_query(f"sync '{path}'", timeout) def touch(self, path: str, timeout: float = 60.0): - self.execute_query(f"touch {path}", timeout) + self.execute_query(f"touch '{path}'", timeout) def find_big_family(self, path: str, n: int = 10, timeout: float = 60.0) -> str: - return self.execute_query(f"find_big_family {path} {n}", timeout) + return self.execute_query(f"find_big_family '{path}' {n}", timeout) def find_super_nodes(self, threshold: int, timeout: float = 60.0) -> str: return self.execute_query(f"find_super_nodes {threshold}", timeout) def get_direct_children_number(self, path: str, timeout: float = 60.0) -> str: - return self.execute_query(f"get_direct_children_number {path}", timeout) + return self.execute_query(f"get_direct_children_number '{path}'", timeout) def get_all_children_number(self, path: str, timeout: float = 60.0) -> str: - return self.execute_query(f"get_all_children_number {path}", timeout) + return self.execute_query(f"get_all_children_number '{path}'", timeout) def delete_stale_backups(self, timeout: float = 60.0) -> str: return self.execute_query("delete_stale_backups", timeout) @@ -196,7 +196,7 @@ class KeeperClient(object): ) return self.execute_query( - f"reconfig {operation} {joining or leaving or new_members}", timeout + f"reconfig {operation} '{joining or leaving or new_members}'", timeout ) @classmethod diff --git a/tests/integration/test_MemoryTracking/configs/no_system_log.xml b/tests/integration/test_MemoryTracking/configs/no_system_log.xml index 3218dae4dc7..7d80c7fbf78 100644 --- a/tests/integration/test_MemoryTracking/configs/no_system_log.xml +++ b/tests/integration/test_MemoryTracking/configs/no_system_log.xml @@ -5,6 +5,7 @@ + diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index c67f63e3f6b..d8662fad011 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -1,5 +1,4 @@ import pytest -import asyncio import glob import re import random @@ -1486,6 +1485,7 @@ def test_backup_all(exclude_system_log_tables): "processors_profile_log", "asynchronous_insert_log", "backup_log", + "error_log", ] exclude_from_backup += ["system." + table_name for table_name in log_tables] diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 967ed6a221c..d53335000a6 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -627,67 +627,126 @@ def test_user_specific_auth(start_cluster): create_user("superuser2") create_user("regularuser") - node.query("CREATE TABLE specific_auth (col UInt64) ENGINE=Memory") + node.query("CREATE TABLE specific_auth (col UInt64) ENGINE=MergeTree ORDER BY col") + node.query("INSERT INTO specific_auth VALUES (1)") - assert "Access" in node.query_and_get_error( - "BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup1.zip')" + def backup_restore(backup, user, should_fail, on_cluster=False, base_backup=None): + on_cluster_clause = "ON CLUSTER 'cluster'" if on_cluster else "" + base_backup = ( + f" SETTINGS base_backup = {base_backup}" if base_backup is not None else "" + ) + backup_query = ( + f"BACKUP TABLE specific_auth {on_cluster_clause} TO {backup} {base_backup}" + ) + restore_query = f"RESTORE TABLE specific_auth {on_cluster_clause} FROM {backup}" + + if should_fail: + assert "Access" in node.query_and_get_error(backup_query, user=user) + else: + node.query(backup_query, user=user) + node.query("DROP TABLE specific_auth SYNC") + node.query(restore_query, user=user) + + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup1/')", + user=None, + should_fail=True, ) - assert "Access" in node.query_and_get_error( - "BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup1.zip')", + + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup1/')", user="regularuser", + should_fail=True, ) - node.query( - "BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup1.zip')", - user="superuser1", - ) - node.query( - "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup1.zip')", + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup1/')", user="superuser1", + should_fail=False, ) - node.query( - "BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup2.zip')", - user="superuser2", - ) - node.query( - "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup2.zip')", + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup2/')", user="superuser2", + should_fail=False, ) assert "Access" in node.query_and_get_error( - "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup1.zip')", + "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup1/')", user="regularuser", ) - assert "HTTP response code: 403" in node.query_and_get_error( - "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1.zip', 'RawBLOB')", + node.query("INSERT INTO specific_auth VALUES (2)") + + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup1_inc/')", user="regularuser", + should_fail=True, + base_backup="S3('http://minio1:9001/root/data/backups/limited/backup1/')", ) - node.query( - "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1.zip', 'RawBLOB')", + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup1_inc/')", user="superuser1", + should_fail=False, + base_backup="S3('http://minio1:9001/root/data/backups/limited/backup1/')", + ) + + assert "Access" in node.query_and_get_error( + "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup1_inc/')", + user="regularuser", ) assert "Access Denied" in node.query_and_get_error( - "BACKUP TABLE specific_auth ON CLUSTER 'cluster' TO S3('http://minio1:9001/root/data/backups/limited/backup3/')", + "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1/*', 'RawBLOB')", user="regularuser", ) node.query( - "BACKUP TABLE specific_auth ON CLUSTER 'cluster' TO S3('http://minio1:9001/root/data/backups/limited/backup3/')", + "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1/*', 'RawBLOB')", user="superuser1", ) + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup3/')", + user="regularuser", + should_fail=True, + on_cluster=True, + ) + + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup3/')", + user="superuser1", + should_fail=False, + on_cluster=True, + ) + assert "Access Denied" in node.query_and_get_error( "RESTORE TABLE specific_auth ON CLUSTER 'cluster' FROM S3('http://minio1:9001/root/data/backups/limited/backup3/')", user="regularuser", ) - node.query( - "RESTORE TABLE specific_auth ON CLUSTER 'cluster' FROM S3('http://minio1:9001/root/data/backups/limited/backup3/')", + node.query("INSERT INTO specific_auth VALUES (3)") + + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup3_inc/')", + user="regularuser", + should_fail=True, + on_cluster=True, + base_backup="S3('http://minio1:9001/root/data/backups/limited/backup3/')", + ) + + backup_restore( + "S3('http://minio1:9001/root/data/backups/limited/backup3_inc/')", user="superuser1", + should_fail=False, + on_cluster=True, + base_backup="S3('http://minio1:9001/root/data/backups/limited/backup3/')", + ) + + assert "Access Denied" in node.query_and_get_error( + "RESTORE TABLE specific_auth ON CLUSTER 'cluster' FROM S3('http://minio1:9001/root/data/backups/limited/backup3_inc/')", + user="regularuser", ) assert "Access Denied" in node.query_and_get_error( diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py index 912fd3cc163..1ed70e20b79 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/test.py +++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py @@ -300,7 +300,7 @@ def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3): LIMIT 1000000 SETTINGS s3_max_single_part_upload_size=100, - s3_min_upload_part_size=100000, + s3_min_upload_part_size=1000000, s3_check_objects_after_upload=0 """, query_id=insert_query_id, @@ -311,7 +311,7 @@ def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3): ) assert create_multipart == 1 - assert upload_parts == 69 + assert upload_parts == 7 assert s3_errors == 3 broken_s3.setup_at_part_upload( @@ -512,7 +512,6 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried( ), error -@pytest.mark.skip(reason="test is flaky, waiting ClickHouse/issues/64451") def test_query_is_canceled_with_inf_retries(cluster, broken_s3): node = cluster.instances["node_with_inf_s3_retries"] @@ -534,11 +533,12 @@ def test_query_is_canceled_with_inf_retries(cluster, broken_s3): SELECT * FROM system.numbers - LIMIT 1000000 + LIMIT 1000000000 SETTINGS s3_max_single_part_upload_size=100, s3_min_upload_part_size=10000, - s3_check_objects_after_upload=0 + s3_check_objects_after_upload=0, + s3_max_inflight_parts_for_one_file=1000 """, query_id=insert_query_id, ) diff --git a/tests/integration/test_composable_protocols/configs/config.xml b/tests/integration/test_composable_protocols/configs/config.xml index 09a512eb5a4..7d39363bc62 100644 --- a/tests/integration/test_composable_protocols/configs/config.xml +++ b/tests/integration/test_composable_protocols/configs/config.xml @@ -58,6 +58,26 @@ 8444 https protocol endpoint + + tls + http + 0.0.0.0 + 8445 + https protocol with TLSv1_2 minimum version + sslv2,sslv3,tlsv1,tlsv1_1 + /etc/clickhouse-server/config.d/server.crt + /etc/clickhouse-server/config.d/server.key + + + tls + http + 0.0.0.0 + 8446 + https protocol with TLSv1_3 minimum version + sslv2,sslv3,tlsv1,tlsv1_1,tlsv1_2 + /etc/clickhouse-server/config.d/server.crt + /etc/clickhouse-server/config.d/server.key + diff --git a/tests/integration/test_composable_protocols/test.py b/tests/integration/test_composable_protocols/test.py index aa5a1e766e6..241d1505433 100644 --- a/tests/integration/test_composable_protocols/test.py +++ b/tests/integration/test_composable_protocols/test.py @@ -7,6 +7,7 @@ from helpers.client import Client import urllib.request, urllib.parse import subprocess import socket +import warnings SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -27,18 +28,34 @@ def setup_nodes(): cluster.shutdown() -def execute_query_https(host, port, query): +def execute_query_https(host, port, query, version=None): url = f"https://{host}:{port}/?query={urllib.parse.quote(query)}" ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE + if version: + ctx.minimum_version = version + ctx.maximum_version = version request = urllib.request.Request(url) response = urllib.request.urlopen(request, context=ctx).read() return response.decode("utf-8") +def execute_query_https_unsupported(host, port, query, version=None): + try: + execute_query_https(host, port, query, version) + except Exception as e: + e_text = str(e) + if "NO_PROTOCOLS_AVAILABLE" in e_text: + return True + if "TLSV1_ALERT_PROTOCOL_VERSION" in e_text: + return True + raise + return False + + def execute_query_http(host, port, query): url = f"http://{host}:{port}/?query={urllib.parse.quote(query)}" @@ -84,6 +101,49 @@ def test_connections(): assert execute_query_https(server.ip_address, 8444, "SELECT 1") == "1\n" + warnings.filterwarnings("ignore", category=DeprecationWarning) + + assert execute_query_https_unsupported( + server.ip_address, 8445, "SELECT 1", version=ssl.TLSVersion.SSLv3 + ) + assert execute_query_https_unsupported( + server.ip_address, 8445, "SELECT 1", version=ssl.TLSVersion.TLSv1 + ) + assert execute_query_https_unsupported( + server.ip_address, 8445, "SELECT 1", version=ssl.TLSVersion.TLSv1_1 + ) + assert ( + execute_query_https( + server.ip_address, 8445, "SELECT 1", version=ssl.TLSVersion.TLSv1_2 + ) + == "1\n" + ) + assert ( + execute_query_https( + server.ip_address, 8445, "SELECT 1", version=ssl.TLSVersion.TLSv1_3 + ) + == "1\n" + ) + + assert execute_query_https_unsupported( + server.ip_address, 8446, "SELECT 1", version=ssl.TLSVersion.SSLv3 + ) + assert execute_query_https_unsupported( + server.ip_address, 8446, "SELECT 1", version=ssl.TLSVersion.TLSv1 + ) + assert execute_query_https_unsupported( + server.ip_address, 8446, "SELECT 1", version=ssl.TLSVersion.TLSv1_1 + ) + assert execute_query_https_unsupported( + server.ip_address, 8446, "SELECT 1", version=ssl.TLSVersion.TLSv1_2 + ) + assert ( + execute_query_https( + server.ip_address, 8446, "SELECT 1", version=ssl.TLSVersion.TLSv1_3 + ) + == "1\n" + ) + data = "PROXY TCP4 255.255.255.255 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007default\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\21ClickHouse client\024\r\253\251\003\0\001\0\0\0\002\001\025SELECT 'Hello, world'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" assert ( netcat(server.ip_address, 9100, bytearray(data, "latin-1")).find( diff --git a/tests/integration/test_config_reloader_interval/__init__.py b/tests/integration/test_config_reloader_interval/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_config_reloader_interval/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_config_reloader_interval/configs/config_reloader.xml b/tests/integration/test_config_reloader_interval/configs/config_reloader.xml new file mode 100644 index 00000000000..1dc9a59bd9d --- /dev/null +++ b/tests/integration/test_config_reloader_interval/configs/config_reloader.xml @@ -0,0 +1,4 @@ + + + 1000 + diff --git a/tests/integration/test_config_reloader_interval/test.py b/tests/integration/test_config_reloader_interval/test.py new file mode 100644 index 00000000000..22b66ecac30 --- /dev/null +++ b/tests/integration/test_config_reloader_interval/test.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 + +import pytest +import fnmatch + +from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + main_configs=["configs/config_reloader.xml"], +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_reload_config(start_cluster): + assert node.wait_for_log_line( + f"Config reload interval set to 1000ms", look_behind_lines=2000 + ) + + assert ( + node.query( + "SELECT value from system.server_settings where name = 'config_reload_interval_ms'" + ) + == "1000\n" + ) + node.replace_in_config( + "/etc/clickhouse-server/config.d/config_reloader.xml", + "1000", + "7777", + ) + + assert node.wait_for_log_line( + f"Config reload interval changed to 7777ms", look_behind_lines=2000 + ) + + assert ( + node.query( + "SELECT value from system.server_settings where name = 'config_reload_interval_ms'" + ) + == "7777\n" + ) diff --git a/tests/integration/test_config_xml_full/configs/config.d/error_log.xml b/tests/integration/test_config_xml_full/configs/config.d/error_log.xml new file mode 100644 index 00000000000..903d8699f5c --- /dev/null +++ b/tests/integration/test_config_xml_full/configs/config.d/error_log.xml @@ -0,0 +1,8 @@ + + + system + error_log
+ 7500 + 1000 +
+
diff --git a/tests/integration/test_config_xml_full/configs/config.xml b/tests/integration/test_config_xml_full/configs/config.xml index 628e1432350..61aa0a5c724 100644 --- a/tests/integration/test_config_xml_full/configs/config.xml +++ b/tests/integration/test_config_xml_full/configs/config.xml @@ -756,6 +756,14 @@ 1000
+ + + system + error_log
+ 7500 + 1000 +
+