mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Merge branch 'master' into fix-for-local-object-storage
This commit is contained in:
commit
3938d7156a
1
.github/PULL_REQUEST_TEMPLATE.md
vendored
1
.github/PULL_REQUEST_TEMPLATE.md
vendored
@ -12,6 +12,7 @@ tests/ci/cancel_and_rerun_workflow_lambda/app.py
|
||||
- Build/Testing/Packaging Improvement
|
||||
- Documentation (changelog entry is not required)
|
||||
- Bug Fix (user-visible misbehavior in an official stable release)
|
||||
- CI Fix or Improvement (changelog entry is not required)
|
||||
- Not for changelog (changelog entry is not required)
|
||||
|
||||
|
||||
|
4
.github/workflows/backport_branches.yml
vendored
4
.github/workflows/backport_branches.yml
vendored
@ -67,8 +67,6 @@ jobs:
|
||||
test_name: Compatibility check (amd64)
|
||||
runner_type: style-checker
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
run_command: |
|
||||
python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
|
||||
CompatibilityCheckAarch64:
|
||||
needs: [RunConfig, BuilderDebAarch64]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
@ -77,8 +75,6 @@ jobs:
|
||||
test_name: Compatibility check (aarch64)
|
||||
runner_type: style-checker
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
run_command: |
|
||||
python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
|
||||
#########################################################################################
|
||||
#################################### ORDINARY BUILDS ####################################
|
||||
#########################################################################################
|
||||
|
28
.github/workflows/master.yml
vendored
28
.github/workflows/master.yml
vendored
@ -73,8 +73,6 @@ jobs:
|
||||
test_name: Compatibility check (amd64)
|
||||
runner_type: style-checker
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
run_command: |
|
||||
python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
|
||||
CompatibilityCheckAarch64:
|
||||
needs: [RunConfig, BuilderDebAarch64]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
@ -83,8 +81,6 @@ jobs:
|
||||
test_name: Compatibility check (aarch64)
|
||||
runner_type: style-checker
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
run_command: |
|
||||
python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
|
||||
#########################################################################################
|
||||
#################################### ORDINARY BUILDS ####################################
|
||||
#########################################################################################
|
||||
@ -375,28 +371,12 @@ jobs:
|
||||
test_name: Stateless tests (release)
|
||||
runner_type: func-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
FunctionalStatelessTestReleaseDatabaseReplicated:
|
||||
FunctionalStatelessTestReleaseAnalyzerS3Replicated:
|
||||
needs: [RunConfig, BuilderDebRelease]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: Stateless tests (release, DatabaseReplicated)
|
||||
runner_type: func-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
FunctionalStatelessTestReleaseAnalyzer:
|
||||
needs: [RunConfig, BuilderDebRelease]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: Stateless tests (release, analyzer)
|
||||
runner_type: func-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
FunctionalStatelessTestReleaseS3:
|
||||
needs: [RunConfig, BuilderDebRelease]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: Stateless tests (release, s3 storage)
|
||||
test_name: Stateless tests (release, analyzer, s3, DatabaseReplicated)
|
||||
runner_type: func-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
FunctionalStatelessTestS3Debug:
|
||||
@ -825,9 +805,7 @@ jobs:
|
||||
- MarkReleaseReady
|
||||
- FunctionalStatelessTestDebug
|
||||
- FunctionalStatelessTestRelease
|
||||
- FunctionalStatelessTestReleaseDatabaseReplicated
|
||||
- FunctionalStatelessTestReleaseAnalyzer
|
||||
- FunctionalStatelessTestReleaseS3
|
||||
- FunctionalStatelessTestReleaseAnalyzerS3Replicated
|
||||
- FunctionalStatelessTestAarch64
|
||||
- FunctionalStatelessTestAsan
|
||||
- FunctionalStatelessTestTsan
|
||||
|
46
.github/workflows/pull_request.yml
vendored
46
.github/workflows/pull_request.yml
vendored
@ -117,8 +117,6 @@ jobs:
|
||||
test_name: Compatibility check (amd64)
|
||||
runner_type: style-checker
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
run_command: |
|
||||
python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
|
||||
CompatibilityCheckAarch64:
|
||||
needs: [RunConfig, BuilderDebAarch64]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
@ -127,8 +125,6 @@ jobs:
|
||||
test_name: Compatibility check (aarch64)
|
||||
runner_type: style-checker
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
run_command: |
|
||||
python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
|
||||
#########################################################################################
|
||||
#################################### ORDINARY BUILDS ####################################
|
||||
#########################################################################################
|
||||
@ -391,28 +387,12 @@ jobs:
|
||||
test_name: Stateless tests (release)
|
||||
runner_type: func-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
FunctionalStatelessTestReleaseDatabaseReplicated:
|
||||
FunctionalStatelessTestReleaseAnalyzerS3Replicated:
|
||||
needs: [RunConfig, BuilderDebRelease]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: Stateless tests (release, DatabaseReplicated)
|
||||
runner_type: func-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
FunctionalStatelessTestReleaseAnalyzer:
|
||||
needs: [RunConfig, BuilderDebRelease]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: Stateless tests (release, analyzer)
|
||||
runner_type: func-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
FunctionalStatelessTestReleaseS3:
|
||||
needs: [RunConfig, BuilderDebRelease]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: Stateless tests (release, s3 storage)
|
||||
test_name: Stateless tests (release, analyzer, s3, DatabaseReplicated)
|
||||
runner_type: func-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
FunctionalStatelessTestS3Debug:
|
||||
@ -750,14 +730,6 @@ jobs:
|
||||
#############################################################################################
|
||||
############################# INTEGRATION TESTS #############################################
|
||||
#############################################################################################
|
||||
IntegrationTestsAsan:
|
||||
needs: [RunConfig, BuilderDebAsan]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: Integration tests (asan)
|
||||
runner_type: stress-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
IntegrationTestsAnalyzerAsan:
|
||||
needs: [RunConfig, BuilderDebAsan]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
@ -774,14 +746,6 @@ jobs:
|
||||
test_name: Integration tests (tsan)
|
||||
runner_type: stress-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
IntegrationTestsRelease:
|
||||
needs: [RunConfig, BuilderDebRelease]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: Integration tests (release)
|
||||
runner_type: stress-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
IntegrationTestsAarch64:
|
||||
needs: [RunConfig, BuilderDebAarch64]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
@ -890,8 +854,6 @@ jobs:
|
||||
- TestsBugfixCheck
|
||||
- FunctionalStatelessTestDebug
|
||||
- FunctionalStatelessTestRelease
|
||||
- FunctionalStatelessTestReleaseDatabaseReplicated
|
||||
- FunctionalStatelessTestReleaseAnalyzer
|
||||
- FunctionalStatelessTestAarch64
|
||||
- FunctionalStatelessTestAsan
|
||||
- FunctionalStatelessTestTsan
|
||||
@ -904,9 +866,9 @@ jobs:
|
||||
- FunctionalStatefulTestTsan
|
||||
- FunctionalStatefulTestMsan
|
||||
- FunctionalStatefulTestUBsan
|
||||
- FunctionalStatelessTestReleaseS3
|
||||
- FunctionalStatelessTestS3Debug
|
||||
- FunctionalStatelessTestS3Tsan
|
||||
- FunctionalStatelessTestReleaseAnalyzerS3Replicated
|
||||
- FunctionalStatefulTestReleaseParallelReplicas
|
||||
- FunctionalStatefulTestAsanParallelReplicas
|
||||
- FunctionalStatefulTestTsanParallelReplicas
|
||||
@ -927,10 +889,8 @@ jobs:
|
||||
- ASTFuzzerTestTsan
|
||||
- ASTFuzzerTestMSan
|
||||
- ASTFuzzerTestUBSan
|
||||
- IntegrationTestsAsan
|
||||
- IntegrationTestsAnalyzerAsan
|
||||
- IntegrationTestsTsan
|
||||
- IntegrationTestsRelease
|
||||
- IntegrationTestsAarch64
|
||||
- IntegrationTestsFlakyCheck
|
||||
- PerformanceComparisonX86
|
||||
|
4
.github/workflows/release_branches.yml
vendored
4
.github/workflows/release_branches.yml
vendored
@ -68,8 +68,6 @@ jobs:
|
||||
test_name: Compatibility check (amd64)
|
||||
runner_type: style-checker
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
run_command: |
|
||||
python3 compatibility_check.py --check-name "Compatibility check (amd64)" --check-glibc --check-distributions
|
||||
CompatibilityCheckAarch64:
|
||||
needs: [RunConfig, BuilderDebAarch64]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
@ -78,8 +76,6 @@ jobs:
|
||||
test_name: Compatibility check (aarch64)
|
||||
runner_type: style-checker
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
run_command: |
|
||||
python3 compatibility_check.py --check-name "Compatibility check (aarch64)" --check-glibc
|
||||
#########################################################################################
|
||||
#################################### ORDINARY BUILDS ####################################
|
||||
#########################################################################################
|
||||
|
@ -16,6 +16,7 @@
|
||||
#ci_set_reduced
|
||||
#ci_set_arm
|
||||
#ci_set_integration
|
||||
#ci_set_analyzer
|
||||
|
||||
## To run specified job in CI:
|
||||
#job_<JOB NAME>
|
||||
|
156
CHANGELOG.md
156
CHANGELOG.md
@ -1,9 +1,165 @@
|
||||
### Table of Contents
|
||||
**[ClickHouse release v24.2, 2024-02-29](#242)**<br/>
|
||||
**[ClickHouse release v24.1, 2024-01-30](#241)**<br/>
|
||||
**[Changelog for 2023](https://clickhouse.com/docs/en/whats-new/changelog/2023/)**<br/>
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### <a id="242"></a> ClickHouse release 24.2, 2024-02-29
|
||||
|
||||
#### Backward Incompatible Change
|
||||
* Validate suspicious/experimental types in nested types. Previously we didn't validate such types (except JSON) in nested types like Array/Tuple/Map. [#59385](https://github.com/ClickHouse/ClickHouse/pull/59385) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Add sanity check for number of threads and block sizes. [#60138](https://github.com/ClickHouse/ClickHouse/pull/60138) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Don't infer floats in exponential notation by default. Add a setting `input_format_try_infer_exponent_floats` that will restore previous behaviour (disabled by default). Closes [#59476](https://github.com/ClickHouse/ClickHouse/issues/59476). [#59500](https://github.com/ClickHouse/ClickHouse/pull/59500) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Allow alter operations to be surrounded by parenthesis. The emission of parentheses can be controlled by the `format_alter_operations_with_parentheses` config. By default, in formatted queries the parentheses are emitted as we store the formatted alter operations in some places as metadata (e.g.: mutations). The new syntax clarifies some of the queries where alter operations end in a list. E.g.: `ALTER TABLE x MODIFY TTL date GROUP BY a, b, DROP COLUMN c` cannot be parsed properly with the old syntax. In the new syntax the query `ALTER TABLE x (MODIFY TTL date GROUP BY a, b), (DROP COLUMN c)` is obvious. Older versions are not able to read the new syntax, therefore using the new syntax might cause issues if newer and older version of ClickHouse are mixed in a single cluster. [#59532](https://github.com/ClickHouse/ClickHouse/pull/59532) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
|
||||
#### New Feature
|
||||
* Added new syntax which allows to specify definer user in View/Materialized View. This allows to execute selects/inserts from views without explicit grants for underlying tables. So, a View will encapsulate the grants. [#54901](https://github.com/ClickHouse/ClickHouse/pull/54901) [#60439](https://github.com/ClickHouse/ClickHouse/pull/60439) ([pufit](https://github.com/pufit)).
|
||||
* Try to detect file format automatically during schema inference if it's unknown in `file/s3/hdfs/url/azureBlobStorage` engines. Closes [#50576](https://github.com/ClickHouse/ClickHouse/issues/50576). [#59092](https://github.com/ClickHouse/ClickHouse/pull/59092) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Implement auto-adjustment for asynchronous insert timeouts. The following settings are introduced: async_insert_poll_timeout_ms, async_insert_use_adaptive_busy_timeout, async_insert_busy_timeout_min_ms, async_insert_busy_timeout_max_ms, async_insert_busy_timeout_increase_rate, async_insert_busy_timeout_decrease_rate. [#58486](https://github.com/ClickHouse/ClickHouse/pull/58486) ([Julia Kartseva](https://github.com/jkartseva)).
|
||||
* Allow to set up a quota for maximum sequential login failures. [#54737](https://github.com/ClickHouse/ClickHouse/pull/54737) ([Alexey Gerasimchuck](https://github.com/Demilivor)).
|
||||
* A new aggregate function `groupArrayIntersect`. Follows up: [#49862](https://github.com/ClickHouse/ClickHouse/issues/49862). [#59598](https://github.com/ClickHouse/ClickHouse/pull/59598) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Backup & Restore support for `AzureBlobStorage`. Resolves [#50747](https://github.com/ClickHouse/ClickHouse/issues/50747). [#56988](https://github.com/ClickHouse/ClickHouse/pull/56988) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* The user can now specify the template string directly in the query using `format_schema_rows_template` as an alternative to `format_template_row`. Closes [#31363](https://github.com/ClickHouse/ClickHouse/issues/31363). [#59088](https://github.com/ClickHouse/ClickHouse/pull/59088) ([Shaun Struwig](https://github.com/Blargian)).
|
||||
* Implemented automatic conversion of merge tree tables of different kinds to replicated engine. Create empty `convert_to_replicated` file in table's data directory (`/clickhouse/store/xxx/xxxyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy/`) and that table will be converted automatically on next server start. [#57798](https://github.com/ClickHouse/ClickHouse/pull/57798) ([Kirill](https://github.com/kirillgarbar)).
|
||||
* Added function `seriesOutliersTukey` to detect outliers in series data using Tukey's fences algorithm. [#58632](https://github.com/ClickHouse/ClickHouse/pull/58632) ([Bhavna Jindal](https://github.com/bhavnajindal)).
|
||||
* Added query `ALTER TABLE table FORGET PARTITION partition` that removes ZooKeeper nodes, related to an empty partition. [#59507](https://github.com/ClickHouse/ClickHouse/pull/59507) ([Sergei Trifonov](https://github.com/serxa)). This is an expert-level feature.
|
||||
* Support JWT credentials file for the NATS table engine. [#59543](https://github.com/ClickHouse/ClickHouse/pull/59543) ([Nickolaj Jepsen](https://github.com/nickolaj-jepsen)).
|
||||
* Implemented system.dns_cache table, which can be useful for debugging DNS issues. [#59856](https://github.com/ClickHouse/ClickHouse/pull/59856) ([Kirill Nikiforov](https://github.com/allmazz)).
|
||||
* The codec `LZ4HC` will accept a new level 2, which is faster than the previous minimum level 3, at the expense of less compression. In previous versions, `LZ4HC(2)` and less was the same as `LZ4HC(3)`. Author: [Cyan4973](https://github.com/Cyan4973). [#60090](https://github.com/ClickHouse/ClickHouse/pull/60090) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Implemented `system.dns_cache` table, which can be useful for debugging DNS issues. New server setting dns_cache_max_size. [#60257](https://github.com/ClickHouse/ClickHouse/pull/60257) ([Kirill Nikiforov](https://github.com/allmazz)).
|
||||
* Support single-argument version for the `merge` table function, as `merge(['db_name', ] 'tables_regexp')`. [#60372](https://github.com/ClickHouse/ClickHouse/pull/60372) ([豪肥肥](https://github.com/HowePa)).
|
||||
* Support negative positional arguments. Closes [#57736](https://github.com/ClickHouse/ClickHouse/issues/57736). [#58292](https://github.com/ClickHouse/ClickHouse/pull/58292) ([flynn](https://github.com/ucasfl)).
|
||||
* Support specifying a set of permitted users for specific S3 settings in config using `user` key. [#60144](https://github.com/ClickHouse/ClickHouse/pull/60144) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Added table function `mergeTreeIndex`. It represents the contents of index and marks files of `MergeTree` tables. It can be used for introspection. Syntax: `mergeTreeIndex(database, table, [with_marks = true])` where `database.table` is an existing table with `MergeTree` engine. [#58140](https://github.com/ClickHouse/ClickHouse/pull/58140) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
|
||||
#### Experimental Feature
|
||||
* Add function `variantType` that returns Enum with variant type name for each row. [#59398](https://github.com/ClickHouse/ClickHouse/pull/59398) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Support `LEFT JOIN`, `ALL INNER JOIN`, and simple subqueries for parallel replicas (only with analyzer). New setting `parallel_replicas_prefer_local_join` chooses local `JOIN` execution (by default) vs `GLOBAL JOIN`. All tables should exist on every replica from `cluster_for_parallel_replicas`. New settings `min_external_table_block_size_rows` and `min_external_table_block_size_bytes` are used to squash small blocks that are sent for temporary tables (only with analyzer). [#58916](https://github.com/ClickHouse/ClickHouse/pull/58916) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Allow concurrent table creation in the `Replicated` database during adding or recovering a new replica. [#59277](https://github.com/ClickHouse/ClickHouse/pull/59277) ([Konstantin Bogdanov](https://github.com/thevar1able)).
|
||||
* Implement comparison operator for `Variant` values and proper Field inserting into `Variant` column. Don't allow creating `Variant` type with similar variant types by default (allow uder a setting `allow_suspicious_variant_types`) Closes [#59996](https://github.com/ClickHouse/ClickHouse/issues/59996). Closes [#59850](https://github.com/ClickHouse/ClickHouse/issues/59850). [#60198](https://github.com/ClickHouse/ClickHouse/pull/60198) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Disable parallel replicas JOIN with CTE (not analyzer) [#59239](https://github.com/ClickHouse/ClickHouse/pull/59239) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### Performance Improvement
|
||||
* Primary key will use less amount of memory. [#60049](https://github.com/ClickHouse/ClickHouse/pull/60049) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Improve memory usage for primary key and some other operations. [#60050](https://github.com/ClickHouse/ClickHouse/pull/60050) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* The tables' primary keys will be loaded in memory lazily on first access. This is controlled by the new MergeTree setting `primary_key_lazy_load`, which is on by default. This provides several advantages: - it will not be loaded for tables that are not used; - if there is not enough memory, an exception will be thrown on first use instead of at server startup. This provides several disadvantages: - the latency of loading the primary key will be paid on the first query rather than before accepting connections; this theoretically may introduce a thundering-herd problem. This closes [#11188](https://github.com/ClickHouse/ClickHouse/issues/11188). [#60093](https://github.com/ClickHouse/ClickHouse/pull/60093) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Vectorized distance functions used in vector search. [#58866](https://github.com/ClickHouse/ClickHouse/pull/58866) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Vectorized function `dotProduct` which is useful for vector search. [#60202](https://github.com/ClickHouse/ClickHouse/pull/60202) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Add short-circuit ability for `dictGetOrDefault` function. Closes [#52098](https://github.com/ClickHouse/ClickHouse/issues/52098). [#57767](https://github.com/ClickHouse/ClickHouse/pull/57767) ([jsc0218](https://github.com/jsc0218)).
|
||||
* Keeper improvement: cache only a certain amount of logs in-memory controlled by `latest_logs_cache_size_threshold` and `commit_logs_cache_size_threshold`. [#59460](https://github.com/ClickHouse/ClickHouse/pull/59460) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Keeper improvement: reduce size of data node even more. [#59592](https://github.com/ClickHouse/ClickHouse/pull/59592) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Continue optimizing branch miss of `if` function when result type is `Float*/Decimal*/*Int*`, follow up of https://github.com/ClickHouse/ClickHouse/pull/57885. [#59148](https://github.com/ClickHouse/ClickHouse/pull/59148) ([李扬](https://github.com/taiyang-li)).
|
||||
* Optimize `if` function when the input type is `Map`, the speed-up is up to ~10x. [#59413](https://github.com/ClickHouse/ClickHouse/pull/59413) ([李扬](https://github.com/taiyang-li)).
|
||||
* Improve performance of the `Int8` type by implementing strict aliasing (we already have it for `UInt8` and all other integer types). [#59485](https://github.com/ClickHouse/ClickHouse/pull/59485) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Optimize performance of sum/avg conditionally for bigint and big decimal types by reducing branch miss. [#59504](https://github.com/ClickHouse/ClickHouse/pull/59504) ([李扬](https://github.com/taiyang-li)).
|
||||
* Improve performance of SELECTs with active mutations. [#59531](https://github.com/ClickHouse/ClickHouse/pull/59531) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Optimized function `isNotNull` with AVX2. [#59621](https://github.com/ClickHouse/ClickHouse/pull/59621) ([李扬](https://github.com/taiyang-li)).
|
||||
* Improve ASOF JOIN performance for sorted or almost sorted data. [#59731](https://github.com/ClickHouse/ClickHouse/pull/59731) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* The previous default value equals to 1 MB for `async_insert_max_data_size` appeared to be too small. The new one would be 10 MiB. [#59536](https://github.com/ClickHouse/ClickHouse/pull/59536) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Use multiple threads while reading the metadata of tables from a backup while executing the RESTORE command. [#60040](https://github.com/ClickHouse/ClickHouse/pull/60040) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Now if `StorageBuffer` has more than 1 shard (`num_layers` > 1) background flush will happen simultaneously for all shards in multiple threads. [#60111](https://github.com/ClickHouse/ClickHouse/pull/60111) ([alesapin](https://github.com/alesapin)).
|
||||
|
||||
#### Improvement
|
||||
* When output format is Pretty format and a block consists of a single numeric value which exceeds one million, A readable number will be printed on table right. [#60379](https://github.com/ClickHouse/ClickHouse/pull/60379) ([rogeryk](https://github.com/rogeryk)).
|
||||
* Added settings `split_parts_ranges_into_intersecting_and_non_intersecting_final` and `split_intersecting_parts_ranges_into_layers_final`. These settings are needed to disable optimizations for queries with `FINAL` and needed for debug only. [#59705](https://github.com/ClickHouse/ClickHouse/pull/59705) ([Maksim Kita](https://github.com/kitaisreal)). Actually not only for that - they can also lower memory usage at the expense of performance.
|
||||
* Rename the setting `extract_kvp_max_pairs_per_row` to `extract_key_value_pairs_max_pairs_per_row`. The issue (unnecessary abbreviation in the setting name) was introduced in https://github.com/ClickHouse/ClickHouse/pull/43606. Fix the documentation of this setting. [#59683](https://github.com/ClickHouse/ClickHouse/pull/59683) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#59960](https://github.com/ClickHouse/ClickHouse/pull/59960) ([jsc0218](https://github.com/jsc0218)).
|
||||
* Running `ALTER COLUMN MATERIALIZE` on a column with `DEFAULT` or `MATERIALIZED` expression now precisely follows the semantics. [#58023](https://github.com/ClickHouse/ClickHouse/pull/58023) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Enabled an exponential backoff logic for errors during mutations. It will reduce the CPU usage, memory usage and log file sizes. [#58036](https://github.com/ClickHouse/ClickHouse/pull/58036) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
|
||||
* Add improvement to count the `InitialQuery` Profile Event. [#58195](https://github.com/ClickHouse/ClickHouse/pull/58195) ([Unalian](https://github.com/Unalian)).
|
||||
* Allow to define `volume_priority` in `storage_configuration`. [#58533](https://github.com/ClickHouse/ClickHouse/pull/58533) ([Andrey Zvonov](https://github.com/zvonand)).
|
||||
* Add support for the `Date32` type in the `T64` codec. [#58738](https://github.com/ClickHouse/ClickHouse/pull/58738) ([Hongbin Ma](https://github.com/binmahone)).
|
||||
* Allow trailing commas in types with several items. [#59119](https://github.com/ClickHouse/ClickHouse/pull/59119) ([Aleksandr Musorin](https://github.com/AVMusorin)).
|
||||
* Settings for the Distributed table engine can now be specified in the server configuration file (similar to MergeTree settings), e.g. `<distributed> <flush_on_detach>false</flush_on_detach> </distributed>`. [#59291](https://github.com/ClickHouse/ClickHouse/pull/59291) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Retry disconnects and expired sessions when reading `system.zookeeper`. This is helpful when reading many rows from `system.zookeeper` table especially in the presence of fault-injected disconnects. [#59388](https://github.com/ClickHouse/ClickHouse/pull/59388) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Do not interpret numbers with leading zeroes as octals when `input_format_values_interpret_expressions=0`. [#59403](https://github.com/ClickHouse/ClickHouse/pull/59403) ([Joanna Hulboj](https://github.com/jh0x)).
|
||||
* At startup and whenever config files are changed, ClickHouse updates the hard memory limits of its total memory tracker. These limits are computed based on various server settings and cgroups limits (on Linux). Previously, setting `/sys/fs/cgroup/memory.max` (for cgroups v2) was hard-coded. As a result, cgroup v2 memory limits configured for nested groups (hierarchies), e.g. `/sys/fs/cgroup/my/nested/group/memory.max` were ignored. This is now fixed. The behavior of v1 memory limits remains unchanged. [#59435](https://github.com/ClickHouse/ClickHouse/pull/59435) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* New profile events added to observe the time spent on calculating PK/projections/secondary indices during `INSERT`-s. [#59436](https://github.com/ClickHouse/ClickHouse/pull/59436) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Allow to define a starting point for S3Queue with Ordered mode at the creation using a setting `s3queue_last_processed_path`. [#59446](https://github.com/ClickHouse/ClickHouse/pull/59446) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Made comments for system tables also available in `system.tables` in `clickhouse-local`. [#59493](https://github.com/ClickHouse/ClickHouse/pull/59493) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* `system.zookeeper` table: previously the whole result was accumulated in memory and returned as one big chunk. This change should help to reduce memory consumption when reading many rows from `system.zookeeper`, allow showing intermediate progress (how many rows have been read so far) and avoid hitting connection timeout when result set is big. [#59545](https://github.com/ClickHouse/ClickHouse/pull/59545) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Now dashboard understands both compressed and uncompressed state of URL's #hash (backward compatibility). Continuation of [#59124](https://github.com/ClickHouse/ClickHouse/issues/59124) . [#59548](https://github.com/ClickHouse/ClickHouse/pull/59548) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Bumped Intel QPL (used by codec `DEFLATE_QPL`) from v1.3.1 to v1.4.0 . Also fixed a bug for polling timeout mechanism, as we observed in same cases timeout won't work properly, if timeout happen, IAA and CPU may process buffer concurrently. So far, we'd better make sure IAA codec status is not QPL_STS_BEING_PROCESSED, then fallback to SW codec. [#59551](https://github.com/ClickHouse/ClickHouse/pull/59551) ([jasperzhu](https://github.com/jinjunzh)).
|
||||
* Do not show a warning about the server version in ClickHouse Cloud because ClickHouse Cloud handles seamless upgrades automatically. [#59657](https://github.com/ClickHouse/ClickHouse/pull/59657) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* After self-extraction temporary binary is moved instead copying. [#59661](https://github.com/ClickHouse/ClickHouse/pull/59661) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Fix stack unwinding on Apple macOS. This closes [#53653](https://github.com/ClickHouse/ClickHouse/issues/53653). [#59690](https://github.com/ClickHouse/ClickHouse/pull/59690) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Check for stack overflow in parsers even if the user misconfigured the `max_parser_depth` setting to a very high value. This closes [#59622](https://github.com/ClickHouse/ClickHouse/issues/59622). [#59697](https://github.com/ClickHouse/ClickHouse/pull/59697) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#60434](https://github.com/ClickHouse/ClickHouse/pull/60434)
|
||||
* Unify XML and SQL created named collection behaviour in Kafka storage. [#59710](https://github.com/ClickHouse/ClickHouse/pull/59710) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)).
|
||||
* In case when `merge_max_block_size_bytes` is small enough and tables contain wide rows (strings or tuples) background merges may stuck in an endless loop. This behaviour is fixed. Follow-up for https://github.com/ClickHouse/ClickHouse/pull/59340. [#59812](https://github.com/ClickHouse/ClickHouse/pull/59812) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Allow uuid in replica_path if CREATE TABLE explicitly has it. [#59908](https://github.com/ClickHouse/ClickHouse/pull/59908) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Add column `metadata_version` of ReplicatedMergeTree table in `system.tables` system table. [#59942](https://github.com/ClickHouse/ClickHouse/pull/59942) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Keeper improvement: send only Keeper related metrics/events for Prometheus. [#59945](https://github.com/ClickHouse/ClickHouse/pull/59945) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* The dashboard will display metrics across different ClickHouse versions even if the structure of system tables has changed after the upgrade. [#59967](https://github.com/ClickHouse/ClickHouse/pull/59967) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Allow loading AZ info from a file. [#59976](https://github.com/ClickHouse/ClickHouse/pull/59976) ([Konstantin Bogdanov](https://github.com/thevar1able)).
|
||||
* Keeper improvement: add retries on failures for Disk related operations. [#59980](https://github.com/ClickHouse/ClickHouse/pull/59980) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Add new config setting `backups.remove_backup_files_after_failure`: `<clickhouse> <backups> <remove_backup_files_after_failure>true</remove_backup_files_after_failure> </backups> </clickhouse>`. [#60002](https://github.com/ClickHouse/ClickHouse/pull/60002) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Short circuit execution for `ULIDStringToDateTime`. [#60211](https://github.com/ClickHouse/ClickHouse/pull/60211) ([Juan Madurga](https://github.com/jlmadurga)).
|
||||
* Added `query_id` column for tables `system.backups` and `system.backup_log`. Added error stacktrace to `error` column. [#60220](https://github.com/ClickHouse/ClickHouse/pull/60220) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Connections through the MySQL port now automatically run with setting `prefer_column_name_to_alias = 1` to support QuickSight out-of-the-box. Also, settings `mysql_map_string_to_text_in_show_columns` and `mysql_map_fixed_string_to_text_in_show_columns` are now enabled by default, affecting also only MySQL connections. This increases compatibility with more BI tools. [#60365](https://github.com/ClickHouse/ClickHouse/pull/60365) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix a race condition in JavaScript code leading to duplicate charts on top of each other. [#60392](https://github.com/ClickHouse/ClickHouse/pull/60392) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Added builds and tests with coverage collection with introspection. Continuation of [#56102](https://github.com/ClickHouse/ClickHouse/issues/56102). [#58792](https://github.com/ClickHouse/ClickHouse/pull/58792) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Update the Rust toolchain in `corrosion-cmake` when the CMake cross-compilation toolchain variable is set. [#59309](https://github.com/ClickHouse/ClickHouse/pull/59309) ([Aris Tritas](https://github.com/aris-aiven)).
|
||||
* Add some fuzzing to ASTLiterals. [#59383](https://github.com/ClickHouse/ClickHouse/pull/59383) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)).
|
||||
* Remove ability to disable generic clickhouse components (like server/client/...), but keep some that requires extra libraries (like ODBC or keeper). [#59857](https://github.com/ClickHouse/ClickHouse/pull/59857) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Query fuzzer will fuzz SETTINGS inside queries. [#60087](https://github.com/ClickHouse/ClickHouse/pull/60087) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Add support for building ClickHouse with clang-19 (master). [#60448](https://github.com/ClickHouse/ClickHouse/pull/60448) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
* Fix a "Non-ready set" error in TTL WHERE. [#57430](https://github.com/ClickHouse/ClickHouse/pull/57430) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix a bug in the `quantilesGK` function [#58216](https://github.com/ClickHouse/ClickHouse/pull/58216) ([李扬](https://github.com/taiyang-li)).
|
||||
* Fix a wrong behavior with `intDiv` for Decimal arguments [#59243](https://github.com/ClickHouse/ClickHouse/pull/59243) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
|
||||
* Fix `translate` with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix digest calculation in Keeper [#59439](https://github.com/ClickHouse/ClickHouse/pull/59439) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix stacktraces for binaries without debug symbols [#59444](https://github.com/ClickHouse/ClickHouse/pull/59444) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix `ASTAlterCommand::formatImpl` in case of column specific settings… [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* Fix `SELECT * FROM [...] ORDER BY ALL` with Analyzer [#59462](https://github.com/ClickHouse/ClickHouse/pull/59462) ([zhongyuankai](https://github.com/zhongyuankai)).
|
||||
* Fix possible uncaught exception during distributed query cancellation [#59487](https://github.com/ClickHouse/ClickHouse/pull/59487) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Make MAX use the same rules as permutation for complex types [#59498](https://github.com/ClickHouse/ClickHouse/pull/59498) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix corner case when passing `update_insert_deduplication_token_in_dependent_materialized_views` [#59544](https://github.com/ClickHouse/ClickHouse/pull/59544) ([Jordi Villar](https://github.com/jrdi)).
|
||||
* Fix incorrect result of arrayElement / map on empty value [#59594](https://github.com/ClickHouse/ClickHouse/pull/59594) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix crash in topK when merging empty states [#59603](https://github.com/ClickHouse/ClickHouse/pull/59603) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix distributed table with a constant sharding key [#59606](https://github.com/ClickHouse/ClickHouse/pull/59606) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix KQL issue found by WingFuzz [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Maintain function alias in RewriteSumFunctionWithSumAndCountVisitor [#59658](https://github.com/ClickHouse/ClickHouse/pull/59658) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Validate types of arguments for `minmax` skipping index [#59733](https://github.com/ClickHouse/ClickHouse/pull/59733) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix leftPad / rightPad function with FixedString input [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix AST fuzzer issue in function `countMatches` [#59752](https://github.com/ClickHouse/ClickHouse/pull/59752) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* RabbitMQ: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix StorageURL doing some of the query execution in single thread [#59833](https://github.com/ClickHouse/ClickHouse/pull/59833) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* S3Queue: fix uninitialized value [#59897](https://github.com/ClickHouse/ClickHouse/pull/59897) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix parsing of partition expressions surrounded by parens [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* Fix crash in JSONColumnsWithMetadata format over HTTP [#59925](https://github.com/ClickHouse/ClickHouse/pull/59925) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Do not rewrite sum to count if the return value differs in Analyzer [#59926](https://github.com/ClickHouse/ClickHouse/pull/59926) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* UniqExactSet read crash fix [#59928](https://github.com/ClickHouse/ClickHouse/pull/59928) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* ReplicatedMergeTree invalid metadata_version fix [#59946](https://github.com/ClickHouse/ClickHouse/pull/59946) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix data race in `StorageDistributed` [#59987](https://github.com/ClickHouse/ClickHouse/pull/59987) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Docker: run init scripts when option is enabled rather than disabled [#59991](https://github.com/ClickHouse/ClickHouse/pull/59991) ([jktng](https://github.com/jktng)).
|
||||
* Fix INSERT into `SQLite` with single quote (by escaping single quotes with a quote instead of backslash) [#60015](https://github.com/ClickHouse/ClickHouse/pull/60015) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix several logical errors in `arrayFold` [#60022](https://github.com/ClickHouse/ClickHouse/pull/60022) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix optimize_uniq_to_count removing the column alias [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix possible exception from S3Queue table on drop [#60036](https://github.com/ClickHouse/ClickHouse/pull/60036) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix formatting of NOT with single literals [#60042](https://github.com/ClickHouse/ClickHouse/pull/60042) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Use max_query_size from context in DDLLogEntry instead of hardcoded 4096 [#60083](https://github.com/ClickHouse/ClickHouse/pull/60083) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix inconsistent formatting of queries containing tables named `table`. Fix wrong formatting of queries with `UNION ALL`, `INTERSECT`, and `EXCEPT` when their structure wasn't linear. This closes #52349. Fix wrong formatting of `SYSTEM` queries, including `SYSTEM ... DROP FILESYSTEM CACHE`, `SYSTEM ... REFRESH/START/STOP/CANCEL/TEST VIEW`, `SYSTEM ENABLE/DISABLE FAILPOINT`. Fix formatting of parameterized DDL queries. Fix the formatting of the `DESCRIBE FILESYSTEM CACHE` query. Fix incorrect formatting of the `SET param_...` (a query setting a parameter). Fix incorrect formatting of `CREATE INDEX` queries. Fix inconsistent formatting of `CREATE USER` and similar queries. Fix inconsistent formatting of `CREATE SETTINGS PROFILE`. Fix incorrect formatting of `ALTER ... MODIFY REFRESH`. Fix inconsistent formatting of window functions if frame offsets were expressions. Fix inconsistent formatting of `RESPECT NULLS` and `IGNORE NULLS` if they were used after a function that implements an operator (such as `plus`). Fix idiotic formatting of `SYSTEM SYNC REPLICA ... LIGHTWEIGHT FROM ...`. Fix inconsistent formatting of invalid queries with `GROUP BY GROUPING SETS ... WITH ROLLUP/CUBE/TOTALS`. Fix inconsistent formatting of `GRANT CURRENT GRANTS`. Fix inconsistent formatting of `CREATE TABLE (... COLLATE)`. Additionally, I fixed the incorrect formatting of `EXPLAIN` in subqueries (#60102). Fixed incorrect formatting of lambda functions (#60012). Added a check so there is no way to miss these abominations in the future. [#60095](https://github.com/ClickHouse/ClickHouse/pull/60095) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix inconsistent formatting of explain in subqueries [#60102](https://github.com/ClickHouse/ClickHouse/pull/60102) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Allow casting of bools in string representation to true bools [#60160](https://github.com/ClickHouse/ClickHouse/pull/60160) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix `system.s3queue_log` [#60166](https://github.com/ClickHouse/ClickHouse/pull/60166) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix arrayReduce with nullable aggregate function name [#60188](https://github.com/ClickHouse/ClickHouse/pull/60188) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Hide sensitive info for `S3Queue` [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix http exception codes. [#60252](https://github.com/ClickHouse/ClickHouse/pull/60252) ([Austin Kothig](https://github.com/kothiga)).
|
||||
* S3Queue: fix a bug (also fixes flaky test_storage_s3_queue/test.py::test_shards_distributed) [#60282](https://github.com/ClickHouse/ClickHouse/pull/60282) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix use-of-uninitialized-value and invalid result in hashing functions with IPv6 [#60359](https://github.com/ClickHouse/ClickHouse/pull/60359) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix OptimizeDateOrDateTimeConverterWithPreimageVisitor with null arguments [#60453](https://github.com/ClickHouse/ClickHouse/pull/60453) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fixed a minor bug that prevented distributed table queries sent from either KQL or PRQL dialect clients to be executed on replicas. [#59674](https://github.com/ClickHouse/ClickHouse/issues/59674). [#60470](https://github.com/ClickHouse/ClickHouse/pull/60470) ([Alexey Milovidov](https://github.com/alexey-milovidov)) [#59674](https://github.com/ClickHouse/ClickHouse/pull/59674) ([Austin Kothig](https://github.com/kothiga)).
|
||||
|
||||
|
||||
### <a id="241"></a> ClickHouse release 24.1, 2024-01-30
|
||||
|
||||
#### Backward Incompatible Change
|
||||
|
@ -10,6 +10,7 @@ set (CMAKE_CXX_STANDARD 20)
|
||||
|
||||
set (SRCS
|
||||
argsToConfig.cpp
|
||||
cgroupsv2.cpp
|
||||
coverage.cpp
|
||||
demangle.cpp
|
||||
getAvailableMemoryAmount.cpp
|
||||
|
@ -185,7 +185,8 @@ inline bool memequalWide(const char * p1, const char * p2, size_t size)
|
||||
{
|
||||
case 3: if (!compare8(p1 + 32, p2 + 32)) return false; [[fallthrough]];
|
||||
case 2: if (!compare8(p1 + 16, p2 + 16)) return false; [[fallthrough]];
|
||||
case 1: if (!compare8(p1, p2)) return false;
|
||||
case 1: if (!compare8(p1, p2)) return false; [[fallthrough]];
|
||||
default: ;
|
||||
}
|
||||
|
||||
return compare8(p1 + size - 16, p2 + size - 16);
|
||||
|
64
base/base/cgroupsv2.cpp
Normal file
64
base/base/cgroupsv2.cpp
Normal file
@ -0,0 +1,64 @@
|
||||
#include <base/cgroupsv2.h>
|
||||
|
||||
#include <base/defines.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
|
||||
|
||||
bool cgroupsV2Enabled()
|
||||
{
|
||||
#if defined(OS_LINUX)
|
||||
/// This file exists iff the host has cgroups v2 enabled.
|
||||
auto controllers_file = default_cgroups_mount / "cgroup.controllers";
|
||||
if (!std::filesystem::exists(controllers_file))
|
||||
return false;
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cgroupsV2MemoryControllerEnabled()
|
||||
{
|
||||
#if defined(OS_LINUX)
|
||||
chassert(cgroupsV2Enabled());
|
||||
/// According to https://docs.kernel.org/admin-guide/cgroup-v2.html:
|
||||
/// - file 'cgroup.controllers' defines which controllers *can* be enabled
|
||||
/// - file 'cgroup.subtree_control' defines which controllers *are* enabled
|
||||
/// Caveat: nested groups may disable controllers. For simplicity, check only the top-level group.
|
||||
std::ifstream subtree_control_file(default_cgroups_mount / "cgroup.subtree_control");
|
||||
if (!subtree_control_file.is_open())
|
||||
return false;
|
||||
std::string controllers;
|
||||
std::getline(subtree_control_file, controllers);
|
||||
if (controllers.find("memory") == std::string::npos)
|
||||
return false;
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string cgroupV2OfProcess()
|
||||
{
|
||||
#if defined(OS_LINUX)
|
||||
chassert(cgroupsV2Enabled());
|
||||
/// All PIDs assigned to a cgroup are in /sys/fs/cgroups/{cgroup_name}/cgroup.procs
|
||||
/// A simpler way to get the membership is:
|
||||
std::ifstream cgroup_name_file("/proc/self/cgroup");
|
||||
if (!cgroup_name_file.is_open())
|
||||
return "";
|
||||
/// With cgroups v2, there will be a *single* line with prefix "0::/"
|
||||
/// (see https://docs.kernel.org/admin-guide/cgroup-v2.html)
|
||||
std::string cgroup;
|
||||
std::getline(cgroup_name_file, cgroup);
|
||||
static const std::string v2_prefix = "0::/";
|
||||
if (!cgroup.starts_with(v2_prefix))
|
||||
return "";
|
||||
cgroup = cgroup.substr(v2_prefix.length());
|
||||
return cgroup;
|
||||
#else
|
||||
return "";
|
||||
#endif
|
||||
}
|
22
base/base/cgroupsv2.h
Normal file
22
base/base/cgroupsv2.h
Normal file
@ -0,0 +1,22 @@
|
||||
#pragma once
|
||||
|
||||
#include <filesystem>
|
||||
#include <string>
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
/// I think it is possible to mount the cgroups hierarchy somewhere else (e.g. when in containers).
|
||||
/// /sys/fs/cgroup was still symlinked to the actual mount in the cases that I have seen.
|
||||
static inline const std::filesystem::path default_cgroups_mount = "/sys/fs/cgroup";
|
||||
#endif
|
||||
|
||||
/// Is cgroups v2 enabled on the system?
|
||||
bool cgroupsV2Enabled();
|
||||
|
||||
/// Is the memory controller of cgroups v2 enabled on the system?
|
||||
/// Assumes that cgroupsV2Enabled() is enabled.
|
||||
bool cgroupsV2MemoryControllerEnabled();
|
||||
|
||||
/// Which cgroup does the process belong to?
|
||||
/// Returns an empty string if the cgroup cannot be determined.
|
||||
/// Assumes that cgroupsV2Enabled() is enabled.
|
||||
std::string cgroupV2OfProcess();
|
@ -1,17 +1,14 @@
|
||||
#include <base/getMemoryAmount.h>
|
||||
|
||||
#include <base/cgroupsv2.h>
|
||||
#include <base/getPageSize.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#if defined(BSD)
|
||||
#include <sys/sysctl.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace
|
||||
@ -20,49 +17,14 @@ namespace
|
||||
std::optional<uint64_t> getCgroupsV2MemoryLimit()
|
||||
{
|
||||
#if defined(OS_LINUX)
|
||||
const std::filesystem::path default_cgroups_mount = "/sys/fs/cgroup";
|
||||
|
||||
/// This file exists iff the host has cgroups v2 enabled.
|
||||
std::ifstream controllers_file(default_cgroups_mount / "cgroup.controllers");
|
||||
if (!controllers_file.is_open())
|
||||
if (!cgroupsV2Enabled())
|
||||
return {};
|
||||
|
||||
/// Make sure that the memory controller is enabled.
|
||||
/// - cgroup.controllers defines which controllers *can* be enabled.
|
||||
/// - cgroup.subtree_control defines which controllers *are* enabled.
|
||||
/// (see https://docs.kernel.org/admin-guide/cgroup-v2.html)
|
||||
/// Caveat: nested groups may disable controllers. For simplicity, check only the top-level group.
|
||||
/// ReadBufferFromFile subtree_control_file(default_cgroups_mount / "cgroup.subtree_control");
|
||||
/// std::string subtree_control;
|
||||
/// readString(subtree_control, subtree_control_file);
|
||||
/// if (subtree_control.find("memory") == std::string::npos)
|
||||
/// return {};
|
||||
std::ifstream subtree_control_file(default_cgroups_mount / "cgroup.subtree_control");
|
||||
std::stringstream subtree_control_buf;
|
||||
subtree_control_buf << subtree_control_file.rdbuf();
|
||||
std::string subtree_control = subtree_control_buf.str();
|
||||
if (subtree_control.find("memory") == std::string::npos)
|
||||
if (!cgroupsV2MemoryControllerEnabled())
|
||||
return {};
|
||||
|
||||
/// Identify the cgroup the process belongs to
|
||||
/// All PIDs assigned to a cgroup are in /sys/fs/cgroups/{cgroup_name}/cgroup.procs
|
||||
/// A simpler way to get the membership is:
|
||||
std::ifstream cgroup_name_file("/proc/self/cgroup");
|
||||
if (!cgroup_name_file.is_open())
|
||||
return {};
|
||||
|
||||
std::stringstream cgroup_name_buf;
|
||||
cgroup_name_buf << cgroup_name_file.rdbuf();
|
||||
std::string cgroup_name = cgroup_name_buf.str();
|
||||
if (!cgroup_name.empty() && cgroup_name.back() == '\n')
|
||||
cgroup_name.pop_back(); /// remove trailing newline, if any
|
||||
/// With cgroups v2, there will be a *single* line with prefix "0::/"
|
||||
const std::string v2_prefix = "0::/";
|
||||
if (!cgroup_name.starts_with(v2_prefix))
|
||||
return {};
|
||||
cgroup_name = cgroup_name.substr(v2_prefix.length());
|
||||
|
||||
std::filesystem::path current_cgroup = cgroup_name.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup_name);
|
||||
std::string cgroup = cgroupV2OfProcess();
|
||||
auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup);
|
||||
|
||||
/// Open the bottom-most nested memory limit setting file. If there is no such file at the current
|
||||
/// level, try again at the parent level as memory settings are inherited.
|
||||
|
@ -68,7 +68,7 @@ public:
|
||||
typedef typename Bucket::iterator BucketIterator;
|
||||
typedef typename BucketVec::iterator BucketVecIterator;
|
||||
|
||||
class ConstIterator : public std::iterator<std::forward_iterator_tag, Value>
|
||||
class ConstIterator
|
||||
{
|
||||
public:
|
||||
ConstIterator() : _initialized(false) { }
|
||||
|
@ -46,5 +46,6 @@ if (COMPILER_CLANG)
|
||||
no_warning(thread-safety-negative) # experimental flag, too many false positives
|
||||
no_warning(enum-constexpr-conversion) # breaks magic-enum library in clang-16
|
||||
no_warning(unsafe-buffer-usage) # too aggressive
|
||||
no_warning(switch-default) # conflicts with "defaults in a switch covering all enum values"
|
||||
# TODO Enable conversion, sign-conversion, double-promotion warnings.
|
||||
endif ()
|
||||
|
2
contrib/NuRaft
vendored
2
contrib/NuRaft
vendored
@ -1 +1 @@
|
||||
Subproject commit 5bb3a0e8257bacd65b099cb1b7239bd6b9a2c477
|
||||
Subproject commit 4a12f99dfc9d47c687ff7700b927cc76856225d1
|
2
contrib/aws
vendored
2
contrib/aws
vendored
@ -1 +1 @@
|
||||
Subproject commit 9eb5097a0abfa837722cca7a5114a25837817bf2
|
||||
Subproject commit 5f0542b3ad7eef25b0540d37d778207e0345ea8f
|
@ -157,7 +157,7 @@ if (TARGET ch_contrib::zlib)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::zstd)
|
||||
target_compile_definitions(_libarchive PUBLIC HAVE_ZSTD_H=1 HAVE_LIBZSTD=1)
|
||||
target_compile_definitions(_libarchive PUBLIC HAVE_ZSTD_H=1 HAVE_LIBZSTD=1 HAVE_LIBZSTD_COMPRESSOR=1)
|
||||
target_link_libraries(_libarchive PRIVATE ch_contrib::zstd)
|
||||
endif()
|
||||
|
||||
|
@ -25,21 +25,21 @@ public:
|
||||
static const uint32_t bits = 128;
|
||||
|
||||
// Constructor initializes the same as Initialize()
|
||||
MetroHash128(const uint64_t seed=0);
|
||||
|
||||
explicit MetroHash128(const uint64_t seed=0);
|
||||
|
||||
// Initializes internal state for new hash with optional seed
|
||||
void Initialize(const uint64_t seed=0);
|
||||
|
||||
|
||||
// Update the hash state with a string of bytes. If the length
|
||||
// is sufficiently long, the implementation switches to a bulk
|
||||
// hashing algorithm directly on the argument buffer for speed.
|
||||
void Update(const uint8_t * buffer, const uint64_t length);
|
||||
|
||||
|
||||
// Constructs the final hash and writes it to the argument buffer.
|
||||
// After a hash is finalized, this instance must be Initialized()-ed
|
||||
// again or the behavior of Update() and Finalize() is undefined.
|
||||
void Finalize(uint8_t * const hash);
|
||||
|
||||
|
||||
// A non-incremental function implementation. This can be significantly
|
||||
// faster than the incremental implementation for some usage patterns.
|
||||
static void Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed=0);
|
||||
@ -57,7 +57,7 @@ private:
|
||||
static const uint64_t k1 = 0x8648DBDB;
|
||||
static const uint64_t k2 = 0x7BDEC03B;
|
||||
static const uint64_t k3 = 0x2F5870A5;
|
||||
|
||||
|
||||
struct { uint64_t v[4]; } state;
|
||||
struct { uint8_t b[32]; } input;
|
||||
uint64_t bytes;
|
||||
|
@ -25,21 +25,21 @@ public:
|
||||
static const uint32_t bits = 64;
|
||||
|
||||
// Constructor initializes the same as Initialize()
|
||||
MetroHash64(const uint64_t seed=0);
|
||||
|
||||
explicit MetroHash64(const uint64_t seed=0);
|
||||
|
||||
// Initializes internal state for new hash with optional seed
|
||||
void Initialize(const uint64_t seed=0);
|
||||
|
||||
|
||||
// Update the hash state with a string of bytes. If the length
|
||||
// is sufficiently long, the implementation switches to a bulk
|
||||
// hashing algorithm directly on the argument buffer for speed.
|
||||
void Update(const uint8_t * buffer, const uint64_t length);
|
||||
|
||||
|
||||
// Constructs the final hash and writes it to the argument buffer.
|
||||
// After a hash is finalized, this instance must be Initialized()-ed
|
||||
// again or the behavior of Update() and Finalize() is undefined.
|
||||
void Finalize(uint8_t * const hash);
|
||||
|
||||
|
||||
// A non-incremental function implementation. This can be significantly
|
||||
// faster than the incremental implementation for some usage patterns.
|
||||
static void Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed=0);
|
||||
@ -57,7 +57,7 @@ private:
|
||||
static const uint64_t k1 = 0xA2AA033B;
|
||||
static const uint64_t k2 = 0x62992FC1;
|
||||
static const uint64_t k3 = 0x30BC5B29;
|
||||
|
||||
|
||||
struct { uint64_t v[4]; } state;
|
||||
struct { uint8_t b[32]; } input;
|
||||
uint64_t bytes;
|
||||
|
2
contrib/liburing
vendored
2
contrib/liburing
vendored
@ -1 +1 @@
|
||||
Subproject commit f5a48392c4ea33f222cbebeb2e2fc31620162949
|
||||
Subproject commit f4e42a515cd78c8c9cac2be14222834be5f8df2b
|
2
contrib/qpl
vendored
2
contrib/qpl
vendored
@ -1 +1 @@
|
||||
Subproject commit a61bdd845fd7ca363b2bcc55454aa520dfcd8298
|
||||
Subproject commit d4715e0e79896b85612158e135ee1a85f3b3e04d
|
2
contrib/rapidjson
vendored
2
contrib/rapidjson
vendored
@ -1 +1 @@
|
||||
Subproject commit c4ef90ccdbc21d5d5a628d08316bfd301e32d6fa
|
||||
Subproject commit 800ca2f38fc3b387271d9e1926fcfc9070222104
|
@ -86,7 +86,7 @@ function download
|
||||
|
||||
chmod +x clickhouse
|
||||
# clickhouse may be compressed - run once to decompress
|
||||
./clickhouse ||:
|
||||
./clickhouse --query "SELECT 1" ||:
|
||||
ln -s ./clickhouse ./clickhouse-server
|
||||
ln -s ./clickhouse ./clickhouse-client
|
||||
ln -s ./clickhouse ./clickhouse-local
|
||||
@ -387,6 +387,11 @@ if [ -f core.zst ]; then
|
||||
fi
|
||||
|
||||
rg --text -F '<Fatal>' server.log > fatal.log ||:
|
||||
FATAL_LINK=''
|
||||
if [ -s fatal.log ]; then
|
||||
FATAL_LINK='<a href="fatal.log">fatal.log</a>'
|
||||
fi
|
||||
|
||||
dmesg -T > dmesg.log ||:
|
||||
|
||||
zstd --threads=0 --rm server.log
|
||||
@ -419,6 +424,7 @@ p.links a { padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-s
|
||||
<a href="main.log">main.log</a>
|
||||
<a href="dmesg.log">dmesg.log</a>
|
||||
${CORE_LINK}
|
||||
${FATAL_LINK}
|
||||
</p>
|
||||
<table>
|
||||
<tr>
|
||||
|
@ -214,8 +214,7 @@ function check_server_start()
|
||||
function check_logs_for_critical_errors()
|
||||
{
|
||||
# Sanitizer asserts
|
||||
rg -Fa "==================" /var/log/clickhouse-server/stderr.log | rg -v "in query:" >> /test_output/tmp
|
||||
rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
sed -n '/WARNING:.*anitizer/,/^$/p' >> /test_output/tmp
|
||||
rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
|
||||
&& echo -e "Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No sanitizer asserts$OK" >> /test_output/test_results.tsv
|
||||
@ -233,8 +232,8 @@ function check_logs_for_critical_errors()
|
||||
# Remove file logical_errors.txt if it's empty
|
||||
[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt
|
||||
|
||||
# No such key errors
|
||||
rg --text "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/no_such_key_errors.txt \
|
||||
# No such key errors (ignore a.myext which is used in 02724_database_s3.sh and does not exist)
|
||||
rg --text "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log | grep -v "a.myext" > /test_output/no_such_key_errors.txt \
|
||||
&& echo -e "S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)$FAIL$(trim_server_logs no_such_key_errors.txt)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No lost s3 keys$OK" >> /test_output/test_results.tsv
|
||||
|
||||
|
@ -100,6 +100,7 @@ rm /etc/clickhouse-server/config.d/merge_tree.xml
|
||||
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
|
||||
rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml
|
||||
rm /etc/clickhouse-server/config.d/storage_conf_02963.xml
|
||||
rm /etc/clickhouse-server/config.d/backoff_failed_mutation.xml
|
||||
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
|
||||
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
|
||||
rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml
|
||||
@ -160,6 +161,7 @@ rm /etc/clickhouse-server/config.d/merge_tree.xml
|
||||
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
|
||||
rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml
|
||||
rm /etc/clickhouse-server/config.d/storage_conf_02963.xml
|
||||
rm /etc/clickhouse-server/config.d/backoff_failed_mutation.xml
|
||||
rm /etc/clickhouse-server/config.d/block_number.xml
|
||||
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
|
||||
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
|
||||
|
@ -403,4 +403,3 @@ sidebar_label: 2023
|
||||
* Do not remove part if `Too many open files` is thrown [#56238](https://github.com/ClickHouse/ClickHouse/pull/56238) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix ORC commit [#56261](https://github.com/ClickHouse/ClickHouse/pull/56261) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix typo in largestTriangleThreeBuckets.md [#56263](https://github.com/ClickHouse/ClickHouse/pull/56263) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
|
||||
|
@ -596,4 +596,3 @@ sidebar_label: 2023
|
||||
* Fix assertion from stress test [#50718](https://github.com/ClickHouse/ClickHouse/pull/50718) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix flaky unit test [#50719](https://github.com/ClickHouse/ClickHouse/pull/50719) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Show correct sharing state in system.query_cache [#50728](https://github.com/ClickHouse/ClickHouse/pull/50728) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
|
@ -298,4 +298,3 @@ sidebar_label: 2023
|
||||
* Update version_date.tsv and changelogs after v23.4.5.22-stable [#51638](https://github.com/ClickHouse/ClickHouse/pull/51638) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Update version_date.tsv and changelogs after v23.3.7.5-lts [#51639](https://github.com/ClickHouse/ClickHouse/pull/51639) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Update parts.md [#51643](https://github.com/ClickHouse/ClickHouse/pull/51643) ([Ramazan Polat](https://github.com/ramazanpolat)).
|
||||
|
||||
|
@ -588,4 +588,3 @@ sidebar_label: 2023
|
||||
* tests: mark 02152_http_external_tables_memory_tracking as no-parallel [#54155](https://github.com/ClickHouse/ClickHouse/pull/54155) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* The external logs have had colliding arguments [#54165](https://github.com/ClickHouse/ClickHouse/pull/54165) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Rename macro [#54169](https://github.com/ClickHouse/ClickHouse/pull/54169) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
|
||||
|
@ -379,4 +379,3 @@ sidebar_label: 2023
|
||||
* Fix typo in packager when ccache is used [#55104](https://github.com/ClickHouse/ClickHouse/pull/55104) ([Ilya Yatsishin](https://github.com/qoega)).
|
||||
* Reduce flakiness of 01455_opentelemetry_distributed [#55111](https://github.com/ClickHouse/ClickHouse/pull/55111) ([Michael Kolupaev](https://github.com/al13n321)).
|
||||
* Fix build [#55113](https://github.com/ClickHouse/ClickHouse/pull/55113) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
|
@ -37,7 +37,7 @@ sudo xcode-select --install
|
||||
|
||||
``` bash
|
||||
brew update
|
||||
brew install ccache cmake ninja libtool gettext llvm gcc binutils grep findutils
|
||||
brew install ccache cmake ninja libtool gettext llvm gcc binutils grep findutils nasm
|
||||
```
|
||||
|
||||
## Checkout ClickHouse Sources {#checkout-clickhouse-sources}
|
||||
|
@ -10,7 +10,7 @@ Allows to connect to databases on a remote [PostgreSQL](https://www.postgresql.o
|
||||
|
||||
Gives the real-time access to table list and table structure from remote PostgreSQL with the help of `SHOW TABLES` and `DESCRIBE TABLE` queries.
|
||||
|
||||
Supports table structure modifications (`ALTER TABLE ... ADD|DROP COLUMN`). If `use_table_cache` parameter (see the Engine Parameters below) it set to `1`, the table structure is cached and not checked for being modified, but can be updated with `DETACH` and `ATTACH` queries.
|
||||
Supports table structure modifications (`ALTER TABLE ... ADD|DROP COLUMN`). If `use_table_cache` parameter (see the Engine Parameters below) is set to `1`, the table structure is cached and not checked for being modified, but can be updated with `DETACH` and `ATTACH` queries.
|
||||
|
||||
## Creating a Database {#creating-a-database}
|
||||
|
||||
|
@ -19,6 +19,8 @@ CREATE TABLE azure_blob_storage_table (name String, value UInt32)
|
||||
|
||||
### Engine parameters
|
||||
|
||||
- `endpoint` — AzureBlobStorage endpoint URL with container & prefix. Optionally can contain account_name if the authentication method used needs it. (http://azurite1:{port}/[account_name]{container_name}/{data_prefix}) or these parameters can be provided separately using storage_account_url, account_name & container. For specifying prefix, endpoint should be used.
|
||||
- `endpoint_contains_account_name` - This flag is used to specify if endpoint contains account_name as it is only needed for certain authentication methods. (Default : true)
|
||||
- `connection_string|storage_account_url` — connection_string includes account name & key ([Create connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&bc=%2Fazure%2Fstorage%2Fblobs%2Fbreadcrumb%2Ftoc.json#configure-a-connection-string-for-an-azure-storage-account)) or you could also provide the storage account url here and account name & account key as separate parameters (see parameters account_name & account_key)
|
||||
- `container_name` - Container name
|
||||
- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings.
|
||||
|
@ -870,6 +870,11 @@ Tags:
|
||||
- `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`.
|
||||
- `least_used_ttl_ms` - Configure timeout (in milliseconds) for the updating available space on all disks (`0` - update always, `-1` - never update, default is `60000`). Note, if the disk can be used by ClickHouse only and is not subject to a online filesystem resize/shrink you can use `-1`, in all other cases it is not recommended, since eventually it will lead to incorrect space distribution.
|
||||
- `prefer_not_to_merge` — You should not use this setting. Disables merging of data parts on this volume (this is harmful and leads to performance degradation). When this setting is enabled (don't do it), merging data on this volume is not allowed (which is bad). This allows (but you don't need it) controlling (if you want to control something, you're making a mistake) how ClickHouse works with slow disks (but ClickHouse knows better, so please don't use this setting).
|
||||
- `volume_priority` — Defines the priority (order) in which volumes are filled. Lower value means higher priority. The parameter values should be natural numbers and collectively cover the range from 1 to N (lowest priority given) without skipping any numbers.
|
||||
* If _all_ volumes are tagged, they are prioritized in given order.
|
||||
* If only _some_ volumes are tagged, those without the tag have the lowest priority, and they are prioritized in the order they are defined in config.
|
||||
* If _no_ volumes are tagged, their priority is set correspondingly to their order they are declared in configuration.
|
||||
* Two volumes cannot have the same priority value.
|
||||
|
||||
Configuration examples:
|
||||
|
||||
@ -919,7 +924,8 @@ In given example, the `hdd_in_order` policy implements the [round-robin](https:/
|
||||
If there are different kinds of disks available in the system, `moving_from_ssd_to_hdd` policy can be used instead. The volume `hot` consists of an SSD disk (`fast_ssd`), and the maximum size of a part that can be stored on this volume is 1GB. All the parts with the size larger than 1GB will be stored directly on the `cold` volume, which contains an HDD disk `disk1`.
|
||||
Also, once the disk `fast_ssd` gets filled by more than 80%, data will be transferred to the `disk1` by a background process.
|
||||
|
||||
The order of volume enumeration within a storage policy is important. Once a volume is overfilled, data are moved to the next one. The order of disk enumeration is important as well because data are stored on them in turns.
|
||||
The order of volume enumeration within a storage policy is important in case at least one of the volumes listed has no explicit `volume_priority` parameter.
|
||||
Once a volume is overfilled, data are moved to the next one. The order of disk enumeration is important as well because data are stored on them in turns.
|
||||
|
||||
When creating a table, one can apply one of the configured storage policies to it:
|
||||
|
||||
@ -1236,7 +1242,9 @@ Configuration markup:
|
||||
```
|
||||
|
||||
Connection parameters:
|
||||
* `storage_account_url` - **Required**, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`.
|
||||
* `endpoint` — AzureBlobStorage endpoint URL with container & prefix. Optionally can contain account_name if the authentication method used needs it. (`http://account.blob.core.windows.net:{port}/[account_name]{container_name}/{data_prefix}`) or these parameters can be provided separately using storage_account_url, account_name & container. For specifying prefix, endpoint should be used.
|
||||
* `endpoint_contains_account_name` - This flag is used to specify if endpoint contains account_name as it is only needed for certain authentication methods. (Default : true)
|
||||
* `storage_account_url` - Required if endpoint is not specified, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`.
|
||||
* `container_name` - Target container name, defaults to `default-container`.
|
||||
* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet.
|
||||
|
||||
|
@ -304,6 +304,24 @@ We use the term `MergeTree` to refer to all table engines in the `MergeTree fami
|
||||
|
||||
If you had a `MergeTree` table that was manually replicated, you can convert it to a replicated table. You might need to do this if you have already collected a large amount of data in a `MergeTree` table and now you want to enable replication.
|
||||
|
||||
`MergeTree` table can be automatically converted on server restart if `convert_to_replicated` flag is set at the table's data directory (`/var/lib/clickhouse/store/xxx/xxxyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy/` for `Atomic` database).
|
||||
Create empty `convert_to_replicated` file and the table will be loaded as replicated on next server restart.
|
||||
|
||||
This query can be used to get the table's data path.
|
||||
|
||||
```sql
|
||||
SELECT data_paths FROM system.tables WHERE table = 'table_name' AND database = 'database_name';
|
||||
```
|
||||
|
||||
Note that ReplicatedMergeTree table will be created with values of `default_replica_path` and `default_replica_name` settings.
|
||||
To create a converted table on other replicas, you will need to explicitly specify its path in the first argument of the `ReplicatedMergeTree` engine. The following query can be used to get its path.
|
||||
|
||||
```sql
|
||||
SELECT zookeeper_path FROM system.replicas WHERE table = 'table_name';
|
||||
```
|
||||
|
||||
There is also a manual way to do this without server restart.
|
||||
|
||||
If the data differs on various replicas, first sync it, or delete this data on all the replicas except one.
|
||||
|
||||
Rename the existing MergeTree table, then create a `ReplicatedMergeTree` table with the old name.
|
||||
|
@ -74,6 +74,10 @@ Specifying the `sharding_key` is necessary for the following:
|
||||
|
||||
`fsync_directories` - do the `fsync` for directories. Guarantees that the OS refreshed directory metadata after operations related to background inserts on Distributed table (after insert, after sending the data to shard, etc.).
|
||||
|
||||
#### skip_unavailable_shards
|
||||
|
||||
`skip_unavailable_shards` - If true, ClickHouse silently skips unavailable shards. Shard is marked as unavailable when: 1) The shard cannot be reached due to a connection failure. 2) Shard is unresolvable through DNS. 3) Table does not exist on the shard. Default false.
|
||||
|
||||
#### bytes_to_throw_insert
|
||||
|
||||
`bytes_to_throw_insert` - if more than this number of compressed bytes will be pending for background INSERT, an exception will be thrown. 0 - do not throw. Default 0.
|
||||
@ -102,6 +106,10 @@ Specifying the `sharding_key` is necessary for the following:
|
||||
|
||||
`background_insert_max_sleep_time_ms` - same as [distributed_background_insert_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_background_insert_max_sleep_time_ms)
|
||||
|
||||
#### flush_on_detach
|
||||
|
||||
`flush_on_detach` - Flush data to remote nodes on DETACH/DROP/server shutdown. Default true.
|
||||
|
||||
:::note
|
||||
**Durability settings** (`fsync_...`):
|
||||
|
||||
|
@ -23,7 +23,6 @@ As of November 8th, 2022, each TSV is approximately the following size and numbe
|
||||
|
||||
# Table of Contents
|
||||
|
||||
- [ClickHouse GitHub data](#clickhouse-github-data)
|
||||
- [Table of Contents](#table-of-contents)
|
||||
- [Generating the data](#generating-the-data)
|
||||
- [Downloading and inserting the data](#downloading-and-inserting-the-data)
|
||||
|
@ -79,10 +79,7 @@ It is recommended to use official pre-compiled `deb` packages for Debian or Ubun
|
||||
#### Setup the Debian repository
|
||||
``` bash
|
||||
sudo apt-get install -y apt-transport-https ca-certificates dirmngr
|
||||
GNUPGHOME=$(mktemp -d)
|
||||
sudo GNUPGHOME="$GNUPGHOME" gpg --no-default-keyring --keyring /usr/share/keyrings/clickhouse-keyring.gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 8919F6BD2B48D754
|
||||
sudo rm -rf "$GNUPGHOME"
|
||||
sudo chmod +r /usr/share/keyrings/clickhouse-keyring.gpg
|
||||
sudo gpg --no-default-keyring --keyring /usr/share/keyrings/clickhouse-keyring.gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 8919F6BD2B48D754
|
||||
|
||||
echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \
|
||||
/etc/apt/sources.list.d/clickhouse.list
|
||||
|
15
docs/en/interfaces/third-party/gui.md
vendored
15
docs/en/interfaces/third-party/gui.md
vendored
@ -306,3 +306,18 @@ License: [commercial](https://tablum.io/pricing) product with 3-month free perio
|
||||
|
||||
Try it out for free [in the cloud](https://tablum.io/try).
|
||||
Learn more about the product at [TABLUM.IO](https://tablum.io/)
|
||||
|
||||
### CKMAN {#ckman}
|
||||
|
||||
[CKMAN] (https://www.github.com/housepower/ckman) is a tool for managing and monitoring ClickHouse clusters!
|
||||
|
||||
Features:
|
||||
|
||||
- Rapid and convenient automated deployment of clusters through a browser interface
|
||||
- Clusters can be scaled or scaled
|
||||
- Load balance the data of the cluster
|
||||
- Upgrade the cluster online
|
||||
- Modify the cluster configuration on the page
|
||||
- Provides cluster node monitoring and zookeeper monitoring
|
||||
- Monitor the status of tables and partitions, and monitor slow SQL statements
|
||||
- Provides an easy-to-use SQL execution page
|
||||
|
@ -80,6 +80,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des
|
||||
- ASYNC: backup or restore asynchronously
|
||||
- PARTITIONS: a list of partitions to restore
|
||||
- SETTINGS:
|
||||
- `id`: id of backup or restore operation, randomly generated UUID is used, if not specified manually. If there is already running operation with the same `id` exception is thrown.
|
||||
- [`compression_method`](/docs/en/sql-reference/statements/create/table.md/#column-compression-codecs) and compression_level
|
||||
- `password` for the file on disk
|
||||
- `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')`
|
||||
@ -167,6 +168,28 @@ RESTORE TABLE test.table PARTITIONS '2', '3'
|
||||
FROM Disk('backups', 'filename.zip')
|
||||
```
|
||||
|
||||
### Backups as tar archives
|
||||
|
||||
Backups can also be stored as tar archives. The functionality is the same as for zip, except that a password is not supported.
|
||||
|
||||
Write a backup as a tar:
|
||||
```
|
||||
BACKUP TABLE test.table TO Disk('backups', '1.tar')
|
||||
```
|
||||
|
||||
Corresponding restore:
|
||||
```
|
||||
RESTORE TABLE test.table FROM Disk('backups', '1.tar')
|
||||
```
|
||||
|
||||
To change the compression method, the correct file suffix should be appended to the backup name. I.E to compress the tar archive using gzip:
|
||||
```
|
||||
BACKUP TABLE test.table TO Disk('backups', '1.tar.gz')
|
||||
```
|
||||
|
||||
The supported compression file suffixes are `tar.gz`, `.tgz` `tar.bz2`, `tar.lzma`, `.tar.zst`, `.tzst` and `.tar.xz`.
|
||||
|
||||
|
||||
### Check the status of backups
|
||||
|
||||
The backup command returns an `id` and `status`, and that `id` can be used to get the status of the backup. This is very useful to check the progress of long ASYNC backups. The example below shows a failure that happened when trying to overwrite an existing backup file:
|
||||
@ -206,7 +229,7 @@ end_time: 2022-08-30 09:21:46
|
||||
1 row in set. Elapsed: 0.002 sec.
|
||||
```
|
||||
|
||||
Along with `system.backups` table, all backup and restore operations are also tracked in the system log table [backup_log](../operations/system-tables/backup_log.md):
|
||||
Along with `system.backups` table, all backup and restore operations are also tracked in the system log table [backup_log](../operations/system-tables/backup_log.md):
|
||||
```
|
||||
SELECT *
|
||||
FROM system.backup_log
|
||||
@ -222,7 +245,7 @@ event_time_microseconds: 2023-08-18 11:13:43.097414
|
||||
id: 7678b0b3-f519-4e6e-811f-5a0781a4eb52
|
||||
name: Disk('backups', '1.zip')
|
||||
status: CREATING_BACKUP
|
||||
error:
|
||||
error:
|
||||
start_time: 2023-08-18 11:13:43
|
||||
end_time: 1970-01-01 03:00:00
|
||||
num_files: 0
|
||||
@ -252,7 +275,7 @@ compressed_size: 0
|
||||
files_read: 0
|
||||
bytes_read: 0
|
||||
|
||||
2 rows in set. Elapsed: 0.075 sec.
|
||||
2 rows in set. Elapsed: 0.075 sec.
|
||||
```
|
||||
|
||||
## Configuring BACKUP/RESTORE to use an S3 Endpoint
|
||||
@ -271,7 +294,7 @@ Creating an S3 bucket is covered in [Use S3 Object Storage as a ClickHouse disk]
|
||||
|
||||
The destination for a backup will be specified like this:
|
||||
```
|
||||
S3('<S3 endpoint>/<directory>', '<Access key ID>', '<Secret access key>)
|
||||
S3('<S3 endpoint>/<directory>', '<Access key ID>', '<Secret access key>')
|
||||
```
|
||||
|
||||
```sql
|
||||
|
@ -199,6 +199,16 @@ Type: Bool
|
||||
|
||||
Default: 0
|
||||
|
||||
|
||||
## dns_cache_max_entries
|
||||
|
||||
Internal DNS cache max entries.
|
||||
|
||||
Type: UInt64
|
||||
|
||||
Default: 10000
|
||||
|
||||
|
||||
## dns_cache_update_period
|
||||
|
||||
Internal DNS cache update period in seconds.
|
||||
@ -458,6 +468,38 @@ Type: Double
|
||||
|
||||
Default: 0.9
|
||||
|
||||
## cgroups_memory_usage_observer_wait_time
|
||||
|
||||
Interval in seconds during which the server's maximum allowed memory consumption is adjusted by the corresponding threshold in cgroups. (see
|
||||
settings `cgroup_memory_watcher_hard_limit_ratio` and `cgroup_memory_watcher_soft_limit_ratio`).
|
||||
|
||||
Type: UInt64
|
||||
|
||||
Default: 15
|
||||
|
||||
## cgroup_memory_watcher_hard_limit_ratio
|
||||
|
||||
Specifies the "hard" threshold with regards to the memory consumption of the server process according to cgroups after which the server's
|
||||
maximum memory consumption is adjusted to the threshold value.
|
||||
|
||||
See settings `cgroups_memory_usage_observer_wait_time` and `cgroup_memory_watcher_soft_limit_ratio`
|
||||
|
||||
Type: Double
|
||||
|
||||
Default: 0.95
|
||||
|
||||
## cgroup_memory_watcher_soft_limit_ratio
|
||||
|
||||
Specifies the "soft" threshold with regards to the memory consumption of the server process according to cgroups after which arenas in
|
||||
jemalloc are purged.
|
||||
|
||||
|
||||
See settings `cgroups_memory_usage_observer_wait_time` and `cgroup_memory_watcher_hard_limit_ratio`
|
||||
|
||||
Type: Double
|
||||
|
||||
Default: 0.95
|
||||
|
||||
## max_table_size_to_drop
|
||||
|
||||
Restriction on deleting tables.
|
||||
@ -472,10 +514,10 @@ The value 0 means that you can delete all tables without any restrictions.
|
||||
``` xml
|
||||
<max_table_size_to_drop>0</max_table_size_to_drop>
|
||||
```
|
||||
|
||||
|
||||
## max\_database\_num\_to\_warn {#max-database-num-to-warn}
|
||||
If the number of attached databases exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table.
|
||||
|
||||
## max\_database\_num\_to\_warn {#max-database-num-to-warn}
|
||||
If the number of attached databases exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table.
|
||||
Default value: 1000
|
||||
|
||||
**Example**
|
||||
@ -483,10 +525,10 @@ Default value: 1000
|
||||
``` xml
|
||||
<max_database_num_to_warn>50</max_database_num_to_warn>
|
||||
```
|
||||
|
||||
## max\_table\_num\_to\_warn {#max-table-num-to-warn}
|
||||
If the number of attached tables exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table.
|
||||
Default value: 5000
|
||||
|
||||
## max\_table\_num\_to\_warn {#max-table-num-to-warn}
|
||||
If the number of attached tables exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table.
|
||||
Default value: 5000
|
||||
|
||||
**Example**
|
||||
|
||||
@ -495,9 +537,9 @@ Default value: 5000
|
||||
```
|
||||
|
||||
|
||||
## max\_part\_num\_to\_warn {#max-part-num-to-warn}
|
||||
If the number of active parts exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table.
|
||||
Default value: 100000
|
||||
## max\_part\_num\_to\_warn {#max-part-num-to-warn}
|
||||
If the number of active parts exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table.
|
||||
Default value: 100000
|
||||
|
||||
**Example**
|
||||
|
||||
@ -2873,3 +2915,23 @@ A limit on the number of materialized views attached to a table.
|
||||
Note that only directly dependent views are considered here, and the creation of one view on top of another view is not considered.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## format_alter_operations_with_parentheses {#format_alter_operations_with_parentheses}
|
||||
|
||||
If set to true, then alter operations will be surrounded by parentheses in formatted queries. This makes the parsing of formatted alter queries less ambiguous.
|
||||
|
||||
Type: Bool
|
||||
|
||||
Default: 0
|
||||
|
||||
## ignore_empty_sql_security_in_create_view_query {#ignore_empty_sql_security_in_create_view_query}
|
||||
|
||||
If true, ClickHouse doesn't write defaults for empty SQL security statement in CREATE VIEW queries.
|
||||
|
||||
:::note
|
||||
This setting is only necessary for the migration period and will become obsolete in 24.4
|
||||
:::
|
||||
|
||||
Type: Bool
|
||||
|
||||
Default: 1
|
||||
|
@ -1656,6 +1656,33 @@ Result:
|
||||
└─────────────────────────┴─────────┘
|
||||
```
|
||||
|
||||
### output_format_pretty_single_large_number_tip_threshold {#output_format_pretty_single_large_number_tip_threshold}
|
||||
|
||||
Print a readable number tip on the right side of the table if the block consists of a single number which exceeds
|
||||
this value (except 0).
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — The readable number tip will not be printed.
|
||||
- Positive integer — The readable number tip will be printed if the single number exceeds this value.
|
||||
|
||||
Default value: `1000000`.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT 1000000000 as a;
|
||||
```
|
||||
|
||||
Result:
|
||||
```text
|
||||
┌──────────a─┐
|
||||
│ 1000000000 │ -- 1.00 billion
|
||||
└────────────┘
|
||||
```
|
||||
|
||||
## Template format settings {#template-format-settings}
|
||||
|
||||
### format_template_resultset {#format_template_resultset}
|
||||
|
@ -755,7 +755,7 @@ By default: 1,000,000. It only works when reading from MergeTree engines.
|
||||
|
||||
## max_concurrent_queries_for_user {#max-concurrent-queries-for-user}
|
||||
|
||||
The maximum number of simultaneously processed queries related to MergeTree table per user.
|
||||
The maximum number of simultaneously processed queries per user.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -1776,7 +1776,7 @@ Default value: 0 (no restriction).
|
||||
## insert_quorum {#insert_quorum}
|
||||
|
||||
:::note
|
||||
`insert_quorum` does not apply when using the [`SharedMergeTree` table engine](/en/cloud/reference/shared-merge-tree) in ClickHouse Cloud as all inserts are quorum inserted.
|
||||
This setting is not applicable to SharedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information.
|
||||
:::
|
||||
|
||||
Enables the quorum writes.
|
||||
@ -1819,7 +1819,7 @@ See also:
|
||||
## insert_quorum_parallel {#insert_quorum_parallel}
|
||||
|
||||
:::note
|
||||
`insert_quorum_parallel` does not apply when using the [`SharedMergeTree` table engine](/en/cloud/reference/shared-merge-tree) in ClickHouse Cloud as all inserts are quorum inserted.
|
||||
This setting is not applicable to SharedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information.
|
||||
:::
|
||||
|
||||
Enables or disables parallelism for quorum `INSERT` queries. If enabled, additional `INSERT` queries can be sent while previous queries have not yet finished. If disabled, additional writes to the same table will be rejected.
|
||||
@ -1839,6 +1839,10 @@ See also:
|
||||
|
||||
## select_sequential_consistency {#select_sequential_consistency}
|
||||
|
||||
:::note
|
||||
This setting differ in behavior between SharedMergeTree and ReplicatedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information about the behavior of `select_sequential_consistency` in SharedMergeTree.
|
||||
:::
|
||||
|
||||
Enables or disables sequential consistency for `SELECT` queries. Requires `insert_quorum_parallel` to be disabled (enabled by default).
|
||||
|
||||
Possible values:
|
||||
@ -2037,7 +2041,7 @@ Possible values:
|
||||
- 0 — Disabled.
|
||||
- 1 — Enabled.
|
||||
|
||||
Default value: 1.
|
||||
Default value: 0.
|
||||
|
||||
By default, async inserts are inserted into replicated tables by the `INSERT` statement enabling [async_insert](#async-insert) are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)).
|
||||
For the replicated tables, by default, only 10000 of the most recent inserts for each partition are deduplicated (see [replicated_deduplication_window_for_async_inserts](merge-tree-settings.md/#replicated-deduplication-window-async-inserts), [replicated_deduplication_window_seconds_for_async_inserts](merge-tree-settings.md/#replicated-deduplication-window-seconds-async-inserts)).
|
||||
@ -3445,7 +3449,7 @@ Has an effect only when the connection is made through the MySQL wire protocol.
|
||||
- 0 - Use `BLOB`.
|
||||
- 1 - Use `TEXT`.
|
||||
|
||||
Default value: `0`.
|
||||
Default value: `1`.
|
||||
|
||||
## mysql_map_fixed_string_to_text_in_show_columns {#mysql_map_fixed_string_to_text_in_show_columns}
|
||||
|
||||
@ -3456,7 +3460,7 @@ Has an effect only when the connection is made through the MySQL wire protocol.
|
||||
- 0 - Use `BLOB`.
|
||||
- 1 - Use `TEXT`.
|
||||
|
||||
Default value: `0`.
|
||||
Default value: `1`.
|
||||
|
||||
## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold}
|
||||
|
||||
@ -3706,7 +3710,7 @@ Default value: `0`.
|
||||
|
||||
## allow_experimental_live_view {#allow-experimental-live-view}
|
||||
|
||||
Allows creation of experimental [live views](../../sql-reference/statements/create/view.md/#live-view).
|
||||
Allows creation of a deprecated LIVE VIEW.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -3717,21 +3721,15 @@ Default value: `0`.
|
||||
|
||||
## live_view_heartbeat_interval {#live-view-heartbeat-interval}
|
||||
|
||||
Sets the heartbeat interval in seconds to indicate [live view](../../sql-reference/statements/create/view.md/#live-view) is alive .
|
||||
|
||||
Default value: `15`.
|
||||
Deprecated.
|
||||
|
||||
## max_live_view_insert_blocks_before_refresh {#max-live-view-insert-blocks-before-refresh}
|
||||
|
||||
Sets the maximum number of inserted blocks after which mergeable blocks are dropped and query for [live view](../../sql-reference/statements/create/view.md/#live-view) is re-executed.
|
||||
|
||||
Default value: `64`.
|
||||
Deprecated.
|
||||
|
||||
## periodic_live_view_refresh {#periodic-live-view-refresh}
|
||||
|
||||
Sets the interval in seconds after which periodically refreshed [live view](../../sql-reference/statements/create/view.md/#live-view) is forced to refresh.
|
||||
|
||||
Default value: `60`.
|
||||
Deprecated.
|
||||
|
||||
## http_connection_timeout {#http_connection_timeout}
|
||||
|
||||
@ -4279,6 +4277,41 @@ Result:
|
||||
└─────┴─────┴───────┘
|
||||
```
|
||||
|
||||
## enable_order_by_all {#enable-order-by-all}
|
||||
|
||||
Enables or disables sorting with `ORDER BY ALL` syntax, see [ORDER BY](../../sql-reference/statements/select/order-by.md).
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — Disable ORDER BY ALL.
|
||||
- 1 — Enable ORDER BY ALL.
|
||||
|
||||
Default value: `1`.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE TAB(C1 Int, C2 Int, ALL Int) ENGINE=Memory();
|
||||
|
||||
INSERT INTO TAB VALUES (10, 20, 30), (20, 20, 10), (30, 10, 20);
|
||||
|
||||
SELECT * FROM TAB ORDER BY ALL; -- returns an error that ALL is ambiguous
|
||||
|
||||
SELECT * FROM TAB ORDER BY ALL SETTINGS enable_order_by_all = 0;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─C1─┬─C2─┬─ALL─┐
|
||||
│ 20 │ 20 │ 10 │
|
||||
│ 30 │ 10 │ 20 │
|
||||
│ 10 │ 20 │ 30 │
|
||||
└────┴────┴─────┘
|
||||
```
|
||||
|
||||
## splitby_max_substrings_includes_remaining_string {#splitby_max_substrings_includes_remaining_string}
|
||||
|
||||
Controls whether function [splitBy*()](../../sql-reference/functions/splitting-merging-functions.md) with argument `max_substrings` > 0 will include the remaining string in the last element of the result array.
|
||||
@ -5345,6 +5378,24 @@ SELECT map('a', range(number), 'b', number, 'c', 'str_' || toString(number)) as
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
## default_normal_view_sql_security {#default_normal_view_sql_security}
|
||||
|
||||
Allows to set default `SQL SECURITY` option while creating a normal view. [More about SQL security](../../sql-reference/statements/create/view.md#sql_security).
|
||||
|
||||
The default value is `INVOKER`.
|
||||
|
||||
## default_materialized_view_sql_security {#default_materialized_view_sql_security}
|
||||
|
||||
Allows to set a default value for SQL SECURITY option when creating a materialized view. [More about SQL security](../../sql-reference/statements/create/view.md#sql_security).
|
||||
|
||||
The default value is `DEFINER`.
|
||||
|
||||
## default_view_definer {#default_view_definer}
|
||||
|
||||
Allows to set default `DEFINER` option while creating a view. [More about SQL security](../../sql-reference/statements/create/view.md#sql_security).
|
||||
|
||||
The default value is `CURRENT_USER`.
|
||||
|
||||
## max_partition_size_to_drop
|
||||
|
||||
Restriction on dropping partitions in query time. The value 0 means that you can drop partitions without any restrictions.
|
||||
|
38
docs/en/operations/system-tables/dns_cache.md
Normal file
38
docs/en/operations/system-tables/dns_cache.md
Normal file
@ -0,0 +1,38 @@
|
||||
---
|
||||
slug: /en/operations/system-tables/dns_cache
|
||||
---
|
||||
# dns_cache
|
||||
|
||||
Contains information about cached DNS records.
|
||||
|
||||
Columns:
|
||||
|
||||
- `hostname` ([String](../../sql-reference/data-types/string.md)) — cached hostname
|
||||
- `ip_address` ([String](../../sql-reference/data-types/string.md)) — ip address for the hostname
|
||||
- `ip_family` ([Enum](../../sql-reference/data-types/enum.md)) — family of the ip address, possible values:
|
||||
- 'IPv4'
|
||||
- 'IPv6'
|
||||
- 'UNIX_LOCAL'
|
||||
- `cached_at` ([DateTime](../../sql-reference/data-types/datetime.md)) - when the record was cached
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT * FROM system.dns_cache;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
| hostname | ip\_address | ip\_family | cached\_at |
|
||||
| :--- | :--- | :--- | :--- |
|
||||
| localhost | ::1 | IPv6 | 2024-02-11 17:04:40 |
|
||||
| localhost | 127.0.0.1 | IPv4 | 2024-02-11 17:04:40 |
|
||||
|
||||
**See also**
|
||||
|
||||
- [disable_internal_dns_cache setting](../../operations/server-configuration-parameters/settings.md#disable_internal_dns_cache)
|
||||
- [dns_cache_max_entries setting](../../operations/server-configuration-parameters/settings.md#dns_cache_max_entries)
|
||||
- [dns_cache_update_period setting](../../operations/server-configuration-parameters/settings.md#dns_cache_update_period)
|
||||
- [dns_max_consecutive_failures setting](../../operations/server-configuration-parameters/settings.md#dns_max_consecutive_failures)
|
@ -21,7 +21,7 @@ Columns:
|
||||
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
|
||||
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query.
|
||||
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query.
|
||||
- `event_time_microsecinds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query with microseconds precision.
|
||||
- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query with microseconds precision.
|
||||
- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution.
|
||||
- `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Start time of query execution with microsecond precision.
|
||||
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution.
|
||||
@ -32,8 +32,7 @@ Columns:
|
||||
- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The difference between the amount of allocated and freed memory in context of this thread.
|
||||
- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread.
|
||||
- `thread_name` ([String](../../sql-reference/data-types/string.md)) — Name of the thread.
|
||||
- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Internal thread ID.
|
||||
- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — thread ID.
|
||||
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — OS thread ID.
|
||||
- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS initial ID of initial thread.
|
||||
- `query` ([String](../../sql-reference/data-types/string.md)) — Query string.
|
||||
- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Query type. Possible values:
|
||||
|
32
docs/en/operations/system-tables/settings_changes.md
Normal file
32
docs/en/operations/system-tables/settings_changes.md
Normal file
@ -0,0 +1,32 @@
|
||||
---
|
||||
slug: /en/operations/system-tables/settings_changes
|
||||
---
|
||||
# settings_changes
|
||||
|
||||
Contains information about setting changes in previous ClickHouse versions.
|
||||
|
||||
Columns:
|
||||
|
||||
- `version` ([String](../../sql-reference/data-types/string.md)) — The ClickHouse version in which settings were changed
|
||||
- `changes` ([Array](../../sql-reference/data-types/array.md) of [Tuple](../../sql-reference/data-types/tuple.md)) — A description of the setting changes: (setting name, previous value, new value, reason for the change)
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM system.settings_changes
|
||||
WHERE version = '23.5'
|
||||
FORMAT Vertical
|
||||
```
|
||||
|
||||
``` text
|
||||
Row 1:
|
||||
──────
|
||||
version: 23.5
|
||||
changes: [('input_format_parquet_preserve_order','1','0','Allow Parquet reader to reorder rows for better parallelism.'),('parallelize_output_from_storages','0','1','Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows.'),('use_with_fill_by_sorting_prefix','0','1','Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently'),('output_format_parquet_compliant_nested_types','0','1','Change an internal field name in output Parquet file schema.')]
|
||||
```
|
||||
|
||||
**See also**
|
||||
|
||||
- [Settings](../../operations/settings/index.md#session-settings-intro)
|
||||
- [system.settings](settings.md)
|
@ -26,6 +26,6 @@ Columns:
|
||||
|
||||
- `max` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The maximum value of the setting. NULL if not set.
|
||||
|
||||
- `readonly` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges))) — Profile that allows only read queries.
|
||||
- `writability` ([Nullable](../../sql-reference/data-types/nullable.md)([Enum8](../../sql-reference/data-types/enum.md)('WRITABLE' = 0, 'CONST' = 1, 'CHANGEABLE_IN_READONLY' = 2))) — Sets the settings constraint writability kind.
|
||||
|
||||
- `inherit_profile` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — A parent profile for this setting profile. `NULL` if not set. Setting profile will inherit all the settings' values and constraints (`min`, `max`, `readonly`) from its parent profiles.
|
||||
|
@ -111,6 +111,14 @@ On newer Linux kernels transparent huge pages are alright.
|
||||
$ echo 'madvise' | sudo tee /sys/kernel/mm/transparent_hugepage/enabled
|
||||
```
|
||||
|
||||
If you want to modify the transparent huge pages setting permanently, editing the `/etc/default/grub` to add the `transparent_hugepage=never` to the `GRUB_CMDLINE_LINUX_DEFAULT` option:
|
||||
|
||||
```bash
|
||||
$ GRUB_CMDLINE_LINUX_DEFAULT="transparent_hugepage=madvise ..."
|
||||
```
|
||||
|
||||
After that, run the `sudo update-grub` command then reboot to take effect.
|
||||
|
||||
## Hypervisor configuration
|
||||
|
||||
If you are using OpenStack, set
|
||||
|
@ -0,0 +1,50 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/grouparrayintersect
|
||||
sidebar_position: 115
|
||||
---
|
||||
|
||||
# groupArrayIntersect
|
||||
|
||||
Return an intersection of given arrays (Return all items of arrays, that are in all given arrays).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
groupArrayIntersect(x)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — Argument (column name or expression).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- Array that contains elements that are in all arrays.
|
||||
|
||||
Type: [Array](../../data-types/array.md).
|
||||
|
||||
**Examples**
|
||||
|
||||
Consider table `numbers`:
|
||||
|
||||
``` text
|
||||
┌─a──────────────┐
|
||||
│ [1,2,4] │
|
||||
│ [1,5,2,8,-1,0] │
|
||||
│ [1,5,7,5,8,2] │
|
||||
└────────────────┘
|
||||
```
|
||||
|
||||
Query with column name as argument:
|
||||
|
||||
``` sql
|
||||
SELECT groupArrayIntersect(a) as intersection FROM numbers;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
┌─intersection──────┐
|
||||
│ [1, 2] │
|
||||
└───────────────────┘
|
||||
```
|
@ -55,6 +55,7 @@ ClickHouse-specific aggregate functions:
|
||||
- [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
|
||||
- [groupArraySample](./grouparraysample.md)
|
||||
- [groupArraySorted](/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md)
|
||||
- [groupArrayIntersect](./grouparrayintersect.md)
|
||||
- [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md)
|
||||
- [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md)
|
||||
- [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md)
|
||||
|
@ -5,25 +5,25 @@ sidebar_position: 221
|
||||
|
||||
# stochasticLinearRegression
|
||||
|
||||
This function implements stochastic linear regression. It supports custom parameters for learning rate, L2 regularization coefficient, mini-batch size and has few methods for updating weights ([Adam](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (used by default), [simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)).
|
||||
This function implements stochastic linear regression. It supports custom parameters for learning rate, L2 regularization coefficient, mini-batch size, and has a few methods for updating weights ([Adam](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (used by default), [simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), and [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)).
|
||||
|
||||
### Parameters
|
||||
|
||||
There are 4 customizable parameters. They are passed to the function sequentially, but there is no need to pass all four - default values will be used, however good model required some parameter tuning.
|
||||
|
||||
``` text
|
||||
stochasticLinearRegression(1.0, 1.0, 10, 'SGD')
|
||||
stochasticLinearRegression(0.00001, 0.1, 15, 'Adam')
|
||||
```
|
||||
|
||||
1. `learning rate` is the coefficient on step length, when gradient descent step is performed. Too big learning rate may cause infinite weights of the model. Default is `0.00001`.
|
||||
1. `learning rate` is the coefficient on step length, when the gradient descent step is performed. A learning rate that is too big may cause infinite weights of the model. Default is `0.00001`.
|
||||
2. `l2 regularization coefficient` which may help to prevent overfitting. Default is `0.1`.
|
||||
3. `mini-batch size` sets the number of elements, which gradients will be computed and summed to perform one step of gradient descent. Pure stochastic descent uses one element, however having small batches(about 10 elements) make gradient steps more stable. Default is `15`.
|
||||
4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, `Nesterov`. `Momentum` and `Nesterov` require little bit more computations and memory, however they happen to be useful in terms of speed of convergence and stability of stochastic gradient methods.
|
||||
3. `mini-batch size` sets the number of elements, which gradients will be computed and summed to perform one step of gradient descent. Pure stochastic descent uses one element, however, having small batches (about 10 elements) makes gradient steps more stable. Default is `15`.
|
||||
4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, and `Nesterov`. `Momentum` and `Nesterov` require a little bit more computations and memory, however, they happen to be useful in terms of speed of convergence and stability of stochastic gradient methods.
|
||||
|
||||
### Usage
|
||||
|
||||
`stochasticLinearRegression` is used in two steps: fitting the model and predicting on new data. In order to fit the model and save its state for later usage we use `-State` combinator, which basically saves the state (model weights, etc).
|
||||
To predict we use function [evalMLMethod](../../../sql-reference/functions/machine-learning-functions.md#machine_learning_methods-evalmlmethod), which takes a state as an argument as well as features to predict on.
|
||||
`stochasticLinearRegression` is used in two steps: fitting the model and predicting on new data. In order to fit the model and save its state for later usage, we use the `-State` combinator, which saves the state (e.g. model weights).
|
||||
To predict, we use the function [evalMLMethod](../../../sql-reference/functions/machine-learning-functions.md#machine_learning_methods-evalmlmethod), which takes a state as an argument as well as features to predict on.
|
||||
|
||||
<a name="stochasticlinearregression-usage-fitting"></a>
|
||||
|
||||
@ -44,12 +44,12 @@ stochasticLinearRegressionState(0.1, 0.0, 5, 'SGD')(target, param1, param2)
|
||||
AS state FROM train_data;
|
||||
```
|
||||
|
||||
Here we also need to insert data into `train_data` table. The number of parameters is not fixed, it depends only on number of arguments, passed into `linearRegressionState`. They all must be numeric values.
|
||||
Note that the column with target value(which we would like to learn to predict) is inserted as the first argument.
|
||||
Here, we also need to insert data into the `train_data` table. The number of parameters is not fixed, it depends only on the number of arguments passed into `linearRegressionState`. They all must be numeric values.
|
||||
Note that the column with target value (which we would like to learn to predict) is inserted as the first argument.
|
||||
|
||||
**2.** Predicting
|
||||
|
||||
After saving a state into the table, we may use it multiple times for prediction, or even merge with other states and create new even better models.
|
||||
After saving a state into the table, we may use it multiple times for prediction or even merge with other states and create new, even better models.
|
||||
|
||||
``` sql
|
||||
WITH (SELECT state FROM your_model) AS model SELECT
|
||||
|
@ -12,6 +12,11 @@ has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` v
|
||||
The order of nested types doesn't matter: Variant(T1, T2) = Variant(T2, T1).
|
||||
Nested types can be arbitrary types except Nullable(...), LowCardinality(Nullable(...)) and Variant(...) types.
|
||||
|
||||
:::note
|
||||
It's not recommended to use similar types as variants (for example different numeric types like `Variant(UInt32, Int64)` or different date types like `Variant(Date, DateTime)`),
|
||||
because working with values of such types can lead to ambiguity. By default, creating such `Variant` type will lead to an exception, but can be enabled using setting `allow_suspicious_variant_types`
|
||||
:::
|
||||
|
||||
:::note
|
||||
The Variant data type is an experimental feature. To use it, set `allow_experimental_variant_type = 1`.
|
||||
:::
|
||||
@ -272,3 +277,121 @@ $$)
|
||||
│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │
|
||||
└─────────────────────┴───────────────┴──────┴───────┴─────────────────────┴─────────┘
|
||||
```
|
||||
|
||||
|
||||
## Comparing values of Variant type
|
||||
|
||||
Values of a `Variant` type can be compared only with values with the same `Variant` type.
|
||||
|
||||
The result of operator `<` for values `v1` with underlying type `T1` and `v2` with underlying type `T2` of a type `Variant(..., T1, ... T2, ...)` is defined as follows:
|
||||
- If `T1 = T2 = T`, the result will be `v1.T < v2.T` (underlying values will be compared).
|
||||
- If `T1 != T2`, the result will be `T1 < T2` (type names will be compared).
|
||||
|
||||
Examples:
|
||||
```sql
|
||||
CREATE TABLE test (v1 Variant(String, UInt64, Array(UInt32)), v2 Variant(String, UInt64, Array(UInt32))) ENGINE=Memory;
|
||||
INSERT INTO test VALUES (42, 42), (42, 43), (42, 'abc'), (42, [1, 2, 3]), (42, []), (42, NULL);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT v2, variantType(v2) as v2_type from test order by v2;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─v2──────┬─v2_type───────┐
|
||||
│ [] │ Array(UInt32) │
|
||||
│ [1,2,3] │ Array(UInt32) │
|
||||
│ abc │ String │
|
||||
│ 42 │ UInt64 │
|
||||
│ 43 │ UInt64 │
|
||||
│ ᴺᵁᴸᴸ │ None │
|
||||
└─────────┴───────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT v1, variantType(v1) as v1_type, v2, variantType(v2) as v2_type, v1 = v2, v1 < v2, v1 > v2 from test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─v1─┬─v1_type─┬─v2──────┬─v2_type───────┬─equals(v1, v2)─┬─less(v1, v2)─┬─greater(v1, v2)─┐
|
||||
│ 42 │ UInt64 │ 42 │ UInt64 │ 1 │ 0 │ 0 │
|
||||
│ 42 │ UInt64 │ 43 │ UInt64 │ 0 │ 1 │ 0 │
|
||||
│ 42 │ UInt64 │ abc │ String │ 0 │ 0 │ 1 │
|
||||
│ 42 │ UInt64 │ [1,2,3] │ Array(UInt32) │ 0 │ 0 │ 1 │
|
||||
│ 42 │ UInt64 │ [] │ Array(UInt32) │ 0 │ 0 │ 1 │
|
||||
│ 42 │ UInt64 │ ᴺᵁᴸᴸ │ None │ 0 │ 1 │ 0 │
|
||||
└────┴─────────┴─────────┴───────────────┴────────────────┴──────────────┴─────────────────┘
|
||||
|
||||
```
|
||||
|
||||
If you need to find the row with specific `Variant` value, you can do one of the following:
|
||||
|
||||
- Cast value to the corresponding `Variant` type:
|
||||
|
||||
```sql
|
||||
SELECT * FROM test WHERE v2 == [1,2,3]::Array(UInt32)::Variant(String, UInt64, Array(UInt32));
|
||||
```
|
||||
|
||||
```text
|
||||
┌─v1─┬─v2──────┐
|
||||
│ 42 │ [1,2,3] │
|
||||
└────┴─────────┘
|
||||
```
|
||||
|
||||
- Compare `Variant` subcolumn with required type:
|
||||
|
||||
```sql
|
||||
SELECT * FROM test WHERE v2.`Array(UInt32)` == [1,2,3] -- or using variantElement(v2, 'Array(UInt32)')
|
||||
```
|
||||
|
||||
```text
|
||||
┌─v1─┬─v2──────┐
|
||||
│ 42 │ [1,2,3] │
|
||||
└────┴─────────┘
|
||||
```
|
||||
|
||||
Sometimes it can be useful to make additional check on variant type as subcolumns with complex types like `Array/Map/Tuple` cannot be inside `Nullable` and will have default values instead of `NULL` on rows with different types:
|
||||
|
||||
```sql
|
||||
SELECT v2, v2.`Array(UInt32)`, variantType(v2) FROM test WHERE v2.`Array(UInt32)` == [];
|
||||
```
|
||||
|
||||
```text
|
||||
┌─v2───┬─v2.Array(UInt32)─┬─variantType(v2)─┐
|
||||
│ 42 │ [] │ UInt64 │
|
||||
│ 43 │ [] │ UInt64 │
|
||||
│ abc │ [] │ String │
|
||||
│ [] │ [] │ Array(UInt32) │
|
||||
│ ᴺᵁᴸᴸ │ [] │ None │
|
||||
└──────┴──────────────────┴─────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT v2, v2.`Array(UInt32)`, variantType(v2) FROM test WHERE variantType(v2) == 'Array(UInt32)' AND v2.`Array(UInt32)` == [];
|
||||
```
|
||||
|
||||
```text
|
||||
┌─v2─┬─v2.Array(UInt32)─┬─variantType(v2)─┐
|
||||
│ [] │ [] │ Array(UInt32) │
|
||||
└────┴──────────────────┴─────────────────┘
|
||||
```
|
||||
|
||||
**Note:** values of variants with different numeric types are considered as different variants and not compared between each other, their type names are compared instead.
|
||||
|
||||
Example:
|
||||
|
||||
```sql
|
||||
SET allow_suspicious_variant_types = 1;
|
||||
CREATE TABLE test (v Variant(UInt32, Int64)) ENGINE=Memory;
|
||||
INSERT INTO test VALUES (1::UInt32), (1::Int64), (100::UInt32), (100::Int64);
|
||||
SELECT v, variantType(v) FROM test ORDER by v;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─v───┬─variantType(v)─┐
|
||||
│ 1 │ Int64 │
|
||||
│ 100 │ Int64 │
|
||||
│ 1 │ UInt32 │
|
||||
│ 100 │ UInt32 │
|
||||
└─────┴────────────────┘
|
||||
```
|
||||
|
@ -167,6 +167,10 @@ Result:
|
||||
└──────────────────────────────────────────┴───────────────────────────────┘
|
||||
```
|
||||
|
||||
## byteSlice(s, offset, length)
|
||||
|
||||
See function [substring](string-functions.md#substring).
|
||||
|
||||
## bitTest
|
||||
|
||||
Takes any integer and converts it into [binary form](https://en.wikipedia.org/wiki/Binary_number), returns the value of a bit at specified position. The countdown starts from 0 from the right to the left.
|
||||
|
@ -780,8 +780,52 @@ If executed in the context of a distributed table, this function generates a nor
|
||||
|
||||
## version()
|
||||
|
||||
Returns the server version as a string.
|
||||
If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise it produces a constant value.
|
||||
Returns the current version of ClickHouse as a string in the form of:
|
||||
|
||||
- Major version
|
||||
- Minor version
|
||||
- Patch version
|
||||
- Number of commits since the previous stable release.
|
||||
|
||||
```plaintext
|
||||
major_version.minor_version.patch_version.number_of_commits_since_the_previous_stable_release
|
||||
```
|
||||
|
||||
If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise, it produces a constant value.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
version()
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
None.
|
||||
|
||||
**Returned value**
|
||||
|
||||
Type: [String](../data-types/string)
|
||||
|
||||
**Implementation details**
|
||||
|
||||
None.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT version()
|
||||
```
|
||||
|
||||
**Result**:
|
||||
|
||||
```response
|
||||
┌─version()─┐
|
||||
│ 24.2.1.1 │
|
||||
└───────────┘
|
||||
```
|
||||
|
||||
## buildId()
|
||||
|
||||
|
@ -558,6 +558,7 @@ substring(s, offset[, length])
|
||||
Alias:
|
||||
- `substr`
|
||||
- `mid`
|
||||
- `byteSlice`
|
||||
|
||||
**Arguments**
|
||||
|
||||
|
@ -9,6 +9,7 @@ The following operations with [partitions](/docs/en/engines/table-engines/merget
|
||||
|
||||
- [DETACH PARTITION\|PART](#detach-partitionpart) — Moves a partition or part to the `detached` directory and forget it.
|
||||
- [DROP PARTITION\|PART](#drop-partitionpart) — Deletes a partition or part.
|
||||
- [FORGET PARTITION](#forget-partition) — Deletes a partition metadata from zookeeper if it's empty.
|
||||
- [ATTACH PARTITION\|PART](#attach-partitionpart) — Adds a partition or part from the `detached` directory to the table.
|
||||
- [ATTACH PARTITION FROM](#attach-partition-from) — Copies the data partition from one table to another and adds.
|
||||
- [REPLACE PARTITION](#replace-partition) — Copies the data partition from one table to another and replaces.
|
||||
@ -73,6 +74,22 @@ ALTER TABLE table_name [ON CLUSTER cluster] DROP DETACHED PARTITION|PART partiti
|
||||
Removes the specified part or all parts of the specified partition from `detached`.
|
||||
Read more about setting the partition expression in a section [How to set the partition expression](#how-to-set-partition-expression).
|
||||
|
||||
## FORGET PARTITION
|
||||
|
||||
``` sql
|
||||
ALTER TABLE table_name FORGET PARTITION partition_expr
|
||||
```
|
||||
|
||||
Removes all metadata about an empty partition from ZooKeeper. Query fails if partition is not empty or unknown. Make sure to execute only for partitions that will never be used again.
|
||||
|
||||
Read about setting the partition expression in a section [How to set the partition expression](#how-to-set-partition-expression).
|
||||
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE mt FORGET PARTITION '20201121';
|
||||
```
|
||||
|
||||
## ATTACH PARTITION\|PART
|
||||
|
||||
``` sql
|
||||
|
@ -13,7 +13,9 @@ Creates a new view. Views can be [normal](#normal-view), [materialized](#materia
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] AS SELECT ...
|
||||
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name]
|
||||
[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }]
|
||||
AS SELECT ...
|
||||
```
|
||||
|
||||
Normal views do not store any data. They just perform a read from another table on each access. In other words, a normal view is nothing more than a saved query. When reading from a view, this saved query is used as a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause.
|
||||
@ -52,7 +54,9 @@ SELECT * FROM view(column1=value1, column2=value2 ...)
|
||||
## Materialized View
|
||||
|
||||
``` sql
|
||||
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ...
|
||||
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE]
|
||||
[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }]
|
||||
AS SELECT ...
|
||||
```
|
||||
|
||||
:::tip
|
||||
@ -91,6 +95,49 @@ Views look the same as normal tables. For example, they are listed in the result
|
||||
|
||||
To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop-view). Although `DROP TABLE` works for VIEWs as well.
|
||||
|
||||
## SQL security {#sql_security}
|
||||
|
||||
`DEFINER` and `SQL SECURITY` allow you to specify which ClickHouse user to use when executing the view's underlying query.
|
||||
`SQL SECURITY` has three legal values: `DEFINER`, `INVOKER`, or `NONE`. You can specify any existing user or `CURRENT_USER` in the `DEFINER` clause.
|
||||
|
||||
The following table will explain which rights are required for which user in order to select from view.
|
||||
Note that regardless of the SQL security option, in every case it is still required to have `GRANT SELECT ON <view>` in order to read from it.
|
||||
|
||||
| SQL security option | View | Materialized View |
|
||||
|---------------------|-----------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------|
|
||||
| `DEFINER alice` | `alice` must have a `SELECT` grant for the view's source table. | `alice` must have a `SELECT` grant for the view's source table and an `INSERT` grant for the view's target table. |
|
||||
| `INVOKER` | User must have a `SELECT` grant for the view's source table. | `SQL SECURITY INVOKER` can't be specified for materialized views. |
|
||||
| `NONE` | - | - |
|
||||
|
||||
:::note
|
||||
`SQL SECURITY NONE` is a deprecated option. Any user with the rights to create views with `SQL SECURITY NONE` will be able to execute any arbitrary query.
|
||||
Thus, it is required to have `GRANT ALLOW SQL SECURITY NONE TO <user>` in order to create a view with this option.
|
||||
:::
|
||||
|
||||
If `DEFINER`/`SQL SECURITY` aren't specified, the default values are used:
|
||||
- `SQL SECURITY`: `INVOKER` for normal views and `DEFINER` for materialized views ([configurable by settings](../../../operations/settings/settings.md#default_normal_view_sql_security))
|
||||
- `DEFINER`: `CURRENT_USER` ([configurable by settings](../../../operations/settings/settings.md#default_view_definer))
|
||||
|
||||
If a view is attached without `DEFINER`/`SQL SECURITY` specified, the default value is `SQL SECURITY NONE` for the materialized view and `SQL SECURITY INVOKER` for the normal view.
|
||||
|
||||
To change SQL security for an existing view, use
|
||||
```sql
|
||||
ALTER TABLE MODIFY SQL SECURITY { DEFINER | INVOKER | NONE } [DEFINER = { user | CURRENT_USER }]
|
||||
```
|
||||
|
||||
### Examples sql security
|
||||
```sql
|
||||
CREATE test_view
|
||||
DEFINER = alice SQL SECURITY DEFINER
|
||||
AS SELECT ...
|
||||
```
|
||||
|
||||
```sql
|
||||
CREATE test_view
|
||||
SQL SECURITY INVOKER
|
||||
AS SELECT ...
|
||||
```
|
||||
|
||||
## Live View [Deprecated]
|
||||
|
||||
This feature is deprecated and will be removed in the future.
|
||||
|
@ -114,6 +114,7 @@ Hierarchy of privileges:
|
||||
- `ALTER VIEW`
|
||||
- `ALTER VIEW REFRESH`
|
||||
- `ALTER VIEW MODIFY QUERY`
|
||||
- `ALTER VIEW MODIFY SQL SECURITY`
|
||||
- [CREATE](#grant-create)
|
||||
- `CREATE DATABASE`
|
||||
- `CREATE TABLE`
|
||||
@ -307,6 +308,7 @@ Allows executing [ALTER](../../sql-reference/statements/alter/index.md) queries
|
||||
- `ALTER VIEW` Level: `GROUP`
|
||||
- `ALTER VIEW REFRESH`. Level: `VIEW`. Aliases: `ALTER LIVE VIEW REFRESH`, `REFRESH VIEW`
|
||||
- `ALTER VIEW MODIFY QUERY`. Level: `VIEW`. Aliases: `ALTER TABLE MODIFY QUERY`
|
||||
- `ALTER VIEW MODIFY SQL SECURITY`. Level: `VIEW`. Aliases: `ALTER TABLE MODIFY SQL SECURITY`
|
||||
|
||||
Examples of how this hierarchy is treated:
|
||||
|
||||
@ -409,6 +411,7 @@ Allows a user to execute queries that manage users, roles and row policies.
|
||||
- `SHOW_ROW_POLICIES`. Level: `GLOBAL`. Aliases: `SHOW POLICIES`, `SHOW CREATE ROW POLICY`, `SHOW CREATE POLICY`
|
||||
- `SHOW_QUOTAS`. Level: `GLOBAL`. Aliases: `SHOW CREATE QUOTA`
|
||||
- `SHOW_SETTINGS_PROFILES`. Level: `GLOBAL`. Aliases: `SHOW PROFILES`, `SHOW CREATE SETTINGS PROFILE`, `SHOW CREATE PROFILE`
|
||||
- `ALLOW SQL SECURITY NONE`. Level: `GLOBAL`. Aliases: `CREATE SQL SECURITY NONE`, `SQL SECURITY NONE`, `SECURITY NONE`
|
||||
|
||||
The `ROLE ADMIN` privilege allows a user to assign and revoke any roles including those which are not assigned to the user with the admin option.
|
||||
|
||||
|
@ -176,7 +176,7 @@ INSERT INTO infile_globs FROM INFILE 'input_?.csv' FORMAT CSV;
|
||||
```
|
||||
:::
|
||||
|
||||
## Inserting into Table Function
|
||||
## Inserting using a Table Function
|
||||
|
||||
Data can be inserted into tables referenced by [table functions](../../sql-reference/table-functions/index.md).
|
||||
|
||||
@ -204,7 +204,7 @@ Result:
|
||||
└─────┴───────────────────────┘
|
||||
```
|
||||
|
||||
## Inserts into ClickHouse Cloud
|
||||
## Inserting into ClickHouse Cloud
|
||||
|
||||
By default, services on ClickHouse Cloud provide multiple replicas for high availability. When you connect to a service, a connection is established to one of these replicas.
|
||||
|
||||
@ -218,6 +218,12 @@ SELECT .... SETTINGS select_sequential_consistency = 1;
|
||||
|
||||
Note that using `select_sequential_consistency` will increase the load on ClickHouse Keeper (used by ClickHouse Cloud internally) and may result in slower performance depending on the load on the service. We recommend against enabling this setting unless necessary. The recommended approach is to execute read/writes in the same session or to use a client driver that uses the native protocol (and thus supports sticky connections).
|
||||
|
||||
## Inserting into a replicated setup
|
||||
|
||||
In a replicated setup, data will be visible on other replicas after it has been replicated. Data begins being replicated (downloaded on other replicas) immediately after an `INSERT`. This differs from ClickHouse Cloud, where data is immediately written to shared storage and replicas subscribe to metadata changes.
|
||||
|
||||
Note that for replicated setups, `INSERTs` can sometimes take a considerable amount of time (in the order of one second) as it requires committing to ClickHouse Keeper for distributed consensus. Using S3 for storage also adds additional latency.
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
`INSERT` sorts the input data by primary key and splits them into partitions by a partition key. If you insert data into several partitions at once, it can significantly reduce the performance of the `INSERT` query. To avoid this:
|
||||
@ -230,7 +236,15 @@ Performance will not decrease if:
|
||||
- Data is added in real time.
|
||||
- You upload data that is usually sorted by time.
|
||||
|
||||
It's also possible to asynchronously insert data in small but frequent inserts. The data from such insertions is combined into batches and then safely inserted into a table. To enable the asynchronous mode, switch on the [async_insert](../../operations/settings/settings.md#async-insert) setting. Note that asynchronous insertions are supported only over HTTP protocol, and deduplication is not supported for them.
|
||||
### Asynchronous inserts
|
||||
|
||||
It is possible to asynchronously insert data in small but frequent inserts. The data from such insertions is combined into batches and then safely inserted into a table. To use asynchronous inserts, enable the [`async_insert`](../../operations/settings/settings.md#async-insert) setting.
|
||||
|
||||
Using `async_insert` or the [`Buffer` table engine](/en/engines/table-engines/special/buffer) results in additional buffering.
|
||||
|
||||
### Large or long-running inserts
|
||||
|
||||
When you are inserting large amounts of data, ClickHouse will optimize write performance through a process called "squashing". Small blocks of inserted data in memory are merged and squashed into larger blocks before being written to disk. Squashing reduces the overhead associated with each write operation. In this process, inserted data will be available to query after ClickHouse completes writing each [`max_insert_block_size`](/en/operations/settings/settings#max_insert_block_size) rows.
|
||||
|
||||
**See Also**
|
||||
|
||||
|
@ -9,9 +9,10 @@ The `ORDER BY` clause contains
|
||||
|
||||
- a list of expressions, e.g. `ORDER BY visits, search_phrase`,
|
||||
- a list of numbers referring to columns in the `SELECT` clause, e.g. `ORDER BY 2, 1`, or
|
||||
- `*` (without other expressions or numbers) which means all columns of the `SELECT` clause: `ORDER BY *`.
|
||||
- `ALL` which means all columns of the `SELECT` clause, e.g. `ORDER BY ALL`.
|
||||
|
||||
To disable sorting by column numbers, set setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments) = 0.
|
||||
To disable sorting by `ALL`, set setting [enable_order_by_all](../../../operations/settings/settings.md#enable-order-by-all) = 0.
|
||||
|
||||
The `ORDER BY` clause can be attributed by a `DESC` (descending) or `ASC` (ascending) modifier which determines the sorting direction.
|
||||
Unless an explicit sort order is specified, `ASC` is used by default.
|
||||
|
@ -68,7 +68,7 @@ RELOAD FUNCTION [ON CLUSTER cluster_name] function_name
|
||||
|
||||
Clears ClickHouse’s internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries).
|
||||
|
||||
For more convenient (automatic) cache management, see disable_internal_dns_cache, dns_cache_update_period parameters.
|
||||
For more convenient (automatic) cache management, see disable_internal_dns_cache, dns_cache_max_entries, dns_cache_update_period parameters.
|
||||
|
||||
## DROP MARK CACHE
|
||||
|
||||
|
@ -59,9 +59,7 @@ INSERT INTO TABLE FUNCTION file('file2.csv', 'CSV', 'i UInt32, s String') VALUES
|
||||
Now, read data contents of `test1.csv` and `test2.csv` via `fileCluster` table function:
|
||||
|
||||
```sql
|
||||
SELECT * from fileCluster(
|
||||
'my_cluster', 'file{1,2}.csv', 'CSV', 'i UInt32, s String') ORDER BY (i, s)"""
|
||||
)
|
||||
SELECT * FROM fileCluster('my_cluster', 'file{1,2}.csv', 'CSV', 'i UInt32, s String') ORDER BY i, s
|
||||
```
|
||||
|
||||
```
|
||||
|
@ -11,11 +11,11 @@ Creates a temporary [Merge](../../engines/table-engines/special/merge.md) table.
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
merge('db_name', 'tables_regexp')
|
||||
merge(['db_name',] 'tables_regexp')
|
||||
```
|
||||
**Arguments**
|
||||
|
||||
- `db_name` — Possible values:
|
||||
- `db_name` — Possible values (optional, default is `currentDatabase()`):
|
||||
- database name,
|
||||
- constant expression that returns a string with a database name, for example, `currentDatabase()`,
|
||||
- `REGEXP(expression)`, where `expression` is a regular expression to match the DB names.
|
||||
|
83
docs/en/sql-reference/table-functions/mergeTreeIndex.md
Normal file
83
docs/en/sql-reference/table-functions/mergeTreeIndex.md
Normal file
@ -0,0 +1,83 @@
|
||||
---
|
||||
slug: /en/sql-reference/table-functions/mergeTreeIndex
|
||||
sidebar_position: 77
|
||||
sidebar_label: mergeTreeIndex
|
||||
---
|
||||
|
||||
# mergeTreeIndex
|
||||
|
||||
Represents the contents of index and marks files of MergeTree tables. It can be used for introspection
|
||||
|
||||
``` sql
|
||||
mergeTreeIndex(database, table, [with_marks = true])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `database`- The database name to read index and marks from.
|
||||
- `table`- The table name to read index and marks from.
|
||||
- `with_marks` - Whether include columns with marks to the result.
|
||||
|
||||
**Returned Value**
|
||||
|
||||
A table object with columns with values of primary index of source table, columns with values of marks (if enabled) for all possible files in data parts of source table and virtual columns:
|
||||
|
||||
- `part_name` - The name of data part.
|
||||
- `mark_number` - The number of current mark in data part.
|
||||
- `rows_in_granule` - The number of rows in current granule.
|
||||
|
||||
Marks column may contain `(NULL, NULL)` value in case when column is absent in data part or marks for one of its substreams are not written (e.g. in compact parts).
|
||||
|
||||
## Usage Example
|
||||
|
||||
```sql
|
||||
CREATE TABLE test_table
|
||||
(
|
||||
`id` UInt64,
|
||||
`n` UInt64,
|
||||
`arr` Array(UInt64)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id
|
||||
SETTINGS index_granularity = 3, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 8;
|
||||
|
||||
INSERT INTO test_table SELECT number, number, range(number % 5) FROM numbers(5);
|
||||
|
||||
INSERT INTO test_table SELECT number, number, range(number % 5) FROM numbers(10, 10);
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT * FROM mergeTreeIndex(currentDatabase(), test_table, with_marks = true);
|
||||
```
|
||||
|
||||
```text
|
||||
┌─part_name─┬─mark_number─┬─rows_in_granule─┬─id─┬─id.mark─┬─n.mark──┬─arr.size0.mark─┬─arr.mark─┐
|
||||
│ all_1_1_0 │ 0 │ 3 │ 0 │ (0,0) │ (42,0) │ (NULL,NULL) │ (84,0) │
|
||||
│ all_1_1_0 │ 1 │ 2 │ 3 │ (133,0) │ (172,0) │ (NULL,NULL) │ (211,0) │
|
||||
│ all_1_1_0 │ 2 │ 0 │ 4 │ (271,0) │ (271,0) │ (NULL,NULL) │ (271,0) │
|
||||
└───────────┴─────────────┴─────────────────┴────┴─────────┴─────────┴────────────────┴──────────┘
|
||||
┌─part_name─┬─mark_number─┬─rows_in_granule─┬─id─┬─id.mark─┬─n.mark─┬─arr.size0.mark─┬─arr.mark─┐
|
||||
│ all_2_2_0 │ 0 │ 3 │ 10 │ (0,0) │ (0,0) │ (0,0) │ (0,0) │
|
||||
│ all_2_2_0 │ 1 │ 3 │ 13 │ (0,24) │ (0,24) │ (0,24) │ (0,24) │
|
||||
│ all_2_2_0 │ 2 │ 3 │ 16 │ (0,48) │ (0,48) │ (0,48) │ (0,80) │
|
||||
│ all_2_2_0 │ 3 │ 1 │ 19 │ (0,72) │ (0,72) │ (0,72) │ (0,128) │
|
||||
│ all_2_2_0 │ 4 │ 0 │ 19 │ (0,80) │ (0,80) │ (0,80) │ (0,160) │
|
||||
└───────────┴─────────────┴─────────────────┴────┴─────────┴────────┴────────────────┴──────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
DESCRIBE mergeTreeIndex(currentDatabase(), test_table, with_marks = true) SETTINGS describe_compact_output = 1;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─name────────────┬─type─────────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ part_name │ String │
|
||||
│ mark_number │ UInt64 │
|
||||
│ rows_in_granule │ UInt64 │
|
||||
│ id │ UInt64 │
|
||||
│ id.mark │ Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) │
|
||||
│ n.mark │ Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) │
|
||||
│ arr.size0.mark │ Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) │
|
||||
│ arr.mark │ Tuple(offset_in_compressed_file Nullable(UInt64), offset_in_decompressed_block Nullable(UInt64)) │
|
||||
└─────────────────┴──────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
@ -679,11 +679,20 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y);
|
||||
|
||||
Тэги:
|
||||
|
||||
- `policy_name_N` — название политики. Названия политик должны быть уникальны.
|
||||
- `volume_name_N` — название тома. Названия томов должны быть уникальны.
|
||||
- `disk` — диск, находящийся внутри тома.
|
||||
- `max_data_part_size_bytes` — максимальный размер куска данных, который может находиться на любом из дисков этого тома. Если в результате слияния размер куска ожидается больше, чем max_data_part_size_bytes, то этот кусок будет записан в следующий том. В основном эта функция позволяет хранить новые / мелкие куски на горячем (SSD) томе и перемещать их на холодный (HDD) том, когда они достигают большого размера. Не используйте этот параметр, если политика имеет только один том.
|
||||
- `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1). Для перемещения куски сортируются по размеру от большего к меньшему (по убыванию) и выбираются куски, совокупный размер которых достаточен для соблюдения условия `move_factor`, если совокупный размер всех партов недостаточен, будут перемещены все парты.
|
||||
- `policy_name_N` — название политики. Названия политик должны быть уникальны.
|
||||
- `volume_name_N` — название тома. Названия томов должны быть уникальны.
|
||||
- `disk` — диск, находящийся внутри тома.
|
||||
- `max_data_part_size_bytes` — максимальный размер куска данных, который может находиться на любом из дисков этого тома. Если в результате слияния размер куска ожидается больше, чем max_data_part_size_bytes, то этот кусок будет записан в следующий том. В основном эта функция позволяет хранить новые / мелкие куски на горячем (SSD) томе и перемещать их на холодный (HDD) том, когда они достигают большого размера. Не используйте этот параметр, если политика имеет только один том.
|
||||
- `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1). Для перемещения куски сортируются по размеру от большего к меньшему (по убыванию) и выбираются куски, совокупный размер которых достаточен для соблюдения условия `move_factor`, если совокупный размер всех партов недостаточен, будут перемещены все парты.
|
||||
- `perform_ttl_move_on_insert` — отключает перемещение данных с истекшим TTL при вставке. По умолчанию (если включено), если мы вставляем часть данных, которая уже просрочилась по правилу перемещения по сроку жизни, она немедленно перемещается на том / диск, указанный в правиле перемещения. Это может значительно замедлить вставку в случае, если целевой том / диск медленный (например, S3). Если отключено, то просроченная часть данных записывается на том по умолчанию, а затем сразу перемещается на том, указанный в правиле для истёкшего TTL.
|
||||
- `load_balancing` - политика балансировки дисков, `round_robin` или `least_used`.
|
||||
- `least_used_ttl_ms` - устанавливает таймаут (в миллисекундах) для обновления доступного пространства на всех дисках (`0` - обновлять всегда, `-1` - никогда не обновлять, значение по умолчанию - `60000`). Обратите внимание, если диск используется только ClickHouse и не будет подвергаться изменению размеров файловой системы на лету, можно использовать значение `-1`. Во всех остальных случаях это не рекомендуется, так как в конечном итоге это приведет к неправильному распределению пространства.
|
||||
- `prefer_not_to_merge` — эту настройку лучше не использовать. Она отключает слияние частей данных на этом томе (что потенциально вредно и может привести к замедлению). Когда эта настройка включена (не делайте этого), объединение данных на этом томе запрещено (что плохо). Это позволяет (но вам это не нужно) контролировать (если вы хотите что-то контролировать, вы делаете ошибку), как ClickHouse взаимодействует с медленными дисками (но ClickHouse лучше знает, поэтому, пожалуйста, не используйте эту настройку).
|
||||
- `volume_priority` — Определяет приоритет (порядок), в котором заполняются тома. Чем меньше значение -- тем выше приоритет. Значения параметра должны быть натуральными числами и охватывать диапазон от 1 до N (N - наибольшее значение параметра из указанных) без пропусков.
|
||||
* Если _все_ тома имеют этот параметр, они приоритизируются в указанном порядке.
|
||||
* Если его имеют лишь _некоторые_, то не имеющие этого параметра тома имеют самый низкий приоритет. Те, у которых он указан, приоритизируются в соответствии со значением тега, приоритет остальных определяется порядком описания в конфигурационном файле относительно друг друга.
|
||||
* Если _ни одному_ тому не присвоен этот параметр, их порядок определяется порядком описания в конфигурационном файле.
|
||||
* Приоритет нескольких томов не может быть одинаковым.
|
||||
|
||||
Примеры конфигураций:
|
||||
|
||||
@ -733,7 +742,7 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y);
|
||||
|
||||
Если система содержит диски различных типов, то может пригодиться политика `moving_from_ssd_to_hdd`. В томе `hot` находится один SSD-диск (`fast_ssd`), а также задается ограничение на максимальный размер куска, который может храниться на этом томе (1GB). Все куски такой таблицы больше 1GB будут записываться сразу на том `cold`, в котором содержится один HDD-диск `disk1`. Также при заполнении диска `fast_ssd` более чем на 80% данные будут переноситься на диск `disk1` фоновым процессом.
|
||||
|
||||
Порядок томов в политиках хранения важен, при достижении условий на переполнение тома данные переносятся на следующий. Порядок дисков в томах так же важен, данные пишутся по очереди на каждый из них.
|
||||
Порядок томов в политиках хранения важен в случае, если приоритеты томов (`volume_priority`) не указаны явно: при достижении условий на переполнение тома данные переносятся на следующий. Порядок дисков в томах так же важен, данные пишутся по очереди на каждый из них.
|
||||
|
||||
После задания конфигурации политик хранения их можно использовать, как настройку при создании таблиц:
|
||||
|
||||
|
16
docs/ru/interfaces/third-party/gui.md
vendored
16
docs/ru/interfaces/third-party/gui.md
vendored
@ -260,3 +260,19 @@ SeekTable [бесплатен](https://www.seektable.com/help/cloud-pricing) д
|
||||
|
||||
Протестировать TABLUM.IO без разворачивания на собственном сервере можно [здесь](https://tablum.io/try).
|
||||
Подробно о продукте смотрите на [TABLUM.IO](https://tablum.io/)
|
||||
|
||||
|
||||
### CKMAN {#ckman}
|
||||
|
||||
[CKMAN] (https://www.github.com/housepower/ckman) — инструмент управления и мониторинга кластеров ClickHouse!
|
||||
|
||||
Основные возможности:
|
||||
|
||||
- Быстрое и простое развертывание кластеров через веб-интерфейс
|
||||
- Кластеры можно масштабировать или масштабировать
|
||||
- Балансировка нагрузки данных кластера
|
||||
- Обновление кластера в режиме онлайн
|
||||
- Измените конфигурацию кластера на странице
|
||||
- Обеспечивает мониторинг узлов кластера и zookeeper
|
||||
- Мониторинг состояния таблиц и секций, а также медленные SQL-операторы
|
||||
- Предоставляет простую в использовании страницу выполнения SQL
|
||||
|
@ -3258,7 +3258,7 @@ SELECT * FROM test2;
|
||||
|
||||
## allow_experimental_live_view {#allow-experimental-live-view}
|
||||
|
||||
Включает экспериментальную возможность использования [LIVE-представлений](../../sql-reference/statements/create/view.md#live-view).
|
||||
Включает устаревшую возможность использования [LIVE-представлений](../../sql-reference/statements/create/view.md#live-view).
|
||||
|
||||
Возможные значения:
|
||||
- 0 — живые представления не поддерживаются.
|
||||
@ -3268,21 +3268,15 @@ SELECT * FROM test2;
|
||||
|
||||
## live_view_heartbeat_interval {#live-view-heartbeat-interval}
|
||||
|
||||
Задает интервал в секундах для периодической проверки существования [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view).
|
||||
|
||||
Значение по умолчанию: `15`.
|
||||
Устарело.
|
||||
|
||||
## max_live_view_insert_blocks_before_refresh {#max-live-view-insert-blocks-before-refresh}
|
||||
|
||||
Задает наибольшее число вставок, после которых запрос на формирование [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view) исполняется снова.
|
||||
|
||||
Значение по умолчанию: `64`.
|
||||
Устарело.
|
||||
|
||||
## periodic_live_view_refresh {#periodic-live-view-refresh}
|
||||
|
||||
Задает время в секундах, по истечении которого [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view) с установленным автообновлением обновляется.
|
||||
|
||||
Значение по умолчанию: `60`.
|
||||
Устарело.
|
||||
|
||||
## check_query_single_value_result {#check_query_single_value_result}
|
||||
|
||||
|
@ -11,7 +11,9 @@ sidebar_label: "Представление"
|
||||
## Обычные представления {#normal}
|
||||
|
||||
``` sql
|
||||
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name] AS SELECT ...
|
||||
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name]
|
||||
[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }]
|
||||
AS SELECT ...
|
||||
```
|
||||
|
||||
Обычные представления не хранят никаких данных, они выполняют чтение данных из другой таблицы при каждом доступе. Другими словами, обычное представление — это не что иное, как сохраненный запрос. При чтении данных из представления этот сохраненный запрос используется как подзапрос в секции [FROM](../../../sql-reference/statements/select/from.md).
|
||||
@ -37,7 +39,9 @@ SELECT a, b, c FROM (SELECT ...)
|
||||
## Материализованные представления {#materialized}
|
||||
|
||||
``` sql
|
||||
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ...
|
||||
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE]
|
||||
[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }]
|
||||
AS SELECT ...
|
||||
```
|
||||
|
||||
Материализованные (MATERIALIZED) представления хранят данные, преобразованные соответствующим запросом [SELECT](../../../sql-reference/statements/select/index.md).
|
||||
@ -66,6 +70,52 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
|
||||
|
||||
Чтобы удалить представление, следует использовать [DROP VIEW](../../../sql-reference/statements/drop.md#drop-view). Впрочем, `DROP TABLE` тоже работает для представлений.
|
||||
|
||||
## SQL безопасность {#sql_security}
|
||||
|
||||
Параметры `DEFINER` и `SQL SECURITY` позволяют задать правило от имени какого пользователя будут выполняться запросы к таблицам, на которые ссылается представление.
|
||||
Для `SQL SECURITY` допустимо три значения: `DEFINER`, `INVOKER`, или `NONE`.
|
||||
Для `DEFINER` можно указать имя любого существующего пользователя или же `CURRENT_USER`.
|
||||
|
||||
Далее приведена таблица, объясняющая какие права необходимы каким пользователям при заданных параметрах SQL безопасности.
|
||||
Обратите внимание, что, в независимости от заданных параметров SQL безопасности,
|
||||
у пользователя должно быть право `GRANT SELECT ON <view>` для чтения из представления.
|
||||
|
||||
| SQL security option | View | Materialized View |
|
||||
|---------------------|----------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------|
|
||||
| `DEFINER alice` | У `alice` должно быть право `SELECT` на таблицу-источник. | У `alice` должны быть права `SELECT` на таблицу-источник и `INSERT` на таблицу-назначение. |
|
||||
| `INVOKER` | У пользователя выполняющего запрос к представлению должно быть право `SELECT` на таблицу-источник. | Тип `SQL SECURITY INVOKER` не может быть указан для материализованных представлений. |
|
||||
| `NONE` | - | - |
|
||||
|
||||
:::note
|
||||
Тип `SQL SECURITY NONE` не безопасен для использования. Любой пользователь с правом создавать представления с `SQL SECURITY NONE` сможет исполнять любые запросы без проверки прав.
|
||||
По умолчанию, у пользователей нет прав указывать `SQL SECURITY NONE`, однако, при необходимости, это право можно выдать с помощью `GRANT ALLOW SQL SECURITY NONE TO <user>`.
|
||||
:::
|
||||
|
||||
Если `DEFINER`/`SQL SECURITY` не указан, будут использованы значения по умолчанию:
|
||||
- `SQL SECURITY`: `INVOKER` для обычных представлений и `DEFINER` для материализованных ([изменяется в настройках](../../../operations/settings/settings.md#default_normal_view_sql_security))
|
||||
- `DEFINER`: `CURRENT_USER` ([изменяется в настройках](../../../operations/settings/settings.md#default_view_definer))
|
||||
|
||||
Если представление подключается с помощью ключевого слова `ATTACH` и настройки SQL безопасности не были заданы,
|
||||
то по умолчанию будет использоваться `SQL SECURITY NONE` для материализованных представлений и `SQL SECURITY INVOKER` для обычных.
|
||||
|
||||
Изменить параметры SQL безопасности возможно с помощью следующего запроса:
|
||||
```sql
|
||||
ALTER TABLE MODIFY SQL SECURITY { DEFINER | INVOKER | NONE } [DEFINER = { user | CURRENT_USER }]
|
||||
```
|
||||
|
||||
### Примеры представлений с SQL безопасностью
|
||||
```sql
|
||||
CREATE test_view
|
||||
DEFINER = alice SQL SECURITY DEFINER
|
||||
AS SELECT ...
|
||||
```
|
||||
|
||||
```sql
|
||||
CREATE test_view
|
||||
SQL SECURITY INVOKER
|
||||
AS SELECT ...
|
||||
```
|
||||
|
||||
## LIVE-представления [экспериментальный функционал] {#live-view}
|
||||
|
||||
:::note Важно
|
||||
|
15
docs/zh/interfaces/third-party/gui.md
vendored
15
docs/zh/interfaces/third-party/gui.md
vendored
@ -129,3 +129,18 @@ ClickHouse Web 界面 [Tabix](https://github.com/tabixio/tabix).
|
||||
- 数据编辑器。
|
||||
- 重构。
|
||||
- 搜索和导航。
|
||||
|
||||
### CKMAN {#ckman}
|
||||
|
||||
[CKMAN](https://www.github.com/housepower/ckman) 是一个用于管理和监控ClickHouse集群的可视化工具!
|
||||
|
||||
特征:
|
||||
|
||||
- 非常快速便捷的通过浏览器界面自动化部署集群
|
||||
- 支持对集群进行扩缩容操作
|
||||
- 对集群的数据进行负载均衡
|
||||
- 对集群进行在线升级
|
||||
- 通过界面修改集群配置
|
||||
- 提供集群节点监控,zookeeper监控
|
||||
- 监控表、分区状态,慢SQL监控
|
||||
- 提供简单易操作的SQL执行页面
|
||||
|
@ -280,9 +280,6 @@ GRANT INSERT(x,y) ON db.table TO john
|
||||
- `ALTER MOVE PARTITION`. 级别: `TABLE`. 别名: `ALTER MOVE PART`, `MOVE PARTITION`, `MOVE PART`
|
||||
- `ALTER FETCH PARTITION`. 级别: `TABLE`. 别名: `FETCH PARTITION`
|
||||
- `ALTER FREEZE PARTITION`. 级别: `TABLE`. 别名: `FREEZE PARTITION`
|
||||
- `ALTER VIEW` 级别: `GROUP`
|
||||
- `ALTER VIEW REFRESH`. 级别: `VIEW`. 别名: `ALTER LIVE VIEW REFRESH`, `REFRESH VIEW`
|
||||
- `ALTER VIEW MODIFY QUERY`. 级别: `VIEW`. 别名: `ALTER TABLE MODIFY QUERY`
|
||||
|
||||
如何对待该层级的示例:
|
||||
- `ALTER` 权限包含所有其它 `ALTER *` 的权限
|
||||
|
@ -61,14 +61,14 @@ sidebar_label: ORDER BY
|
||||
|
||||
我们只建议使用 `COLLATE` 对于少量行的最终排序,因为排序与 `COLLATE` 比正常的按字节排序效率低。
|
||||
|
||||
## ORDER BY *
|
||||
## ORDER BY ALL
|
||||
|
||||
`ORDER BY *` 对所有选定的列进行升序排序。
|
||||
`ORDER BY ALL` 对所有选定的列进行升序排序。
|
||||
|
||||
示例:
|
||||
|
||||
``` sql
|
||||
SELECT a, b, c FROM t ORDER BY *
|
||||
SELECT a, b, c FROM t ORDER BY ALL
|
||||
```
|
||||
|
||||
等同于:
|
||||
|
@ -330,6 +330,7 @@ try
|
||||
processConfig();
|
||||
adjustSettings();
|
||||
initTTYBuffer(toProgressOption(config().getString("progress", "default")));
|
||||
ASTAlterCommand::setFormatAlterCommandsWithParentheses(true);
|
||||
|
||||
{
|
||||
// All that just to set DB::CurrentThread::get().getGlobalContext()
|
||||
|
@ -91,8 +91,8 @@ static std::vector<std::string> extractFromConfig(
|
||||
|
||||
zkutil::validateZooKeeperConfig(*bootstrap_configuration);
|
||||
|
||||
zkutil::ZooKeeperPtr zookeeper = std::make_shared<zkutil::ZooKeeper>(
|
||||
*bootstrap_configuration, bootstrap_configuration->has("zookeeper") ? "zookeeper" : "keeper", nullptr);
|
||||
zkutil::ZooKeeperPtr zookeeper = zkutil::ZooKeeper::createWithoutKillingPreviousSessions(
|
||||
*bootstrap_configuration, bootstrap_configuration->has("zookeeper") ? "zookeeper" : "keeper");
|
||||
|
||||
zkutil::ZooKeeperNodeCache zk_node_cache([&] { return zookeeper; });
|
||||
config_xml = processor.processConfig(&has_zk_includes, &zk_node_cache);
|
||||
|
@ -400,7 +400,7 @@ int KeeperClient::main(const std::vector<String> & /* args */)
|
||||
zk_args.connection_timeout_ms = config().getInt("connection-timeout", 10) * 1000;
|
||||
zk_args.session_timeout_ms = config().getInt("session-timeout", 10) * 1000;
|
||||
zk_args.operation_timeout_ms = config().getInt("operation-timeout", 10) * 1000;
|
||||
zookeeper = std::make_unique<zkutil::ZooKeeper>(zk_args);
|
||||
zookeeper = zkutil::ZooKeeper::createWithoutKillingPreviousSessions(zk_args);
|
||||
|
||||
if (config().has("no-confirmation") || config().has("query"))
|
||||
ask_confirmation = false;
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <Coordination/CoordinationSettings.h>
|
||||
#include <Coordination/KeeperSnapshotManager.h>
|
||||
#include <Coordination/ZooKeeperDataReader.h>
|
||||
#include <Coordination/KeeperContext.h>
|
||||
#include <Common/TerminalSize.h>
|
||||
#include <Poco/ConsoleChannel.h>
|
||||
#include <Poco/AutoPtr.h>
|
||||
|
@ -506,6 +506,7 @@ try
|
||||
processConfig();
|
||||
adjustSettings();
|
||||
initTTYBuffer(toProgressOption(config().getString("progress", "default")));
|
||||
ASTAlterCommand::setFormatAlterCommandsWithParentheses(true);
|
||||
|
||||
applyCmdSettings(global_context);
|
||||
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include <Common/MemoryTracker.h>
|
||||
#include <Common/ClickHouseRevision.h>
|
||||
#include <Common/DNSResolver.h>
|
||||
#include <Common/CgroupsMemoryUsageObserver.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/ConcurrencyControl.h>
|
||||
#include <Common/Macros.h>
|
||||
@ -623,6 +624,8 @@ try
|
||||
ServerSettings server_settings;
|
||||
server_settings.loadSettingsFromConfig(config());
|
||||
|
||||
ASTAlterCommand::setFormatAlterCommandsWithParentheses(server_settings.format_alter_operations_with_parentheses);
|
||||
|
||||
StackTrace::setShowAddresses(server_settings.show_addresses_in_stack_traces);
|
||||
|
||||
#if USE_HDFS
|
||||
@ -1280,6 +1283,18 @@ try
|
||||
SensitiveDataMasker::setInstance(std::make_unique<SensitiveDataMasker>(config(), "query_masking_rules"));
|
||||
}
|
||||
|
||||
std::optional<CgroupsMemoryUsageObserver> cgroups_memory_usage_observer;
|
||||
try
|
||||
{
|
||||
UInt64 wait_time = server_settings.cgroups_memory_usage_observer_wait_time;
|
||||
if (wait_time != 0)
|
||||
cgroups_memory_usage_observer.emplace(std::chrono::seconds(wait_time));
|
||||
}
|
||||
catch (Exception &)
|
||||
{
|
||||
tryLogCurrentException(log, "Disabling cgroup memory observer because of an error during initialization");
|
||||
}
|
||||
|
||||
const std::string cert_path = config().getString("openSSL.server.certificateFile", "");
|
||||
const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
|
||||
|
||||
@ -1333,6 +1348,15 @@ try
|
||||
total_memory_tracker.setDescription("(total)");
|
||||
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
|
||||
|
||||
if (cgroups_memory_usage_observer)
|
||||
{
|
||||
double hard_limit_ratio = new_server_settings.cgroup_memory_watcher_hard_limit_ratio;
|
||||
double soft_limit_ratio = new_server_settings.cgroup_memory_watcher_soft_limit_ratio;
|
||||
cgroups_memory_usage_observer->setLimits(
|
||||
static_cast<uint64_t>(max_server_memory_usage * hard_limit_ratio),
|
||||
static_cast<uint64_t>(max_server_memory_usage * soft_limit_ratio));
|
||||
}
|
||||
|
||||
size_t merges_mutations_memory_usage_soft_limit = new_server_settings.merges_mutations_memory_usage_soft_limit;
|
||||
|
||||
size_t default_merges_mutations_server_memory_usage = static_cast<size_t>(current_physical_server_memory * new_server_settings.merges_mutations_memory_usage_to_ram_ratio);
|
||||
@ -1750,6 +1774,8 @@ try
|
||||
}
|
||||
else
|
||||
{
|
||||
DNSResolver::instance().setCacheMaxEntries(server_settings.dns_cache_max_entries);
|
||||
|
||||
/// Initialize a watcher periodically updating DNS cache
|
||||
dns_cache_updater = std::make_unique<DNSCacheUpdater>(
|
||||
global_context, server_settings.dns_cache_update_period, server_settings.dns_max_consecutive_failures);
|
||||
|
1
programs/server/config.d/filesystem_cache_log.xml
Symbolic link
1
programs/server/config.d/filesystem_cache_log.xml
Symbolic link
@ -0,0 +1 @@
|
||||
../../../tests/config/config.d/filesystem_cache_log.xml
|
4
programs/server/config.d/filesystem_caches_path.xml
Normal file
4
programs/server/config.d/filesystem_caches_path.xml
Normal file
@ -0,0 +1,4 @@
|
||||
<clickhouse>
|
||||
<filesystem_caches_path>/tmp/filesystem_caches/</filesystem_caches_path>
|
||||
<custom_cached_disks_base_directory replace="replace">/tmp/filesystem_caches/</custom_cached_disks_base_directory>
|
||||
</clickhouse>
|
@ -1392,13 +1392,27 @@
|
||||
<!-- <host_name>replica</host_name> -->
|
||||
</distributed_ddl>
|
||||
|
||||
<!-- Settings to fine tune MergeTree tables. See documentation in source code, in MergeTreeSettings.h -->
|
||||
<!-- Settings to fine-tune MergeTree tables. See documentation in source code, in MergeTreeSettings.h -->
|
||||
<!--
|
||||
<merge_tree>
|
||||
<max_suspicious_broken_parts>5</max_suspicious_broken_parts>
|
||||
</merge_tree>
|
||||
-->
|
||||
|
||||
<!-- Settings to fine-tune ReplicatedMergeTree tables. See documentation in source code, in MergeTreeSettings.h -->
|
||||
<!--
|
||||
<replicated_merge_tree>
|
||||
<max_replicated_fetches_network_bandwidth>1000000000</max_replicated_fetches_network_bandwidth>
|
||||
</replicated_merge_tree>
|
||||
-->
|
||||
|
||||
<!-- Settings to fine-tune Distributed tables. See documentation in source code, in DistributedSettings.h -->
|
||||
<!--
|
||||
<distributed>
|
||||
<flush_on_detach>false</flush_on_detach>
|
||||
</distributed>
|
||||
-->
|
||||
|
||||
<!-- Protection from accidental DROP.
|
||||
If size of a MergeTree table is greater than max_table_size_to_drop (in bytes) than table could not be dropped with any DROP query.
|
||||
If you want do delete one table and don't want to change clickhouse-server config, you could create special file <clickhouse-path>/flags/force_drop_table and make DROP once.
|
||||
@ -1569,6 +1583,11 @@
|
||||
|
||||
<backups>
|
||||
<allowed_path>backups</allowed_path>
|
||||
|
||||
<!-- If the BACKUP command fails and this setting is true then the files
|
||||
copied before the failure will be removed automatically.
|
||||
-->
|
||||
<remove_backup_files_after_failure>true</remove_backup_files_after_failure>
|
||||
</backups>
|
||||
|
||||
<!-- This allows to disable exposing addresses in stack traces for security reasons.
|
||||
|
@ -85,11 +85,10 @@
|
||||
gap: 1rem;
|
||||
}
|
||||
.chart {
|
||||
flex: 1 40%;
|
||||
min-width: 20rem;
|
||||
flex: 1 1 40rem;
|
||||
min-height: 16rem;
|
||||
background: var(--chart-background);
|
||||
box-shadow: 0 0 1rem var(--shadow-color);
|
||||
box-shadow: 1px 1px 0 var(--shadow-color);
|
||||
overflow: hidden;
|
||||
position: relative;
|
||||
}
|
||||
@ -195,7 +194,7 @@
|
||||
}
|
||||
|
||||
.inputs input {
|
||||
box-shadow: 0 0 1rem var(--shadow-color);
|
||||
box-shadow: 1px 1px 0 var(--shadow-color);
|
||||
padding: 0.25rem;
|
||||
}
|
||||
|
||||
@ -255,8 +254,6 @@
|
||||
font-weight: bold;
|
||||
user-select: none;
|
||||
cursor: pointer;
|
||||
padding-left: 0.5rem;
|
||||
padding-right: 0.5rem;
|
||||
background: var(--new-chart-background-color);
|
||||
color: var(--new-chart-text-color);
|
||||
float: right;
|
||||
@ -275,7 +272,6 @@
|
||||
width: 36%;
|
||||
}
|
||||
|
||||
|
||||
#global-error {
|
||||
align-self: center;
|
||||
width: 60%;
|
||||
@ -298,7 +294,7 @@
|
||||
background: var(--param-background-color);
|
||||
color: var(--param-text-color);
|
||||
display: inline-block;
|
||||
box-shadow: 0 0 1rem var(--shadow-color);
|
||||
box-shadow: 1px 1px 0 var(--shadow-color);
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
@ -491,17 +487,10 @@
|
||||
* - if a query returned something unusual, display the table;
|
||||
*/
|
||||
|
||||
let host = 'https://play.clickhouse.com/';
|
||||
let user = 'explorer';
|
||||
let host = location.protocol != 'file:' ? location.origin : 'http://localhost:8123/';
|
||||
let user = 'default';
|
||||
let password = '';
|
||||
let add_http_cors_header = true;
|
||||
|
||||
/// If it is hosted on server, assume that it is the address of ClickHouse.
|
||||
if (location.protocol != 'file:') {
|
||||
host = location.origin;
|
||||
user = 'default';
|
||||
add_http_cors_header = false;
|
||||
}
|
||||
let add_http_cors_header = (location.protocol != 'file:');
|
||||
|
||||
const errorCodeMessageMap = {
|
||||
516: 'Error authenticating with database. Please check your connection params and try again.'
|
||||
@ -1273,8 +1262,11 @@ function hideError() {
|
||||
}
|
||||
|
||||
let firstLoad = true;
|
||||
|
||||
let is_drawing = false; // Prevent race condition leading to duplicate/dangling charts.
|
||||
async function drawAll() {
|
||||
if (is_drawing) return;
|
||||
is_drawing = true;
|
||||
|
||||
let params = getParamsForURL();
|
||||
const chartsArray = document.getElementsByClassName('chart');
|
||||
|
||||
@ -1301,12 +1293,12 @@ async function drawAll() {
|
||||
document.getElementById('edit').style.display = 'inline-block';
|
||||
document.getElementById('search-span').style.display = '';
|
||||
hideError();
|
||||
}
|
||||
else {
|
||||
const charts = document.getElementById('charts')
|
||||
charts.style.height = '0px';
|
||||
} else {
|
||||
document.getElementById('charts').style.height = '0px';
|
||||
}
|
||||
});
|
||||
|
||||
is_drawing = false;
|
||||
}
|
||||
|
||||
function resize() {
|
||||
|
230
rust/Cargo.lock
generated
230
rust/Cargo.lock
generated
@ -6,7 +6,7 @@ version = 3
|
||||
name = "_ch_rust_prql"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"prql-compiler",
|
||||
"prqlc",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
@ -79,16 +79,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "0.3.2"
|
||||
version = "0.6.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163"
|
||||
checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse",
|
||||
"anstyle-query",
|
||||
"anstyle-wincon",
|
||||
"colorchoice",
|
||||
"is-terminal",
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
@ -113,33 +112,33 @@ version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648"
|
||||
dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-wincon"
|
||||
version = "1.0.2"
|
||||
version = "3.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c677ab05e09154296dd37acecd46420c17b9713e8366facafa8fc0885167cf4c"
|
||||
checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"windows-sys 0.48.0",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.75"
|
||||
version = "1.0.80"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
|
||||
checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ariadne"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72fe02fc62033df9ba41cba57ee19acf5e742511a140c7dbc3a873e19a19a1bd"
|
||||
checksum = "dd002a6223f12c7a95cdd4b1cb3a0149d22d37f7a9ecdb2cb691a071fe236c29"
|
||||
dependencies = [
|
||||
"unicode-width",
|
||||
"yansi",
|
||||
@ -213,16 +212,16 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "chrono"
|
||||
version = "0.4.31"
|
||||
version = "0.4.34"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38"
|
||||
checksum = "5bc015644b92d5890fab7489e49d21f879d5c990186827d42ec511919404f38b"
|
||||
dependencies = [
|
||||
"android-tzdata",
|
||||
"iana-time-zone",
|
||||
"js-sys",
|
||||
"num-traits",
|
||||
"wasm-bindgen",
|
||||
"windows-targets 0.48.5",
|
||||
"windows-targets 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -368,7 +367,7 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"scratch",
|
||||
"syn 2.0.41",
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -385,7 +384,7 @@ checksum = "5c6888cd161769d65134846d4d4981d5a6654307cc46ec83fb917e530aea5f84"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.41",
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -509,18 +508,14 @@ dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.41",
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "errno"
|
||||
version = "0.3.8"
|
||||
name = "equivalent"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
@ -570,12 +565,6 @@ version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone"
|
||||
version = "0.1.58"
|
||||
@ -606,23 +595,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
|
||||
|
||||
[[package]]
|
||||
name = "is-terminal"
|
||||
version = "0.4.9"
|
||||
name = "indexmap"
|
||||
version = "2.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
|
||||
checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"rustix",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
|
||||
dependencies = [
|
||||
"either",
|
||||
"equivalent",
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -681,12 +660,6 @@ dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.4.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.20"
|
||||
@ -805,45 +778,35 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.70"
|
||||
version = "1.0.78"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b"
|
||||
checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prql-ast"
|
||||
version = "0.9.5"
|
||||
name = "prqlc"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9d91522f9f16d055409b9ffec55693a96e3424fe5d8e7c8331adcf6d7ee363a"
|
||||
dependencies = [
|
||||
"enum-as-inner",
|
||||
"semver",
|
||||
"serde",
|
||||
"strum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prql-compiler"
|
||||
version = "0.9.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4d56865532fcf1abaa31fbb6da6fd9e90edc441c5c78bfe2870ee75187c7a3c"
|
||||
checksum = "4beb05b6b71ce096fa56d73006ab1c42a8d11bf190d193fa511a134f7730ec43"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anyhow",
|
||||
"ariadne",
|
||||
"chrono",
|
||||
"csv",
|
||||
"enum-as-inner",
|
||||
"itertools 0.11.0",
|
||||
"itertools",
|
||||
"log",
|
||||
"once_cell",
|
||||
"prql-ast",
|
||||
"prql-parser",
|
||||
"prqlc-ast",
|
||||
"prqlc-parser",
|
||||
"regex",
|
||||
"semver",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_yaml",
|
||||
"sqlformat",
|
||||
"sqlparser",
|
||||
"strum",
|
||||
@ -851,15 +814,29 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prql-parser"
|
||||
version = "0.9.5"
|
||||
name = "prqlc-ast"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9360352e413390cfd26345f49279622b87581a3b748340d3f42d4d616c2a1ec1"
|
||||
checksum = "c98923b046bc48046e3846b14a5fde5a059f681c7c367bd0ab96ebd3ecc33a71"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"enum-as-inner",
|
||||
"semver",
|
||||
"serde",
|
||||
"strum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prqlc-parser"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "855ad9aba599ef608efc88a30ebd731155997d9bbe780639eb175de060b6cddc"
|
||||
dependencies = [
|
||||
"chumsky",
|
||||
"itertools 0.11.0",
|
||||
"prql-ast",
|
||||
"itertools",
|
||||
"prqlc-ast",
|
||||
"semver",
|
||||
"stacker",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -873,9 +850,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.33"
|
||||
version = "1.0.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
|
||||
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
@ -922,9 +899,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.10.2"
|
||||
version = "1.10.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
|
||||
checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
@ -934,9 +911,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.3"
|
||||
version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
|
||||
checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
@ -955,19 +932,6 @@ version = "0.1.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.14"
|
||||
@ -988,44 +952,57 @@ checksum = "a3cf7c11c38cb994f3d40e8a8cde3bbd1f72a435e4c49e85d6553d8312306152"
|
||||
|
||||
[[package]]
|
||||
name = "semver"
|
||||
version = "1.0.20"
|
||||
version = "1.0.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090"
|
||||
checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.193"
|
||||
version = "1.0.197"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89"
|
||||
checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.193"
|
||||
version = "1.0.197"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3"
|
||||
checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.41",
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.108"
|
||||
version = "1.0.114"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b"
|
||||
checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_yaml"
|
||||
version = "0.9.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8fd075d994154d4a774f95b51fb96bdc2832b0ea48425c92546073816cda1f2f"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
"unsafe-libyaml",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "skim"
|
||||
version = "0.10.4"
|
||||
@ -1057,16 +1034,16 @@ version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ce81b7bd7c4493975347ef60d8c7e8b742d4694f4c49f93e0a12ea263938176c"
|
||||
dependencies = [
|
||||
"itertools 0.12.0",
|
||||
"itertools",
|
||||
"nom",
|
||||
"unicode_categories",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sqlparser"
|
||||
version = "0.37.0"
|
||||
version = "0.43.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37ae05a8250b968a3f7db93155a84d68b2e6cea1583949af5ca5b5170c76c075"
|
||||
checksum = "f95c4bae5aba7cd30bd506f7140026ade63cff5afd778af8854026f9606bf5d4"
|
||||
dependencies = [
|
||||
"log",
|
||||
"serde",
|
||||
@ -1093,24 +1070,24 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.25.0"
|
||||
version = "0.26.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125"
|
||||
checksum = "723b93e8addf9aa965ebe2d11da6d7540fa2283fcea14b3371ff055f7ba13f5f"
|
||||
dependencies = [
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.25.3"
|
||||
version = "0.26.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0"
|
||||
checksum = "7a3417fc93d76740d974a01654a09777cb500428cc874ca9f45edfe0c4d4cd18"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn 2.0.41",
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -1126,9 +1103,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.41"
|
||||
version = "2.0.52"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269"
|
||||
checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@ -1172,7 +1149,7 @@ checksum = "01742297787513b79cf8e29d1056ede1313e2420b7b3b15d0a768b4921f549df"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.41",
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -1244,6 +1221,12 @@ version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
|
||||
|
||||
[[package]]
|
||||
name = "unsafe-libyaml"
|
||||
version = "0.2.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab4c90930b95a82d00dc9e9ac071b4991924390d46cbd0dfe566148667605e4b"
|
||||
|
||||
[[package]]
|
||||
name = "utf8parse"
|
||||
version = "0.2.1"
|
||||
@ -1304,7 +1287,7 @@ dependencies = [
|
||||
"once_cell",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.41",
|
||||
"syn 2.0.52",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
@ -1326,7 +1309,7 @@ checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.41",
|
||||
"syn 2.0.52",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
@ -1377,15 +1360,6 @@ dependencies = [
|
||||
"windows-targets 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
|
||||
dependencies = [
|
||||
"windows-targets 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.52.0"
|
||||
@ -1532,5 +1506,5 @@ checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.41",
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
@ -3,10 +3,8 @@ edition = "2021"
|
||||
name = "_ch_rust_prql"
|
||||
version = "0.1.0"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
prql-compiler = "0.9.3"
|
||||
prqlc = {version = "0.11.3", default-features = false}
|
||||
serde_json = "1.0"
|
||||
|
||||
[lib]
|
||||
|
@ -1,8 +1,8 @@
|
||||
use prql_compiler::sql::Dialect;
|
||||
use prql_compiler::{Options, Target};
|
||||
use prqlc::sql::Dialect;
|
||||
use prqlc::{Options, Target};
|
||||
use std::ffi::{c_char, CString};
|
||||
use std::slice;
|
||||
use std::panic;
|
||||
use std::slice;
|
||||
|
||||
fn set_output(result: String, out: *mut *mut u8, out_size: *mut u64) {
|
||||
assert!(!out_size.is_null());
|
||||
@ -37,7 +37,7 @@ pub unsafe extern "C" fn prql_to_sql_impl(
|
||||
signature_comment: false,
|
||||
color: false,
|
||||
};
|
||||
let (is_err, res) = match prql_compiler::compile(&prql_query, &opts) {
|
||||
let (is_err, res) = match prqlc::compile(&prql_query, &opts) {
|
||||
Ok(sql_str) => (false, sql_str),
|
||||
Err(err) => (true, err.to_string()),
|
||||
};
|
||||
|
@ -164,7 +164,7 @@ public:
|
||||
int getBcryptWorkfactor() const;
|
||||
|
||||
/// Enables logic that users without permissive row policies can still read rows using a SELECT query.
|
||||
/// For example, if there two users A, B and a row policy is defined only for A, then
|
||||
/// For example, if there are two users A, B and a row policy is defined only for A, then
|
||||
/// if this setting is true the user B will see all rows, and if this setting is false the user B will see no rows.
|
||||
void setEnabledUsersWithoutRowPoliciesCanReadRows(bool enable) { users_without_row_policies_can_read_rows = enable; }
|
||||
bool isEnabledUsersWithoutRowPoliciesCanReadRows() const { return users_without_row_policies_can_read_rows; }
|
||||
|
@ -1,7 +1,6 @@
|
||||
#include <Access/Common/AccessFlags.h>
|
||||
#include <Access/Common/AccessType.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <base/types.h>
|
||||
#include <boost/algorithm/string/case_conv.hpp>
|
||||
#include <boost/algorithm/string/replace.hpp>
|
||||
#include <boost/algorithm/string/split.hpp>
|
||||
@ -103,6 +102,7 @@ namespace
|
||||
const Flags & getColumnFlags() const { return all_flags_for_target[COLUMN]; }
|
||||
const Flags & getDictionaryFlags() const { return all_flags_for_target[DICTIONARY]; }
|
||||
const Flags & getNamedCollectionFlags() const { return all_flags_for_target[NAMED_COLLECTION]; }
|
||||
const Flags & getUserNameFlags() const { return all_flags_for_target[USER_NAME]; }
|
||||
const Flags & getAllFlagsGrantableOnGlobalLevel() const { return getAllFlags(); }
|
||||
const Flags & getAllFlagsGrantableOnGlobalWithParameterLevel() const { return getGlobalWithParameterFlags(); }
|
||||
const Flags & getAllFlagsGrantableOnDatabaseLevel() const { return all_flags_grantable_on_database_level; }
|
||||
@ -121,6 +121,7 @@ namespace
|
||||
COLUMN,
|
||||
DICTIONARY,
|
||||
NAMED_COLLECTION,
|
||||
USER_NAME,
|
||||
};
|
||||
|
||||
struct Node;
|
||||
@ -300,7 +301,7 @@ namespace
|
||||
collectAllFlags(child.get());
|
||||
|
||||
all_flags_grantable_on_table_level = all_flags_for_target[TABLE] | all_flags_for_target[DICTIONARY] | all_flags_for_target[COLUMN];
|
||||
all_flags_grantable_on_global_with_parameter_level = all_flags_for_target[NAMED_COLLECTION];
|
||||
all_flags_grantable_on_global_with_parameter_level = all_flags_for_target[NAMED_COLLECTION] | all_flags_for_target[USER_NAME];
|
||||
all_flags_grantable_on_database_level = all_flags_for_target[DATABASE] | all_flags_grantable_on_table_level;
|
||||
}
|
||||
|
||||
@ -351,7 +352,7 @@ namespace
|
||||
std::unordered_map<std::string_view, Flags> keyword_to_flags_map;
|
||||
std::vector<Flags> access_type_to_flags_mapping;
|
||||
Flags all_flags;
|
||||
Flags all_flags_for_target[static_cast<size_t>(NAMED_COLLECTION) + 1];
|
||||
Flags all_flags_for_target[static_cast<size_t>(USER_NAME) + 1];
|
||||
Flags all_flags_grantable_on_database_level;
|
||||
Flags all_flags_grantable_on_table_level;
|
||||
Flags all_flags_grantable_on_global_with_parameter_level;
|
||||
@ -371,7 +372,11 @@ std::unordered_map<AccessFlags::ParameterType, AccessFlags> AccessFlags::splitIn
|
||||
if (named_collection_flags)
|
||||
result.emplace(ParameterType::NAMED_COLLECTION, named_collection_flags);
|
||||
|
||||
auto other_flags = (~AccessFlags::allNamedCollectionFlags()) & *this;
|
||||
auto user_flags = AccessFlags::allUserNameFlags() & *this;
|
||||
if (user_flags)
|
||||
result.emplace(ParameterType::USER_NAME, user_flags);
|
||||
|
||||
auto other_flags = (~named_collection_flags & ~user_flags) & *this;
|
||||
if (other_flags)
|
||||
result.emplace(ParameterType::NONE, other_flags);
|
||||
|
||||
@ -387,6 +392,9 @@ AccessFlags::ParameterType AccessFlags::getParameterType() const
|
||||
if (AccessFlags::allNamedCollectionFlags().contains(*this))
|
||||
return AccessFlags::NAMED_COLLECTION;
|
||||
|
||||
if (AccessFlags::allUserNameFlags().contains(*this))
|
||||
return AccessFlags::USER_NAME;
|
||||
|
||||
throw Exception(ErrorCodes::MIXED_ACCESS_PARAMETER_TYPES, "Having mixed parameter types: {}", toString());
|
||||
}
|
||||
|
||||
@ -405,6 +413,7 @@ AccessFlags AccessFlags::allTableFlags() { return Helper::instance().getTableFla
|
||||
AccessFlags AccessFlags::allColumnFlags() { return Helper::instance().getColumnFlags(); }
|
||||
AccessFlags AccessFlags::allDictionaryFlags() { return Helper::instance().getDictionaryFlags(); }
|
||||
AccessFlags AccessFlags::allNamedCollectionFlags() { return Helper::instance().getNamedCollectionFlags(); }
|
||||
AccessFlags AccessFlags::allUserNameFlags() { return Helper::instance().getUserNameFlags(); }
|
||||
AccessFlags AccessFlags::allFlagsGrantableOnGlobalLevel() { return Helper::instance().getAllFlagsGrantableOnGlobalLevel(); }
|
||||
AccessFlags AccessFlags::allFlagsGrantableOnGlobalWithParameterLevel() { return Helper::instance().getAllFlagsGrantableOnGlobalWithParameterLevel(); }
|
||||
AccessFlags AccessFlags::allFlagsGrantableOnDatabaseLevel() { return Helper::instance().getAllFlagsGrantableOnDatabaseLevel(); }
|
||||
|
@ -57,6 +57,7 @@ public:
|
||||
{
|
||||
NONE,
|
||||
NAMED_COLLECTION,
|
||||
USER_NAME,
|
||||
};
|
||||
ParameterType getParameterType() const;
|
||||
std::unordered_map<ParameterType, AccessFlags> splitIntoParameterTypes() const;
|
||||
@ -103,6 +104,9 @@ public:
|
||||
/// Returns all the flags related to a named collection.
|
||||
static AccessFlags allNamedCollectionFlags();
|
||||
|
||||
/// Returns all the flags related to a user.
|
||||
static AccessFlags allUserNameFlags();
|
||||
|
||||
/// Returns all the flags which could be granted on the global level.
|
||||
/// The same as allFlags().
|
||||
static AccessFlags allFlagsGrantableOnGlobalLevel();
|
||||
|
@ -12,7 +12,7 @@ enum class AccessType
|
||||
/// Macro M should be defined as M(name, aliases, node_type, parent_group_name)
|
||||
/// where name is identifier with underscores (instead of spaces);
|
||||
/// aliases is a string containing comma-separated list;
|
||||
/// node_type either specifies access type's level (GLOBAL/NAMED_COLLECTION/DATABASE/TABLE/DICTIONARY/VIEW/COLUMNS),
|
||||
/// node_type either specifies access type's level (GLOBAL/NAMED_COLLECTION/USER_NAME/DATABASE/TABLE/DICTIONARY/VIEW/COLUMNS),
|
||||
/// or specifies that the access type is a GROUP of other access types;
|
||||
/// parent_group_name is the name of the group containing this access type (or NONE if there is no such group).
|
||||
/// NOTE A parent group must be declared AFTER all its children.
|
||||
@ -80,13 +80,13 @@ enum class AccessType
|
||||
M(ALTER_TABLE, "", GROUP, ALTER) \
|
||||
M(ALTER_DATABASE, "", GROUP, ALTER) \
|
||||
\
|
||||
M(ALTER_VIEW_REFRESH, "ALTER LIVE VIEW REFRESH, REFRESH VIEW", VIEW, ALTER_VIEW) \
|
||||
M(ALTER_VIEW_MODIFY_QUERY, "ALTER TABLE MODIFY QUERY", VIEW, ALTER_VIEW) \
|
||||
M(ALTER_VIEW_MODIFY_REFRESH, "ALTER TABLE MODIFY QUERY", VIEW, ALTER_VIEW) \
|
||||
M(ALTER_VIEW_MODIFY_SQL_SECURITY, "ALTER TABLE MODIFY SQL SECURITY", VIEW, ALTER_VIEW) \
|
||||
M(ALTER_VIEW, "", GROUP, ALTER) /* allows to execute ALTER VIEW REFRESH, ALTER VIEW MODIFY QUERY, ALTER VIEW MODIFY REFRESH;
|
||||
implicitly enabled by the grant ALTER_TABLE */\
|
||||
\
|
||||
M(ALTER, "", GROUP, ALL) /* allows to execute ALTER {TABLE|LIVE VIEW} */\
|
||||
M(ALTER, "", GROUP, ALL) /* allows to execute ALTER TABLE */\
|
||||
\
|
||||
M(CREATE_DATABASE, "", DATABASE, CREATE) /* allows to execute {CREATE|ATTACH} DATABASE */\
|
||||
M(CREATE_TABLE, "", TABLE, CREATE) /* allows to execute {CREATE|ATTACH} {TABLE|VIEW} */\
|
||||
@ -139,6 +139,7 @@ enum class AccessType
|
||||
M(CREATE_SETTINGS_PROFILE, "CREATE PROFILE", GLOBAL, ACCESS_MANAGEMENT) \
|
||||
M(ALTER_SETTINGS_PROFILE, "ALTER PROFILE", GLOBAL, ACCESS_MANAGEMENT) \
|
||||
M(DROP_SETTINGS_PROFILE, "DROP PROFILE", GLOBAL, ACCESS_MANAGEMENT) \
|
||||
M(ALLOW_SQL_SECURITY_NONE, "CREATE SQL SECURITY NONE, ALLOW SQL SECURITY NONE, SQL SECURITY NONE, SECURITY NONE", GLOBAL, ACCESS_MANAGEMENT) \
|
||||
M(SHOW_USERS, "SHOW CREATE USER", GLOBAL, SHOW_ACCESS) \
|
||||
M(SHOW_ROLES, "SHOW CREATE ROLE", GLOBAL, SHOW_ACCESS) \
|
||||
M(SHOW_ROW_POLICIES, "SHOW POLICIES, SHOW CREATE ROW POLICY, SHOW CREATE POLICY", TABLE, SHOW_ACCESS) \
|
||||
@ -150,6 +151,7 @@ enum class AccessType
|
||||
M(SHOW_NAMED_COLLECTIONS_SECRETS, "SHOW NAMED COLLECTIONS SECRETS", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \
|
||||
M(NAMED_COLLECTION, "NAMED COLLECTION USAGE, USE NAMED COLLECTION", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \
|
||||
M(NAMED_COLLECTION_ADMIN, "NAMED COLLECTION CONTROL", NAMED_COLLECTION, ALL) \
|
||||
M(SET_DEFINER, "", USER_NAME, ALL) \
|
||||
\
|
||||
M(SYSTEM_SHUTDOWN, "SYSTEM KILL, SHUTDOWN", GLOBAL, SYSTEM) \
|
||||
M(SYSTEM_DROP_DNS_CACHE, "SYSTEM DROP DNS, DROP DNS CACHE, DROP DNS", GLOBAL, SYSTEM_DROP_CACHE) \
|
||||
|
11
src/Access/Common/SQLSecurityDefs.h
Normal file
11
src/Access/Common/SQLSecurityDefs.h
Normal file
@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
#include <Core/Types.h>
|
||||
|
||||
|
||||
/// SQL security enum. Used in ASTSQLSecurity::type. For more info, please refer to the docs/sql-reference/statements/create/view.md#sql_security
|
||||
enum class SQLSecurityType : uint8_t
|
||||
{
|
||||
INVOKER, /// All queries will be executed with the current user's context.
|
||||
DEFINER, /// All queries will be executed with the specified user's context.
|
||||
NONE, /// All queries will be executed with the global context.
|
||||
};
|
@ -24,7 +24,7 @@ class HTTPAuthClient
|
||||
public:
|
||||
using Result = TResponseParser::Result;
|
||||
|
||||
HTTPAuthClient(const HTTPAuthClientParams & params, const TResponseParser & parser_ = TResponseParser{})
|
||||
explicit HTTPAuthClient(const HTTPAuthClientParams & params, const TResponseParser & parser_ = TResponseParser{})
|
||||
: timeouts{params.timeouts}
|
||||
, max_tries{params.max_tries}
|
||||
, retry_initial_backoff_ms{params.retry_initial_backoff_ms}
|
||||
|
@ -31,7 +31,7 @@ struct AuthResult
|
||||
{
|
||||
UUID user_id;
|
||||
/// Session settings received from authentication server (if any)
|
||||
SettingsChanges settings;
|
||||
SettingsChanges settings{};
|
||||
};
|
||||
|
||||
/// Contains entities, i.e. instances of classes derived from IAccessEntity.
|
||||
|
@ -53,7 +53,8 @@ TEST(AccessRights, Union)
|
||||
"SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, "
|
||||
"SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM VIEWS, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, "
|
||||
"SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, "
|
||||
"SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, GRANT NAMED COLLECTION ADMIN ON db1");
|
||||
"SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, "
|
||||
"GRANT SET DEFINER ON db1, GRANT NAMED COLLECTION ADMIN ON db1");
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/HelpersMinMaxAny.h>
|
||||
#include <AggregateFunctions/SingleValueData.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <base/defines.h>
|
||||
@ -11,219 +11,347 @@ struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INCORRECT_DATA;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
struct AggregateFunctionAnyRespectNullsData
|
||||
|
||||
template <typename Data>
|
||||
class AggregateFunctionAny final : public IAggregateFunctionDataHelper<Data, AggregateFunctionAny<Data>>
|
||||
{
|
||||
enum Status : UInt8
|
||||
{
|
||||
NotSet = 1,
|
||||
SetNull = 2,
|
||||
SetOther = 3
|
||||
};
|
||||
|
||||
Status status = Status::NotSet;
|
||||
Field value;
|
||||
|
||||
bool isSet() const { return status != Status::NotSet; }
|
||||
void setNull() { status = Status::SetNull; }
|
||||
void setOther() { status = Status::SetOther; }
|
||||
};
|
||||
|
||||
template <bool First>
|
||||
class AggregateFunctionAnyRespectNulls final
|
||||
: public IAggregateFunctionDataHelper<AggregateFunctionAnyRespectNullsData, AggregateFunctionAnyRespectNulls<First>>
|
||||
{
|
||||
public:
|
||||
using Data = AggregateFunctionAnyRespectNullsData;
|
||||
|
||||
private:
|
||||
SerializationPtr serialization;
|
||||
const bool returns_nullable_type = false;
|
||||
|
||||
explicit AggregateFunctionAnyRespectNulls(const DataTypePtr & type)
|
||||
: IAggregateFunctionDataHelper<Data, AggregateFunctionAnyRespectNulls<First>>({type}, {}, type)
|
||||
, serialization(type->getDefaultSerialization())
|
||||
, returns_nullable_type(type->isNullable())
|
||||
public:
|
||||
explicit AggregateFunctionAny(const DataTypes & argument_types_)
|
||||
: IAggregateFunctionDataHelper<Data, AggregateFunctionAny<Data>>(argument_types_, {}, argument_types_[0])
|
||||
, serialization(this->result_type->getDefaultSerialization())
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
if constexpr (First)
|
||||
return "any_respect_nulls";
|
||||
else
|
||||
return "anyLast_respect_nulls";
|
||||
}
|
||||
String getName() const override { return "any"; }
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void addNull(AggregateDataPtr __restrict place) const
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
chassert(returns_nullable_type);
|
||||
auto & d = this->data(place);
|
||||
if (First && d.isSet())
|
||||
return;
|
||||
d.setNull();
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
if (columns[0]->isNullable())
|
||||
{
|
||||
if (columns[0]->isNullAt(row_num))
|
||||
return addNull(place);
|
||||
}
|
||||
auto & d = this->data(place);
|
||||
if (First && d.isSet())
|
||||
return;
|
||||
d.setOther();
|
||||
columns[0]->get(row_num, d.value);
|
||||
}
|
||||
|
||||
void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override
|
||||
{
|
||||
if (columns[0]->isNullable())
|
||||
addNull(place);
|
||||
else
|
||||
add(place, columns, 0, arena);
|
||||
if (!this->data(place).has())
|
||||
this->data(place).set(*columns[0], row_num, arena);
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(
|
||||
size_t row_begin, size_t row_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos)
|
||||
const override
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr __restrict place,
|
||||
const IColumn ** __restrict columns,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos) const override
|
||||
{
|
||||
if (this->data(place).has() || row_begin >= row_end)
|
||||
return;
|
||||
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
size_t size = row_end - row_begin;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
const auto & if_map = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = row_begin; i < row_end; i++)
|
||||
{
|
||||
size_t pos = First ? row_begin + i : row_end - 1 - i;
|
||||
if (flags[pos])
|
||||
if (if_map.data()[i] != 0)
|
||||
{
|
||||
add(place, columns, pos, arena);
|
||||
break;
|
||||
this->data(place).set(*columns[0], i, arena);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (row_begin < row_end)
|
||||
else
|
||||
{
|
||||
size_t pos = First ? row_begin : row_end - 1;
|
||||
add(place, columns, pos, arena);
|
||||
this->data(place).set(*columns[0], row_begin, arena);
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceNotNull(
|
||||
size_t, size_t, AggregateDataPtr __restrict, const IColumn **, const UInt8 *, Arena *, ssize_t) const override
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr __restrict place,
|
||||
const IColumn ** __restrict columns,
|
||||
const UInt8 * __restrict null_map,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos) const override
|
||||
{
|
||||
/// This should not happen since it means somebody else has preprocessed the data (NULLs or IFs) and might
|
||||
/// have discarded values that we need (NULLs)
|
||||
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionAnyRespectNulls::addBatchSinglePlaceNotNull called");
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
auto & d = this->data(place);
|
||||
if (First && d.isSet())
|
||||
if (this->data(place).has() || row_begin >= row_end)
|
||||
return;
|
||||
|
||||
auto & other = this->data(rhs);
|
||||
if (other.isSet())
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
d.status = other.status;
|
||||
d.value = other.value;
|
||||
const auto & if_map = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = row_begin; i < row_end; i++)
|
||||
{
|
||||
if (if_map.data()[i] != 0 && null_map[i] == 0)
|
||||
{
|
||||
this->data(place).set(*columns[0], i, arena);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = row_begin; i < row_end; i++)
|
||||
{
|
||||
if (null_map[i] == 0)
|
||||
{
|
||||
this->data(place).set(*columns[0], i, arena);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override
|
||||
{
|
||||
if (!this->data(place).has())
|
||||
this->data(place).set(*columns[0], 0, arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
if (!this->data(place).has())
|
||||
this->data(place).set(this->data(rhs), arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
auto & d = this->data(place);
|
||||
UInt8 k = d.status;
|
||||
|
||||
writeBinaryLittleEndian<UInt8>(k, buf);
|
||||
if (k == Data::Status::SetOther)
|
||||
serialization->serializeBinary(d.value, buf, {});
|
||||
this->data(place).write(buf, *serialization);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
auto & d = this->data(place);
|
||||
UInt8 k = Data::Status::NotSet;
|
||||
readBinaryLittleEndian<UInt8>(k, buf);
|
||||
d.status = static_cast<Data::Status>(k);
|
||||
if (d.status == Data::Status::NotSet)
|
||||
return;
|
||||
else if (d.status == Data::Status::SetNull)
|
||||
{
|
||||
if (!returns_nullable_type)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type (NULL) in non-nullable {}State", getName());
|
||||
return;
|
||||
}
|
||||
else if (d.status == Data::Status::SetOther)
|
||||
serialization->deserializeBinary(d.value, buf, {});
|
||||
else
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type ({}) in {}State", static_cast<Int8>(k), getName());
|
||||
this->data(place).read(buf, *serialization, arena);
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return Data::allocatesMemoryInArena(); }
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
auto & d = this->data(place);
|
||||
if (d.status == Data::Status::SetOther)
|
||||
to.insert(d.value);
|
||||
else
|
||||
to.insertDefault();
|
||||
this->data(place).insertResultInto(to);
|
||||
}
|
||||
|
||||
AggregateFunctionPtr getOwnNullAdapter(
|
||||
const AggregateFunctionPtr & original_function,
|
||||
const DataTypes & /*arguments*/,
|
||||
const Array & /*params*/,
|
||||
const AggregateFunctionProperties & /*properties*/) const override
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
bool isCompilable() const override
|
||||
{
|
||||
return original_function;
|
||||
if constexpr (!Data::is_compilable)
|
||||
return false;
|
||||
else
|
||||
return Data::isCompilable(*this->argument_types[0]);
|
||||
}
|
||||
|
||||
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
|
||||
{
|
||||
if constexpr (Data::is_compilable)
|
||||
Data::compileCreate(builder, aggregate_data_ptr);
|
||||
else
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
|
||||
}
|
||||
|
||||
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
|
||||
{
|
||||
if constexpr (Data::is_compilable)
|
||||
Data::compileAny(builder, aggregate_data_ptr, arguments[0].value);
|
||||
else
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
|
||||
}
|
||||
|
||||
void
|
||||
compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
|
||||
{
|
||||
if constexpr (Data::is_compilable)
|
||||
Data::compileAnyMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
|
||||
else
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
|
||||
}
|
||||
|
||||
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
|
||||
{
|
||||
if constexpr (Data::is_compilable)
|
||||
return Data::compileGetResult(builder, aggregate_data_ptr);
|
||||
else
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
template <bool First>
|
||||
IAggregateFunction * createAggregateFunctionSingleValueRespectNulls(
|
||||
const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
AggregateFunctionPtr
|
||||
createAggregateFunctionAny(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
|
||||
{
|
||||
assertNoParameters(name, parameters);
|
||||
assertUnary(name, argument_types);
|
||||
|
||||
return new AggregateFunctionAnyRespectNulls<First>(argument_types[0]);
|
||||
return AggregateFunctionPtr(
|
||||
createAggregateFunctionSingleValue<AggregateFunctionAny, /* unary */ true>(name, argument_types, parameters, settings));
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAny(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyData>(name, argument_types, parameters, settings));
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAnyRespectNulls(
|
||||
template <typename Data>
|
||||
class AggregateFunctionAnyLast final : public IAggregateFunctionDataHelper<Data, AggregateFunctionAnyLast<Data>>
|
||||
{
|
||||
private:
|
||||
SerializationPtr serialization;
|
||||
|
||||
public:
|
||||
explicit AggregateFunctionAnyLast(const DataTypes & argument_types_)
|
||||
: IAggregateFunctionDataHelper<Data, AggregateFunctionAnyLast<Data>>(argument_types_, {}, argument_types_[0])
|
||||
, serialization(this->result_type->getDefaultSerialization())
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return "anyLast"; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
this->data(place).set(*columns[0], row_num, arena);
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr __restrict place,
|
||||
const IColumn ** __restrict columns,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos) const override
|
||||
{
|
||||
if (row_begin >= row_end)
|
||||
return;
|
||||
|
||||
size_t batch_size = row_end - row_begin;
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & if_map = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = 0; i < batch_size; i++)
|
||||
{
|
||||
size_t pos = (row_end - 1) - i;
|
||||
if (if_map.data()[pos] != 0)
|
||||
{
|
||||
this->data(place).set(*columns[0], pos, arena);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
this->data(place).set(*columns[0], row_end - 1, arena);
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceNotNull(
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr __restrict place,
|
||||
const IColumn ** __restrict columns,
|
||||
const UInt8 * __restrict null_map,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos) const override
|
||||
{
|
||||
if (row_begin >= row_end)
|
||||
return;
|
||||
|
||||
size_t batch_size = row_end - row_begin;
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & if_map = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = 0; i < batch_size; i++)
|
||||
{
|
||||
size_t pos = (row_end - 1) - i;
|
||||
if (if_map.data()[pos] != 0 && null_map[pos] == 0)
|
||||
{
|
||||
this->data(place).set(*columns[0], pos, arena);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < batch_size; i++)
|
||||
{
|
||||
size_t pos = (row_end - 1) - i;
|
||||
if (null_map[pos] == 0)
|
||||
{
|
||||
this->data(place).set(*columns[0], pos, arena);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override
|
||||
{
|
||||
this->data(place).set(*columns[0], 0, arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
this->data(place).set(this->data(rhs), arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).write(buf, *serialization);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
this->data(place).read(buf, *serialization, arena);
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return Data::allocatesMemoryInArena(); }
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
this->data(place).insertResultInto(to);
|
||||
}
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
bool isCompilable() const override
|
||||
{
|
||||
if constexpr (!Data::is_compilable)
|
||||
return false;
|
||||
else
|
||||
return Data::isCompilable(*this->argument_types[0]);
|
||||
}
|
||||
|
||||
void compileCreate(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
|
||||
{
|
||||
if constexpr (Data::is_compilable)
|
||||
Data::compileCreate(builder, aggregate_data_ptr);
|
||||
else
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
|
||||
}
|
||||
|
||||
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const ValuesWithType & arguments) const override
|
||||
{
|
||||
if constexpr (Data::is_compilable)
|
||||
Data::compileAnyLast(builder, aggregate_data_ptr, arguments[0].value);
|
||||
else
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
|
||||
}
|
||||
|
||||
void
|
||||
compileMerge(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_dst_ptr, llvm::Value * aggregate_data_src_ptr) const override
|
||||
{
|
||||
if constexpr (Data::is_compilable)
|
||||
Data::compileAnyLastMerge(builder, aggregate_data_dst_ptr, aggregate_data_src_ptr);
|
||||
else
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
|
||||
}
|
||||
|
||||
llvm::Value * compileGetResult(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr) const override
|
||||
{
|
||||
if constexpr (Data::is_compilable)
|
||||
return Data::compileGetResult(builder, aggregate_data_ptr);
|
||||
else
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not JIT-compilable", getName());
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAnyLast(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValueRespectNulls<true>(name, argument_types, parameters, settings));
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyLastData>(name, argument_types, parameters, settings));
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAnyLastRespectNulls(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValueRespectNulls<false>(name, argument_types, parameters, settings));
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyHeavyData>(name, argument_types, parameters, settings));
|
||||
return AggregateFunctionPtr(
|
||||
createAggregateFunctionSingleValue<AggregateFunctionAnyLast, /* unary */ true>(name, argument_types, parameters, settings));
|
||||
}
|
||||
|
||||
}
|
||||
@ -231,27 +359,11 @@ AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, c
|
||||
void registerAggregateFunctionsAny(AggregateFunctionFactory & factory)
|
||||
{
|
||||
AggregateFunctionProperties default_properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
|
||||
AggregateFunctionProperties default_properties_for_respect_nulls
|
||||
= {.returns_default_when_only_null = false, .is_order_dependent = true, .is_window_function = true};
|
||||
|
||||
factory.registerFunction("any", {createAggregateFunctionAny, default_properties});
|
||||
factory.registerAlias("any_value", "any", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("first_value", "any", AggregateFunctionFactory::CaseInsensitive);
|
||||
|
||||
factory.registerFunction("any_respect_nulls", {createAggregateFunctionAnyRespectNulls, default_properties_for_respect_nulls});
|
||||
factory.registerAlias("any_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("first_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
|
||||
|
||||
factory.registerFunction("anyLast", {createAggregateFunctionAnyLast, default_properties});
|
||||
factory.registerAlias("last_value", "anyLast", AggregateFunctionFactory::CaseInsensitive);
|
||||
|
||||
factory.registerFunction("anyLast_respect_nulls", {createAggregateFunctionAnyLastRespectNulls, default_properties_for_respect_nulls});
|
||||
factory.registerAlias("last_value_respect_nulls", "anyLast_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
|
||||
|
||||
factory.registerFunction("anyHeavy", {createAggregateFunctionAnyHeavy, default_properties});
|
||||
|
||||
factory.registerNullsActionTransformation("any", "any_respect_nulls");
|
||||
factory.registerNullsActionTransformation("anyLast", "anyLast_respect_nulls");
|
||||
}
|
||||
|
||||
}
|
||||
|
168
src/AggregateFunctions/AggregateFunctionAnyHeavy.cpp
Normal file
168
src/AggregateFunctions/AggregateFunctionAnyHeavy.cpp
Normal file
@ -0,0 +1,168 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/SingleValueData.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <base/defines.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
/** Implement 'heavy hitters' algorithm.
|
||||
* Selects most frequent value if its frequency is more than 50% in each thread of execution.
|
||||
* Otherwise, selects some arbitrary value.
|
||||
* http://www.cs.umd.edu/~samir/498/karp.pdf
|
||||
*/
|
||||
struct AggregateFunctionAnyHeavyData
|
||||
{
|
||||
using Self = AggregateFunctionAnyHeavyData;
|
||||
|
||||
private:
|
||||
SingleValueDataBaseMemoryBlock v_data;
|
||||
UInt64 counter = 0;
|
||||
|
||||
public:
|
||||
[[noreturn]] explicit AggregateFunctionAnyHeavyData()
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionAnyHeavyData initialized empty");
|
||||
}
|
||||
|
||||
explicit AggregateFunctionAnyHeavyData(TypeIndex value_type) { generateSingleValueFromTypeIndex(value_type, v_data); }
|
||||
|
||||
~AggregateFunctionAnyHeavyData() { data().~SingleValueDataBase(); }
|
||||
|
||||
SingleValueDataBase & data() { return v_data.get(); }
|
||||
const SingleValueDataBase & data() const { return v_data.get(); }
|
||||
|
||||
void add(const IColumn & column, size_t row_num, Arena * arena)
|
||||
{
|
||||
if (data().isEqualTo(column, row_num))
|
||||
{
|
||||
++counter;
|
||||
}
|
||||
else if (counter == 0)
|
||||
{
|
||||
data().set(column, row_num, arena);
|
||||
++counter;
|
||||
}
|
||||
else
|
||||
{
|
||||
--counter;
|
||||
}
|
||||
}
|
||||
|
||||
void add(const Self & to, Arena * arena)
|
||||
{
|
||||
if (!to.data().has())
|
||||
return;
|
||||
|
||||
if (data().isEqualTo(to.data()))
|
||||
counter += to.counter;
|
||||
else if (!data().has() || counter < to.counter)
|
||||
data().set(to.data(), arena);
|
||||
else
|
||||
counter -= to.counter;
|
||||
}
|
||||
|
||||
void addManyDefaults(const IColumn & column, size_t length, Arena * arena)
|
||||
{
|
||||
for (size_t i = 0; i < length; ++i)
|
||||
add(column, 0, arena);
|
||||
}
|
||||
|
||||
void write(WriteBuffer & buf, const ISerialization & serialization) const
|
||||
{
|
||||
data().write(buf, serialization);
|
||||
writeBinaryLittleEndian(counter, buf);
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf, const ISerialization & serialization, Arena * arena)
|
||||
{
|
||||
data().read(buf, serialization, arena);
|
||||
readBinaryLittleEndian(counter, buf);
|
||||
}
|
||||
|
||||
void insertResultInto(IColumn & to) const { data().insertResultInto(to); }
|
||||
};
|
||||
|
||||
|
||||
class AggregateFunctionAnyHeavy final : public IAggregateFunctionDataHelper<AggregateFunctionAnyHeavyData, AggregateFunctionAnyHeavy>
|
||||
{
|
||||
private:
|
||||
SerializationPtr serialization;
|
||||
const TypeIndex value_type_index;
|
||||
|
||||
public:
|
||||
explicit AggregateFunctionAnyHeavy(const DataTypePtr & type)
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionAnyHeavyData, AggregateFunctionAnyHeavy>({type}, {}, type)
|
||||
, serialization(type->getDefaultSerialization())
|
||||
, value_type_index(WhichDataType(type).idx)
|
||||
{
|
||||
}
|
||||
|
||||
void create(AggregateDataPtr __restrict place) const override { new (place) AggregateFunctionAnyHeavyData(value_type_index); }
|
||||
|
||||
String getName() const override { return "anyHeavy"; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
this->data(place).add(*columns[0], row_num, arena);
|
||||
}
|
||||
|
||||
void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override
|
||||
{
|
||||
this->data(place).addManyDefaults(*columns[0], 0, arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
this->data(place).add(this->data(rhs), arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).write(buf, *serialization);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
this->data(place).read(buf, *serialization, arena);
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return singleValueTypeAllocatesMemoryInArena(value_type_index); }
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
this->data(place).insertResultInto(to);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
AggregateFunctionPtr
|
||||
createAggregateFunctionAnyHeavy(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
assertNoParameters(name, parameters);
|
||||
assertUnary(name, argument_types);
|
||||
|
||||
const DataTypePtr & res_type = argument_types[0];
|
||||
return AggregateFunctionPtr(new AggregateFunctionAnyHeavy(res_type));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionAnyHeavy(AggregateFunctionFactory & factory)
|
||||
{
|
||||
AggregateFunctionProperties default_properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
|
||||
factory.registerFunction("anyHeavy", {createAggregateFunctionAnyHeavy, default_properties});
|
||||
}
|
||||
|
||||
}
|
235
src/AggregateFunctions/AggregateFunctionAnyRespectNulls.cpp
Normal file
235
src/AggregateFunctions/AggregateFunctionAnyRespectNulls.cpp
Normal file
@ -0,0 +1,235 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/SingleValueData.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <base/defines.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INCORRECT_DATA;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
struct AggregateFunctionAnyRespectNullsData
|
||||
{
|
||||
enum class Status : UInt8
|
||||
{
|
||||
NotSet = 1,
|
||||
SetNull = 2,
|
||||
SetOther = 3
|
||||
};
|
||||
|
||||
Status status = Status::NotSet;
|
||||
Field value;
|
||||
|
||||
bool isSet() const { return status != Status::NotSet; }
|
||||
void setNull() { status = Status::SetNull; }
|
||||
void setOther() { status = Status::SetOther; }
|
||||
};
|
||||
|
||||
template <bool First>
|
||||
class AggregateFunctionAnyRespectNulls final
|
||||
: public IAggregateFunctionDataHelper<AggregateFunctionAnyRespectNullsData, AggregateFunctionAnyRespectNulls<First>>
|
||||
{
|
||||
public:
|
||||
using Data = AggregateFunctionAnyRespectNullsData;
|
||||
|
||||
SerializationPtr serialization;
|
||||
const bool returns_nullable_type = false;
|
||||
|
||||
explicit AggregateFunctionAnyRespectNulls(const DataTypePtr & type)
|
||||
: IAggregateFunctionDataHelper<Data, AggregateFunctionAnyRespectNulls<First>>({type}, {}, type)
|
||||
, serialization(type->getDefaultSerialization())
|
||||
, returns_nullable_type(type->isNullable())
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
if constexpr (First)
|
||||
return "any_respect_nulls";
|
||||
else
|
||||
return "anyLast_respect_nulls";
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void addNull(AggregateDataPtr __restrict place) const
|
||||
{
|
||||
chassert(returns_nullable_type);
|
||||
auto & d = this->data(place);
|
||||
if (First && d.isSet())
|
||||
return;
|
||||
d.setNull();
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
if (columns[0]->isNullable())
|
||||
{
|
||||
if (columns[0]->isNullAt(row_num))
|
||||
return addNull(place);
|
||||
}
|
||||
auto & d = this->data(place);
|
||||
if (First && d.isSet())
|
||||
return;
|
||||
d.setOther();
|
||||
columns[0]->get(row_num, d.value);
|
||||
}
|
||||
|
||||
void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override
|
||||
{
|
||||
if (columns[0]->isNullable())
|
||||
addNull(place);
|
||||
else
|
||||
add(place, columns, 0, arena);
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(
|
||||
size_t row_begin, size_t row_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos)
|
||||
const override
|
||||
{
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
size_t size = row_end - row_begin;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
size_t pos = First ? row_begin + i : row_end - 1 - i;
|
||||
if (flags[pos])
|
||||
{
|
||||
add(place, columns, pos, arena);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (row_begin < row_end)
|
||||
{
|
||||
size_t pos = First ? row_begin : row_end - 1;
|
||||
add(place, columns, pos, arena);
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceNotNull(
|
||||
size_t, size_t, AggregateDataPtr __restrict, const IColumn **, const UInt8 *, Arena *, ssize_t) const override
|
||||
{
|
||||
/// This should not happen since it means somebody else has preprocessed the data (NULLs or IFs) and might
|
||||
/// have discarded values that we need (NULLs)
|
||||
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionAnyRespectNulls::addBatchSinglePlaceNotNull called");
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
auto & d = this->data(place);
|
||||
if (First && d.isSet())
|
||||
return;
|
||||
|
||||
auto & other = this->data(rhs);
|
||||
if (other.isSet())
|
||||
{
|
||||
d.status = other.status;
|
||||
d.value = other.value;
|
||||
}
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
auto & d = this->data(place);
|
||||
UInt8 k = static_cast<UInt8>(d.status);
|
||||
|
||||
writeBinaryLittleEndian<UInt8>(k, buf);
|
||||
if (d.status == Data::Status::SetOther)
|
||||
serialization->serializeBinary(d.value, buf, {});
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
auto & d = this->data(place);
|
||||
UInt8 k = 0;
|
||||
readBinaryLittleEndian<UInt8>(k, buf);
|
||||
d.status = static_cast<Data::Status>(k);
|
||||
if (d.status == Data::Status::NotSet)
|
||||
return;
|
||||
else if (d.status == Data::Status::SetNull)
|
||||
{
|
||||
if (!returns_nullable_type)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type (NULL) in non-nullable {}State", getName());
|
||||
return;
|
||||
}
|
||||
else if (d.status == Data::Status::SetOther)
|
||||
{
|
||||
serialization->deserializeBinary(d.value, buf, {});
|
||||
return;
|
||||
}
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type ({}) in {}State", static_cast<Int8>(k), getName());
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
auto & d = this->data(place);
|
||||
if (d.status == Data::Status::SetOther)
|
||||
to.insert(d.value);
|
||||
else
|
||||
to.insertDefault();
|
||||
}
|
||||
|
||||
AggregateFunctionPtr getOwnNullAdapter(
|
||||
const AggregateFunctionPtr & original_function,
|
||||
const DataTypes & /*arguments*/,
|
||||
const Array & /*params*/,
|
||||
const AggregateFunctionProperties & /*properties*/) const override
|
||||
{
|
||||
return original_function;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <bool First>
|
||||
IAggregateFunction * createAggregateFunctionSingleValueRespectNulls(
|
||||
const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
assertNoParameters(name, parameters);
|
||||
assertUnary(name, argument_types);
|
||||
|
||||
return new AggregateFunctionAnyRespectNulls<First>(argument_types[0]);
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAnyRespectNulls(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValueRespectNulls<true>(name, argument_types, parameters, settings));
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionAnyLastRespectNulls(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
|
||||
{
|
||||
return AggregateFunctionPtr(createAggregateFunctionSingleValueRespectNulls<false>(name, argument_types, parameters, settings));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionsAnyRespectNulls(AggregateFunctionFactory & factory)
|
||||
{
|
||||
AggregateFunctionProperties default_properties_for_respect_nulls
|
||||
= {.returns_default_when_only_null = false, .is_order_dependent = true, .is_window_function = true};
|
||||
|
||||
factory.registerFunction("any_respect_nulls", {createAggregateFunctionAnyRespectNulls, default_properties_for_respect_nulls});
|
||||
factory.registerAlias("any_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("first_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
|
||||
|
||||
factory.registerFunction("anyLast_respect_nulls", {createAggregateFunctionAnyLastRespectNulls, default_properties_for_respect_nulls});
|
||||
factory.registerAlias("last_value_respect_nulls", "anyLast_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
|
||||
|
||||
/// Must happen after registering any and anyLast
|
||||
factory.registerNullsActionTransformation("any", "any_respect_nulls");
|
||||
factory.registerNullsActionTransformation("anyLast", "anyLast_respect_nulls");
|
||||
}
|
||||
|
||||
}
|
@ -1,107 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/StringRef.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/AggregateFunctionMinMaxAny.h> // SingleValueDataString used in embedded compiler
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int CORRUPTED_DATA;
|
||||
}
|
||||
|
||||
|
||||
/// For possible values for template parameters, see 'AggregateFunctionMinMaxAny.h'.
|
||||
template <typename ResultData, typename ValueData>
|
||||
struct AggregateFunctionArgMinMaxData
|
||||
{
|
||||
using ResultData_t = ResultData;
|
||||
using ValueData_t = ValueData;
|
||||
|
||||
ResultData result; // the argument at which the minimum/maximum value is reached.
|
||||
ValueData value; // value for which the minimum/maximum is calculated.
|
||||
|
||||
static bool allocatesMemoryInArena()
|
||||
{
|
||||
return ResultData::allocatesMemoryInArena() || ValueData::allocatesMemoryInArena();
|
||||
}
|
||||
};
|
||||
|
||||
/// Returns the first arg value found for the minimum/maximum value. Example: argMax(arg, value).
|
||||
template <typename Data>
|
||||
class AggregateFunctionArgMinMax final : public IAggregateFunctionDataHelper<Data, AggregateFunctionArgMinMax<Data>>
|
||||
{
|
||||
private:
|
||||
const DataTypePtr & type_val;
|
||||
const SerializationPtr serialization_res;
|
||||
const SerializationPtr serialization_val;
|
||||
|
||||
using Base = IAggregateFunctionDataHelper<Data, AggregateFunctionArgMinMax<Data>>;
|
||||
|
||||
public:
|
||||
AggregateFunctionArgMinMax(const DataTypePtr & type_res_, const DataTypePtr & type_val_)
|
||||
: Base({type_res_, type_val_}, {}, type_res_)
|
||||
, type_val(this->argument_types[1])
|
||||
, serialization_res(type_res_->getDefaultSerialization())
|
||||
, serialization_val(type_val->getDefaultSerialization())
|
||||
{
|
||||
if (!type_val->isComparable())
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of "
|
||||
"aggregate function {} because the values of that data type are not comparable",
|
||||
type_val->getName(), getName());
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return StringRef(Data::ValueData_t::name()) == StringRef("min") ? "argMin" : "argMax";
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
if (this->data(place).value.changeIfBetter(*columns[1], row_num, arena))
|
||||
this->data(place).result.change(*columns[0], row_num, arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
if (this->data(place).value.changeIfBetter(this->data(rhs).value, arena))
|
||||
this->data(place).result.change(this->data(rhs).result, arena);
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).result.write(buf, *serialization_res);
|
||||
this->data(place).value.write(buf, *serialization_val);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
this->data(place).result.read(buf, *serialization_res, arena);
|
||||
this->data(place).value.read(buf, *serialization_val, arena);
|
||||
if (unlikely(this->data(place).value.has() != this->data(place).result.has()))
|
||||
throw Exception(
|
||||
ErrorCodes::CORRUPTED_DATA,
|
||||
"Invalid state of the aggregate function {}: has_value ({}) != has_result ({})",
|
||||
getName(),
|
||||
this->data(place).value.has(),
|
||||
this->data(place).result.has());
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override
|
||||
{
|
||||
return Data::allocatesMemoryInArena();
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
this->data(place).result.insertResultInto(to);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -219,7 +219,7 @@ public:
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionCountData, AggregateFunctionCountNotNullUnary>({argument}, params, createResultType())
|
||||
{
|
||||
if (!argument->isNullable())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: not Nullable data type passed to AggregateFunctionCountNotNullUnary");
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not Nullable data type passed to AggregateFunctionCountNotNullUnary");
|
||||
}
|
||||
|
||||
String getName() const override { return "count"; }
|
||||
|
@ -100,7 +100,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
|
||||
{
|
||||
AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix("Null");
|
||||
if (!combinator)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: cannot find aggregate function combinator "
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find aggregate function combinator "
|
||||
"to apply a function to Nullable arguments.");
|
||||
|
||||
DataTypes nested_types = combinator->transformArguments(types_without_low_cardinality);
|
||||
@ -123,7 +123,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
|
||||
auto with_original_arguments = getImpl(name, action, types_without_low_cardinality, parameters, out_properties, false);
|
||||
|
||||
if (!with_original_arguments)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: AggregateFunctionFactory returned nullptr");
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionFactory returned nullptr");
|
||||
return with_original_arguments;
|
||||
}
|
||||
|
||||
|
439
src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp
Normal file
439
src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp
Normal file
@ -0,0 +1,439 @@
|
||||
#include <cassert>
|
||||
#include <memory>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/ReadHelpersArena.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Common/HashTable/HashTableKeyHolder.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/KeyHolderHelpers.h>
|
||||
|
||||
#include <Core/Field.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDate32.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeDateTime64.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
struct Settings;
|
||||
|
||||
|
||||
template <typename T>
|
||||
struct AggregateFunctionGroupArrayIntersectData
|
||||
{
|
||||
using Set = HashSet<T>;
|
||||
|
||||
Set value;
|
||||
UInt64 version = 0;
|
||||
};
|
||||
|
||||
|
||||
/// Puts all values to the hash set. Returns an array of unique values. Implemented for numeric types.
|
||||
template <typename T>
|
||||
class AggregateFunctionGroupArrayIntersect
|
||||
: public IAggregateFunctionDataHelper<AggregateFunctionGroupArrayIntersectData<T>, AggregateFunctionGroupArrayIntersect<T>>
|
||||
{
|
||||
|
||||
private:
|
||||
using State = AggregateFunctionGroupArrayIntersectData<T>;
|
||||
|
||||
public:
|
||||
AggregateFunctionGroupArrayIntersect(const DataTypePtr & argument_type, const Array & parameters_)
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionGroupArrayIntersectData<T>,
|
||||
AggregateFunctionGroupArrayIntersect<T>>({argument_type}, parameters_, argument_type) {}
|
||||
|
||||
AggregateFunctionGroupArrayIntersect(const DataTypePtr & argument_type, const Array & parameters_, const DataTypePtr & result_type_)
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionGroupArrayIntersectData<T>,
|
||||
AggregateFunctionGroupArrayIntersect<T>>({argument_type}, parameters_, result_type_) {}
|
||||
|
||||
String getName() const override { return "GroupArrayIntersect"; }
|
||||
|
||||
bool allocatesMemoryInArena() const override { return false; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||
{
|
||||
auto & version = this->data(place).version;
|
||||
auto & set = this->data(place).value;
|
||||
|
||||
const auto data_column = assert_cast<const ColumnArray &>(*columns[0]).getDataPtr();
|
||||
const auto & offsets = assert_cast<const ColumnArray &>(*columns[0]).getOffsets();
|
||||
const size_t offset = offsets[static_cast<ssize_t>(row_num) - 1];
|
||||
const auto arr_size = offsets[row_num] - offset;
|
||||
|
||||
++version;
|
||||
if (version == 1)
|
||||
{
|
||||
for (size_t i = 0; i < arr_size; ++i)
|
||||
set.insert(static_cast<T>((*data_column)[offset + i].get<T>()));
|
||||
}
|
||||
else if (!set.empty())
|
||||
{
|
||||
typename State::Set new_set;
|
||||
for (size_t i = 0; i < arr_size; ++i)
|
||||
{
|
||||
typename State::Set::LookupResult set_value = set.find(static_cast<T>((*data_column)[offset + i].get<T>()));
|
||||
if (set_value != nullptr)
|
||||
new_set.insert(static_cast<T>((*data_column)[offset + i].get<T>()));
|
||||
}
|
||||
set = std::move(new_set);
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
auto & set = this->data(place).value;
|
||||
const auto & rhs_set = this->data(rhs).value;
|
||||
|
||||
if (this->data(rhs).version == 0)
|
||||
return;
|
||||
|
||||
UInt64 version = this->data(place).version++;
|
||||
if (version == 0)
|
||||
{
|
||||
for (auto & rhs_elem : rhs_set)
|
||||
set.insert(rhs_elem.getValue());
|
||||
return;
|
||||
}
|
||||
|
||||
if (!set.empty())
|
||||
{
|
||||
auto create_new_set = [](auto & lhs_val, auto & rhs_val)
|
||||
{
|
||||
typename State::Set new_set;
|
||||
for (auto & lhs_elem : lhs_val)
|
||||
{
|
||||
auto res = rhs_val.find(lhs_elem.getValue());
|
||||
if (res != nullptr)
|
||||
new_set.insert(lhs_elem.getValue());
|
||||
}
|
||||
return new_set;
|
||||
};
|
||||
auto new_set = rhs_set.size() < set.size() ? create_new_set(rhs_set, set) : create_new_set(set, rhs_set);
|
||||
set = std::move(new_set);
|
||||
}
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
auto & set = this->data(place).value;
|
||||
auto version = this->data(place).version;
|
||||
|
||||
writeVarUInt(version, buf);
|
||||
writeVarUInt(set.size(), buf);
|
||||
|
||||
for (const auto & elem : set)
|
||||
writeIntBinary(elem.getValue(), buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
readVarUInt(this->data(place).version, buf);
|
||||
this->data(place).value.read(buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||
|
||||
const auto & set = this->data(place).value;
|
||||
offsets_to.push_back(offsets_to.back() + set.size());
|
||||
|
||||
typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
|
||||
size_t old_size = data_to.size();
|
||||
data_to.resize(old_size + set.size());
|
||||
|
||||
size_t i = 0;
|
||||
for (auto it = set.begin(); it != set.end(); ++it, ++i)
|
||||
data_to[old_size + i] = it->getValue();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/// Generic implementation, it uses serialized representation as object descriptor.
|
||||
struct AggregateFunctionGroupArrayIntersectGenericData
|
||||
{
|
||||
using Set = HashSet<StringRef>;
|
||||
|
||||
Set value;
|
||||
UInt64 version = 0;
|
||||
};
|
||||
|
||||
/** Template parameter with true value should be used for columns that store their elements in memory continuously.
|
||||
* For such columns GroupArrayIntersect() can be implemented more efficiently (especially for small numeric arrays).
|
||||
*/
|
||||
template <bool is_plain_column = false>
|
||||
class AggregateFunctionGroupArrayIntersectGeneric
|
||||
: public IAggregateFunctionDataHelper<AggregateFunctionGroupArrayIntersectGenericData,
|
||||
AggregateFunctionGroupArrayIntersectGeneric<is_plain_column>>
|
||||
{
|
||||
const DataTypePtr & input_data_type;
|
||||
|
||||
using State = AggregateFunctionGroupArrayIntersectGenericData;
|
||||
|
||||
public:
|
||||
AggregateFunctionGroupArrayIntersectGeneric(const DataTypePtr & input_data_type_, const Array & parameters_)
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionGroupArrayIntersectGenericData, AggregateFunctionGroupArrayIntersectGeneric<is_plain_column>>({input_data_type_}, parameters_, input_data_type_)
|
||||
, input_data_type(this->argument_types[0]) {}
|
||||
|
||||
AggregateFunctionGroupArrayIntersectGeneric(const DataTypePtr & input_data_type_, const Array & parameters_, const DataTypePtr & result_type_)
|
||||
: IAggregateFunctionDataHelper<AggregateFunctionGroupArrayIntersectGenericData, AggregateFunctionGroupArrayIntersectGeneric<is_plain_column>>({input_data_type_}, parameters_, result_type_)
|
||||
, input_data_type(result_type_) {}
|
||||
|
||||
String getName() const override { return "GroupArrayIntersect"; }
|
||||
|
||||
bool allocatesMemoryInArena() const override { return true; }
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
auto & set = this->data(place).value;
|
||||
auto & version = this->data(place).version;
|
||||
bool inserted;
|
||||
State::Set::LookupResult it;
|
||||
|
||||
const auto data_column = assert_cast<const ColumnArray &>(*columns[0]).getDataPtr();
|
||||
const auto & offsets = assert_cast<const ColumnArray &>(*columns[0]).getOffsets();
|
||||
const size_t offset = offsets[static_cast<ssize_t>(row_num) - 1];
|
||||
const auto arr_size = offsets[row_num] - offset;
|
||||
|
||||
++version;
|
||||
if (version == 1)
|
||||
{
|
||||
for (size_t i = 0; i < arr_size; ++i)
|
||||
{
|
||||
if constexpr (is_plain_column)
|
||||
set.emplace(ArenaKeyHolder{data_column->getDataAt(offset + i), *arena}, it, inserted);
|
||||
else
|
||||
{
|
||||
const char * begin = nullptr;
|
||||
StringRef serialized = data_column->serializeValueIntoArena(offset + i, *arena, begin);
|
||||
assert(serialized.data != nullptr);
|
||||
set.emplace(SerializedKeyHolder{serialized, *arena}, it, inserted);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!set.empty())
|
||||
{
|
||||
typename State::Set new_set;
|
||||
for (size_t i = 0; i < arr_size; ++i)
|
||||
{
|
||||
if constexpr (is_plain_column)
|
||||
{
|
||||
it = set.find(data_column->getDataAt(offset + i));
|
||||
if (it != nullptr)
|
||||
new_set.emplace(ArenaKeyHolder{data_column->getDataAt(offset + i), *arena}, it, inserted);
|
||||
}
|
||||
else
|
||||
{
|
||||
const char * begin = nullptr;
|
||||
StringRef serialized = data_column->serializeValueIntoArena(offset + i, *arena, begin);
|
||||
assert(serialized.data != nullptr);
|
||||
it = set.find(serialized);
|
||||
|
||||
if (it != nullptr)
|
||||
new_set.emplace(SerializedKeyHolder{serialized, *arena}, it, inserted);
|
||||
}
|
||||
}
|
||||
set = std::move(new_set);
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & set = this->data(place).value;
|
||||
const auto & rhs_value = this->data(rhs).value;
|
||||
|
||||
if (this->data(rhs).version == 0)
|
||||
return;
|
||||
|
||||
UInt64 version = this->data(place).version++;
|
||||
if (version == 0)
|
||||
{
|
||||
bool inserted;
|
||||
State::Set::LookupResult it;
|
||||
for (auto & rhs_elem : rhs_value)
|
||||
{
|
||||
set.emplace(ArenaKeyHolder{rhs_elem.getValue(), *arena}, it, inserted);
|
||||
}
|
||||
}
|
||||
else if (!set.empty())
|
||||
{
|
||||
auto create_new_map = [](auto & lhs_val, auto & rhs_val)
|
||||
{
|
||||
typename State::Set new_map;
|
||||
for (auto & lhs_elem : lhs_val)
|
||||
{
|
||||
auto val = rhs_val.find(lhs_elem.getValue());
|
||||
if (val != nullptr)
|
||||
new_map.insert(lhs_elem.getValue());
|
||||
}
|
||||
return new_map;
|
||||
};
|
||||
auto new_map = rhs_value.size() < set.size() ? create_new_map(rhs_value, set) : create_new_map(set, rhs_value);
|
||||
set = std::move(new_map);
|
||||
}
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
auto & set = this->data(place).value;
|
||||
auto & version = this->data(place).version;
|
||||
writeVarUInt(version, buf);
|
||||
writeVarUInt(set.size(), buf);
|
||||
|
||||
for (const auto & elem : set)
|
||||
writeStringBinary(elem.getValue(), buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
auto & set = this->data(place).value;
|
||||
auto & version = this->data(place).version;
|
||||
size_t size;
|
||||
readVarUInt(version, buf);
|
||||
readVarUInt(size, buf);
|
||||
set.reserve(size);
|
||||
UInt64 elem_version;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
auto key = readStringBinaryInto(*arena, buf);
|
||||
readVarUInt(elem_version, buf);
|
||||
set.insert(key);
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||
IColumn & data_to = arr_to.getData();
|
||||
|
||||
auto & set = this->data(place).value;
|
||||
|
||||
offsets_to.push_back(offsets_to.back() + set.size());
|
||||
|
||||
for (auto & elem : set)
|
||||
{
|
||||
if constexpr (is_plain_column)
|
||||
data_to.insertData(elem.getValue().data, elem.getValue().size);
|
||||
else
|
||||
std::ignore = data_to.deserializeAndInsertFromArena(elem.getValue().data);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
/// Substitute return type for Date and DateTime
|
||||
class AggregateFunctionGroupArrayIntersectDate : public AggregateFunctionGroupArrayIntersect<DataTypeDate::FieldType>
|
||||
{
|
||||
public:
|
||||
explicit AggregateFunctionGroupArrayIntersectDate(const DataTypePtr & argument_type, const Array & parameters_)
|
||||
: AggregateFunctionGroupArrayIntersect<DataTypeDate::FieldType>(argument_type, parameters_, createResultType()) {}
|
||||
static DataTypePtr createResultType() { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeDate>()); }
|
||||
};
|
||||
|
||||
class AggregateFunctionGroupArrayIntersectDateTime : public AggregateFunctionGroupArrayIntersect<DataTypeDateTime::FieldType>
|
||||
{
|
||||
public:
|
||||
explicit AggregateFunctionGroupArrayIntersectDateTime(const DataTypePtr & argument_type, const Array & parameters_)
|
||||
: AggregateFunctionGroupArrayIntersect<DataTypeDateTime::FieldType>(argument_type, parameters_, createResultType()) {}
|
||||
static DataTypePtr createResultType() { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>()); }
|
||||
};
|
||||
|
||||
class AggregateFunctionGroupArrayIntersectDate32 : public AggregateFunctionGroupArrayIntersect<DataTypeDate32::FieldType>
|
||||
{
|
||||
public:
|
||||
explicit AggregateFunctionGroupArrayIntersectDate32(const DataTypePtr & argument_type, const Array & parameters_)
|
||||
: AggregateFunctionGroupArrayIntersect<DataTypeDate32::FieldType>(argument_type, parameters_, createResultType()) {}
|
||||
static DataTypePtr createResultType() { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeDate32>()); }
|
||||
};
|
||||
|
||||
IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type, const Array & parameters)
|
||||
{
|
||||
WhichDataType which(argument_type);
|
||||
if (which.idx == TypeIndex::Date) return new AggregateFunctionGroupArrayIntersectDate(argument_type, parameters);
|
||||
else if (which.idx == TypeIndex::DateTime) return new AggregateFunctionGroupArrayIntersectDateTime(argument_type, parameters);
|
||||
else if (which.idx == TypeIndex::Date32) return new AggregateFunctionGroupArrayIntersectDate32(argument_type, parameters);
|
||||
else if (which.idx == TypeIndex::DateTime64)
|
||||
{
|
||||
const auto * datetime64_type = dynamic_cast<const DataTypeDateTime64 *>(argument_type.get());
|
||||
const auto return_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime64>(datetime64_type->getScale()));
|
||||
|
||||
return new AggregateFunctionGroupArrayIntersectGeneric<true>(argument_type, parameters, return_type);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Check that we can use plain version of AggregateFunctionGroupArrayIntersectGeneric
|
||||
if (argument_type->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
|
||||
return new AggregateFunctionGroupArrayIntersectGeneric<true>(argument_type, parameters);
|
||||
else
|
||||
return new AggregateFunctionGroupArrayIntersectGeneric<false>(argument_type, parameters);
|
||||
}
|
||||
}
|
||||
|
||||
inline AggregateFunctionPtr createAggregateFunctionGroupArrayIntersectImpl(const std::string & name, const DataTypePtr & argument_type, const Array & parameters)
|
||||
{
|
||||
const auto & nested_type = dynamic_cast<const DataTypeArray &>(*argument_type).getNestedType();
|
||||
AggregateFunctionPtr res(createWithNumericType<AggregateFunctionGroupArrayIntersect, const DataTypePtr &>(*nested_type, argument_type, parameters));
|
||||
if (!res)
|
||||
{
|
||||
res = AggregateFunctionPtr(createWithExtraTypes(argument_type, parameters));
|
||||
}
|
||||
|
||||
if (!res)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
|
||||
argument_type->getName(), name);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionGroupArrayIntersect(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
|
||||
{
|
||||
assertUnary(name, argument_types);
|
||||
|
||||
if (!WhichDataType(argument_types.at(0)).isArray())
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function groupArrayIntersect accepts only array type argument.");
|
||||
|
||||
if (!parameters.empty())
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Incorrect number of parameters for aggregate function {}, should be 0", name);
|
||||
|
||||
return createAggregateFunctionGroupArrayIntersectImpl(name, argument_types[0], parameters);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionGroupArrayIntersect(AggregateFunctionFactory & factory)
|
||||
{
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
|
||||
|
||||
factory.registerFunction("groupArrayIntersect", { createAggregateFunctionGroupArrayIntersect, properties });
|
||||
}
|
||||
|
||||
}
|
@ -204,7 +204,7 @@ private:
|
||||
class Adam : public IWeightsUpdater
|
||||
{
|
||||
public:
|
||||
Adam(size_t num_params)
|
||||
explicit Adam(size_t num_params)
|
||||
{
|
||||
beta1_powered = beta1;
|
||||
beta2_powered = beta2;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user