mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Merge branch 'master' into input_format_null_as_default-improvement
This commit is contained in:
commit
134e50ddc4
118
.github/workflows/docs_release.yml
vendored
118
.github/workflows/docs_release.yml
vendored
@ -1,118 +0,0 @@
|
||||
name: DocsReleaseChecks
|
||||
|
||||
env:
|
||||
# Force the stdout and stderr streams to be unbuffered
|
||||
PYTHONUNBUFFERED: 1
|
||||
|
||||
concurrency:
|
||||
group: master-release
|
||||
cancel-in-progress: true
|
||||
'on':
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- '.github/**'
|
||||
- 'docker/docs/release/**'
|
||||
- 'docs/**'
|
||||
- 'utils/list-versions/version_date.tsv'
|
||||
- 'website/**'
|
||||
- 'utils/check-style/aspell-ignore/**'
|
||||
workflow_dispatch:
|
||||
jobs:
|
||||
DockerHubPushAarch64:
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Images check
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_images_check.py --suffix aarch64
|
||||
- name: Upload images files to artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: changed_images_aarch64
|
||||
path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json
|
||||
DockerHubPushAmd64:
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Images check
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_images_check.py --suffix amd64
|
||||
- name: Upload images files to artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: changed_images_amd64
|
||||
path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json
|
||||
DockerHubPush:
|
||||
needs: [DockerHubPushAmd64, DockerHubPushAarch64]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Download changed aarch64 images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images_aarch64
|
||||
path: ${{ runner.temp }}
|
||||
- name: Download changed amd64 images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images_amd64
|
||||
path: ${{ runner.temp }}
|
||||
- name: Images check
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64
|
||||
- name: Upload images files to artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ runner.temp }}/changed_images.json
|
||||
DocsRelease:
|
||||
needs: DockerHubPush
|
||||
runs-on: [self-hosted, func-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
# https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/docs_release
|
||||
REPO_COPY=${{runner.temp}}/docs_release/ClickHouse
|
||||
CLOUDFLARE_TOKEN=${{secrets.CLOUDFLARE}}
|
||||
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
|
||||
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
|
||||
RCSK
|
||||
EOF
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.TEMP_PATH }}
|
||||
- name: Docs Release
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 docs_release.py
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
4
.github/workflows/nightly.yml
vendored
4
.github/workflows/nightly.yml
vendored
@ -107,7 +107,7 @@ jobs:
|
||||
run: |
|
||||
curl --form token="${COVERITY_TOKEN}" \
|
||||
--form email='security+coverity@clickhouse.com' \
|
||||
--form file="@$TEMP_PATH/$BUILD_NAME/coverity-scan.tar.zst" \
|
||||
--form file="@$TEMP_PATH/$BUILD_NAME/coverity-scan.tar.gz" \
|
||||
--form version="${GITHUB_REF#refs/heads/}-${GITHUB_SHA::6}" \
|
||||
--form description="Nighly Scan: $(date +'%Y-%m-%dT%H:%M:%S')" \
|
||||
https://scan.coverity.com/builds?project=ClickHouse%2FClickHouse
|
||||
@ -154,7 +154,7 @@ jobs:
|
||||
- name: Set Up Build Tools
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -yq git cmake ccache python3 ninja-build
|
||||
sudo apt-get install -yq git cmake ccache ninja-build python3 yasm
|
||||
sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
|
||||
- name: Run build-wrapper
|
||||
run: |
|
||||
|
183
CHANGELOG.md
183
CHANGELOG.md
@ -1,9 +1,192 @@
|
||||
### Table of Contents
|
||||
**[ClickHouse release v23.2, 2023-02-23](#232)**<br/>
|
||||
**[ClickHouse release v23.1, 2023-01-25](#231)**<br/>
|
||||
**[Changelog for 2022](https://clickhouse.com/docs/en/whats-new/changelog/2022/)**<br/>
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### <a id="232"></a> ClickHouse release 23.2, 2023-02-23
|
||||
|
||||
#### Backward Incompatible Change
|
||||
* Extend function "toDayOfWeek()" (alias: "DAYOFWEEK") with a mode argument that encodes whether the week starts on Monday or Sunday and whether counting starts at 0 or 1. For consistency with other date time functions, the mode argument was inserted between the time and the time zone arguments. This breaks existing usage of the (previously undocumented) 2-argument syntax "toDayOfWeek(time, time_zone)". A fix is to rewrite the function into "toDayOfWeek(time, 0, time_zone)". [#45233](https://github.com/ClickHouse/ClickHouse/pull/45233) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Rename setting `max_query_cache_size` to `filesystem_cache_max_download_size`. [#45614](https://github.com/ClickHouse/ClickHouse/pull/45614) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* The default user will not have permissions for access type `SHOW NAMED COLLECTION` by default (e.g. by default, default user will not longer be able to do grant ALL to other users as it was before, therefore this PR is backward incompatible). [#46010](https://github.com/ClickHouse/ClickHouse/pull/46010) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* If the SETTINGS clause is specified before the FORMAT clause, the settings will be applied to formatting as well. [#46003](https://github.com/ClickHouse/ClickHouse/pull/46003) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Remove support for setting `materialized_postgresql_allow_automatic_update` (which was by default turned off). Fix integration tests. [#46106](https://github.com/ClickHouse/ClickHouse/pull/46106) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Slightly improve performance of `countDigits` on realistic datasets. This closed [#44518](https://github.com/ClickHouse/ClickHouse/issues/44518). In previous versions, `countDigits(0)` returned `0`; now it returns `1`, which is more correct, and follows the existing documentation. [#46187](https://github.com/ClickHouse/ClickHouse/pull/46187) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Disallow creation of new columns compressed by a combination of codecs "Delta" or "DoubleDelta" followed by codecs "Gorilla" or "FPC". This can be bypassed using setting "allow_suspicious_codecs = true". [#45652](https://github.com/ClickHouse/ClickHouse/pull/45652) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
|
||||
#### New Feature
|
||||
* Add `StorageIceberg` and table function `iceberg` to access iceberg table store on S3. [#45384](https://github.com/ClickHouse/ClickHouse/pull/45384) ([flynn](https://github.com/ucasfl)).
|
||||
* Allow configuring storage as `SETTINGS disk = '<disk_name>'` (instead of `storage_policy`) and with explicit disk creation `SETTINGS disk = disk(type=s3, ...)`. [#41976](https://github.com/ClickHouse/ClickHouse/pull/41976) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Expose `ProfileEvents` counters in `system.part_log`. [#38614](https://github.com/ClickHouse/ClickHouse/pull/38614) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* Enrichment of the existing `ReplacingMergeTree` engine to allow duplicate the insertion. It leverages the power of both `ReplacingMergeTree` and `CollapsingMergeTree` in one MergeTree engine. Deleted data are not returned when queried, but not removed from disk neither. [#41005](https://github.com/ClickHouse/ClickHouse/pull/41005) ([youennL-cs](https://github.com/youennL-cs)).
|
||||
* Add `generateULID` function. Closes [#36536](https://github.com/ClickHouse/ClickHouse/issues/36536). [#44662](https://github.com/ClickHouse/ClickHouse/pull/44662) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Add `corrMatrix` aggregate function, calculating each two columns. In addition, since Aggregatefunctions `covarSamp` and `covarPop` are similar to `corr`, I add `covarSampMatrix`, `covarPopMatrix` by the way. @alexey-milovidov closes [#44587](https://github.com/ClickHouse/ClickHouse/issues/44587). [#44680](https://github.com/ClickHouse/ClickHouse/pull/44680) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)).
|
||||
* Introduce arrayShuffle function for random array permutations. [#45271](https://github.com/ClickHouse/ClickHouse/pull/45271) ([Joanna Hulboj](https://github.com/jh0x)).
|
||||
* Support types `FIXED_SIZE_BINARY` type in Arrow, `FIXED_LENGTH_BYTE_ARRAY` in `Parquet` and match them to `FixedString`. Add settings `output_format_parquet_fixed_string_as_fixed_byte_array/output_format_arrow_fixed_string_as_fixed_byte_array` to control default output type for FixedString. Closes [#45326](https://github.com/ClickHouse/ClickHouse/issues/45326). [#45340](https://github.com/ClickHouse/ClickHouse/pull/45340) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Add a new column `last_exception_time` to system.replication_queue. [#45457](https://github.com/ClickHouse/ClickHouse/pull/45457) ([Frank Chen](https://github.com/FrankChen021)).
|
||||
* Add two new functions which allow for user-defined keys/seeds with SipHash{64,128}. [#45513](https://github.com/ClickHouse/ClickHouse/pull/45513) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
* Allow a three-argument version for table function `format`. close [#45808](https://github.com/ClickHouse/ClickHouse/issues/45808). [#45873](https://github.com/ClickHouse/ClickHouse/pull/45873) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)).
|
||||
* add `JodaTime` format support for 'x','w','S'. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. [#46073](https://github.com/ClickHouse/ClickHouse/pull/46073) ([zk_kiger](https://github.com/zk-kiger)).
|
||||
* Support window function `ntile`.
|
||||
* Add setting `final` to implicitly apply the `FINAL` modifier to every table. [#40945](https://github.com/ClickHouse/ClickHouse/pull/40945) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Added `arrayPartialSort` and `arrayPartialReverseSort` functions. [#46296](https://github.com/ClickHouse/ClickHouse/pull/46296) ([Joanna Hulboj](https://github.com/jh0x)).
|
||||
* The new http parameter `client_protocol_version` allows setting a client protocol version for HTTP responses using the Native format. [#40397](https://github.com/ClickHouse/ClickHouse/issues/40397). [#46360](https://github.com/ClickHouse/ClickHouse/pull/46360) ([Geoff Genz](https://github.com/genzgd)).
|
||||
* Add new function `regexpExtract`, like spark function `REGEXP_EXTRACT` for compatibility. It is similar to the existing function `extract`. [#46469](https://github.com/ClickHouse/ClickHouse/pull/46469) ([李扬](https://github.com/taiyang-li)).
|
||||
* Add new function `JSONArrayLength`, which returns the number of elements in the outermost JSON array. The function returns NULL if the input JSON string is invalid. [#46631](https://github.com/ClickHouse/ClickHouse/pull/46631) ([李扬](https://github.com/taiyang-li)).
|
||||
|
||||
#### Performance Improvement
|
||||
* The introduced logic works if PREWHERE condition is a conjunction of multiple conditions (cond1 AND cond2 AND ... ). It groups those conditions that require reading the same columns into steps. After each step the corresponding part of the full condition is computed and the result rows might be filtered. This allows to read fewer rows in the next steps thus saving IO bandwidth and doing less computation. This logic is disabled by default for now. It will be enabled by default in one of the future releases once it is known to not have any regressions, so it is highly encouraged to be used for testing. It can be controlled by 2 settings: "enable_multiple_prewhere_read_steps" and "move_all_conditions_to_prewhere". [#46140](https://github.com/ClickHouse/ClickHouse/pull/46140) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* An option added to aggregate partitions independently if table partition key and group by key are compatible. Controlled by the setting `allow_aggregate_partitions_independently`. Disabled by default because of limited applicability (please refer to the docs). [#45364](https://github.com/ClickHouse/ClickHouse/pull/45364) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Allow using Vertical merge algorithm with parts in Compact format. This will allow ClickHouse server to use much less memory for background operations. This closes [#46084](https://github.com/ClickHouse/ClickHouse/issues/46084). [#45681](https://github.com/ClickHouse/ClickHouse/pull/45681) [#46282](https://github.com/ClickHouse/ClickHouse/pull/46282) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Optimize `Parquet` reader by using batch reader. [#45878](https://github.com/ClickHouse/ClickHouse/pull/45878) ([LiuNeng](https://github.com/liuneng1994)).
|
||||
* Add new `local_filesystem_read_method` method `io_uring` based on the asynchronous Linux [io_uring](https://kernel.dk/io_uring.pdf) subsystem, improving read performance almost universally compared to the default `pread` method. [#38456](https://github.com/ClickHouse/ClickHouse/pull/38456) ([Saulius Valatka](https://github.com/sauliusvl)).
|
||||
* Rewrite aggregate functions with `if` expression as argument when logically equivalent. For example, `avg(if(cond, col, null))` can be rewritten to avgIf(cond, col). It is helpful in performance. [#44730](https://github.com/ClickHouse/ClickHouse/pull/44730) ([李扬](https://github.com/taiyang-li)).
|
||||
* Improve lower/upper function performance with avx512 instructions. [#37894](https://github.com/ClickHouse/ClickHouse/pull/37894) ([yaqi-zhao](https://github.com/yaqi-zhao)).
|
||||
* Remove the limitation that on systems with >=32 cores and SMT disabled ClickHouse uses only half of the cores (the case when you disable Hyper Threading in BIOS). [#44973](https://github.com/ClickHouse/ClickHouse/pull/44973) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Improve performance of function `multiIf` by columnar executing, speed up by 2.3x. [#45296](https://github.com/ClickHouse/ClickHouse/pull/45296) ([李扬](https://github.com/taiyang-li)).
|
||||
* Add fast path for function `position` when the needle is empty. [#45382](https://github.com/ClickHouse/ClickHouse/pull/45382) ([李扬](https://github.com/taiyang-li)).
|
||||
* Enable `query_plan_remove_redundant_sorting` optimization by default. Optimization implemented in [#45420](https://github.com/ClickHouse/ClickHouse/issues/45420). [#45567](https://github.com/ClickHouse/ClickHouse/pull/45567) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Increased HTTP Transfer Encoding chunk size to improve performance of large queries using the HTTP interface. [#45593](https://github.com/ClickHouse/ClickHouse/pull/45593) ([Geoff Genz](https://github.com/genzgd)).
|
||||
* Fixed performance of short `SELECT` queries that read from tables with large number of `Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Improve performance of filtering for big integers and decimal types. [#45949](https://github.com/ClickHouse/ClickHouse/pull/45949) ([李扬](https://github.com/taiyang-li)).
|
||||
* This change could effectively reduce the overhead of obtaining the filter from ColumnNullable(UInt8) and improve the overall query performance. To evaluate the impact of this change, we adopted TPC-H benchmark but revised the column types from non-nullable to nullable, and we measured the QPS of its queries as the performance indicator. [#45962](https://github.com/ClickHouse/ClickHouse/pull/45962) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
|
||||
* Make the `_part` and `_partition_id` virtual column be `LowCardinality(String)` type. Closes [#45964](https://github.com/ClickHouse/ClickHouse/issues/45964). [#45975](https://github.com/ClickHouse/ClickHouse/pull/45975) ([flynn](https://github.com/ucasfl)).
|
||||
* Improve the performance of Decimal conversion when the scale does not change. [#46095](https://github.com/ClickHouse/ClickHouse/pull/46095) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Allow to increase prefetching for read data. [#46168](https://github.com/ClickHouse/ClickHouse/pull/46168) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Rewrite `arrayExists(x -> x = 1, arr)` -> `has(arr, 1)`, which improve performance by 1.34x. [#46188](https://github.com/ClickHouse/ClickHouse/pull/46188) ([李扬](https://github.com/taiyang-li)).
|
||||
* Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Update zstd to v1.5.4. It has some minor improvements in performance and compression ratio. If you run replicas with different versions of ClickHouse you may see reasonable error messages `Data after merge/mutation is not byte-identical to data on another replicas.` with explanation. These messages are Ok and you should not worry. [#46280](https://github.com/ClickHouse/ClickHouse/pull/46280) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Fix performance degradation caused by [#39737](https://github.com/ClickHouse/ClickHouse/issues/39737). [#46309](https://github.com/ClickHouse/ClickHouse/pull/46309) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* The `replicas_status` handle will answer quickly even in case of a large replication queue. [#46310](https://github.com/ClickHouse/ClickHouse/pull/46310) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Add avx512 support for aggregate function `sum`, function unary arithmetic, function comparison. [#37870](https://github.com/ClickHouse/ClickHouse/pull/37870) ([zhao zhou](https://github.com/zzachimed)).
|
||||
* Rewrote the code around marks distribution and the overall coordination of the reading in order to achieve the maximum performance improvement. This closes [#34527](https://github.com/ClickHouse/ClickHouse/issues/34527). [#43772](https://github.com/ClickHouse/ClickHouse/pull/43772) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
|
||||
* Remove redundant DISTINCT clauses in query (subqueries). Implemented on top of query plan. It does similar optimization as `optimize_duplicate_order_by_and_distinct` regarding DISTINCT clauses. Can be enabled via `query_plan_remove_redundant_distinct` setting. Related to [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#44176](https://github.com/ClickHouse/ClickHouse/pull/44176) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* A few query rewrite optimizations: `sumIf(123, cond) -> 123 * countIf(1, cond)`, `sum(if(cond, 123, 0)) -> 123 * countIf(cond)`, `sum(if(cond, 0, 123)) -> 123 * countIf(not(cond))` [#44728](https://github.com/ClickHouse/ClickHouse/pull/44728) ([李扬](https://github.com/taiyang-li)).
|
||||
* Improved how memory bound merging and aggregation in order on top query plan interact. Previously we fell back to explicit sorting for AIO in some cases when it wasn't actually needed. [#45892](https://github.com/ClickHouse/ClickHouse/pull/45892) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Concurrent merges are scheduled using round-robin by default to ensure fair and starvation-free operation. Previously in heavily overloaded shards, big merges could possibly be starved by smaller merges due to the use of strict priority scheduling. Added `background_merges_mutations_scheduling_policy` server config option to select scheduling algorithm (`round_robin` or `shortest_task_first`). [#46247](https://github.com/ClickHouse/ClickHouse/pull/46247) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
|
||||
#### Improvement
|
||||
* Enable retries for INSERT by default in case of ZooKeeper session loss. We already use it in production. [#46308](https://github.com/ClickHouse/ClickHouse/pull/46308) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Add ability to ignore unknown keys in JSON object for named tuples (`input_format_json_ignore_unknown_keys_in_named_tuple`). [#45678](https://github.com/ClickHouse/ClickHouse/pull/45678) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Support optimizing the `where` clause with sorting key expression move to `prewhere` for query with `final`. [#38893](https://github.com/ClickHouse/ClickHouse/issues/38893). [#38950](https://github.com/ClickHouse/ClickHouse/pull/38950) ([hexiaoting](https://github.com/hexiaoting)).
|
||||
* Add new metrics for backups: num_processed_files and processed_files_size described actual number of processed files. [#42244](https://github.com/ClickHouse/ClickHouse/pull/42244) ([Aleksandr](https://github.com/AVMusorin)).
|
||||
* Added retries on interserver DNS errors. [#43179](https://github.com/ClickHouse/ClickHouse/pull/43179) ([Anton Kozlov](https://github.com/tonickkozlov)).
|
||||
* Keeper improvement: try preallocating space on the disk to avoid undefined out-of-space issues. Introduce setting `max_log_file_size` for the maximum size of Keeper's Raft log files. [#44370](https://github.com/ClickHouse/ClickHouse/pull/44370) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Optimize behavior for a replica delay api logic in case the replica is read-only. [#45148](https://github.com/ClickHouse/ClickHouse/pull/45148) ([mateng915](https://github.com/mateng0915)).
|
||||
* Ask for the password in clickhouse-client interactively in a case when the empty password is wrong. Closes [#46702](https://github.com/ClickHouse/ClickHouse/issues/46702). [#46730](https://github.com/ClickHouse/ClickHouse/pull/46730) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Mark `Gorilla` compression on columns of non-Float* type as suspicious. [#45376](https://github.com/ClickHouse/ClickHouse/pull/45376) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Show replica name that is executing a merge in the `postpone_reason` column. [#45458](https://github.com/ClickHouse/ClickHouse/pull/45458) ([Frank Chen](https://github.com/FrankChen021)).
|
||||
* Save exception stack trace in part_log. [#45459](https://github.com/ClickHouse/ClickHouse/pull/45459) ([Frank Chen](https://github.com/FrankChen021)).
|
||||
* The `regexp_tree` dictionary is polished and now it is compatible with https://github.com/ua-parser/uap-core. [#45631](https://github.com/ClickHouse/ClickHouse/pull/45631) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Updated checking of `SYSTEM SYNC REPLICA`, resolves [#45508](https://github.com/ClickHouse/ClickHouse/issues/45508) [#45648](https://github.com/ClickHouse/ClickHouse/pull/45648) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* Rename setting `replication_alter_partitions_sync` to `alter_sync`. [#45659](https://github.com/ClickHouse/ClickHouse/pull/45659) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* The `generateRandom` table function and the engine now support `LowCardinality` data types. This is useful for testing, for example you can write `INSERT INTO table SELECT * FROM generateRandom() LIMIT 1000`. This is needed to debug [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590). [#45661](https://github.com/ClickHouse/ClickHouse/pull/45661) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* The experimental query result cache now provides more modular configuration settings. [#45679](https://github.com/ClickHouse/ClickHouse/pull/45679) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Renamed "query result cache" to "query cache". [#45682](https://github.com/ClickHouse/ClickHouse/pull/45682) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* add `SYSTEM SYNC FILE CACHE` command. It will do the `sync` syscall. [#8921](https://github.com/ClickHouse/ClickHouse/issues/8921). [#45685](https://github.com/ClickHouse/ClickHouse/pull/45685) ([DR](https://github.com/freedomDR)).
|
||||
* Add a new S3 setting `allow_head_object_request`. This PR makes usage of `GetObjectAttributes` request instead of `HeadObject` introduced in https://github.com/ClickHouse/ClickHouse/pull/45288 optional (and disabled by default). [#45701](https://github.com/ClickHouse/ClickHouse/pull/45701) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Add ability to override connection settings based on connection names (that said that now you can forget about storing password for each connection, you can simply put everything into `~/.clickhouse-client/config.xml` and even use different history files for them, which can be also useful). [#45715](https://github.com/ClickHouse/ClickHouse/pull/45715) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Arrow format: support the duration type. Closes [#45669](https://github.com/ClickHouse/ClickHouse/issues/45669). [#45750](https://github.com/ClickHouse/ClickHouse/pull/45750) ([flynn](https://github.com/ucasfl)).
|
||||
* Extend the logging in the Query Cache to improve investigations of the caching behavior. [#45751](https://github.com/ClickHouse/ClickHouse/pull/45751) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* The query cache's server-level settings are now reconfigurable at runtime. [#45758](https://github.com/ClickHouse/ClickHouse/pull/45758) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Hide password in logs when a table function's arguments are specified with a named collection. [#45774](https://github.com/ClickHouse/ClickHouse/pull/45774) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Improve internal S3 client to correctly deduce regions and redirections for different types of URLs. [#45783](https://github.com/ClickHouse/ClickHouse/pull/45783) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Add support for Map, IPv4 and IPv6 types in generateRandom. Mostly useful for testing. [#45785](https://github.com/ClickHouse/ClickHouse/pull/45785) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Support empty/notEmpty for IP types. [#45799](https://github.com/ClickHouse/ClickHouse/pull/45799) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* The column `num_processed_files` was split into two columns: `num_files` (for BACKUP) and `files_read` (for RESTORE). The column `processed_files_size` was split into two columns: `total_size` (for BACKUP) and `bytes_read` (for RESTORE). [#45800](https://github.com/ClickHouse/ClickHouse/pull/45800) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Add support for `SHOW ENGINES` query for MySQL compatibility. [#45859](https://github.com/ClickHouse/ClickHouse/pull/45859) ([Filatenkov Artur](https://github.com/FArthur-cmd)).
|
||||
* Improved how the obfuscator deals with queries. [#45867](https://github.com/ClickHouse/ClickHouse/pull/45867) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Improve behaviour of conversion into Date for boundary value 65535 (2149-06-06). [#46042](https://github.com/ClickHouse/ClickHouse/pull/46042) [#45914](https://github.com/ClickHouse/ClickHouse/pull/45914) ([Joanna Hulboj](https://github.com/jh0x)).
|
||||
* Add setting `check_referential_table_dependencies` to check referential dependencies on `DROP TABLE`. This PR solves [#38326](https://github.com/ClickHouse/ClickHouse/issues/38326). [#45936](https://github.com/ClickHouse/ClickHouse/pull/45936) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Fix `tupleElement` to return `Null` when having `Null` argument. Closes [#45894](https://github.com/ClickHouse/ClickHouse/issues/45894). [#45952](https://github.com/ClickHouse/ClickHouse/pull/45952) ([flynn](https://github.com/ucasfl)).
|
||||
* Throw an error on no files satisfying the S3 wildcard. Closes [#45587](https://github.com/ClickHouse/ClickHouse/issues/45587). [#45957](https://github.com/ClickHouse/ClickHouse/pull/45957) ([chen](https://github.com/xiedeyantu)).
|
||||
* Use cluster state data to check concurrent backup/restore. [#45982](https://github.com/ClickHouse/ClickHouse/pull/45982) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* ClickHouse Client: Use "exact" matching for fuzzy search, which has correct case ignorance and more appropriate algorithm for matching SQL queries. [#46000](https://github.com/ClickHouse/ClickHouse/pull/46000) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Forbid wrong create View syntax `CREATE View X TO Y AS SELECT`. Closes [#4331](https://github.com/ClickHouse/ClickHouse/issues/4331). [#46043](https://github.com/ClickHouse/ClickHouse/pull/46043) ([flynn](https://github.com/ucasfl)).
|
||||
* Storage `Log` family support setting the `storage_policy`. Closes [#43421](https://github.com/ClickHouse/ClickHouse/issues/43421). [#46044](https://github.com/ClickHouse/ClickHouse/pull/46044) ([flynn](https://github.com/ucasfl)).
|
||||
* Improve `JSONColumns` format when the result is empty. Closes [#46024](https://github.com/ClickHouse/ClickHouse/issues/46024). [#46053](https://github.com/ClickHouse/ClickHouse/pull/46053) ([flynn](https://github.com/ucasfl)).
|
||||
* Add reference implementation for SipHash128. [#46065](https://github.com/ClickHouse/ClickHouse/pull/46065) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
* Add a new metric to record allocations times and bytes using mmap. [#46068](https://github.com/ClickHouse/ClickHouse/pull/46068) ([李扬](https://github.com/taiyang-li)).
|
||||
* Currently for functions like `leftPad`, `rightPad`, `leftPadUTF8`, `rightPadUTF8`, the second argument `length` must be UInt8|16|32|64|128|256. Which is too strict for clickhouse users, besides, it is not consistent with other similar functions like `arrayResize`, `substring` and so on. [#46103](https://github.com/ClickHouse/ClickHouse/pull/46103) ([李扬](https://github.com/taiyang-li)).
|
||||
* Fix assertion in the `welchTTest` function in debug build when the resulting statistics is NaN. Unified the behavior with other similar functions. Change the behavior of `studentTTest` to return NaN instead of throwing an exception because the previous behavior was inconvenient. This closes [#41176](https://github.com/ClickHouse/ClickHouse/issues/41176) This closes [#42162](https://github.com/ClickHouse/ClickHouse/issues/42162). [#46141](https://github.com/ClickHouse/ClickHouse/pull/46141) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* More convenient usage of big integers and ORDER BY WITH FILL. Allow using plain integers for start and end points in WITH FILL when ORDER BY big (128-bit and 256-bit) integers. Fix the wrong result for big integers with negative start or end points. This closes [#16733](https://github.com/ClickHouse/ClickHouse/issues/16733). [#46152](https://github.com/ClickHouse/ClickHouse/pull/46152) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Add `parts`, `active_parts` and `total_marks` columns to `system.tables` on [issue](https://github.com/ClickHouse/ClickHouse/issues/44336). [#46161](https://github.com/ClickHouse/ClickHouse/pull/46161) ([attack204](https://github.com/attack204)).
|
||||
* Functions "multi[Fuzzy]Match(Any|AnyIndex|AllIndices}" now reject regexes which will likely evaluate very slowly in vectorscan. [#46167](https://github.com/ClickHouse/ClickHouse/pull/46167) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* When `insert_null_as_default` is enabled and column doesn't have defined default value, the default of column type will be used. Also this PR fixes using default values on nulls in case of LowCardinality columns. [#46171](https://github.com/ClickHouse/ClickHouse/pull/46171) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Prefer explicitly defined access keys for S3 clients. If `use_environment_credentials` is set to `true`, and the user has provided the access key through query or config, they will be used instead of the ones from the environment variable. [#46191](https://github.com/ClickHouse/ClickHouse/pull/46191) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Add an alias "DATE_FORMAT()" for function "formatDateTime()" to improve compatibility with MySQL's SQL dialect, extend function `formatDateTime` with substitutions "a", "b", "c", "h", "i", "k", "l" "r", "s", "W". ### Documentation entry for user-facing changes User-readable short description: `DATE_FORMAT` is an alias of `formatDateTime`. Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column. (Provide link to [formatDateTime](https://clickhouse.com/docs/en/sql-reference/functions/date-time-functions/#formatdatetime)). [#46302](https://github.com/ClickHouse/ClickHouse/pull/46302) ([Jake Bamrah](https://github.com/JakeBamrah)).
|
||||
* Add `ProfileEvents` and `CurrentMetrics` about the callback tasks for parallel replicas (`s3Cluster` and `MergeTree` tables). [#46313](https://github.com/ClickHouse/ClickHouse/pull/46313) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Add support for `DELETE` and `UPDATE` for tables using `KeeperMap` storage engine. [#46330](https://github.com/ClickHouse/ClickHouse/pull/46330) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Allow writing RENAME queries with query parameters. Resolves [#45778](https://github.com/ClickHouse/ClickHouse/issues/45778). [#46407](https://github.com/ClickHouse/ClickHouse/pull/46407) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix parameterized SELECT queries with REPLACE transformer. Resolves [#33002](https://github.com/ClickHouse/ClickHouse/issues/33002). [#46420](https://github.com/ClickHouse/ClickHouse/pull/46420) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Exclude the internal database used for temporary/external tables from the calculation of asynchronous metric "NumberOfDatabases". This makes the behavior consistent with system table "system.databases". [#46435](https://github.com/ClickHouse/ClickHouse/pull/46435) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Added `last_exception_time` column into distribution_queue table. [#46564](https://github.com/ClickHouse/ClickHouse/pull/46564) ([Aleksandr](https://github.com/AVMusorin)).
|
||||
* Support for IN clause with parameter in parameterized views. [#46583](https://github.com/ClickHouse/ClickHouse/pull/46583) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* Do not load named collections on server startup (load them on first access instead). [#46607](https://github.com/ClickHouse/ClickHouse/pull/46607) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Introduce GWP-ASan implemented by the LLVM runtime. This closes [#27039](https://github.com/ClickHouse/ClickHouse/issues/27039). [#45226](https://github.com/ClickHouse/ClickHouse/pull/45226) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* We want to make our tests less stable and more flaky: add randomization for merge tree settings in tests. [#38983](https://github.com/ClickHouse/ClickHouse/pull/38983) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Enable the HDFS support in PowerPC and which helps to fixes the following functional tests 02113_hdfs_assert.sh, 02244_hdfs_cluster.sql and 02368_cancel_write_into_hdfs.sh. [#44949](https://github.com/ClickHouse/ClickHouse/pull/44949) ([MeenaRenganathan22](https://github.com/MeenaRenganathan22)).
|
||||
* Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* ClickHouse's fork of poco was moved from "contrib/" to "base/poco/". [#46075](https://github.com/ClickHouse/ClickHouse/pull/46075) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Add an option for `clickhouse-watchdog` to restart the child process. This does not make a lot of use. [#46312](https://github.com/ClickHouse/ClickHouse/pull/46312) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* If the environment variable `CLICKHOUSE_DOCKER_RESTART_ON_EXIT` is set to 1, the Docker container will run `clickhouse-server` as a child instead of the first process, and restart it when it exited. [#46391](https://github.com/ClickHouse/ClickHouse/pull/46391) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix Systemd service file. [#46461](https://github.com/ClickHouse/ClickHouse/pull/46461) ([SuperDJY](https://github.com/cmsxbc)).
|
||||
* Raised the minimum Clang version needed to build ClickHouse from 12 to 15. [#46710](https://github.com/ClickHouse/ClickHouse/pull/46710) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Upgrade Intel QPL from v0.3.0 to v1.0.0 2. Build libaccel-config and link it statically to QPL library instead of dynamically. [#45809](https://github.com/ClickHouse/ClickHouse/pull/45809) ([jasperzhu](https://github.com/jinjunzh)).
|
||||
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Flush data exactly by `rabbitmq_flush_interval_ms` or by `rabbitmq_max_block_size` in `StorageRabbitMQ`. Closes [#42389](https://github.com/ClickHouse/ClickHouse/issues/42389). Closes [#45160](https://github.com/ClickHouse/ClickHouse/issues/45160). [#44404](https://github.com/ClickHouse/ClickHouse/pull/44404) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Use PODArray to render in sparkBar function, so we can control the memory usage. Close [#44467](https://github.com/ClickHouse/ClickHouse/issues/44467). [#44489](https://github.com/ClickHouse/ClickHouse/pull/44489) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Fix functions (quantilesExactExclusive, quantilesExactInclusive) return unsorted array element. [#45379](https://github.com/ClickHouse/ClickHouse/pull/45379) ([wujunfu](https://github.com/wujunfu)).
|
||||
* Fix uncaught exception in HTTPHandler when open telemetry is enabled. [#45456](https://github.com/ClickHouse/ClickHouse/pull/45456) ([Frank Chen](https://github.com/FrankChen021)).
|
||||
* Don't infer Dates from 8 digit numbers. It could lead to wrong data to be read. [#45581](https://github.com/ClickHouse/ClickHouse/pull/45581) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fixes to correctly use `odbc_bridge_use_connection_pooling` setting. [#45591](https://github.com/ClickHouse/ClickHouse/pull/45591) ([Bharat Nallan](https://github.com/bharatnc)).
|
||||
* When the callback in the cache is called, it is possible that this cache is destructed. To keep it safe, we capture members by value. It's also safe for task schedule because it will be deactivated before storage is destroyed. Resolve [#45548](https://github.com/ClickHouse/ClickHouse/issues/45548). [#45601](https://github.com/ClickHouse/ClickHouse/pull/45601) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Fix data corruption when codecs Delta or DoubleDelta are combined with codec Gorilla. [#45615](https://github.com/ClickHouse/ClickHouse/pull/45615) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Correctly check types when using N-gram bloom filter index to avoid invalid reads. [#45617](https://github.com/ClickHouse/ClickHouse/pull/45617) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* A couple of segfaults have been reported around `c-ares`. They were introduced in my previous pull requests. I have fixed them with the help of Alexander Tokmakov. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Set compression method and level for backup Closes [#45690](https://github.com/ClickHouse/ClickHouse/issues/45690). [#45737](https://github.com/ClickHouse/ClickHouse/pull/45737) ([Pradeep Chhetri](https://github.com/chhetripradeep)).
|
||||
* Should use `select_query_typed.limitByOffset()` instead of `select_query_typed.limitOffset()`. [#45817](https://github.com/ClickHouse/ClickHouse/pull/45817) ([刘陶峰](https://github.com/taofengliu)).
|
||||
* When use experimental analyzer, queries like `SELECT number FROM numbers(100) LIMIT 10 OFFSET 10;` get wrong results (empty result for this sql). That is caused by an unnecessary offset step added by planner. [#45822](https://github.com/ClickHouse/ClickHouse/pull/45822) ([刘陶峰](https://github.com/taofengliu)).
|
||||
* Backward compatibility - allow implicit narrowing conversion from UInt64 to IPv4 - required for "INSERT ... VALUES ..." expression. [#45865](https://github.com/ClickHouse/ClickHouse/pull/45865) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Bugfix IPv6 parser for mixed ip4 address with missed first octet (like `::.1.2.3`). [#45871](https://github.com/ClickHouse/ClickHouse/pull/45871) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Add the `query_kind` column to the `system.processes` table and the `SHOW PROCESSLIST` query. Remove duplicate code. It fixes a bug: the global configuration parameter `max_concurrent_select_queries` was not respected to queries with `INTERSECT` or `EXCEPT` chains. [#45872](https://github.com/ClickHouse/ClickHouse/pull/45872) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix crash in a function `stochasticLinearRegression`. Found by WingFuzz. [#45985](https://github.com/ClickHouse/ClickHouse/pull/45985) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix crash in `SELECT` queries with `INTERSECT` and `EXCEPT` modifiers that read data from tables with enabled sparse columns (controlled by setting `ratio_of_defaults_for_sparse_serialization`). [#45987](https://github.com/ClickHouse/ClickHouse/pull/45987) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix read in order optimization for DESC sorting with FINAL, close [#45815](https://github.com/ClickHouse/ClickHouse/issues/45815). [#46009](https://github.com/ClickHouse/ClickHouse/pull/46009) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix elapsed column in system.processes (10x error). [#46047](https://github.com/ClickHouse/ClickHouse/pull/46047) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Follow-up fix for Replace domain IP types (IPv4, IPv6) with native https://github.com/ClickHouse/ClickHouse/pull/43221. [#46087](https://github.com/ClickHouse/ClickHouse/pull/46087) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
* Fix environment variable substitution in the configuration when a parameter already has a value. This closes [#46131](https://github.com/ClickHouse/ClickHouse/issues/46131). This closes [#9547](https://github.com/ClickHouse/ClickHouse/issues/9547). [#46144](https://github.com/ClickHouse/ClickHouse/pull/46144) ([pufit](https://github.com/pufit)).
|
||||
* Fix incorrect predicate push down with grouping sets. Closes [#45947](https://github.com/ClickHouse/ClickHouse/issues/45947). [#46151](https://github.com/ClickHouse/ClickHouse/pull/46151) ([flynn](https://github.com/ucasfl)).
|
||||
* Fix possible pipeline stuck error on `fulls_sorting_join` with constant keys. [#46175](https://github.com/ClickHouse/ClickHouse/pull/46175) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Never rewrite tuple functions as literals during formatting to avoid incorrect results. [#46232](https://github.com/ClickHouse/ClickHouse/pull/46232) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
|
||||
* Fix possible out of bounds error while reading LowCardinality(Nullable) in Arrow format. [#46270](https://github.com/ClickHouse/ClickHouse/pull/46270) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
* Fix possible crash which can be caused by an integer overflow while deserializing aggregating state of a function that stores HashTable. [#46349](https://github.com/ClickHouse/ClickHouse/pull/46349) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fixed a LOGICAL_ERROR on an attempt to execute `ALTER ... MOVE PART ... TO TABLE`. This type of query was never actually supported. [#46359](https://github.com/ClickHouse/ClickHouse/pull/46359) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix s3Cluster schema inference in parallel distributed insert select when `parallel_distributed_insert_select` is enabled. [#46381](https://github.com/ClickHouse/ClickHouse/pull/46381) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix queries like `ALTER TABLE ... UPDATE nested.arr1 = nested.arr2 ...`, where `arr1` and `arr2` are fields of the same `Nested` column. [#46387](https://github.com/ClickHouse/ClickHouse/pull/46387) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Scheduler may fail to schedule a task. If it happens, the whole MulityPartUpload should be aborted and `UploadHelper` must wait for already scheduled tasks. [#46451](https://github.com/ClickHouse/ClickHouse/pull/46451) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Fix PREWHERE for Merge with different default types (fixes some `NOT_FOUND_COLUMN_IN_BLOCK` when the default type for the column differs, also allow `PREWHERE` when the type of column is the same across tables, and prohibit it, only if it differs). [#46454](https://github.com/ClickHouse/ClickHouse/pull/46454) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix a crash that could happen when constant values are used in `ORDER BY`. Fixes [#46466](https://github.com/ClickHouse/ClickHouse/issues/46466). [#46493](https://github.com/ClickHouse/ClickHouse/pull/46493) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Do not throw exception if `disk` setting was specified on query level, but `storage_policy` was specified in config merge tree settings section. `disk` will override setting from config. [#46533](https://github.com/ClickHouse/ClickHouse/pull/46533) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* fixes [#46557](https://github.com/ClickHouse/ClickHouse/issues/46557). [#46611](https://github.com/ClickHouse/ClickHouse/pull/46611) ([Alexander Gololobov](https://github.com/davenger)).
|
||||
* Fix endless restarts of clickhouse-server systemd unit if server cannot start within 1m30sec (Disable timeout logic for starting clickhouse-server from systemd service). [#46613](https://github.com/ClickHouse/ClickHouse/pull/46613) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Allocated during asynchronous inserts memory buffers were deallocated in the global context and MemoryTracker counters for corresponding user and query were not updated correctly. That led to false positive OOM exceptions. [#46622](https://github.com/ClickHouse/ClickHouse/pull/46622) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Updated to not clear on_expression from table_join as its used by future analyze runs resolves [#45185](https://github.com/ClickHouse/ClickHouse/issues/45185). [#46487](https://github.com/ClickHouse/ClickHouse/pull/46487) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
|
||||
|
||||
### <a id="231"></a> ClickHouse release 23.1, 2023-01-26
|
||||
|
||||
### ClickHouse release 23.1
|
||||
|
@ -2,6 +2,11 @@
|
||||
|
||||
ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real-time.
|
||||
|
||||
## How To Install (Linux, macOS, FreeBSD)
|
||||
```
|
||||
curl https://clickhouse.com/ | sh
|
||||
```
|
||||
|
||||
## Useful Links
|
||||
|
||||
* [Official website](https://clickhouse.com/) has a quick high-level overview of ClickHouse on the main page.
|
||||
|
@ -15,7 +15,7 @@ execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version OUTPUT_VARIABLE COMPILER
|
||||
message (STATUS "Using compiler:\n${COMPILER_SELF_IDENTIFICATION}")
|
||||
|
||||
# Require minimum compiler versions
|
||||
set (CLANG_MINIMUM_VERSION 12)
|
||||
set (CLANG_MINIMUM_VERSION 15)
|
||||
set (XCODE_MINIMUM_VERSION 12.0)
|
||||
set (APPLE_CLANG_MINIMUM_VERSION 12.0.0)
|
||||
set (GCC_MINIMUM_VERSION 11)
|
||||
|
@ -1,44 +0,0 @@
|
||||
# docker build -t clickhouse/docs-release .
|
||||
FROM ubuntu:20.04
|
||||
|
||||
# ARG for quick switch to a given ubuntu mirror
|
||||
ARG apt_archive="http://archive.ubuntu.com"
|
||||
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
|
||||
|
||||
ENV LANG=C.UTF-8
|
||||
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
|
||||
wget \
|
||||
bash \
|
||||
python \
|
||||
curl \
|
||||
python3-requests \
|
||||
sudo \
|
||||
git \
|
||||
openssl \
|
||||
python3-pip \
|
||||
software-properties-common \
|
||||
fonts-arphic-ukai \
|
||||
fonts-arphic-uming \
|
||||
fonts-ipafont-mincho \
|
||||
fonts-ipafont-gothic \
|
||||
fonts-unfonts-core \
|
||||
xvfb \
|
||||
ssh-client \
|
||||
&& apt-get autoremove --yes \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN pip3 install --ignore-installed --upgrade setuptools pip virtualenv
|
||||
|
||||
# We create the most popular default 1000:1000 ubuntu user to not have ssh issues when running with UID==1000
|
||||
RUN useradd --create-home --uid 1000 --user-group ubuntu \
|
||||
&& ssh-keyscan -t rsa github.com >> /etc/ssh/ssh_known_hosts
|
||||
|
||||
COPY run.sh /
|
||||
|
||||
ENV REPO_PATH=/repo_path
|
||||
ENV OUTPUT_PATH=/output_path
|
||||
|
||||
CMD ["/bin/bash", "/run.sh"]
|
@ -1,12 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
cd "$REPO_PATH/docs/tools"
|
||||
if ! [ -d venv ]; then
|
||||
mkdir -p venv
|
||||
virtualenv -p "$(which python3)" venv
|
||||
source venv/bin/activate
|
||||
python3 -m pip install --ignore-installed -r requirements.txt
|
||||
fi
|
||||
source venv/bin/activate
|
||||
./release.sh 2>&1 | tee "$OUTPUT_PATH/output.log"
|
@ -146,9 +146,5 @@
|
||||
"name": "clickhouse/docs-builder",
|
||||
"dependent": [
|
||||
]
|
||||
},
|
||||
"docker/docs/release": {
|
||||
"name": "clickhouse/docs-release",
|
||||
"dependent": []
|
||||
}
|
||||
}
|
||||
|
@ -174,8 +174,9 @@ fi
|
||||
|
||||
if [ "coverity" == "$COMBINED_OUTPUT" ]
|
||||
then
|
||||
tar -cv --zstd -f "coverity-scan.tar.zst" cov-int
|
||||
mv "coverity-scan.tar.zst" /output
|
||||
# Coverity does not understand ZSTD.
|
||||
tar -cvz -f "coverity-scan.tar.gz" cov-int
|
||||
mv "coverity-scan.tar.gz" /output
|
||||
fi
|
||||
|
||||
ccache_status
|
||||
|
@ -46,7 +46,8 @@ def get_options(i, backward_compatibility_check):
|
||||
if i % 2 == 1 and not backward_compatibility_check:
|
||||
client_options.append("group_by_use_nulls=1")
|
||||
|
||||
if i == 12: # 12 % 3 == 0, so it's Atomic database
|
||||
# 12 % 3 == 0, so it's Atomic database
|
||||
if i == 12 and not backward_compatibility_check:
|
||||
client_options.append("implicit_transaction=1")
|
||||
client_options.append("throw_on_unsupported_query_inside_transaction=0")
|
||||
|
||||
@ -78,7 +79,7 @@ def run_func_test(
|
||||
pipes = []
|
||||
for i in range(0, len(output_paths)):
|
||||
f = open(output_paths[i], "w")
|
||||
full_command = "{} {} {} {} {} --stress".format(
|
||||
full_command = "{} {} {} {} {}".format(
|
||||
cmd,
|
||||
get_options(i, backward_compatibility_check),
|
||||
global_time_limit_option,
|
||||
|
@ -19,7 +19,9 @@ CREATE TABLE deltalake
|
||||
**Engine parameters**
|
||||
|
||||
- `url` — Bucket url with path to the existing Delta Lake table.
|
||||
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
|
||||
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file.
|
||||
|
||||
Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md)
|
||||
|
||||
**Example**
|
||||
|
||||
@ -27,7 +29,24 @@ CREATE TABLE deltalake
|
||||
CREATE TABLE deltalake ENGINE=DeltaLake('http://mars-doc-test.s3.amazonaws.com/clickhouse-bucket-3/test_table/', 'ABC123', 'Abc+123')
|
||||
```
|
||||
|
||||
Using named collections:
|
||||
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<named_collections>
|
||||
<deltalake_conf>
|
||||
<url>http://mars-doc-test.s3.amazonaws.com/clickhouse-bucket-3/</url>
|
||||
<access_key_id>ABC123<access_key_id>
|
||||
<secret_access_key>Abc+123</secret_access_key>
|
||||
</deltalake_conf>
|
||||
</named_collections>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
```sql
|
||||
CREATE TABLE deltalake ENGINE=DeltaLake(deltalake_conf, filename = 'test_table')
|
||||
```
|
||||
|
||||
## See also
|
||||
|
||||
- [deltaLake table function](../../../sql-reference/table-functions/deltalake.md)
|
||||
|
||||
|
@ -19,7 +19,9 @@ CREATE TABLE hudi_table
|
||||
**Engine parameters**
|
||||
|
||||
- `url` — Bucket url with the path to an existing Hudi table.
|
||||
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
|
||||
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file.
|
||||
|
||||
Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md)
|
||||
|
||||
**Example**
|
||||
|
||||
@ -27,7 +29,24 @@ CREATE TABLE hudi_table
|
||||
CREATE TABLE hudi_table ENGINE=Hudi('http://mars-doc-test.s3.amazonaws.com/clickhouse-bucket-3/test_table/', 'ABC123', 'Abc+123')
|
||||
```
|
||||
|
||||
Using named collections:
|
||||
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<named_collections>
|
||||
<hudi_conf>
|
||||
<url>http://mars-doc-test.s3.amazonaws.com/clickhouse-bucket-3/</url>
|
||||
<access_key_id>ABC123<access_key_id>
|
||||
<secret_access_key>Abc+123</secret_access_key>
|
||||
</hudi_conf>
|
||||
</named_collections>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
```sql
|
||||
CREATE TABLE hudi_table ENGINE=Hudi(hudi_conf, filename = 'test_table')
|
||||
```
|
||||
|
||||
## See also
|
||||
|
||||
- [hudi table function](/docs/en/sql-reference/table-functions/hudi.md)
|
||||
|
||||
|
52
docs/en/engines/table-engines/integrations/iceberg.md
Normal file
52
docs/en/engines/table-engines/integrations/iceberg.md
Normal file
@ -0,0 +1,52 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/iceberg
|
||||
sidebar_label: Iceberg
|
||||
---
|
||||
|
||||
# Iceberg Table Engine
|
||||
|
||||
This engine provides a read-only integration with existing Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3.
|
||||
|
||||
## Create Table
|
||||
|
||||
Note that the Iceberg table must already exist in S3, this command does not take DDL parameters to create a new table.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE iceberg_table
|
||||
ENGINE = Iceberg(url, [aws_access_key_id, aws_secret_access_key,])
|
||||
```
|
||||
|
||||
**Engine parameters**
|
||||
|
||||
- `url` — url with the path to an existing Iceberg table.
|
||||
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file.
|
||||
|
||||
Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md)
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
CREATE TABLE iceberg_table ENGINE=Iceberg('http://test.s3.amazonaws.com/clickhouse-bucket/test_table', 'test', 'test')
|
||||
```
|
||||
|
||||
Using named collections:
|
||||
|
||||
``` xml
|
||||
<clickhouse>
|
||||
<named_collections>
|
||||
<iceberg_conf>
|
||||
<url>http://test.s3.amazonaws.com/clickhouse-bucket/</url>
|
||||
<access_key_id>test<access_key_id>
|
||||
<secret_access_key>test</secret_access_key>
|
||||
</iceberg_conf>
|
||||
</named_collections>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
```sql
|
||||
CREATE TABLE iceberg_table ENGINE=Iceberg(iceberg_conf, filename = 'test_table')
|
||||
```
|
||||
|
||||
## See also
|
||||
|
||||
- [iceberg table function](/docs/en/sql-reference/table-functions/iceberg.md)
|
@ -13,7 +13,7 @@ The supported formats are:
|
||||
| Format | Input | Output |
|
||||
|-------------------------------------------------------------------------------------------|------|--------|
|
||||
| [TabSeparated](#tabseparated) | ✔ | ✔ |
|
||||
| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ |
|
||||
| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ |
|
||||
| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ |
|
||||
| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ |
|
||||
| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ |
|
||||
@ -33,7 +33,7 @@ The supported formats are:
|
||||
| [JSONAsString](#jsonasstring) | ✔ | ✗ |
|
||||
| [JSONStrings](#jsonstrings) | ✔ | ✔ |
|
||||
| [JSONColumns](#jsoncolumns) | ✔ | ✔ |
|
||||
| [JSONColumnsWithMetadata](#jsoncolumnswithmetadata) | ✔ | ✔ |
|
||||
| [JSONColumnsWithMetadata](#jsoncolumnsmonoblock)) | ✔ | ✔ |
|
||||
| [JSONCompact](#jsoncompact) | ✔ | ✔ |
|
||||
| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ |
|
||||
| [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ |
|
||||
|
@ -1539,33 +1539,103 @@ Example
|
||||
<postgresql_port>9005</postgresql_port>
|
||||
```
|
||||
|
||||
|
||||
## tmp_path {#tmp-path}
|
||||
|
||||
Path to temporary data for processing large queries.
|
||||
Path on the local filesystem to store temporary data for processing large queries.
|
||||
|
||||
:::note
|
||||
The trailing slash is mandatory.
|
||||
- Only one option can be used to configure temporary data storage: `tmp_path` ,`tmp_policy`, `temporary_data_in_cache`.
|
||||
- The trailing slash is mandatory.
|
||||
:::
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
```xml
|
||||
<tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
|
||||
```
|
||||
|
||||
## tmp_policy {#tmp-policy}
|
||||
|
||||
Policy from [storage_configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) to store temporary files.
|
||||
|
||||
If not set, [tmp_path](#tmp-path) is used, otherwise it is ignored.
|
||||
Alternatively, a policy from [storage_configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) can be used to store temporary files.
|
||||
|
||||
:::note
|
||||
- `move_factor` is ignored.
|
||||
- `keep_free_space_bytes` is ignored.
|
||||
- `max_data_part_size_bytes` is ignored.
|
||||
- Policy should have exactly one volume with local disks.
|
||||
- Only one option can be used to configure temporary data storage: `tmp_path` ,`tmp_policy`, `temporary_data_in_cache`.
|
||||
- `move_factor`, `keep_free_space_bytes`,`max_data_part_size_bytes` and are ignored.
|
||||
- Policy should have exactly *one volume* with *local* disks.
|
||||
:::
|
||||
|
||||
**Example**
|
||||
|
||||
```xml<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<disk1>
|
||||
<path>/disk1/</path>
|
||||
</disk1>
|
||||
<disk2>
|
||||
<path>/disk2/</path>
|
||||
</disk2>
|
||||
</disks>
|
||||
|
||||
<policies>
|
||||
<tmp_two_disks>
|
||||
<volumes>
|
||||
<main>
|
||||
<disk>disk1</disk>
|
||||
<disk>disk2</disk>
|
||||
</main>
|
||||
</volumes>
|
||||
</tmp_two_disks>
|
||||
</policies>
|
||||
</storage_configuration>
|
||||
|
||||
<tmp_policy>tmp_two_disks</tmp_policy>
|
||||
</clickhouse>
|
||||
|
||||
```
|
||||
|
||||
When `/disk1` is full, temporary data will be stored on `/disk2`.
|
||||
|
||||
## temporary_data_in_cache {#temporary-data-in-cache}
|
||||
|
||||
With this option, temporary data will be stored in the cache for the particular disk.
|
||||
In this section, you should specify the disk name with the type `cache`.
|
||||
In that case, the cache and temporary data will share the same space, and the disk cache can be evicted to create temporary data.
|
||||
|
||||
:::note
|
||||
- Only one option can be used to configure temporary data storage: `tmp_path` ,`tmp_policy`, `temporary_data_in_cache`.
|
||||
:::
|
||||
|
||||
**Example**
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<storage_configuration>
|
||||
<disks>
|
||||
<local_disk>
|
||||
<type>local</type>
|
||||
<path>/local_disk/</path>
|
||||
</local_disk>
|
||||
|
||||
<tiny_local_cache>
|
||||
<type>cache</type>
|
||||
<disk>local_disk</disk>
|
||||
<path>/tiny_local_cache/</path>
|
||||
<max_size>10M</max_size>
|
||||
<max_file_segment_size>1M</max_file_segment_size>
|
||||
<cache_on_write_operations>1</cache_on_write_operations>
|
||||
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
|
||||
</tiny_local_cache>
|
||||
</disks>
|
||||
</storage_configuration>
|
||||
|
||||
<temporary_data_in_cache>tiny_local_cache</temporary_data_in_cache>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
Cache for `local_disk` and temporary data will be stored in `/tiny_local_cache` on the filesystem, managed by `tiny_local_cache`.
|
||||
|
||||
## max_temporary_data_on_disk_size {#max_temporary_data_on_disk_size}
|
||||
|
||||
Limit the amount of disk space consumed by temporary files in `tmp_path` for the server.
|
||||
|
@ -1561,6 +1561,17 @@ Possible values:
|
||||
|
||||
Default value: `100000`.
|
||||
|
||||
### async_insert_max_query_number {#async-insert-max-query-number}
|
||||
|
||||
The maximum number of insert queries per block before being inserted. This setting takes effect only if [async_insert_deduplicate](#settings-async-insert-deduplicate) is enabled.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — Asynchronous insertions are disabled.
|
||||
|
||||
Default value: `450`.
|
||||
|
||||
### async_insert_busy_timeout_ms {#async-insert-busy-timeout-ms}
|
||||
|
||||
The maximum timeout in milliseconds since the first `INSERT` query before inserting collected data.
|
||||
|
@ -6,25 +6,28 @@ sidebar_label: format
|
||||
|
||||
# format
|
||||
|
||||
Extracts table structure from data and parses it according to specified input format.
|
||||
Parses data from arguments according to specified input format. If structure argument is not specified, it's extracted from the data.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
format(format_name, data)
|
||||
format(format_name, [structure], data)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `format_name` — The [format](../../interfaces/formats.md#formats) of the data.
|
||||
- `structure` - Structure of the table. Optional. Format 'column1_name column1_type, column2_name column2_type, ...'.
|
||||
- `data` — String literal or constant expression that returns a string containing data in specified format
|
||||
|
||||
**Returned value**
|
||||
|
||||
A table with data parsed from `data` argument according specified format and extracted schema.
|
||||
A table with data parsed from `data` argument according to specified format and specified or extracted structure.
|
||||
|
||||
**Examples**
|
||||
|
||||
Without `structure` argument:
|
||||
|
||||
**Query:**
|
||||
``` sql
|
||||
SELECT * FROM format(JSONEachRow,
|
||||
@ -67,6 +70,29 @@ $$)
|
||||
└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
With `structure` argument:
|
||||
|
||||
**Query:**
|
||||
```sql
|
||||
SELECT * FROM format(JSONEachRow, 'a String, b UInt32',
|
||||
$$
|
||||
{"a": "Hello", "b": 111}
|
||||
{"a": "World", "b": 123}
|
||||
{"a": "Hello", "b": 112}
|
||||
{"a": "World", "b": 124}
|
||||
$$)
|
||||
```
|
||||
|
||||
**Result:**
|
||||
```response
|
||||
┌─a─────┬───b─┐
|
||||
│ Hello │ 111 │
|
||||
│ World │ 123 │
|
||||
│ Hello │ 112 │
|
||||
│ World │ 124 │
|
||||
└───────┴─────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Formats](../../interfaces/formats.md)
|
||||
|
58
docs/en/sql-reference/table-functions/iceberg.md
Normal file
58
docs/en/sql-reference/table-functions/iceberg.md
Normal file
@ -0,0 +1,58 @@
|
||||
---
|
||||
slug: /en/sql-reference/table-functions/iceberg
|
||||
sidebar_label: Iceberg
|
||||
---
|
||||
|
||||
# iceberg Table Function
|
||||
|
||||
Provides a read-only table-like interface to Apache [Iceberg](https://iceberg.apache.org/) tables in Amazon S3.
|
||||
|
||||
## Syntax
|
||||
|
||||
``` sql
|
||||
iceberg(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure])
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `url` — Bucket url with the path to an existing Iceberg table in S3.
|
||||
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. These parameters are optional. If credentials are not specified, they are used from the ClickHouse configuration. For more information see [Using S3 for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3).
|
||||
- `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file. By default `Parquet` is used.
|
||||
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
|
||||
Engine parameters can be specified using [Named Collections](../../operations/named-collections.md)
|
||||
|
||||
**Returned value**
|
||||
|
||||
A table with the specified structure for reading data in the specified Iceberg table in S3.
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT * FROM iceberg('http://test.s3.amazonaws.com/clickhouse-bucket/test_table', 'test', 'test')
|
||||
```
|
||||
|
||||
Using named collections:
|
||||
|
||||
```xml
|
||||
<clickhouse>
|
||||
<named_collections>
|
||||
<iceberg_conf>
|
||||
<url>http://test.s3.amazonaws.com/clickhouse-bucket/</url>
|
||||
<access_key_id>test<access_key_id>
|
||||
<secret_access_key>test</secret_access_key>
|
||||
<format>auto</format>
|
||||
<structure>auto</structure>
|
||||
</iceberg_conf>
|
||||
</named_collections>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT * FROM iceberg(iceberg_conf, filename = 'test_table')
|
||||
DESCRIBE iceberg(iceberg_conf, filename = 'test_table')
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Iceberg engine](/docs/en/engines/table-engines/integrations/iceberg.md)
|
@ -31,13 +31,13 @@ Select the data from all the files in the `/root/data/clickhouse` and `/root/dat
|
||||
|
||||
``` sql
|
||||
SELECT * FROM s3Cluster(
|
||||
'cluster_simple',
|
||||
'http://minio1:9001/root/data/{clickhouse,database}/*',
|
||||
'minio',
|
||||
'minio123',
|
||||
'CSV',
|
||||
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon
|
||||
);
|
||||
'cluster_simple',
|
||||
'http://minio1:9001/root/data/{clickhouse,database}/*',
|
||||
'minio',
|
||||
'minio123',
|
||||
'CSV',
|
||||
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))'
|
||||
) ORDER BY (name, value, polygon);
|
||||
```
|
||||
|
||||
Count the total amount of rows in all files in the cluster `cluster_simple`:
|
||||
|
2
docs/tools/.gitignore
vendored
2
docs/tools/.gitignore
vendored
@ -1,2 +0,0 @@
|
||||
__pycache__
|
||||
*.pyc
|
@ -1 +0,0 @@
|
||||
See https://github.com/ClickHouse/clickhouse-docs/blob/main/contrib-writing-guide.md
|
@ -1,108 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from pathlib import Path
|
||||
import argparse
|
||||
import logging
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
import livereload
|
||||
|
||||
|
||||
def write_redirect_html(output_path: Path, to_url: str) -> None:
|
||||
output_dir = output_path.parent
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
output_path.write_text(
|
||||
f"""<!--[if IE 6]> Redirect: {to_url} <![endif]-->
|
||||
<!DOCTYPE HTML>
|
||||
<html lang="en-US">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta http-equiv="refresh" content="0; url={to_url}">
|
||||
<script type="text/javascript">
|
||||
window.location.href = "{to_url}";
|
||||
</script>
|
||||
<title>Page Redirection</title>
|
||||
</head>
|
||||
<body>
|
||||
If you are not redirected automatically, follow this <a href="{to_url}">link</a>.
|
||||
</body>
|
||||
</html>"""
|
||||
)
|
||||
|
||||
|
||||
def build_static_redirects(output_dir: Path):
|
||||
for static_redirect in [
|
||||
("benchmark.html", "/benchmark/dbms/"),
|
||||
("benchmark_hardware.html", "/benchmark/hardware/"),
|
||||
(
|
||||
"tutorial.html",
|
||||
"/docs/en/getting_started/tutorial/",
|
||||
),
|
||||
(
|
||||
"reference_en.html",
|
||||
"/docs/en/single/",
|
||||
),
|
||||
(
|
||||
"reference_ru.html",
|
||||
"/docs/ru/single/",
|
||||
),
|
||||
(
|
||||
"docs/index.html",
|
||||
"/docs/en/",
|
||||
),
|
||||
]:
|
||||
write_redirect_html(output_dir / static_redirect[0], static_redirect[1])
|
||||
|
||||
|
||||
def build(root_dir: Path, output_dir: Path):
|
||||
if output_dir.exists():
|
||||
shutil.rmtree(args.output_dir)
|
||||
|
||||
(output_dir / "data").mkdir(parents=True)
|
||||
|
||||
logging.info("Building website")
|
||||
|
||||
# This file can be requested to check for available ClickHouse releases.
|
||||
shutil.copy2(
|
||||
root_dir / "utils" / "list-versions" / "version_date.tsv",
|
||||
output_dir / "data" / "version_date.tsv",
|
||||
)
|
||||
|
||||
# This file can be requested to install ClickHouse.
|
||||
shutil.copy2(
|
||||
root_dir / "docs" / "_includes" / "install" / "universal.sh",
|
||||
output_dir / "data" / "install.sh",
|
||||
)
|
||||
|
||||
build_static_redirects(output_dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
root_dir = Path(__file__).parent.parent.parent
|
||||
docs_dir = root_dir / "docs"
|
||||
|
||||
arg_parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"--output-dir",
|
||||
type=Path,
|
||||
default=docs_dir / "build",
|
||||
help="path to the output dir",
|
||||
)
|
||||
arg_parser.add_argument("--livereload", type=int, default="0")
|
||||
arg_parser.add_argument("--verbose", action="store_true")
|
||||
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG if args.verbose else logging.INFO, stream=sys.stderr
|
||||
)
|
||||
|
||||
build(root_dir, args.output_dir)
|
||||
|
||||
if args.livereload:
|
||||
server = livereload.Server()
|
||||
server.serve(root=args.output_dir, host="0.0.0.0", port=args.livereload)
|
||||
sys.exit(0)
|
@ -1,42 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -ex
|
||||
|
||||
BASE_DIR=$(dirname "$(readlink -f "$0")")
|
||||
BUILD_DIR="${BASE_DIR}/../build"
|
||||
PUBLISH_DIR="${BASE_DIR}/../publish"
|
||||
BASE_DOMAIN="${BASE_DOMAIN:-content.clickhouse.com}"
|
||||
GIT_PROD_URI="${GIT_PROD_URI:-git@github.com:ClickHouse/clickhouse-com-content.git}"
|
||||
EXTRA_BUILD_ARGS="${EXTRA_BUILD_ARGS:---verbose}"
|
||||
|
||||
if [[ -z "$1" ]]
|
||||
then
|
||||
source "${BASE_DIR}/venv/bin/activate"
|
||||
# shellcheck disable=2086
|
||||
python3 "${BASE_DIR}/build.py" ${EXTRA_BUILD_ARGS}
|
||||
rm -rf "${PUBLISH_DIR}"
|
||||
mkdir "${PUBLISH_DIR}" && cd "${PUBLISH_DIR}"
|
||||
|
||||
# Will make a repository with website content as the only commit.
|
||||
git init
|
||||
git remote add origin "${GIT_PROD_URI}"
|
||||
git config user.email "robot-clickhouse@users.noreply.github.com"
|
||||
git config user.name "robot-clickhouse"
|
||||
|
||||
# Add files.
|
||||
cp -R "${BUILD_DIR}"/* .
|
||||
echo -n "${BASE_DOMAIN}" > CNAME
|
||||
cat > README.md << 'EOF'
|
||||
## This repo is the source for https://content.clickhouse.com
|
||||
It's built in [the action](https://github.com/ClickHouse/ClickHouse/blob/master/.github/workflows/docs_release.yml) in the DocsRelease job.
|
||||
EOF
|
||||
echo -n "" > ".nojekyll"
|
||||
cp "${BASE_DIR}/../../LICENSE" .
|
||||
git add ./*
|
||||
git add ".nojekyll"
|
||||
|
||||
git commit --quiet -m "Add new release at $(date)"
|
||||
|
||||
# Push to GitHub rewriting the existing contents.
|
||||
# Sometimes it does not work with error message "! [remote rejected] master -> master (cannot lock ref 'refs/heads/master': is at 42a0f6b6b6c7be56a469441b4bf29685c1cebac3 but expected 520e9b02c0d4678a2a5f41d2f561e6532fb98cc1)"
|
||||
for _ in {1..10}; do git push --force origin master && break; sleep 5; done
|
||||
fi
|
@ -1 +0,0 @@
|
||||
livereload==2.6.3
|
@ -327,7 +327,21 @@ try
|
||||
showClientVersion();
|
||||
}
|
||||
|
||||
connect();
|
||||
try
|
||||
{
|
||||
connect();
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
if (e.code() != DB::ErrorCodes::AUTHENTICATION_FAILED ||
|
||||
config().has("password") ||
|
||||
config().getBool("ask-password", false) ||
|
||||
!is_interactive)
|
||||
throw;
|
||||
|
||||
config().setBool("ask-password", true);
|
||||
connect();
|
||||
}
|
||||
|
||||
/// Show warnings at the beginning of connection.
|
||||
if (is_interactive && !config().has("no-warnings"))
|
||||
|
@ -908,7 +908,7 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab
|
||||
/// Exit if success
|
||||
if (task_status != TaskStatus::Finished)
|
||||
{
|
||||
LOG_WARNING(log, "Create destination Tale Failed ");
|
||||
LOG_WARNING(log, "Create destination table failed ");
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1473,7 +1473,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
|
||||
|
||||
if (count != 0)
|
||||
{
|
||||
LOG_INFO(log, "Partition {} piece {}is not empty. In contains {} rows.", task_partition.name, current_piece_number, count);
|
||||
LOG_INFO(log, "Partition {} piece {} is not empty. In contains {} rows.", task_partition.name, current_piece_number, count);
|
||||
Coordination::Stat stat_shards{};
|
||||
zookeeper->get(partition_piece.getPartitionPieceShardsPath(), &stat_shards);
|
||||
|
||||
|
@ -37,7 +37,6 @@
|
||||
#include <AggregateFunctions/registerAggregateFunctions.h>
|
||||
#include <TableFunctions/registerTableFunctions.h>
|
||||
#include <Storages/registerStorages.h>
|
||||
#include <Common/NamedCollections/NamedCollectionUtils.h>
|
||||
#include <Dictionaries/registerDictionaries.h>
|
||||
#include <Disks/registerDisks.h>
|
||||
#include <Formats/registerFormats.h>
|
||||
@ -131,8 +130,6 @@ void LocalServer::initialize(Poco::Util::Application & self)
|
||||
config().getUInt("max_io_thread_pool_size", 100),
|
||||
config().getUInt("max_io_thread_pool_free_size", 0),
|
||||
config().getUInt("io_thread_pool_queue_size", 10000));
|
||||
|
||||
NamedCollectionUtils::loadFromConfig(config());
|
||||
}
|
||||
|
||||
|
||||
@ -224,8 +221,6 @@ void LocalServer::tryInitPath()
|
||||
|
||||
global_context->setUserFilesPath(""); // user's files are everywhere
|
||||
|
||||
NamedCollectionUtils::loadFromSQL(global_context);
|
||||
|
||||
/// top_level_domains_lists
|
||||
const std::string & top_level_domains_path = config().getString("top_level_domains_path", path + "top_level_domains/");
|
||||
if (!top_level_domains_path.empty())
|
||||
|
@ -770,8 +770,6 @@ try
|
||||
config().getUInt("max_io_thread_pool_free_size", 0),
|
||||
config().getUInt("io_thread_pool_queue_size", 10000));
|
||||
|
||||
NamedCollectionUtils::loadFromConfig(config());
|
||||
|
||||
/// Initialize global local cache for remote filesystem.
|
||||
if (config().has("local_cache_for_remote_fs"))
|
||||
{
|
||||
@ -1177,8 +1175,6 @@ try
|
||||
SensitiveDataMasker::setInstance(std::make_unique<SensitiveDataMasker>(config(), "query_masking_rules"));
|
||||
}
|
||||
|
||||
NamedCollectionUtils::loadFromSQL(global_context);
|
||||
|
||||
auto main_config_reloader = std::make_unique<ConfigReloader>(
|
||||
config_path,
|
||||
include_from_path,
|
||||
|
@ -116,21 +116,29 @@
|
||||
width: 50%;
|
||||
|
||||
display: flex;
|
||||
flex-flow: row wrap;
|
||||
flex-flow: column nowrap;
|
||||
}
|
||||
.unconnected #url {
|
||||
width: 100%;
|
||||
}
|
||||
.unconnected #user {
|
||||
#user {
|
||||
margin-right: 0.25rem;
|
||||
width: 50%;
|
||||
}
|
||||
.unconnected #password {
|
||||
#password {
|
||||
width: 49.5%;
|
||||
}
|
||||
.unconnected input {
|
||||
margin-bottom: 5px;
|
||||
}
|
||||
|
||||
#username-password {
|
||||
width: 100%;
|
||||
|
||||
display: flex;
|
||||
flex-flow: row nowrap;
|
||||
}
|
||||
|
||||
.inputs #chart-params {
|
||||
display: block;
|
||||
}
|
||||
@ -142,7 +150,7 @@
|
||||
#connection-params {
|
||||
margin-bottom: 0.5rem;
|
||||
display: grid;
|
||||
grid-template-columns: auto 15% 15%;
|
||||
grid-template-columns: 69.77% 30%;
|
||||
column-gap: 0.25rem;
|
||||
}
|
||||
|
||||
@ -339,8 +347,10 @@
|
||||
<form id="params">
|
||||
<div id="connection-params">
|
||||
<input spellcheck="false" id="url" type="text" value="" placeholder="URL" />
|
||||
<input spellcheck="false" id="user" type="text" value="" placeholder="user" />
|
||||
<input spellcheck="false" id="password" type="password" placeholder="password" />
|
||||
<div id="username-password">
|
||||
<input spellcheck="false" id="user" type="text" value="" placeholder="user" />
|
||||
<input spellcheck="false" id="password" type="password" placeholder="password" />
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<input id="reload" type="button" value="Reload">
|
||||
|
62
src/Analyzer/Passes/AutoFinalOnQueryPass.cpp
Normal file
62
src/Analyzer/Passes/AutoFinalOnQueryPass.cpp
Normal file
@ -0,0 +1,62 @@
|
||||
#include "AutoFinalOnQueryPass.h"
|
||||
|
||||
#include <Analyzer/TableNode.h>
|
||||
#include <Analyzer/TableExpressionModifiers.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
class AutoFinalOnQueryPassVisitor : public InDepthQueryTreeVisitorWithContext<AutoFinalOnQueryPassVisitor>
|
||||
{
|
||||
public:
|
||||
using Base = InDepthQueryTreeVisitorWithContext<AutoFinalOnQueryPassVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (auto * table_node = node->as<TableNode>())
|
||||
{
|
||||
if (autoFinalOnQuery(*table_node, table_node->getStorage(), getContext()))
|
||||
{
|
||||
auto modifier = TableExpressionModifiers(true, std::nullopt, std::nullopt);
|
||||
table_node->setTableExpressionModifiers(modifier);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static bool autoFinalOnQuery(TableNode & table_node, StoragePtr storage, ContextPtr context)
|
||||
{
|
||||
bool is_auto_final_setting_on = context->getSettingsRef().final;
|
||||
bool is_final_supported = storage && storage->supportsFinal() && !storage->isRemote();
|
||||
bool is_query_already_final = table_node.hasTableExpressionModifiers() ? table_node.getTableExpressionModifiers().has_value() : false;
|
||||
|
||||
return is_auto_final_setting_on && !is_query_already_final && is_final_supported;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
String AutoFinalOnQueryPass::getName()
|
||||
{
|
||||
return "AutoFinalOnQueryPass";
|
||||
}
|
||||
|
||||
String AutoFinalOnQueryPass::getDescription()
|
||||
{
|
||||
return "Automatically applies final modifier to queries if it is supported and if user level final setting is set.";
|
||||
}
|
||||
|
||||
void AutoFinalOnQueryPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
{
|
||||
auto visitor = AutoFinalOnQueryPassVisitor(std::move(context));
|
||||
|
||||
visitor.visit(query_tree_node);
|
||||
}
|
||||
|
||||
}
|
21
src/Analyzer/Passes/AutoFinalOnQueryPass.h
Normal file
21
src/Analyzer/Passes/AutoFinalOnQueryPass.h
Normal file
@ -0,0 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
#include <Analyzer/IQueryTreePass.h>
|
||||
#include <Storages/IStorage_fwd.h>
|
||||
#include <Analyzer/TableNode.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
class AutoFinalOnQueryPass final : public IQueryTreePass
|
||||
{
|
||||
public:
|
||||
String getName() override;
|
||||
|
||||
String getDescription() override;
|
||||
|
||||
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
|
||||
};
|
||||
|
||||
}
|
@ -36,9 +36,11 @@
|
||||
#include <Analyzer/Passes/ConvertOrLikeChainPass.h>
|
||||
#include <Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h>
|
||||
#include <Analyzer/Passes/GroupingFunctionsResolvePass.h>
|
||||
#include <Analyzer/Passes/AutoFinalOnQueryPass.h>
|
||||
#include <Analyzer/Passes/ArrayExistsToHasPass.h>
|
||||
#include <Analyzer/Passes/ComparisonTupleEliminationPass.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -263,6 +265,9 @@ void addQueryTreePasses(QueryTreePassManager & manager)
|
||||
manager.addPass(std::make_unique<ConvertOrLikeChainPass>());
|
||||
|
||||
manager.addPass(std::make_unique<GroupingFunctionsResolvePass>());
|
||||
|
||||
manager.addPass(std::make_unique<AutoFinalOnQueryPass>());
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -202,4 +202,9 @@ Strings BackupCoordinationLocal::getAllArchiveSuffixes() const
|
||||
return archive_suffixes;
|
||||
}
|
||||
|
||||
bool BackupCoordinationLocal::hasConcurrentBackups(const std::atomic<size_t> & num_active_backups) const
|
||||
{
|
||||
return (num_active_backups > 1);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -52,6 +52,8 @@ public:
|
||||
String getNextArchiveSuffix() override;
|
||||
Strings getAllArchiveSuffixes() const override;
|
||||
|
||||
bool hasConcurrentBackups(const std::atomic<size_t> & num_active_backups) const override;
|
||||
|
||||
private:
|
||||
mutable std::mutex mutex;
|
||||
BackupCoordinationReplicatedTables replicated_tables TSA_GUARDED_BY(mutex);
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/hex.h>
|
||||
#include <Backups/BackupCoordinationStage.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -18,6 +19,8 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
namespace Stage = BackupCoordinationStage;
|
||||
|
||||
/// zookeeper_path/file_names/file_name->checksum_and_size
|
||||
/// zookeeper_path/file_infos/checksum_and_size->info
|
||||
/// zookeeper_path/archive_suffixes
|
||||
@ -160,27 +163,26 @@ namespace
|
||||
{
|
||||
return fmt::format("{:03}", counter); /// Outputs 001, 002, 003, ...
|
||||
}
|
||||
|
||||
/// We try to store data to zookeeper several times due to possible version conflicts.
|
||||
constexpr size_t NUM_ATTEMPTS = 10;
|
||||
}
|
||||
|
||||
BackupCoordinationRemote::BackupCoordinationRemote(
|
||||
const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, bool remove_zk_nodes_in_destructor_)
|
||||
: zookeeper_path(zookeeper_path_)
|
||||
const String & root_zookeeper_path_, const String & backup_uuid_, zkutil::GetZooKeeper get_zookeeper_, bool is_internal_)
|
||||
: root_zookeeper_path(root_zookeeper_path_)
|
||||
, zookeeper_path(root_zookeeper_path_ + "/backup-" + backup_uuid_)
|
||||
, backup_uuid(backup_uuid_)
|
||||
, get_zookeeper(get_zookeeper_)
|
||||
, remove_zk_nodes_in_destructor(remove_zk_nodes_in_destructor_)
|
||||
, is_internal(is_internal_)
|
||||
{
|
||||
createRootNodes();
|
||||
stage_sync.emplace(
|
||||
zookeeper_path_ + "/stage", [this] { return getZooKeeper(); }, &Poco::Logger::get("BackupCoordination"));
|
||||
zookeeper_path + "/stage", [this] { return getZooKeeper(); }, &Poco::Logger::get("BackupCoordination"));
|
||||
}
|
||||
|
||||
BackupCoordinationRemote::~BackupCoordinationRemote()
|
||||
{
|
||||
try
|
||||
{
|
||||
if (remove_zk_nodes_in_destructor)
|
||||
if (!is_internal)
|
||||
removeAllNodes();
|
||||
}
|
||||
catch (...)
|
||||
@ -468,7 +470,7 @@ void BackupCoordinationRemote::updateFileInfo(const FileInfo & file_info)
|
||||
auto zk = getZooKeeper();
|
||||
String size_and_checksum = serializeSizeAndChecksum(std::pair{file_info.size, file_info.checksum});
|
||||
String full_path = zookeeper_path + "/file_infos/" + size_and_checksum;
|
||||
for (size_t attempt = 0; attempt < NUM_ATTEMPTS; ++attempt)
|
||||
for (size_t attempt = 0; attempt < MAX_ZOOKEEPER_ATTEMPTS; ++attempt)
|
||||
{
|
||||
Coordination::Stat stat;
|
||||
auto new_info = deserializeFileInfo(zk->get(full_path, &stat));
|
||||
@ -476,7 +478,7 @@ void BackupCoordinationRemote::updateFileInfo(const FileInfo & file_info)
|
||||
auto code = zk->trySet(full_path, serializeFileInfo(new_info), stat.version);
|
||||
if (code == Coordination::Error::ZOK)
|
||||
return;
|
||||
bool is_last_attempt = (attempt == NUM_ATTEMPTS - 1);
|
||||
bool is_last_attempt = (attempt == MAX_ZOOKEEPER_ATTEMPTS - 1);
|
||||
if ((code != Coordination::Error::ZBADVERSION) || is_last_attempt)
|
||||
throw zkutil::KeeperException(code, full_path);
|
||||
}
|
||||
@ -595,4 +597,51 @@ Strings BackupCoordinationRemote::getAllArchiveSuffixes() const
|
||||
return node_names;
|
||||
}
|
||||
|
||||
bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic<size_t> &) const
|
||||
{
|
||||
/// If its internal concurrency will be checked for the base backup
|
||||
if (is_internal)
|
||||
return false;
|
||||
|
||||
auto zk = getZooKeeper();
|
||||
std::string backup_stage_path = zookeeper_path +"/stage";
|
||||
|
||||
if (!zk->exists(root_zookeeper_path))
|
||||
zk->createAncestors(root_zookeeper_path);
|
||||
|
||||
for (size_t attempt = 0; attempt < MAX_ZOOKEEPER_ATTEMPTS; ++attempt)
|
||||
{
|
||||
Coordination::Stat stat;
|
||||
zk->get(root_zookeeper_path, &stat);
|
||||
Strings existing_backup_paths = zk->getChildren(root_zookeeper_path);
|
||||
|
||||
for (const auto & existing_backup_path : existing_backup_paths)
|
||||
{
|
||||
if (startsWith(existing_backup_path, "restore-"))
|
||||
continue;
|
||||
|
||||
String existing_backup_uuid = existing_backup_path;
|
||||
existing_backup_uuid.erase(0, String("backup-").size());
|
||||
|
||||
if (existing_backup_uuid == toString(backup_uuid))
|
||||
continue;
|
||||
|
||||
const auto status = zk->get(root_zookeeper_path + "/" + existing_backup_path + "/stage");
|
||||
if (status != Stage::COMPLETED)
|
||||
return true;
|
||||
}
|
||||
|
||||
zk->createIfNotExists(backup_stage_path, "");
|
||||
auto code = zk->trySet(backup_stage_path, Stage::SCHEDULED_TO_START, stat.version);
|
||||
if (code == Coordination::Error::ZOK)
|
||||
break;
|
||||
bool is_last_attempt = (attempt == MAX_ZOOKEEPER_ATTEMPTS - 1);
|
||||
if ((code != Coordination::Error::ZBADVERSION) || is_last_attempt)
|
||||
throw zkutil::KeeperException(code, backup_stage_path);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -9,11 +9,14 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// We try to store data to zookeeper several times due to possible version conflicts.
|
||||
constexpr size_t MAX_ZOOKEEPER_ATTEMPTS = 10;
|
||||
|
||||
/// Implementation of the IBackupCoordination interface performing coordination via ZooKeeper. It's necessary for "BACKUP ON CLUSTER".
|
||||
class BackupCoordinationRemote : public IBackupCoordination
|
||||
{
|
||||
public:
|
||||
BackupCoordinationRemote(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, bool remove_zk_nodes_in_destructor_);
|
||||
BackupCoordinationRemote(const String & root_zookeeper_path_, const String & backup_uuid_, zkutil::GetZooKeeper get_zookeeper_, bool is_internal_);
|
||||
~BackupCoordinationRemote() override;
|
||||
|
||||
void setStage(const String & current_host, const String & new_stage, const String & message) override;
|
||||
@ -55,6 +58,8 @@ public:
|
||||
String getNextArchiveSuffix() override;
|
||||
Strings getAllArchiveSuffixes() const override;
|
||||
|
||||
bool hasConcurrentBackups(const std::atomic<size_t> & num_active_backups) const override;
|
||||
|
||||
private:
|
||||
zkutil::ZooKeeperPtr getZooKeeper() const;
|
||||
zkutil::ZooKeeperPtr getZooKeeperNoLock() const;
|
||||
@ -63,9 +68,11 @@ private:
|
||||
void prepareReplicatedTables() const;
|
||||
void prepareReplicatedAccess() const;
|
||||
|
||||
const String root_zookeeper_path;
|
||||
const String zookeeper_path;
|
||||
const String backup_uuid;
|
||||
const zkutil::GetZooKeeper get_zookeeper;
|
||||
const bool remove_zk_nodes_in_destructor;
|
||||
const bool is_internal;
|
||||
|
||||
std::optional<BackupCoordinationStageSync> stage_sync;
|
||||
|
||||
|
@ -8,6 +8,10 @@ namespace DB
|
||||
|
||||
namespace BackupCoordinationStage
|
||||
{
|
||||
/// This stage is set after concurrency check so ensure we dont start other backup/restores
|
||||
/// when concurrent backup/restores are not allowed
|
||||
constexpr const char * SCHEDULED_TO_START = "scheduled to start";
|
||||
|
||||
/// Finding all tables and databases which we're going to put to the backup and collecting their metadata.
|
||||
constexpr const char * GATHERING_METADATA = "gathering metadata";
|
||||
|
||||
|
@ -28,7 +28,6 @@ namespace ErrorCodes
|
||||
M(UInt64, replica_num) \
|
||||
M(Bool, internal) \
|
||||
M(String, host_id) \
|
||||
M(String, coordination_zk_path) \
|
||||
M(OptionalUUID, backup_uuid)
|
||||
/// M(Int64, compression_level)
|
||||
|
||||
|
@ -55,10 +55,6 @@ struct BackupSettings
|
||||
/// Cluster's hosts' IDs in the format 'escaped_host_name:port' for all shards and replicas in a cluster specified in BACKUP ON CLUSTER.
|
||||
std::vector<Strings> cluster_host_ids;
|
||||
|
||||
/// Internal, should not be specified by user.
|
||||
/// Path in Zookeeper used to coordinate a distributed backup created by BACKUP ON CLUSTER.
|
||||
String coordination_zk_path;
|
||||
|
||||
/// Internal, should not be specified by user.
|
||||
/// UUID of the backup. If it's not set it will be generated randomly.
|
||||
std::optional<UUID> backup_uuid;
|
||||
|
@ -38,12 +38,12 @@ namespace Stage = BackupCoordinationStage;
|
||||
|
||||
namespace
|
||||
{
|
||||
std::shared_ptr<IBackupCoordination> makeBackupCoordination(const String & coordination_zk_path, const ContextPtr & context, bool is_internal_backup)
|
||||
std::shared_ptr<IBackupCoordination> makeBackupCoordination(const String & root_zk_path, const String & backup_uuid, const ContextPtr & context, bool is_internal_backup)
|
||||
{
|
||||
if (!coordination_zk_path.empty())
|
||||
if (!root_zk_path.empty())
|
||||
{
|
||||
auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
|
||||
return std::make_shared<BackupCoordinationRemote>(coordination_zk_path, get_zookeeper, !is_internal_backup);
|
||||
return std::make_shared<BackupCoordinationRemote>(root_zk_path, backup_uuid, get_zookeeper, is_internal_backup);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -51,12 +51,12 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<IRestoreCoordination> makeRestoreCoordination(const String & coordination_zk_path, const ContextPtr & context, bool is_internal_backup)
|
||||
std::shared_ptr<IRestoreCoordination> makeRestoreCoordination(const String & root_zk_path, const String & restore_uuid, const ContextPtr & context, bool is_internal_backup)
|
||||
{
|
||||
if (!coordination_zk_path.empty())
|
||||
if (!root_zk_path.empty())
|
||||
{
|
||||
auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
|
||||
return std::make_shared<RestoreCoordinationRemote>(coordination_zk_path, get_zookeeper, !is_internal_backup);
|
||||
return std::make_shared<RestoreCoordinationRemote>(root_zk_path, restore_uuid, get_zookeeper, is_internal_backup);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -160,26 +160,22 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
|
||||
else
|
||||
backup_id = toString(*backup_settings.backup_uuid);
|
||||
|
||||
String root_zk_path;
|
||||
|
||||
std::shared_ptr<IBackupCoordination> backup_coordination;
|
||||
if (backup_settings.internal)
|
||||
{
|
||||
/// The following call of makeBackupCoordination() is not essential because doBackup() will later create a backup coordination
|
||||
/// if it's not created here. However to handle errors better it's better to make a coordination here because this way
|
||||
/// if an exception will be thrown in startMakingBackup() other hosts will know about that.
|
||||
backup_coordination = makeBackupCoordination(backup_settings.coordination_zk_path, context, backup_settings.internal);
|
||||
root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
|
||||
backup_coordination = makeBackupCoordination(root_zk_path, toString(*backup_settings.backup_uuid), context, backup_settings.internal);
|
||||
}
|
||||
|
||||
auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
|
||||
String backup_name_for_logging = backup_info.toStringForLogging();
|
||||
try
|
||||
{
|
||||
if (!allow_concurrent_backups && hasConcurrentBackups(backup_settings))
|
||||
{
|
||||
/// addInfo is called here to record the failed backup details
|
||||
addInfo(backup_id, backup_name_for_logging, backup_settings.internal, BackupStatus::BACKUP_FAILED);
|
||||
throw Exception(ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED, "Concurrent backups not supported, turn on setting 'allow_concurrent_backups'");
|
||||
}
|
||||
|
||||
addInfo(backup_id, backup_name_for_logging, backup_settings.internal, BackupStatus::CREATING_BACKUP);
|
||||
|
||||
/// Prepare context to use.
|
||||
@ -259,6 +255,7 @@ void BackupsWorker::doBackup(
|
||||
}
|
||||
|
||||
bool on_cluster = !backup_query->cluster.empty();
|
||||
|
||||
assert(mutable_context || (!on_cluster && !called_async));
|
||||
|
||||
/// Checks access rights if this is not ON CLUSTER query.
|
||||
@ -267,22 +264,23 @@ void BackupsWorker::doBackup(
|
||||
if (!on_cluster)
|
||||
context->checkAccess(required_access);
|
||||
|
||||
String root_zk_path;
|
||||
|
||||
ClusterPtr cluster;
|
||||
if (on_cluster)
|
||||
{
|
||||
root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
|
||||
backup_query->cluster = context->getMacros()->expand(backup_query->cluster);
|
||||
cluster = context->getCluster(backup_query->cluster);
|
||||
backup_settings.cluster_host_ids = cluster->getHostIDs();
|
||||
if (backup_settings.coordination_zk_path.empty())
|
||||
{
|
||||
String root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
|
||||
backup_settings.coordination_zk_path = root_zk_path + "/backup-" + toString(*backup_settings.backup_uuid);
|
||||
}
|
||||
}
|
||||
|
||||
/// Make a backup coordination.
|
||||
if (!backup_coordination)
|
||||
backup_coordination = makeBackupCoordination(backup_settings.coordination_zk_path, context, backup_settings.internal);
|
||||
backup_coordination = makeBackupCoordination(root_zk_path, toString(*backup_settings.backup_uuid), context, backup_settings.internal);
|
||||
|
||||
if (!allow_concurrent_backups && backup_coordination->hasConcurrentBackups(std::ref(num_active_backups)))
|
||||
throw Exception(ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED, "Concurrent backups not supported, turn on setting 'allow_concurrent_backups'");
|
||||
|
||||
/// Opens a backup for writing.
|
||||
BackupFactory::CreateParams backup_create_params;
|
||||
@ -402,7 +400,8 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
|
||||
/// The following call of makeRestoreCoordination() is not essential because doRestore() will later create a restore coordination
|
||||
/// if it's not created here. However to handle errors better it's better to make a coordination here because this way
|
||||
/// if an exception will be thrown in startRestoring() other hosts will know about that.
|
||||
restore_coordination = makeRestoreCoordination(restore_settings.coordination_zk_path, context, restore_settings.internal);
|
||||
auto root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
|
||||
restore_coordination = makeRestoreCoordination(root_zk_path, toString(*restore_settings.restore_uuid), context, restore_settings.internal);
|
||||
}
|
||||
|
||||
try
|
||||
@ -410,13 +409,6 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
|
||||
auto backup_info = BackupInfo::fromAST(*restore_query->backup_name);
|
||||
String backup_name_for_logging = backup_info.toStringForLogging();
|
||||
|
||||
if (!allow_concurrent_restores && hasConcurrentRestores(restore_settings))
|
||||
{
|
||||
/// addInfo is called here to record the failed restore details
|
||||
addInfo(restore_id, backup_name_for_logging, restore_settings.internal, BackupStatus::RESTORING);
|
||||
throw Exception(ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED, "Concurrent restores not supported, turn on setting 'allow_concurrent_restores'");
|
||||
}
|
||||
|
||||
addInfo(restore_id, backup_name_for_logging, restore_settings.internal, BackupStatus::RESTORING);
|
||||
|
||||
/// Prepare context to use.
|
||||
@ -496,18 +488,18 @@ void BackupsWorker::doRestore(
|
||||
backup_open_params.context = context;
|
||||
backup_open_params.backup_info = backup_info;
|
||||
backup_open_params.base_backup_info = restore_settings.base_backup_info;
|
||||
backup_open_params.backup_uuid = restore_settings.restore_uuid;
|
||||
backup_open_params.password = restore_settings.password;
|
||||
BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);
|
||||
|
||||
String current_database = context->getCurrentDatabase();
|
||||
|
||||
String root_zk_path;
|
||||
/// Checks access rights if this is ON CLUSTER query.
|
||||
/// (If this isn't ON CLUSTER query RestorerFromBackup will check access rights later.)
|
||||
ClusterPtr cluster;
|
||||
bool on_cluster = !restore_query->cluster.empty();
|
||||
if (on_cluster)
|
||||
{
|
||||
root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
|
||||
restore_query->cluster = context->getMacros()->expand(restore_query->cluster);
|
||||
cluster = context->getCluster(restore_query->cluster);
|
||||
restore_settings.cluster_host_ids = cluster->getHostIDs();
|
||||
@ -529,14 +521,11 @@ void BackupsWorker::doRestore(
|
||||
}
|
||||
|
||||
/// Make a restore coordination.
|
||||
if (on_cluster && restore_settings.coordination_zk_path.empty())
|
||||
{
|
||||
String root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
|
||||
restore_settings.coordination_zk_path = root_zk_path + "/restore-" + toString(UUIDHelpers::generateV4());
|
||||
}
|
||||
|
||||
if (!restore_coordination)
|
||||
restore_coordination = makeRestoreCoordination(restore_settings.coordination_zk_path, context, restore_settings.internal);
|
||||
restore_coordination = makeRestoreCoordination(root_zk_path, toString(*restore_settings.restore_uuid), context, restore_settings.internal);
|
||||
|
||||
if (!allow_concurrent_restores && restore_coordination->hasConcurrentRestores(std::ref(num_active_restores)))
|
||||
throw Exception(ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED, "Concurrent restores not supported, turn on setting 'allow_concurrent_restores'");
|
||||
|
||||
/// Do RESTORE.
|
||||
if (on_cluster)
|
||||
@ -726,58 +715,6 @@ std::vector<BackupsWorker::Info> BackupsWorker::getAllInfos() const
|
||||
return res_infos;
|
||||
}
|
||||
|
||||
std::vector<BackupsWorker::Info> BackupsWorker::getAllActiveBackupInfos() const
|
||||
{
|
||||
std::vector<Info> res_infos;
|
||||
std::lock_guard lock{infos_mutex};
|
||||
for (const auto & info : infos | boost::adaptors::map_values)
|
||||
{
|
||||
if (info.status==BackupStatus::CREATING_BACKUP)
|
||||
res_infos.push_back(info);
|
||||
}
|
||||
return res_infos;
|
||||
}
|
||||
|
||||
std::vector<BackupsWorker::Info> BackupsWorker::getAllActiveRestoreInfos() const
|
||||
{
|
||||
std::vector<Info> res_infos;
|
||||
std::lock_guard lock{infos_mutex};
|
||||
for (const auto & info : infos | boost::adaptors::map_values)
|
||||
{
|
||||
if (info.status==BackupStatus::RESTORING)
|
||||
res_infos.push_back(info);
|
||||
}
|
||||
return res_infos;
|
||||
}
|
||||
|
||||
bool BackupsWorker::hasConcurrentBackups(const BackupSettings & backup_settings) const
|
||||
{
|
||||
/// Check if there are no concurrent backups
|
||||
if (num_active_backups)
|
||||
{
|
||||
/// If its an internal backup and we currently have 1 active backup, it could be the original query, validate using backup_uuid
|
||||
if (!(num_active_backups == 1 && backup_settings.internal && getAllActiveBackupInfos().at(0).id == toString(*backup_settings.backup_uuid)))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool BackupsWorker::hasConcurrentRestores(const RestoreSettings & restore_settings) const
|
||||
{
|
||||
/// Check if there are no concurrent restores
|
||||
if (num_active_restores)
|
||||
{
|
||||
/// If its an internal restore and we currently have 1 active restore, it could be the original query, validate using iz
|
||||
if (!(num_active_restores == 1 && restore_settings.internal && getAllActiveRestoreInfos().at(0).id == toString(*restore_settings.restore_uuid)))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void BackupsWorker::shutdown()
|
||||
{
|
||||
bool has_active_backups_and_restores = (num_active_backups || num_active_restores);
|
||||
|
@ -117,11 +117,6 @@ private:
|
||||
void setNumFilesAndSize(const OperationID & id, size_t num_files, UInt64 total_size, size_t num_entries,
|
||||
UInt64 uncompressed_size, UInt64 compressed_size, size_t num_read_files, UInt64 num_read_bytes);
|
||||
|
||||
std::vector<Info> getAllActiveBackupInfos() const;
|
||||
std::vector<Info> getAllActiveRestoreInfos() const;
|
||||
bool hasConcurrentBackups(const BackupSettings & backup_settings) const;
|
||||
bool hasConcurrentRestores(const RestoreSettings & restore_settings) const;
|
||||
|
||||
ThreadPool backups_thread_pool;
|
||||
ThreadPool restores_thread_pool;
|
||||
|
||||
|
@ -114,6 +114,10 @@ public:
|
||||
|
||||
/// Returns the list of all the archive suffixes which were generated.
|
||||
virtual Strings getAllArchiveSuffixes() const = 0;
|
||||
|
||||
/// This function is used to check if concurrent backups are running
|
||||
/// other than the backup passed to the function
|
||||
virtual bool hasConcurrentBackups(const std::atomic<size_t> & num_active_backups) const = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -34,6 +34,10 @@ public:
|
||||
/// Sets that this replica is going to restore a ReplicatedAccessStorage.
|
||||
/// The function returns false if this access storage is being already restored by another replica.
|
||||
virtual bool acquireReplicatedAccessStorage(const String & access_storage_zk_path) = 0;
|
||||
|
||||
/// This function is used to check if concurrent restores are running
|
||||
/// other than the restore passed to the function
|
||||
virtual bool hasConcurrentRestores(const std::atomic<size_t> & num_active_restores) const = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -42,4 +42,9 @@ bool RestoreCoordinationLocal::acquireReplicatedAccessStorage(const String &)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RestoreCoordinationLocal::hasConcurrentRestores(const std::atomic<size_t> & num_active_restores) const
|
||||
{
|
||||
return (num_active_restores > 1);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -35,6 +35,8 @@ public:
|
||||
/// The function returns false if this access storage is being already restored by another replica.
|
||||
bool acquireReplicatedAccessStorage(const String & access_storage_zk_path) override;
|
||||
|
||||
bool hasConcurrentRestores(const std::atomic<size_t> & num_active_restores) const override;
|
||||
|
||||
private:
|
||||
std::set<std::pair<String /* database_zk_path */, String /* table_name */>> acquired_tables_in_replicated_databases;
|
||||
std::unordered_set<String /* table_zk_path */> acquired_data_in_replicated_tables;
|
||||
|
@ -1,28 +1,33 @@
|
||||
#include <Backups/RestoreCoordinationRemote.h>
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
|
||||
#include <Backups/BackupCoordinationStage.h>
|
||||
#include <Backups/BackupCoordinationRemote.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace Stage = BackupCoordinationStage;
|
||||
|
||||
RestoreCoordinationRemote::RestoreCoordinationRemote(
|
||||
const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, bool remove_zk_nodes_in_destructor_)
|
||||
: zookeeper_path(zookeeper_path_)
|
||||
const String & root_zookeeper_path_, const String & restore_uuid_, zkutil::GetZooKeeper get_zookeeper_, bool is_internal_)
|
||||
: root_zookeeper_path(root_zookeeper_path_)
|
||||
, zookeeper_path(root_zookeeper_path_ + "/restore-" + restore_uuid_)
|
||||
, restore_uuid(restore_uuid_)
|
||||
, get_zookeeper(get_zookeeper_)
|
||||
, remove_zk_nodes_in_destructor(remove_zk_nodes_in_destructor_)
|
||||
, is_internal(is_internal_)
|
||||
{
|
||||
createRootNodes();
|
||||
|
||||
stage_sync.emplace(
|
||||
zookeeper_path_ + "/stage", [this] { return getZooKeeper(); }, &Poco::Logger::get("RestoreCoordination"));
|
||||
zookeeper_path + "/stage", [this] { return getZooKeeper(); }, &Poco::Logger::get("RestoreCoordination"));
|
||||
}
|
||||
|
||||
RestoreCoordinationRemote::~RestoreCoordinationRemote()
|
||||
{
|
||||
try
|
||||
{
|
||||
if (remove_zk_nodes_in_destructor)
|
||||
if (!is_internal)
|
||||
removeAllNodes();
|
||||
}
|
||||
catch (...)
|
||||
@ -129,4 +134,49 @@ void RestoreCoordinationRemote::removeAllNodes()
|
||||
zk->removeRecursive(zookeeper_path);
|
||||
}
|
||||
|
||||
bool RestoreCoordinationRemote::hasConcurrentRestores(const std::atomic<size_t> &) const
|
||||
{
|
||||
/// If its internal concurrency will be checked for the base restore
|
||||
if (is_internal)
|
||||
return false;
|
||||
|
||||
auto zk = getZooKeeper();
|
||||
std::string path = zookeeper_path +"/stage";
|
||||
|
||||
if (! zk->exists(root_zookeeper_path))
|
||||
zk->createAncestors(root_zookeeper_path);
|
||||
|
||||
for (size_t attempt = 0; attempt < MAX_ZOOKEEPER_ATTEMPTS; ++attempt)
|
||||
{
|
||||
Coordination::Stat stat;
|
||||
zk->get(root_zookeeper_path, &stat);
|
||||
Strings existing_restore_paths = zk->getChildren(root_zookeeper_path);
|
||||
for (const auto & existing_restore_path : existing_restore_paths)
|
||||
{
|
||||
if (startsWith(existing_restore_path, "backup-"))
|
||||
continue;
|
||||
|
||||
String existing_restore_uuid = existing_restore_path;
|
||||
existing_restore_uuid.erase(0, String("restore-").size());
|
||||
|
||||
if (existing_restore_uuid == toString(restore_uuid))
|
||||
continue;
|
||||
|
||||
|
||||
const auto status = zk->get(root_zookeeper_path + "/" + existing_restore_path + "/stage");
|
||||
if (status != Stage::COMPLETED)
|
||||
return true;
|
||||
}
|
||||
|
||||
zk->createIfNotExists(path, "");
|
||||
auto code = zk->trySet(path, Stage::SCHEDULED_TO_START, stat.version);
|
||||
if (code == Coordination::Error::ZOK)
|
||||
break;
|
||||
bool is_last_attempt = (attempt == MAX_ZOOKEEPER_ATTEMPTS - 1);
|
||||
if ((code != Coordination::Error::ZBADVERSION) || is_last_attempt)
|
||||
throw zkutil::KeeperException(code, path);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -11,7 +11,7 @@ namespace DB
|
||||
class RestoreCoordinationRemote : public IRestoreCoordination
|
||||
{
|
||||
public:
|
||||
RestoreCoordinationRemote(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, bool remove_zk_nodes_in_destructor_);
|
||||
RestoreCoordinationRemote(const String & root_zookeeper_path_, const String & restore_uuid_, zkutil::GetZooKeeper get_zookeeper_, bool is_internal_);
|
||||
~RestoreCoordinationRemote() override;
|
||||
|
||||
/// Sets the current stage and waits for other hosts to come to this stage too.
|
||||
@ -31,6 +31,8 @@ public:
|
||||
/// The function returns false if this access storage is being already restored by another replica.
|
||||
bool acquireReplicatedAccessStorage(const String & access_storage_zk_path) override;
|
||||
|
||||
bool hasConcurrentRestores(const std::atomic<size_t> & num_active_restores) const override;
|
||||
|
||||
private:
|
||||
zkutil::ZooKeeperPtr getZooKeeper() const;
|
||||
void createRootNodes();
|
||||
@ -38,9 +40,11 @@ private:
|
||||
|
||||
class ReplicatedDatabasesMetadataSync;
|
||||
|
||||
const String root_zookeeper_path;
|
||||
const String zookeeper_path;
|
||||
const String restore_uuid;
|
||||
const zkutil::GetZooKeeper get_zookeeper;
|
||||
const bool remove_zk_nodes_in_destructor;
|
||||
const bool is_internal;
|
||||
|
||||
std::optional<BackupCoordinationStageSync> stage_sync;
|
||||
|
||||
|
@ -163,7 +163,6 @@ namespace
|
||||
M(RestoreUDFCreationMode, create_function) \
|
||||
M(Bool, internal) \
|
||||
M(String, host_id) \
|
||||
M(String, coordination_zk_path) \
|
||||
M(OptionalUUID, restore_uuid)
|
||||
|
||||
|
||||
|
@ -118,13 +118,9 @@ struct RestoreSettings
|
||||
/// Cluster's hosts' IDs in the format 'escaped_host_name:port' for all shards and replicas in a cluster specified in BACKUP ON CLUSTER.
|
||||
std::vector<Strings> cluster_host_ids;
|
||||
|
||||
/// Internal, should not be specified by user.
|
||||
/// Path in Zookeeper used to coordinate restoring process while executing by RESTORE ON CLUSTER.
|
||||
String coordination_zk_path;
|
||||
|
||||
/// Internal, should not be specified by user.
|
||||
/// UUID of the restore. If it's not set it will be generated randomly.
|
||||
/// This is used to validate internal restores when allow_concurrent_restores is turned off
|
||||
/// This is used to generate coordination path and for concurrency check
|
||||
std::optional<UUID> restore_uuid;
|
||||
|
||||
static RestoreSettings fromRestoreQuery(const ASTBackupQuery & query);
|
||||
|
@ -110,4 +110,27 @@ ThreadGroupStatusPtr CurrentThread::getGroup()
|
||||
return current_thread->getThreadGroup();
|
||||
}
|
||||
|
||||
MemoryTracker * CurrentThread::getUserMemoryTracker()
|
||||
{
|
||||
if (unlikely(!current_thread))
|
||||
return nullptr;
|
||||
|
||||
auto * tracker = current_thread->memory_tracker.getParent();
|
||||
while (tracker && tracker->level != VariableContext::User)
|
||||
tracker = tracker->getParent();
|
||||
|
||||
return tracker;
|
||||
}
|
||||
|
||||
void CurrentThread::flushUntrackedMemory()
|
||||
{
|
||||
if (unlikely(!current_thread))
|
||||
return;
|
||||
if (current_thread->untracked_memory == 0)
|
||||
return;
|
||||
|
||||
current_thread->memory_tracker.adjustWithUntrackedMemory(current_thread->untracked_memory);
|
||||
current_thread->untracked_memory = 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -40,6 +40,12 @@ public:
|
||||
/// Group to which belongs current thread
|
||||
static ThreadGroupStatusPtr getGroup();
|
||||
|
||||
/// MemoryTracker for user that owns current thread if any
|
||||
static MemoryTracker * getUserMemoryTracker();
|
||||
|
||||
/// Adjust counters in MemoryTracker hierarchy if untracked_memory is not 0.
|
||||
static void flushUntrackedMemory();
|
||||
|
||||
/// A logs queue used by TCPHandler to pass logs to a client
|
||||
static void attachInternalTextLogsQueue(const std::shared_ptr<InternalTextLogsQueue> & logs_queue,
|
||||
LogsLevel client_logs_level);
|
||||
|
@ -32,6 +32,9 @@ namespace ErrorCodes
|
||||
namespace NamedCollectionUtils
|
||||
{
|
||||
|
||||
static std::atomic<bool> is_loaded_from_config = false;
|
||||
static std::atomic<bool> is_loaded_from_sql = false;
|
||||
|
||||
class LoadFromConfig
|
||||
{
|
||||
private:
|
||||
@ -329,10 +332,21 @@ std::unique_lock<std::mutex> lockNamedCollectionsTransaction()
|
||||
return std::unique_lock(transaction_lock);
|
||||
}
|
||||
|
||||
void loadFromConfigUnlocked(const Poco::Util::AbstractConfiguration & config, std::unique_lock<std::mutex> &)
|
||||
{
|
||||
auto named_collections = LoadFromConfig(config).getAll();
|
||||
LOG_TRACE(
|
||||
&Poco::Logger::get("NamedCollectionsUtils"),
|
||||
"Loaded {} collections from config", named_collections.size());
|
||||
|
||||
NamedCollectionFactory::instance().add(std::move(named_collections));
|
||||
is_loaded_from_config = true;
|
||||
}
|
||||
|
||||
void loadFromConfig(const Poco::Util::AbstractConfiguration & config)
|
||||
{
|
||||
auto lock = lockNamedCollectionsTransaction();
|
||||
NamedCollectionFactory::instance().add(LoadFromConfig(config).getAll());
|
||||
loadFromConfigUnlocked(config, lock);
|
||||
}
|
||||
|
||||
void reloadFromConfig(const Poco::Util::AbstractConfiguration & config)
|
||||
@ -342,17 +356,47 @@ void reloadFromConfig(const Poco::Util::AbstractConfiguration & config)
|
||||
auto & instance = NamedCollectionFactory::instance();
|
||||
instance.removeById(SourceId::CONFIG);
|
||||
instance.add(collections);
|
||||
is_loaded_from_config = true;
|
||||
}
|
||||
|
||||
void loadFromSQLUnlocked(ContextPtr context, std::unique_lock<std::mutex> &)
|
||||
{
|
||||
auto named_collections = LoadFromSQL(context).getAll();
|
||||
LOG_TRACE(
|
||||
&Poco::Logger::get("NamedCollectionsUtils"),
|
||||
"Loaded {} collections from SQL", named_collections.size());
|
||||
|
||||
NamedCollectionFactory::instance().add(std::move(named_collections));
|
||||
is_loaded_from_sql = true;
|
||||
}
|
||||
|
||||
void loadFromSQL(ContextPtr context)
|
||||
{
|
||||
auto lock = lockNamedCollectionsTransaction();
|
||||
NamedCollectionFactory::instance().add(LoadFromSQL(context).getAll());
|
||||
loadFromSQLUnlocked(context, lock);
|
||||
}
|
||||
|
||||
void loadIfNotUnlocked(std::unique_lock<std::mutex> & lock)
|
||||
{
|
||||
auto global_context = Context::getGlobalContextInstance();
|
||||
if (!is_loaded_from_config)
|
||||
loadFromConfigUnlocked(global_context->getConfigRef(), lock);
|
||||
if (!is_loaded_from_sql)
|
||||
loadFromSQLUnlocked(global_context, lock);
|
||||
}
|
||||
|
||||
void loadIfNot()
|
||||
{
|
||||
if (is_loaded_from_sql && is_loaded_from_config)
|
||||
return;
|
||||
auto lock = lockNamedCollectionsTransaction();
|
||||
return loadIfNotUnlocked(lock);
|
||||
}
|
||||
|
||||
void removeFromSQL(const std::string & collection_name, ContextPtr context)
|
||||
{
|
||||
auto lock = lockNamedCollectionsTransaction();
|
||||
loadIfNotUnlocked(lock);
|
||||
LoadFromSQL(context).remove(collection_name);
|
||||
NamedCollectionFactory::instance().remove(collection_name);
|
||||
}
|
||||
@ -360,6 +404,7 @@ void removeFromSQL(const std::string & collection_name, ContextPtr context)
|
||||
void removeIfExistsFromSQL(const std::string & collection_name, ContextPtr context)
|
||||
{
|
||||
auto lock = lockNamedCollectionsTransaction();
|
||||
loadIfNotUnlocked(lock);
|
||||
LoadFromSQL(context).removeIfExists(collection_name);
|
||||
NamedCollectionFactory::instance().removeIfExists(collection_name);
|
||||
}
|
||||
@ -367,12 +412,14 @@ void removeIfExistsFromSQL(const std::string & collection_name, ContextPtr conte
|
||||
void createFromSQL(const ASTCreateNamedCollectionQuery & query, ContextPtr context)
|
||||
{
|
||||
auto lock = lockNamedCollectionsTransaction();
|
||||
loadIfNotUnlocked(lock);
|
||||
NamedCollectionFactory::instance().add(query.collection_name, LoadFromSQL(context).create(query));
|
||||
}
|
||||
|
||||
void updateFromSQL(const ASTAlterNamedCollectionQuery & query, ContextPtr context)
|
||||
{
|
||||
auto lock = lockNamedCollectionsTransaction();
|
||||
loadIfNotUnlocked(lock);
|
||||
LoadFromSQL(context).update(query);
|
||||
|
||||
auto collection = NamedCollectionFactory::instance().getMutable(query.collection_name);
|
||||
|
@ -35,6 +35,8 @@ void createFromSQL(const ASTCreateNamedCollectionQuery & query, ContextPtr conte
|
||||
/// Update definition of already existing collection from AST and update result in `context->getPath() / named_collections /`.
|
||||
void updateFromSQL(const ASTAlterNamedCollectionQuery & query, ContextPtr context);
|
||||
|
||||
void loadIfNot();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -275,6 +275,8 @@ class IColumn;
|
||||
M(Milliseconds, stream_flush_interval_ms, 7500, "Timeout for flushing data from streaming storages.", 0) \
|
||||
M(Milliseconds, stream_poll_timeout_ms, 500, "Timeout for polling data from/to streaming storages.", 0) \
|
||||
\
|
||||
M(Bool, final, false, "Query with the FINAL modifier by default. If the engine does not support final, it does not have any effect. On queries with multiple tables final is applied only on those that support it. It also works on distributed tables", 0) \
|
||||
\
|
||||
/** Settings for testing hedged requests */ \
|
||||
M(Milliseconds, sleep_in_send_tables_status_ms, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \
|
||||
M(Milliseconds, sleep_in_send_data_ms, 0, "Time to sleep in sending data in TCPHandler", 0) \
|
||||
@ -704,7 +706,7 @@ class IColumn;
|
||||
M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
|
||||
M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
|
||||
M(Bool, optimize_sorting_by_input_stream_properties, true, "Optimize sorting by sorting properties of input stream", 0) \
|
||||
M(UInt64, insert_keeper_max_retries, 0, "Max retries for keeper operations during insert", 0) \
|
||||
M(UInt64, insert_keeper_max_retries, 20, "Max retries for keeper operations during insert", 0) \
|
||||
M(UInt64, insert_keeper_retry_initial_backoff_ms, 100, "Initial backoff timeout for keeper operations during insert", 0) \
|
||||
M(UInt64, insert_keeper_retry_max_backoff_ms, 10000, "Max backoff timeout for keeper operations during insert", 0) \
|
||||
M(Float, insert_keeper_fault_injection_probability, 0.0f, "Approximate probability of failure for a keeper request during insert. Valid value is in interval [0.0f, 1.0f]", 0) \
|
||||
|
@ -83,7 +83,8 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
|
||||
{"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"},
|
||||
{"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"},
|
||||
{"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"},
|
||||
{"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}}},
|
||||
{"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"},
|
||||
{"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}},
|
||||
{"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"},
|
||||
{"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"},
|
||||
{"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"},
|
||||
|
@ -232,10 +232,19 @@ void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr
|
||||
|
||||
seen_elements[element_pos] = 1;
|
||||
auto & element_column = extractElementColumn(column, element_pos);
|
||||
if (settings.null_as_default)
|
||||
SerializationNullable::deserializeTextJSONImpl(element_column, istr, settings, elems[element_pos]);
|
||||
else
|
||||
elems[element_pos]->deserializeTextJSON(element_column, istr, settings);
|
||||
|
||||
try
|
||||
{
|
||||
if (settings.null_as_default)
|
||||
SerializationNullable::deserializeTextJSONImpl(element_column, istr, settings, elems[element_pos]);
|
||||
else
|
||||
elems[element_pos]->deserializeTextJSON(element_column, istr, settings);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage("(while reading the value of nested key " + name + ")");
|
||||
throw;
|
||||
}
|
||||
|
||||
skipWhitespaceIfAny(istr);
|
||||
++processed;
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <Parsers/ASTInsertQuery.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/FieldVisitorHash.h>
|
||||
#include <Common/DateLUT.h>
|
||||
@ -102,9 +103,10 @@ bool AsynchronousInsertQueue::InsertQuery::operator==(const InsertQuery & other)
|
||||
return query_str == other.query_str && settings == other.settings;
|
||||
}
|
||||
|
||||
AsynchronousInsertQueue::InsertData::Entry::Entry(String && bytes_, String && query_id_)
|
||||
AsynchronousInsertQueue::InsertData::Entry::Entry(String && bytes_, String && query_id_, MemoryTracker * user_memory_tracker_)
|
||||
: bytes(std::move(bytes_))
|
||||
, query_id(std::move(query_id_))
|
||||
, user_memory_tracker(user_memory_tracker_)
|
||||
, create_time(std::chrono::system_clock::now())
|
||||
{
|
||||
}
|
||||
@ -209,7 +211,7 @@ std::future<void> AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_c
|
||||
if (auto quota = query_context->getQuota())
|
||||
quota->used(QuotaType::WRITTEN_BYTES, bytes.size());
|
||||
|
||||
auto entry = std::make_shared<InsertData::Entry>(std::move(bytes), query_context->getCurrentQueryId());
|
||||
auto entry = std::make_shared<InsertData::Entry>(std::move(bytes), query_context->getCurrentQueryId(), CurrentThread::getUserMemoryTracker());
|
||||
|
||||
InsertQuery key{query, settings};
|
||||
InsertDataPtr data_to_process;
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <Poco/Logger.h>
|
||||
@ -41,6 +42,31 @@ private:
|
||||
UInt128 calculateHash() const;
|
||||
};
|
||||
|
||||
struct UserMemoryTrackerSwitcher
|
||||
{
|
||||
explicit UserMemoryTrackerSwitcher(MemoryTracker * new_tracker)
|
||||
{
|
||||
auto * thread_tracker = CurrentThread::getMemoryTracker();
|
||||
prev_untracked_memory = current_thread->untracked_memory;
|
||||
prev_memory_tracker_parent = thread_tracker->getParent();
|
||||
|
||||
current_thread->untracked_memory = 0;
|
||||
thread_tracker->setParent(new_tracker);
|
||||
}
|
||||
|
||||
~UserMemoryTrackerSwitcher()
|
||||
{
|
||||
CurrentThread::flushUntrackedMemory();
|
||||
auto * thread_tracker = CurrentThread::getMemoryTracker();
|
||||
|
||||
current_thread->untracked_memory = prev_untracked_memory;
|
||||
thread_tracker->setParent(prev_memory_tracker_parent);
|
||||
}
|
||||
|
||||
MemoryTracker * prev_memory_tracker_parent;
|
||||
Int64 prev_untracked_memory;
|
||||
};
|
||||
|
||||
struct InsertData
|
||||
{
|
||||
struct Entry
|
||||
@ -48,9 +74,10 @@ private:
|
||||
public:
|
||||
const String bytes;
|
||||
const String query_id;
|
||||
MemoryTracker * const user_memory_tracker;
|
||||
const std::chrono::time_point<std::chrono::system_clock> create_time;
|
||||
|
||||
Entry(String && bytes_, String && query_id_);
|
||||
Entry(String && bytes_, String && query_id_, MemoryTracker * user_memory_tracker_);
|
||||
|
||||
void finish(std::exception_ptr exception_ = nullptr);
|
||||
std::future<void> getFuture() { return promise.get_future(); }
|
||||
@ -61,6 +88,19 @@ private:
|
||||
std::atomic_bool finished = false;
|
||||
};
|
||||
|
||||
~InsertData()
|
||||
{
|
||||
auto it = entries.begin();
|
||||
// Entries must be destroyed in context of user who runs async insert.
|
||||
// Each entry in the list may correspond to a different user,
|
||||
// so we need to switch current thread's MemoryTracker parent on each iteration.
|
||||
while (it != entries.end())
|
||||
{
|
||||
UserMemoryTrackerSwitcher switcher((*it)->user_memory_tracker);
|
||||
it = entries.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
using EntryPtr = std::shared_ptr<Entry>;
|
||||
|
||||
std::list<EntryPtr> entries;
|
||||
|
@ -509,6 +509,11 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
query_info.additional_filter_ast = parseAdditionalFilterConditionForTable(
|
||||
settings.additional_table_filters, joined_tables.tablesWithColumns().front().table, *context);
|
||||
|
||||
if (autoFinalOnQuery(query))
|
||||
{
|
||||
query.setFinal();
|
||||
}
|
||||
|
||||
auto analyze = [&] (bool try_move_to_prewhere)
|
||||
{
|
||||
/// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it.
|
||||
@ -3020,6 +3025,15 @@ void InterpreterSelectQuery::ignoreWithTotals()
|
||||
getSelectQuery().group_by_with_totals = false;
|
||||
}
|
||||
|
||||
bool InterpreterSelectQuery::autoFinalOnQuery(ASTSelectQuery & query)
|
||||
{
|
||||
// query.tables() is required because not all queries have tables in it, it could be a function.
|
||||
bool is_auto_final_setting_on = context->getSettingsRef().final;
|
||||
bool is_final_supported = storage && storage->supportsFinal() && !storage->isRemote() && query.tables();
|
||||
bool is_query_already_final = query.final();
|
||||
|
||||
return is_auto_final_setting_on && !is_query_already_final && is_final_supported;
|
||||
}
|
||||
|
||||
void InterpreterSelectQuery::initSettings()
|
||||
{
|
||||
|
@ -184,6 +184,7 @@ private:
|
||||
void executeDistinct(QueryPlan & query_plan, bool before_order, Names columns, bool pre_distinct);
|
||||
void executeExtremes(QueryPlan & query_plan);
|
||||
void executeSubqueriesInSetsAndJoins(QueryPlan & query_plan);
|
||||
bool autoFinalOnQuery(ASTSelectQuery & select_query);
|
||||
|
||||
enum class Modificator
|
||||
{
|
||||
|
@ -35,7 +35,8 @@ bool PredicateExpressionsOptimizer::optimize(ASTSelectQuery & select_query)
|
||||
if (!enable_optimize_predicate_expression)
|
||||
return false;
|
||||
|
||||
if (select_query.having() && (!select_query.group_by_with_cube && !select_query.group_by_with_rollup && !select_query.group_by_with_totals))
|
||||
const bool has_incompatible_constructs = select_query.group_by_with_cube || select_query.group_by_with_rollup || select_query.group_by_with_totals || select_query.group_by_with_grouping_sets;
|
||||
if (select_query.having() && !has_incompatible_constructs)
|
||||
tryMovePredicatesFromHavingToWhere(select_query);
|
||||
|
||||
if (!select_query.tables() || select_query.tables()->children.empty())
|
||||
|
@ -56,6 +56,8 @@ public:
|
||||
|
||||
const Aggregator::Params & getParams() const { return params; }
|
||||
|
||||
const auto & getGroupingSetsParamsList() const { return grouping_sets_params; }
|
||||
|
||||
bool inOrder() const { return !sort_description_for_merging.empty(); }
|
||||
bool explicitSortingRequired() const { return explicit_sorting_required_for_aggregation_in_order; }
|
||||
bool isGroupingSets() const { return !grouping_sets_params.empty(); }
|
||||
|
@ -53,6 +53,53 @@ static void checkChildrenSize(QueryPlan::Node * node, size_t child_num)
|
||||
child_num, child->getInputStreams().size(), node->children.size());
|
||||
}
|
||||
|
||||
static bool identifiersIsAmongAllGroupingSets(const GroupingSetsParamsList & grouping_sets_params, const NameSet & identifiers_in_predicate)
|
||||
{
|
||||
for (const auto & grouping_set : grouping_sets_params)
|
||||
{
|
||||
for (const auto & identifier : identifiers_in_predicate)
|
||||
{
|
||||
if (std::find(grouping_set.used_keys.begin(), grouping_set.used_keys.end(), identifier) == grouping_set.used_keys.end())
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static NameSet findIdentifiersOfNode(const ActionsDAG::Node * node)
|
||||
{
|
||||
NameSet res;
|
||||
|
||||
/// We treat all INPUT as identifier
|
||||
if (node->type == ActionsDAG::ActionType::INPUT)
|
||||
{
|
||||
res.emplace(node->result_name);
|
||||
return res;
|
||||
}
|
||||
|
||||
std::queue<const ActionsDAG::Node *> queue;
|
||||
queue.push(node);
|
||||
|
||||
while (!queue.empty())
|
||||
{
|
||||
const auto * top = queue.front();
|
||||
for (const auto * child : top->children)
|
||||
{
|
||||
if (child->type == ActionsDAG::ActionType::INPUT)
|
||||
{
|
||||
res.emplace(child->result_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Only push non INPUT child into the queue
|
||||
queue.push(child);
|
||||
}
|
||||
}
|
||||
queue.pop();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static ActionsDAGPtr splitFilter(QueryPlan::Node * parent_node, const Names & allowed_inputs, size_t child_idx = 0)
|
||||
{
|
||||
QueryPlan::Node * child_node = parent_node->children.front();
|
||||
@ -176,6 +223,20 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
|
||||
|
||||
if (auto * aggregating = typeid_cast<AggregatingStep *>(child.get()))
|
||||
{
|
||||
/// If aggregating is GROUPING SETS, and not all the identifiers exist in all
|
||||
/// of the grouping sets, we could not push the filter down.
|
||||
if (aggregating->isGroupingSets())
|
||||
{
|
||||
|
||||
const auto & actions = filter->getExpression();
|
||||
const auto & filter_node = actions->findInOutputs(filter->getFilterColumnName());
|
||||
|
||||
auto identifiers_in_predicate = findIdentifiersOfNode(&filter_node);
|
||||
|
||||
if (!identifiersIsAmongAllGroupingSets(aggregating->getGroupingSetsParamsList(), identifiers_in_predicate))
|
||||
return 0;
|
||||
}
|
||||
|
||||
const auto & params = aggregating->getParams();
|
||||
const auto & keys = params.keys;
|
||||
|
||||
|
@ -188,7 +188,14 @@ private:
|
||||
return false;
|
||||
|
||||
/// remove sorting
|
||||
parent_node->children.front() = sorting_node->children.front();
|
||||
for (auto & child : parent_node->children)
|
||||
{
|
||||
if (child == sorting_node)
|
||||
{
|
||||
child = sorting_node->children.front();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// sorting removed, so need to update sorting traits for upstream steps
|
||||
const DataStream * input_stream = &parent_node->children.front()->step->getOutputStream();
|
||||
|
@ -465,6 +465,7 @@ void StorageDistributedDirectoryMonitor::run()
|
||||
|
||||
tryLogCurrentException(getLoggerName().data());
|
||||
status.last_exception = std::current_exception();
|
||||
status.last_exception_time = std::chrono::system_clock::now();
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -58,6 +58,7 @@ public:
|
||||
struct InternalStatus
|
||||
{
|
||||
std::exception_ptr last_exception;
|
||||
std::chrono::system_clock::time_point last_exception_time;
|
||||
|
||||
size_t error_count = 0;
|
||||
|
||||
|
@ -176,16 +176,16 @@ ChunkAndProgress IMergeTreeSelectAlgorithm::read()
|
||||
return ChunkAndProgress{
|
||||
.chunk = Chunk(ordered_columns, res.row_count),
|
||||
.num_read_rows = res.num_read_rows,
|
||||
.num_read_bytes = res.num_read_bytes};
|
||||
.num_read_bytes = res.num_read_bytes,
|
||||
.is_finished = false};
|
||||
}
|
||||
else
|
||||
{
|
||||
num_read_rows += res.num_read_rows;
|
||||
num_read_bytes += res.num_read_bytes;
|
||||
return {Chunk(), res.num_read_rows, res.num_read_bytes, false};
|
||||
}
|
||||
}
|
||||
|
||||
return {Chunk(), num_read_rows, num_read_bytes};
|
||||
return {Chunk(), num_read_rows, num_read_bytes, true};
|
||||
}
|
||||
|
||||
void IMergeTreeSelectAlgorithm::initializeMergeTreeReadersForCurrentTask(
|
||||
|
@ -20,6 +20,9 @@ struct ChunkAndProgress
|
||||
Chunk chunk;
|
||||
size_t num_read_rows = 0;
|
||||
size_t num_read_bytes = 0;
|
||||
/// Explicitly indicate that we have read all data.
|
||||
/// This is needed to occasionally return empty chunk to indicate the progress while the rows are filtered out in PREWHERE.
|
||||
bool is_finished = false;
|
||||
};
|
||||
|
||||
struct ParallelReadingExtension
|
||||
|
@ -141,7 +141,6 @@ MergeTreeReadTask::MergeTreeReadTask(
|
||||
size_t part_index_in_query_,
|
||||
const NameSet & column_name_set_,
|
||||
const MergeTreeReadTaskColumns & task_columns_,
|
||||
bool remove_prewhere_column_,
|
||||
MergeTreeBlockSizePredictorPtr size_predictor_,
|
||||
int64_t priority_,
|
||||
std::future<MergeTreeReaderPtr> reader_,
|
||||
@ -151,7 +150,6 @@ MergeTreeReadTask::MergeTreeReadTask(
|
||||
, part_index_in_query{part_index_in_query_}
|
||||
, column_name_set{column_name_set_}
|
||||
, task_columns{task_columns_}
|
||||
, remove_prewhere_column{remove_prewhere_column_}
|
||||
, size_predictor{size_predictor_}
|
||||
, reader(std::move(reader_))
|
||||
, pre_reader_for_step(std::move(pre_reader_for_step_))
|
||||
|
@ -59,8 +59,6 @@ struct MergeTreeReadTask
|
||||
const NameSet & column_name_set;
|
||||
/// column names to read during PREWHERE and WHERE
|
||||
const MergeTreeReadTaskColumns & task_columns;
|
||||
/// should PREWHERE column be returned to requesting side?
|
||||
const bool remove_prewhere_column;
|
||||
/// Used to satistfy preferred_block_size_bytes limitation
|
||||
MergeTreeBlockSizePredictorPtr size_predictor;
|
||||
/// Used to save current range processing status
|
||||
@ -87,7 +85,6 @@ struct MergeTreeReadTask
|
||||
size_t part_index_in_query_,
|
||||
const NameSet & column_name_set_,
|
||||
const MergeTreeReadTaskColumns & task_columns_,
|
||||
bool remove_prewhere_column_,
|
||||
MergeTreeBlockSizePredictorPtr size_predictor_,
|
||||
int64_t priority_ = 0,
|
||||
std::future<MergeTreeReaderPtr> reader_ = {},
|
||||
|
@ -6431,17 +6431,21 @@ std::optional<ProjectionCandidate> MergeTreeData::getQueryProcessingStageWithAgg
|
||||
if (query_info.additional_filter_ast)
|
||||
return std::nullopt;
|
||||
|
||||
auto query_ptr = query_info.original_query;
|
||||
auto query_ptr = query_info.query;
|
||||
auto original_query_ptr = query_info.original_query;
|
||||
|
||||
auto * select_query = query_ptr->as<ASTSelectQuery>();
|
||||
if (!select_query)
|
||||
auto * original_select_query = original_query_ptr->as<ASTSelectQuery>();
|
||||
|
||||
if (!original_select_query || !select_query)
|
||||
return std::nullopt;
|
||||
|
||||
// Currently projections don't support final yet.
|
||||
if (select_query->final())
|
||||
if (select_query->final() || original_select_query->final())
|
||||
return std::nullopt;
|
||||
|
||||
// Currently projections don't support sample yet.
|
||||
if (select_query->sampleSize())
|
||||
if (original_select_query->sampleSize())
|
||||
return std::nullopt;
|
||||
|
||||
// Currently projection don't support deduplication when moving parts between shards.
|
||||
@ -6449,24 +6453,24 @@ std::optional<ProjectionCandidate> MergeTreeData::getQueryProcessingStageWithAgg
|
||||
return std::nullopt;
|
||||
|
||||
// Currently projections don't support ARRAY JOIN yet.
|
||||
if (select_query->arrayJoinExpressionList().first)
|
||||
if (original_select_query->arrayJoinExpressionList().first)
|
||||
return std::nullopt;
|
||||
|
||||
// In order to properly analyze joins, aliases should be recognized. However, aliases get lost during projection analysis.
|
||||
// Let's disable projection if there are any JOIN clauses.
|
||||
// TODO: We need a better identifier resolution mechanism for projection analysis.
|
||||
if (select_query->hasJoin())
|
||||
if (original_select_query->hasJoin())
|
||||
return std::nullopt;
|
||||
|
||||
// INTERPOLATE expressions may include aliases, so aliases should be preserved
|
||||
if (select_query->interpolate() && !select_query->interpolate()->children.empty())
|
||||
if (original_select_query->interpolate() && !original_select_query->interpolate()->children.empty())
|
||||
return std::nullopt;
|
||||
|
||||
// Projections don't support grouping sets yet.
|
||||
if (select_query->group_by_with_grouping_sets
|
||||
|| select_query->group_by_with_totals
|
||||
|| select_query->group_by_with_rollup
|
||||
|| select_query->group_by_with_cube)
|
||||
if (original_select_query->group_by_with_grouping_sets
|
||||
|| original_select_query->group_by_with_totals
|
||||
|| original_select_query->group_by_with_rollup
|
||||
|| original_select_query->group_by_with_cube)
|
||||
return std::nullopt;
|
||||
|
||||
auto query_options = SelectQueryOptions(
|
||||
@ -6476,7 +6480,7 @@ std::optional<ProjectionCandidate> MergeTreeData::getQueryProcessingStageWithAgg
|
||||
).ignoreProjections().ignoreAlias();
|
||||
|
||||
InterpreterSelectQuery select(
|
||||
query_ptr,
|
||||
original_query_ptr,
|
||||
query_context,
|
||||
query_options,
|
||||
query_info.prepared_sets);
|
||||
|
@ -56,7 +56,6 @@ try
|
||||
|
||||
task = std::make_unique<MergeTreeReadTask>(
|
||||
data_part, mark_ranges_for_task, part_index_in_query, column_name_set, task_columns,
|
||||
prewhere_info && prewhere_info->remove_prewhere_column,
|
||||
std::move(size_predictor));
|
||||
|
||||
return true;
|
||||
|
@ -509,7 +509,7 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr
|
||||
|
||||
auto read_task = std::make_unique<MergeTreeReadTask>(
|
||||
part.data_part, ranges_to_get_from_part, part.part_index_in_query,
|
||||
part.column_name_set, part.task_columns, prewhere_info && prewhere_info->remove_prewhere_column,
|
||||
part.column_name_set, part.task_columns,
|
||||
std::move(curr_task_size_predictor));
|
||||
|
||||
read_task->priority = priority;
|
||||
|
@ -208,7 +208,7 @@ MergeTreeReadTaskPtr MergeTreeReadPool::getTask(size_t thread)
|
||||
return std::make_unique<MergeTreeReadTask>(
|
||||
part.data_part, ranges_to_get_from_part, part.part_index_in_query,
|
||||
per_part.column_name_set, per_part.task_columns,
|
||||
prewhere_info && prewhere_info->remove_prewhere_column, std::move(curr_task_size_predictor));
|
||||
std::move(curr_task_size_predictor));
|
||||
}
|
||||
|
||||
Block MergeTreeReadPool::getHeader() const
|
||||
@ -459,7 +459,6 @@ MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicas::getTask(size_t thread)
|
||||
part.part_index_in_query,
|
||||
per_part.column_name_set,
|
||||
per_part.task_columns,
|
||||
prewhere_info && prewhere_info->remove_prewhere_column,
|
||||
std::move(curr_task_size_predictor));
|
||||
}
|
||||
|
||||
|
@ -49,8 +49,7 @@ bool MergeTreeReverseSelectAlgorithm::getNewTaskOrdinaryReading()
|
||||
|
||||
task = std::make_unique<MergeTreeReadTask>(
|
||||
data_part, mark_ranges_for_task, part_index_in_query, column_name_set,
|
||||
task_columns, prewhere_info && prewhere_info->remove_prewhere_column,
|
||||
std::move(size_predictor));
|
||||
task_columns, std::move(size_predictor));
|
||||
|
||||
return true;
|
||||
|
||||
@ -88,8 +87,7 @@ bool MergeTreeReverseSelectAlgorithm::getNewTaskParallelReplicas()
|
||||
|
||||
task = std::make_unique<MergeTreeReadTask>(
|
||||
data_part, mark_ranges_for_task, part_index_in_query, column_name_set,
|
||||
task_columns, prewhere_info && prewhere_info->remove_prewhere_column,
|
||||
std::move(size_predictor));
|
||||
task_columns, std::move(size_predictor));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -162,7 +162,6 @@ struct Settings;
|
||||
M(Bool, allow_remote_fs_zero_copy_replication, false, "Don't use this setting in production, because it is not ready.", 0) \
|
||||
M(String, remote_fs_zero_copy_zookeeper_path, "/clickhouse/zero_copy", "ZooKeeper path for Zero-copy table-independet info.", 0) \
|
||||
M(Bool, remote_fs_zero_copy_path_compatible_mode, false, "Run zero-copy in compatible mode during conversion process.", 0) \
|
||||
\
|
||||
/** Compress marks and primary key. */ \
|
||||
M(Bool, compress_marks, false, "Marks support compression, reduce mark file size and speed up network transmission.", 0) \
|
||||
M(Bool, compress_primary_key, false, "Primary key support compression, reduce primary key file size and speed up network transmission.", 0) \
|
||||
|
@ -176,15 +176,16 @@ ISource::Status MergeTreeSource::prepare()
|
||||
}
|
||||
|
||||
|
||||
std::optional<Chunk> MergeTreeSource::reportProgress(ChunkAndProgress chunk)
|
||||
Chunk MergeTreeSource::processReadResult(ChunkAndProgress chunk)
|
||||
{
|
||||
if (chunk.num_read_rows || chunk.num_read_bytes)
|
||||
progress(chunk.num_read_rows, chunk.num_read_bytes);
|
||||
|
||||
if (chunk.chunk.hasRows())
|
||||
return std::move(chunk.chunk);
|
||||
finished = chunk.is_finished;
|
||||
|
||||
return {};
|
||||
/// We can return a chunk with no rows even if are not finished.
|
||||
/// This allows to report progress when all the rows are filtered out inside MergeTreeBaseSelectProcessor by PREWHERE logic.
|
||||
return std::move(chunk.chunk);
|
||||
}
|
||||
|
||||
|
||||
@ -194,7 +195,7 @@ std::optional<Chunk> MergeTreeSource::tryGenerate()
|
||||
if (async_reading_state)
|
||||
{
|
||||
if (async_reading_state->getStage() == AsyncReadingState::Stage::IsFinished)
|
||||
return reportProgress(async_reading_state->getResult());
|
||||
return processReadResult(async_reading_state->getResult());
|
||||
|
||||
chassert(async_reading_state->getStage() == AsyncReadingState::Stage::NotStarted);
|
||||
|
||||
@ -220,7 +221,7 @@ std::optional<Chunk> MergeTreeSource::tryGenerate()
|
||||
}
|
||||
#endif
|
||||
|
||||
return reportProgress(algorithm->read());
|
||||
return processReadResult(algorithm->read());
|
||||
}
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
|
@ -36,7 +36,7 @@ private:
|
||||
std::unique_ptr<AsyncReadingState> async_reading_state;
|
||||
#endif
|
||||
|
||||
std::optional<Chunk> reportProgress(ChunkAndProgress chunk);
|
||||
Chunk processReadResult(ChunkAndProgress chunk);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -58,6 +58,8 @@ NamedCollectionPtr tryGetNamedCollectionWithOverrides(ASTs asts)
|
||||
if (asts.empty())
|
||||
return nullptr;
|
||||
|
||||
NamedCollectionUtils::loadIfNot();
|
||||
|
||||
auto collection = tryGetNamedCollectionFromASTs(asts);
|
||||
if (!collection)
|
||||
return nullptr;
|
||||
|
@ -56,10 +56,10 @@ int RabbitMQHandler::iterateLoop()
|
||||
|
||||
/// Do not need synchronization as in iterateLoop(), because this method is used only for
|
||||
/// initial RabbitMQ setup - at this point there is no background loop thread.
|
||||
void RabbitMQHandler::startBlockingLoop()
|
||||
int RabbitMQHandler::startBlockingLoop()
|
||||
{
|
||||
LOG_DEBUG(log, "Started blocking loop.");
|
||||
uv_run(loop, UV_RUN_DEFAULT);
|
||||
return uv_run(loop, UV_RUN_DEFAULT);
|
||||
}
|
||||
|
||||
void RabbitMQHandler::stopLoop()
|
||||
|
@ -38,7 +38,7 @@ public:
|
||||
|
||||
/// Loop to wait for small tasks in a blocking mode.
|
||||
/// No synchronization is done with the main loop thread.
|
||||
void startBlockingLoop();
|
||||
int startBlockingLoop();
|
||||
|
||||
void stopLoop();
|
||||
|
||||
|
@ -262,7 +262,20 @@ void RabbitMQProducer::startProducingTaskLoop()
|
||||
LOG_TEST(log, "Waiting for pending callbacks to finish (count: {}, try: {})", res, try_num);
|
||||
}
|
||||
|
||||
LOG_DEBUG(log, "Producer on channel {} completed", channel_id);
|
||||
producer_channel->close()
|
||||
.onSuccess([&]()
|
||||
{
|
||||
LOG_TRACE(log, "Successfully closed producer channel");
|
||||
connection.getHandler().stopLoop();
|
||||
})
|
||||
.onError([&](const char * message)
|
||||
{
|
||||
LOG_ERROR(log, "Failed to close producer channel: {}", message);
|
||||
connection.getHandler().stopLoop();
|
||||
});
|
||||
|
||||
int active = connection.getHandler().startBlockingLoop();
|
||||
LOG_DEBUG(log, "Producer on channel completed (not finished events: {})", active);
|
||||
}
|
||||
|
||||
|
||||
|
@ -101,6 +101,7 @@ NamesAndTypesList StorageSystemDistributionQueue::getNamesAndTypes()
|
||||
{ "broken_data_files", std::make_shared<DataTypeUInt64>() },
|
||||
{ "broken_data_compressed_bytes", std::make_shared<DataTypeUInt64>() },
|
||||
{ "last_exception", std::make_shared<DataTypeString>() },
|
||||
{ "last_exception_time", std::make_shared<DataTypeDateTime>() },
|
||||
};
|
||||
}
|
||||
|
||||
@ -190,6 +191,7 @@ void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, Cont
|
||||
res_columns[col_num++]->insert(getExceptionMessage(status.last_exception, false));
|
||||
else
|
||||
res_columns[col_num++]->insertDefault();
|
||||
res_columns[col_num++]->insert(static_cast<UInt32>(std::chrono::system_clock::to_time_t(status.last_exception_time)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -31,6 +31,8 @@ void StorageSystemNamedCollections::fillData(MutableColumns & res_columns, Conte
|
||||
{
|
||||
context->checkAccess(AccessType::SHOW_NAMED_COLLECTIONS);
|
||||
|
||||
NamedCollectionUtils::loadIfNot();
|
||||
|
||||
auto collections = NamedCollectionFactory::instance().getAll();
|
||||
for (const auto & [name, collection] : collections)
|
||||
{
|
||||
|
@ -20,7 +20,6 @@ NEED_RERUN_ON_EDITED = {
|
||||
}
|
||||
|
||||
NEED_RERUN_OR_CANCELL_WORKFLOWS = {
|
||||
"DocsReleaseChecks",
|
||||
"BackportPR",
|
||||
}.union(NEED_RERUN_ON_EDITED)
|
||||
|
||||
|
@ -1,126 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import logging
|
||||
import subprocess
|
||||
import os
|
||||
import sys
|
||||
|
||||
from github import Github
|
||||
|
||||
from commit_status_helper import get_commit
|
||||
from docker_pull_helper import get_image_with_version
|
||||
from env_helper import TEMP_PATH, REPO_COPY, CLOUDFLARE_TOKEN
|
||||
from get_robot_token import get_best_robot_token
|
||||
from pr_info import PRInfo
|
||||
from report import TestResults, TestResult
|
||||
from rerun_helper import RerunHelper
|
||||
from s3_helper import S3Helper
|
||||
from ssh import SSHKey
|
||||
from tee_popen import TeePopen
|
||||
from upload_result_helper import upload_results
|
||||
|
||||
NAME = "Docs Release"
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
description="ClickHouse building script using prebuilt Docker image",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--as-root", action="store_true", help="if the container should run as root"
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
args = parse_args()
|
||||
|
||||
temp_path = TEMP_PATH
|
||||
repo_path = REPO_COPY
|
||||
|
||||
gh = Github(get_best_robot_token(), per_page=100)
|
||||
pr_info = PRInfo()
|
||||
rerun_helper = RerunHelper(gh, pr_info, NAME)
|
||||
if rerun_helper.is_already_finished_by_status():
|
||||
logging.info("Check is already finished according to github status, exiting")
|
||||
sys.exit(0)
|
||||
|
||||
if not os.path.exists(temp_path):
|
||||
os.makedirs(temp_path)
|
||||
|
||||
docker_image = get_image_with_version(temp_path, "clickhouse/docs-release")
|
||||
|
||||
test_output = os.path.join(temp_path, "docs_release_log")
|
||||
if not os.path.exists(test_output):
|
||||
os.makedirs(test_output)
|
||||
|
||||
if args.as_root:
|
||||
user = "0:0"
|
||||
else:
|
||||
user = f"{os.geteuid()}:{os.getegid()}"
|
||||
|
||||
run_log_path = os.path.join(test_output, "run.log")
|
||||
|
||||
with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"):
|
||||
cmd = (
|
||||
f"docker run --cap-add=SYS_PTRACE --user={user} "
|
||||
f"--volume='{os.getenv('SSH_AUTH_SOCK', '')}:/ssh-agent' "
|
||||
f"--volume={repo_path}:/repo_path --volume={test_output}:/output_path "
|
||||
f"-e SSH_AUTH_SOCK=/ssh-agent -e EXTRA_BUILD_ARGS='--verbose' "
|
||||
f"-e CLOUDFLARE_TOKEN={CLOUDFLARE_TOKEN} {docker_image}"
|
||||
)
|
||||
logging.info("Running command: %s", cmd)
|
||||
with TeePopen(cmd, run_log_path) as process:
|
||||
retcode = process.wait()
|
||||
if retcode == 0:
|
||||
logging.info("Run successfully")
|
||||
status = "success"
|
||||
description = "Released successfuly"
|
||||
else:
|
||||
description = "Release failed (non zero exit code)"
|
||||
status = "failure"
|
||||
logging.info("Run failed")
|
||||
|
||||
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
|
||||
files = os.listdir(test_output)
|
||||
test_results = [] # type: TestResults
|
||||
additional_files = []
|
||||
if not files:
|
||||
logging.error("No output files after docs release")
|
||||
description = "No output files after docs release"
|
||||
status = "failure"
|
||||
else:
|
||||
for f in files:
|
||||
path = os.path.join(test_output, f)
|
||||
additional_files.append(path)
|
||||
with open(path, "r", encoding="utf-8") as check_file:
|
||||
for line in check_file:
|
||||
if "ERROR" in line:
|
||||
test_results.append(TestResult(line.split(":")[-1], "FAIL"))
|
||||
if test_results:
|
||||
status = "failure"
|
||||
description = "Found errors in docs"
|
||||
elif status != "failure":
|
||||
test_results.append(TestResult("No errors found", "OK"))
|
||||
else:
|
||||
test_results.append(TestResult("Non zero exit code", "FAIL"))
|
||||
|
||||
s3_helper = S3Helper()
|
||||
|
||||
report_url = upload_results(
|
||||
s3_helper, pr_info.number, pr_info.sha, test_results, additional_files, NAME
|
||||
)
|
||||
print("::notice ::Report url: {report_url}")
|
||||
commit = get_commit(gh, pr_info.sha)
|
||||
commit.create_status(
|
||||
context=NAME, description=description, state=status, target_url=report_url
|
||||
)
|
||||
|
||||
if status == "failure":
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -62,7 +62,6 @@ TRUSTED_WORKFLOW_IDS = {
|
||||
NEED_RERUN_WORKFLOWS = {
|
||||
"BackportPR",
|
||||
"DocsCheck",
|
||||
"DocsReleaseChecks",
|
||||
"MasterCI",
|
||||
"NightlyBuilds",
|
||||
"PullRequestCI",
|
||||
|
@ -320,11 +320,11 @@ def check_tables_are_synchronized(
|
||||
)
|
||||
result = instance.query(result_query)
|
||||
|
||||
for _ in range(30):
|
||||
for _ in range(50):
|
||||
if result == expected:
|
||||
break
|
||||
else:
|
||||
time.sleep(0.5)
|
||||
time.sleep(1)
|
||||
result = instance.query(result_query)
|
||||
|
||||
assert result == expected
|
||||
|
40
tests/integration/test_async_insert_memory/test.py
Normal file
40
tests/integration/test_async_insert_memory/test.py
Normal file
@ -0,0 +1,40 @@
|
||||
import pytest
|
||||
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
node = cluster.add_instance("node")
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
def start_cluster():
|
||||
try:
|
||||
cluster.start()
|
||||
yield cluster
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
def test_memory_usage():
|
||||
node.query(
|
||||
"CREATE TABLE async_table(data Array(UInt64)) ENGINE=MergeTree() ORDER BY data"
|
||||
)
|
||||
|
||||
node.get_query_request("SELECT count() FROM system.numbers")
|
||||
|
||||
INSERT_QUERY = "INSERT INTO async_table SETTINGS async_insert=1, wait_for_async_insert=1 VALUES ({})"
|
||||
for iter in range(10):
|
||||
values = list(range(iter * 5000000, (iter + 1) * 5000000))
|
||||
node.query(INSERT_QUERY.format(values))
|
||||
|
||||
response = node.get_query_request(
|
||||
"SELECT groupArray(number) FROM numbers(1000000) SETTINGS max_memory_usage_for_user={}".format(
|
||||
30 * (2**23)
|
||||
)
|
||||
)
|
||||
|
||||
_, err = response.get_answer_and_error()
|
||||
assert err == "", "Query failed with error {}".format(err)
|
||||
|
||||
node.query("DROP TABLE async_table")
|
@ -54,7 +54,8 @@ def test_backup_from_old_version(started_cluster):
|
||||
|
||||
node1.query("ALTER TABLE source_table FREEZE PARTITION tuple();")
|
||||
|
||||
node1.restart_with_latest_version(fix_metadata=True)
|
||||
# We don't want to wait old outdated version to finish properly, just terminate it
|
||||
node1.restart_with_latest_version(fix_metadata=True, signal=9)
|
||||
|
||||
node1.query(
|
||||
"CREATE TABLE dest_table (A Int64, B String, Y String) ENGINE = ReplicatedMergeTree('/test/dest_table1', '1') ORDER BY tuple()"
|
||||
@ -107,7 +108,8 @@ def test_backup_from_old_version_setting(started_cluster):
|
||||
|
||||
node2.query("ALTER TABLE source_table FREEZE PARTITION tuple();")
|
||||
|
||||
node2.restart_with_latest_version(fix_metadata=True)
|
||||
# We don't want to wait old outdated version to finish properly, just terminate it
|
||||
node2.restart_with_latest_version(fix_metadata=True, signal=9)
|
||||
|
||||
node2.query(
|
||||
"CREATE TABLE dest_table (A Int64, B String, Y String) ENGINE = ReplicatedMergeTree('/test/dest_table2', '1') ORDER BY tuple() SETTINGS enable_mixed_granularity_parts = 1"
|
||||
@ -163,7 +165,10 @@ def test_backup_from_old_version_config(started_cluster):
|
||||
"<clickhouse><merge_tree><enable_mixed_granularity_parts>1</enable_mixed_granularity_parts></merge_tree></clickhouse>",
|
||||
)
|
||||
|
||||
node3.restart_with_latest_version(callback_onstop=callback, fix_metadata=True)
|
||||
# We don't want to wait old outdated version to finish properly, just terminate it
|
||||
node3.restart_with_latest_version(
|
||||
callback_onstop=callback, fix_metadata=True, signal=9
|
||||
)
|
||||
|
||||
node3.query(
|
||||
"CREATE TABLE dest_table (A Int64, B String, Y String) ENGINE = ReplicatedMergeTree('/test/dest_table3', '1') ORDER BY tuple() SETTINGS enable_mixed_granularity_parts = 1"
|
||||
|
@ -565,13 +565,20 @@ def test_copy_with_recovering(started_cluster, use_sample_offset):
|
||||
str(COPYING_FAIL_PROBABILITY),
|
||||
"--experimental-use-sample-offset",
|
||||
"1",
|
||||
"--max-table-tries",
|
||||
"10",
|
||||
],
|
||||
)
|
||||
else:
|
||||
execute_task(
|
||||
started_cluster,
|
||||
Task1(started_cluster),
|
||||
["--copy-fault-probability", str(COPYING_FAIL_PROBABILITY)],
|
||||
[
|
||||
"--copy-fault-probability",
|
||||
str(COPYING_FAIL_PROBABILITY),
|
||||
"--max-table-tries",
|
||||
"10",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@ -606,7 +613,12 @@ def test_copy_month_to_week_partition_with_recovering(started_cluster):
|
||||
execute_task(
|
||||
started_cluster,
|
||||
Task2(started_cluster, "test2"),
|
||||
["--copy-fault-probability", str(COPYING_FAIL_PROBABILITY)],
|
||||
[
|
||||
"--copy-fault-probability",
|
||||
str(COPYING_FAIL_PROBABILITY),
|
||||
"--max-table-tries",
|
||||
"10",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
|
@ -89,7 +89,9 @@ def test_blocade_leader(started_cluster):
|
||||
print("Got exception from node", smaller_exception(ex))
|
||||
time.sleep(0.1)
|
||||
|
||||
node2.query("INSERT INTO ordinary.t1 SELECT number FROM numbers(10)")
|
||||
node2.query(
|
||||
"INSERT INTO ordinary.t1 SELECT number FROM numbers(10) SETTINGS insert_keeper_max_retries = 0"
|
||||
)
|
||||
|
||||
node1.query("SYSTEM SYNC REPLICA ordinary.t1", timeout=10)
|
||||
node3.query("SYSTEM SYNC REPLICA ordinary.t1", timeout=10)
|
||||
@ -107,7 +109,9 @@ def test_blocade_leader(started_cluster):
|
||||
restart_replica_for_sure(
|
||||
node2, "ordinary.t1", "/clickhouse/t1/replicas/2"
|
||||
)
|
||||
node2.query("INSERT INTO ordinary.t1 SELECT rand() FROM numbers(100)")
|
||||
node2.query(
|
||||
"INSERT INTO ordinary.t1 SELECT rand() FROM numbers(100) SETTINGS insert_keeper_max_retries = 0"
|
||||
)
|
||||
break
|
||||
except Exception as ex:
|
||||
try:
|
||||
@ -128,7 +132,9 @@ def test_blocade_leader(started_cluster):
|
||||
restart_replica_for_sure(
|
||||
node3, "ordinary.t1", "/clickhouse/t1/replicas/3"
|
||||
)
|
||||
node3.query("INSERT INTO ordinary.t1 SELECT rand() FROM numbers(100)")
|
||||
node3.query(
|
||||
"INSERT INTO ordinary.t1 SELECT rand() FROM numbers(100) SETTINGS insert_keeper_max_retries = 0"
|
||||
)
|
||||
break
|
||||
except Exception as ex:
|
||||
try:
|
||||
@ -167,7 +173,9 @@ def test_blocade_leader(started_cluster):
|
||||
|
||||
for i in range(100):
|
||||
try:
|
||||
node1.query("INSERT INTO ordinary.t1 SELECT rand() FROM numbers(100)")
|
||||
node1.query(
|
||||
"INSERT INTO ordinary.t1 SELECT rand() FROM numbers(100) SETTINGS insert_keeper_max_retries = 0"
|
||||
)
|
||||
break
|
||||
except Exception as ex:
|
||||
print("Got exception node1", smaller_exception(ex))
|
||||
@ -293,7 +301,9 @@ def test_blocade_leader_twice(started_cluster):
|
||||
print("Got exception from node", smaller_exception(ex))
|
||||
time.sleep(0.1)
|
||||
|
||||
node2.query("INSERT INTO ordinary.t2 SELECT number FROM numbers(10)")
|
||||
node2.query(
|
||||
"INSERT INTO ordinary.t2 SELECT number FROM numbers(10) SETTINGS insert_keeper_max_retries = 0"
|
||||
)
|
||||
|
||||
node1.query("SYSTEM SYNC REPLICA ordinary.t2", timeout=10)
|
||||
node3.query("SYSTEM SYNC REPLICA ordinary.t2", timeout=10)
|
||||
@ -311,7 +321,9 @@ def test_blocade_leader_twice(started_cluster):
|
||||
restart_replica_for_sure(
|
||||
node2, "ordinary.t2", "/clickhouse/t2/replicas/2"
|
||||
)
|
||||
node2.query("INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100)")
|
||||
node2.query(
|
||||
"INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100) SETTINGS insert_keeper_max_retries = 0"
|
||||
)
|
||||
break
|
||||
except Exception as ex:
|
||||
try:
|
||||
@ -333,7 +345,9 @@ def test_blocade_leader_twice(started_cluster):
|
||||
node3, "ordinary.t2", "/clickhouse/t2/replicas/3"
|
||||
)
|
||||
node3.query("SYSTEM SYNC REPLICA ordinary.t2", timeout=10)
|
||||
node3.query("INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100)")
|
||||
node3.query(
|
||||
"INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100) SETTINGS insert_keeper_max_retries = 0"
|
||||
)
|
||||
break
|
||||
except Exception as ex:
|
||||
try:
|
||||
@ -359,14 +373,18 @@ def test_blocade_leader_twice(started_cluster):
|
||||
|
||||
for i in range(10):
|
||||
try:
|
||||
node3.query("INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100)")
|
||||
node3.query(
|
||||
"INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100) SETTINGS insert_keeper_max_retries = 0"
|
||||
)
|
||||
assert False, "Node3 became leader?"
|
||||
except Exception as ex:
|
||||
time.sleep(0.5)
|
||||
|
||||
for i in range(10):
|
||||
try:
|
||||
node2.query("INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100)")
|
||||
node2.query(
|
||||
"INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100) SETTINGS insert_keeper_max_retries = 0"
|
||||
)
|
||||
assert False, "Node2 became leader?"
|
||||
except Exception as ex:
|
||||
time.sleep(0.5)
|
||||
@ -399,7 +417,9 @@ def test_blocade_leader_twice(started_cluster):
|
||||
for n, node in enumerate([node1, node2, node3]):
|
||||
for i in range(100):
|
||||
try:
|
||||
node.query("INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100)")
|
||||
node.query(
|
||||
"INSERT INTO ordinary.t2 SELECT rand() FROM numbers(100) SETTINGS insert_keeper_max_retries = 0"
|
||||
)
|
||||
break
|
||||
except Exception as ex:
|
||||
print("Got exception node{}".format(n + 1), smaller_exception(ex))
|
||||
|
@ -2,6 +2,8 @@
|
||||
<profiles>
|
||||
<default>
|
||||
<stream_like_engine_allow_direct_select>1</stream_like_engine_allow_direct_select>
|
||||
<!-- One test is expecting the interruption after blocking ZooKeeper -->
|
||||
<insert_keeper_max_retries>0</insert_keeper_max_retries>
|
||||
</default>
|
||||
</profiles>
|
||||
</clickhouse>
|
||||
|
@ -1034,8 +1034,7 @@ def test_rabbitmq_overloaded_insert(rabbitmq_cluster):
|
||||
rabbitmq_exchange_type = 'direct',
|
||||
rabbitmq_num_consumers = 2,
|
||||
rabbitmq_flush_interval_ms=1000,
|
||||
rabbitmq_max_block_size = 1000,
|
||||
rabbitmq_num_queues = 2,
|
||||
rabbitmq_max_block_size = 100,
|
||||
rabbitmq_routing_key_list = 'over',
|
||||
rabbitmq_format = 'TSV',
|
||||
rabbitmq_row_delimiter = '\\n';
|
||||
@ -1045,8 +1044,6 @@ def test_rabbitmq_overloaded_insert(rabbitmq_cluster):
|
||||
rabbitmq_exchange_name = 'over',
|
||||
rabbitmq_exchange_type = 'direct',
|
||||
rabbitmq_routing_key_list = 'over',
|
||||
rabbitmq_flush_interval_ms=1000,
|
||||
rabbitmq_max_block_size = 1000,
|
||||
rabbitmq_format = 'TSV',
|
||||
rabbitmq_row_delimiter = '\\n';
|
||||
CREATE TABLE test.view_overload (key UInt64, value UInt64)
|
||||
@ -1087,6 +1084,9 @@ def test_rabbitmq_overloaded_insert(rabbitmq_cluster):
|
||||
time.sleep(random.uniform(0, 1))
|
||||
thread.start()
|
||||
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
|
||||
while True:
|
||||
result = instance.query("SELECT count() FROM test.view_overload")
|
||||
expected = messages_num * threads_num
|
||||
@ -1097,16 +1097,13 @@ def test_rabbitmq_overloaded_insert(rabbitmq_cluster):
|
||||
|
||||
instance.query(
|
||||
"""
|
||||
DROP TABLE test.consumer_overload;
|
||||
DROP TABLE test.view_overload;
|
||||
DROP TABLE test.rabbitmq_consume;
|
||||
DROP TABLE test.rabbitmq_overload;
|
||||
DROP TABLE test.consumer_overload NO DELAY;
|
||||
DROP TABLE test.view_overload NO DELAY;
|
||||
DROP TABLE test.rabbitmq_consume NO DELAY;
|
||||
DROP TABLE test.rabbitmq_overload NO DELAY;
|
||||
"""
|
||||
)
|
||||
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
|
||||
assert (
|
||||
int(result) == messages_num * threads_num
|
||||
), "ClickHouse lost some messages: {}".format(result)
|
||||
|
@ -1,5 +1,5 @@
|
||||
DROP TABLE IF EXISTS size_hint;
|
||||
CREATE TABLE size_hint (s Array(String)) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 1000;
|
||||
CREATE TABLE size_hint (s Array(String)) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 1000, index_granularity_bytes = '10Mi';
|
||||
|
||||
SET max_block_size = 1000;
|
||||
SET max_memory_usage = 1000000000;
|
||||
|
@ -65,7 +65,7 @@ CREATE TABLE large_alter_table_00804 (
|
||||
somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12)),
|
||||
id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC),
|
||||
data String CODEC(ZSTD(2), LZ4HC, NONE, LZ4, LZ4)
|
||||
) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2, min_bytes_for_wide_part = 0;
|
||||
) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2, index_granularity_bytes = '10Mi', min_bytes_for_wide_part = 0;
|
||||
|
||||
INSERT INTO large_alter_table_00804 SELECT toDate('2019-01-01'), number, toString(number + rand()) FROM system.numbers LIMIT 300000;
|
||||
|
||||
|
@ -114,7 +114,7 @@ CREATE TABLE compression_codec_multiple_with_key (
|
||||
somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12), Delta, Delta),
|
||||
id UInt64 CODEC(LZ4, ZSTD, Delta, NONE, LZ4HC, Delta),
|
||||
data String CODEC(ZSTD(2), Delta(1), LZ4HC, NONE, LZ4, LZ4)
|
||||
) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2;
|
||||
) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2, index_granularity_bytes = '10Mi';
|
||||
|
||||
|
||||
INSERT INTO compression_codec_multiple_with_key VALUES(toDate('2018-10-12'), 100000, 'hello'), (toDate('2018-10-12'), 100002, 'world'), (toDate('2018-10-12'), 1111, '!');
|
||||
|
@ -23,7 +23,7 @@ CREATE TABLE minmax_idx
|
||||
INDEX idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3
|
||||
) ENGINE = MergeTree()
|
||||
ORDER BY u64
|
||||
SETTINGS index_granularity = 2;"
|
||||
SETTINGS index_granularity = 2, index_granularity_bytes = '10Mi';"
|
||||
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="INSERT INTO minmax_idx VALUES
|
||||
@ -48,4 +48,4 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM minmax_idx WHERE i32 = 5 AND i32 + f64
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM minmax_idx WHERE (u64 < 2 OR u64 > 10) AND e != 'b' ORDER BY dt"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM minmax_idx WHERE (u64 < 2 OR u64 > 10) AND e != 'b' ORDER BY dt FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="DROP TABLE minmax_idx"
|
||||
$CLICKHOUSE_CLIENT --query="DROP TABLE minmax_idx"
|
||||
|
@ -19,7 +19,7 @@ CREATE TABLE minmax_idx1
|
||||
idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3
|
||||
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_00837/minmax', 'r1')
|
||||
ORDER BY u64
|
||||
SETTINGS index_granularity = 2;
|
||||
SETTINGS index_granularity = 2, index_granularity_bytes = '10Mi';
|
||||
|
||||
CREATE TABLE minmax_idx2
|
||||
(
|
||||
@ -36,7 +36,7 @@ CREATE TABLE minmax_idx2
|
||||
idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3
|
||||
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_00837/minmax', 'r2')
|
||||
ORDER BY u64
|
||||
SETTINGS index_granularity = 2;
|
||||
SETTINGS index_granularity = 2, index_granularity_bytes = '10Mi';
|
||||
|
||||
|
||||
/* many small inserts => table will make merges */
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user