diff --git a/.github/workflows/jepsen.yml b/.github/workflows/jepsen.yml index 50118eaf622..db837ac1ec7 100644 --- a/.github/workflows/jepsen.yml +++ b/.github/workflows/jepsen.yml @@ -8,13 +8,13 @@ on: # yamllint disable-line rule:truthy schedule: - cron: '0 */6 * * *' workflow_dispatch: - workflow_call: jobs: KeeperJepsenRelease: uses: ./.github/workflows/reusable_simple_job.yml with: test_name: Jepsen keeper check runner_type: style-checker + report_required: true run_command: | python3 jepsen_check.py keeper # ServerJepsenRelease: diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index d2865eb737d..6d150f37a27 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -15,6 +15,8 @@ jobs: outputs: data: ${{ steps.runconfig.outputs.CI_DATA }} steps: + - name: DebugInfo + uses: hmarr/debug-action@a701ed95a46e6f2fb0df25e1a558c16356fae35a - name: Check out repository code uses: ClickHouse/checkout@v1 with: @@ -33,11 +35,9 @@ jobs: - name: PrepareRunConfig id: runconfig run: | - echo "::group::configure CI run" python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --rebuild-all-binaries --outfile ${{ runner.temp }}/ci_run_data.json - echo "::endgroup::" - echo "::group::CI run configure results" + echo "::group::CI configuration" python3 -m json.tool ${{ runner.temp }}/ci_run_data.json echo "::endgroup::" @@ -255,9 +255,9 @@ jobs: run_command: | cd "$GITHUB_WORKSPACE/tests/ci" python3 docker_server.py --release-type head \ - --image-repo clickhouse/clickhouse-server --image-path docker/server + --image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse python3 docker_server.py --release-type head \ - --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper + --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse ############################################################################################ ##################################### BUILD REPORTER ####################################### ############################################################################################ diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index bd2b2b60904..b3ac2135e50 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -22,6 +22,8 @@ jobs: outputs: data: ${{ steps.runconfig.outputs.CI_DATA }} steps: + - name: DebugInfo + uses: hmarr/debug-action@a701ed95a46e6f2fb0df25e1a558c16356fae35a - name: Check out repository code uses: ClickHouse/checkout@v1 with: @@ -44,11 +46,9 @@ jobs: - name: PrepareRunConfig id: runconfig run: | - echo "::group::configure CI run" python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --outfile ${{ runner.temp }}/ci_run_data.json - echo "::endgroup::" - echo "::group::CI run configure results" + echo "::group::CI configuration" python3 -m json.tool ${{ runner.temp }}/ci_run_data.json echo "::endgroup::" @@ -67,6 +67,7 @@ jobs: DOCKER_TAG=$(echo '${{ toJson(fromJson(steps.runconfig.outputs.CI_DATA).docker_data.images) }}' | tr -d '\n') export DOCKER_TAG=$DOCKER_TAG python3 ./tests/ci/style_check.py --no-push + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --post --job-name 'Style check' BuildDockers: needs: [RunConfig] if: ${{ !failure() && !cancelled() }} @@ -796,7 +797,7 @@ jobs: test_name: Unit tests (asan) runner_type: fuzzer-unit-tester data: ${{ needs.RunConfig.outputs.data }} - UnitTestsReleaseClang: + UnitTestsRelease: needs: [RunConfig, BuilderBinRelease] if: ${{ !failure() && !cancelled() }} uses: ./.github/workflows/reusable_test.yml @@ -923,7 +924,7 @@ jobs: - UnitTestsTsan - UnitTestsMsan - UnitTestsUBsan - - UnitTestsReleaseClang + - UnitTestsRelease - CompatibilityCheckX86 - CompatibilityCheckAarch64 - SQLancerTestRelease @@ -966,13 +967,20 @@ jobs: ############################################################################################# ###################################### JEPSEN TESTS ######################################### ############################################################################################# + # This is special test NOT INCLUDED in FinishCheck + # When it's skipped, all dependent tasks will be skipped too. + # DO NOT add it there Jepsen: - # This is special test NOT INCLUDED in FinishCheck - # When it's skipped, all dependent tasks will be skipped too. - # DO NOT add it there - if: ${{ !failure() && !cancelled() && contains(github.event.pull_request.labels.*.name, 'jepsen-test') }} + # we need concurrency as the job uses dedicated instances in the cloud + concurrency: + group: jepsen + if: ${{ !failure() && !cancelled() }} needs: [RunConfig, BuilderBinRelease] - uses: ./.github/workflows/jepsen.yml + uses: ./.github/workflows/reusable_test.yml + with: + test_name: ClickHouse Keeper Jepsen + runner_type: style-checker + data: ${{ needs.RunConfig.outputs.data }} ############################################################################################# ####################################### libFuzzer ########################################### ############################################################################################# diff --git a/.github/workflows/reusable_build.yml b/.github/workflows/reusable_build.yml index e6aa04a3569..2371579692f 100644 --- a/.github/workflows/reusable_build.yml +++ b/.github/workflows/reusable_build.yml @@ -58,6 +58,7 @@ jobs: - name: Apply sparse checkout for contrib # in order to check that it doesn't break build # This step is done in GITHUB_WORKSPACE, # because it's broken in REPO_COPY for some reason + # See also update-submodules.sh if: ${{ env.BUILD_SPARSE_CHECKOUT == 'true' }} run: | rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' @@ -72,12 +73,15 @@ jobs: - name: Pre run: | python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --pre --job-name '${{inputs.build_name}}' - - name: Build + - name: Run run: | - python3 "$GITHUB_WORKSPACE/tests/ci/build_check.py" "$BUILD_NAME" + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" \ + --infile ${{ toJson(inputs.data) }} \ + --job-name "$BUILD_NAME" \ + --run - name: Post # it still be build report to upload for failed build job - if: always() + if: ${{ !cancelled() }} run: | python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --post --job-name '${{inputs.build_name}}' - name: Mark as done diff --git a/.github/workflows/reusable_simple_job.yml b/.github/workflows/reusable_simple_job.yml index ea196a32664..7b7084420a4 100644 --- a/.github/workflows/reusable_simple_job.yml +++ b/.github/workflows/reusable_simple_job.yml @@ -34,12 +34,16 @@ name: Simple job working-directory: description: sets custom working directory type: string - default: "" + default: "$GITHUB_WORKSPACE/tests/ci" git_ref: description: commit to use, merge commit for pr or head required: false type: string default: ${{ github.event.after }} # no merge commit + report_required: + description: set to true if job report with the commit status required + type: boolean + default: false secrets: secret_envs: description: if given, it's passed to the environments @@ -58,6 +62,8 @@ jobs: env: GITHUB_JOB_OVERRIDDEN: ${{inputs.test_name}} steps: + - name: DebugInfo + uses: hmarr/debug-action@a701ed95a46e6f2fb0df25e1a558c16356fae35a - name: Check out repository code uses: ClickHouse/checkout@v1 with: @@ -79,12 +85,12 @@ jobs: job_type: test - name: Run run: | - if [ -n '${{ inputs.working-directory }}' ]; then - cd "${{ inputs.working-directory }}" - else - cd "$GITHUB_WORKSPACE/tests/ci" - fi + cd "${{ inputs.working-directory }}" ${{ inputs.run_command }} + - name: Post + if: ${{ inputs.report_required }} + run: | + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --post --job-name '${{inputs.test_name}}' - name: Clean if: always() uses: ./.github/actions/clean diff --git a/.github/workflows/reusable_test.yml b/.github/workflows/reusable_test.yml index 09177ad887a..749f64d434e 100644 --- a/.github/workflows/reusable_test.yml +++ b/.github/workflows/reusable_test.yml @@ -38,7 +38,7 @@ name: Testing workflow working-directory: description: sets custom working directory type: string - default: "" + default: "$GITHUB_WORKSPACE/tests/ci" secrets: secret_envs: description: if given, it's passed to the environments @@ -96,19 +96,14 @@ jobs: python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --pre --job-name '${{inputs.test_name}}' - name: Run run: | - if [ -n "${{ inputs.working-directory }}" ]; then - cd "${{ inputs.working-directory }}" - else - cd "$GITHUB_WORKSPACE/tests/ci" - fi - if [ -n "$(echo '${{ inputs.run_command }}' | tr -d '\n')" ]; then - echo "Running command from workflow input" - ${{ inputs.run_command }} - else - echo "Running command from job config" - python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --run --job-name '${{inputs.test_name}}' - fi + cd "${{ inputs.working-directory }}" + python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" \ + --infile ${{ toJson(inputs.data) }} \ + --job-name '${{inputs.test_name}}' \ + --run \ + --run-command '''${{inputs.run_command}}''' - name: Post run + if: ${{ !cancelled() }} run: | python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --post --job-name '${{inputs.test_name}}' - name: Mark as done diff --git a/.gitmessage b/.gitmessage index f4a25a837bc..098b66aab1c 100644 --- a/.gitmessage +++ b/.gitmessage @@ -1,9 +1,18 @@ -## To avoid merge commit in CI run (add a leading space to apply): -#no-merge-commit +### CI modificators (add a leading space to apply): -## Running specified job (add a leading space to apply): +## To avoid a merge commit in CI: +#no_merge_commit + +## To discard CI cache: +#no_ci_cache + +## To run specified set of tests in CI: +#ci_set_ +#ci_set_reduced + +## To run specified job in CI: #job_ #job_stateless_tests_release #job_package_debug diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b36142cc9f..50db3292ca8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,2164 +1,178 @@ ### Table of Contents -**[ClickHouse release v23.12, 2023-12-28](#2312)**
-**[ClickHouse release v23.11, 2023-12-06](#2311)**
-**[ClickHouse release v23.10, 2023-11-02](#2310)**
-**[ClickHouse release v23.9, 2023-09-28](#239)**
-**[ClickHouse release v23.8 LTS, 2023-08-31](#238)**
-**[ClickHouse release v23.7, 2023-07-27](#237)**
-**[ClickHouse release v23.6, 2023-06-30](#236)**
-**[ClickHouse release v23.5, 2023-06-08](#235)**
-**[ClickHouse release v23.4, 2023-04-26](#234)**
-**[ClickHouse release v23.3 LTS, 2023-03-30](#233)**
-**[ClickHouse release v23.2, 2023-02-23](#232)**
-**[ClickHouse release v23.1, 2023-01-25](#231)**
-**[Changelog for 2022](https://clickhouse.com/docs/en/whats-new/changelog/2022/)**
+**[ClickHouse release v24.1, 2024-01-30](#241)**
+**[Changelog for 2023](https://clickhouse.com/docs/en/whats-new/changelog/2023/)**
-# 2023 Changelog +# 2024 Changelog -### ClickHouse release 23.12, 2023-12-28 +### ClickHouse release 24.1, 2024-01-30 + +### ClickHouse release master (b4a5b6060ea) FIXME as compared to v23.12.1.1368-stable (a2faa65b080) #### Backward Incompatible Change -* Fix check for non-deterministic functions in TTL expressions. Previously, you could create a TTL expression with non-deterministic functions in some cases, which could lead to undefined behavior later. This fixes [#37250](https://github.com/ClickHouse/ClickHouse/issues/37250). Disallow TTL expressions that don't depend on any columns of a table by default. It can be allowed back by `SET allow_suspicious_ttl_expressions = 1` or `SET compatibility = '23.11'`. Closes [#37286](https://github.com/ClickHouse/ClickHouse/issues/37286). [#51858](https://github.com/ClickHouse/ClickHouse/pull/51858) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The MergeTree setting `clean_deleted_rows` is deprecated, it has no effect anymore. The `CLEANUP` keyword for the `OPTIMIZE` is not allowed by default (it can be unlocked with the `allow_experimental_replacing_merge_with_cleanup` setting). [#58267](https://github.com/ClickHouse/ClickHouse/pull/58267) ([Alexander Tokmakov](https://github.com/tavplubix)). This fixes [#57930](https://github.com/ClickHouse/ClickHouse/issues/57930). This closes [#54988](https://github.com/ClickHouse/ClickHouse/issues/54988). This closes [#54570](https://github.com/ClickHouse/ClickHouse/issues/54570). This closes [#50346](https://github.com/ClickHouse/ClickHouse/issues/50346). This closes [#47579](https://github.com/ClickHouse/ClickHouse/issues/47579). The feature has to be removed because it is not good. We have to remove it as quickly as possible, because there is no other option. [#57932](https://github.com/ClickHouse/ClickHouse/pull/57932) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The setting `print_pretty_type_names` is turned on by default. You can turn it off to keep the old behavior or `SET compatibility = '23.12'`. [#57726](https://github.com/ClickHouse/ClickHouse/pull/57726) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The MergeTree setting `clean_deleted_rows` is deprecated, it has no effect anymore. The `CLEANUP` keyword for `OPTIMIZE` is not allowed by default (unless `allow_experimental_replacing_merge_with_cleanup` is enabled). [#58316](https://github.com/ClickHouse/ClickHouse/pull/58316) ([Alexander Tokmakov](https://github.com/tavplubix)). +* The function `reverseDNSQuery` is no longer available. This closes [#58368](https://github.com/ClickHouse/ClickHouse/issues/58368). [#58369](https://github.com/ClickHouse/ClickHouse/pull/58369) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable various changes to improve the access control in the configuration file. These changes affect the behavior, and you check the `config.xml` in the `access_control_improvements` section. In case you are not confident, keep the values in the configuration file as they were in the previous version. [#58584](https://github.com/ClickHouse/ClickHouse/pull/58584) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve the operation of `sumMapFiltered` with NaN values. NaN values are now placed at the end (instead of randomly) and considered different from any values. `-0` is now also treated as equal to `0`; since 0 values are discarded, `-0` values are discarded too. [#58959](https://github.com/ClickHouse/ClickHouse/pull/58959) ([Raúl Marín](https://github.com/Algunenano)). +* The function `visibleWidth` will behave according to the docs. In previous versions, it simply counted code points after string serialization, like the `lengthUTF8` function, but didn't consider zero-width and combining characters, full-width characters, tabs, and deletes. Now the behavior is changed accordingly. If you want to keep the old behavior, set `function_visible_width_behavior` to `0`, or set `compatibility` to `23.12` or lower. [#59022](https://github.com/ClickHouse/ClickHouse/pull/59022) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `Kusto` dialect is disabled until these two bugs will be fixed: [#59037](https://github.com/ClickHouse/ClickHouse/issues/59037) and [#59036](https://github.com/ClickHouse/ClickHouse/issues/59036). [#59305](https://github.com/ClickHouse/ClickHouse/pull/59305) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Any attempt to use `Kusto` will result in exception. +* More efficient implementation of the `FINAL` modifier no longer guarantees preserving the order even if `max_threads = 1`. If you counted on the previous behavior, set `enable_vertical_final` to 0 or `compatibility` to `23.12`. #### New Feature -* Implement Refreshable Materialized Views, requested in [#33919](https://github.com/ClickHouse/ClickHouse/issues/33919). [#56946](https://github.com/ClickHouse/ClickHouse/pull/56946) ([Michael Kolupaev](https://github.com/al13n321), [Michael Guzov](https://github.com/koloshmet)). -* Introduce `PASTE JOIN`, which allows users to join tables without `ON` clause simply by row numbers. Example: `SELECT * FROM (SELECT number AS a FROM numbers(2)) AS t1 PASTE JOIN (SELECT number AS a FROM numbers(2) ORDER BY a DESC) AS t2`. [#57995](https://github.com/ClickHouse/ClickHouse/pull/57995) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* The `ORDER BY` clause now supports specifying `ALL`, meaning that ClickHouse sorts by all columns in the `SELECT` clause. Example: `SELECT col1, col2 FROM tab WHERE [...] ORDER BY ALL`. [#57875](https://github.com/ClickHouse/ClickHouse/pull/57875) ([zhongyuankai](https://github.com/zhongyuankai)). -* Added a new mutation command `ALTER TABLE APPLY DELETED MASK`, which allows to enforce applying of mask written by lightweight delete and to remove rows marked as deleted from disk. [#57433](https://github.com/ClickHouse/ClickHouse/pull/57433) ([Anton Popov](https://github.com/CurtizJ)). -* A handler `/binary` opens a visual viewer of symbols inside the ClickHouse binary. [#58211](https://github.com/ClickHouse/ClickHouse/pull/58211) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Added a new SQL function `sqid` to generate Sqids (https://sqids.org/), example: `SELECT sqid(125, 126)`. [#57512](https://github.com/ClickHouse/ClickHouse/pull/57512) ([Robert Schulze](https://github.com/rschu1ze)). -* Add a new function `seriesPeriodDetectFFT` to detect series period using FFT. [#57574](https://github.com/ClickHouse/ClickHouse/pull/57574) ([Bhavna Jindal](https://github.com/bhavnajindal)). -* Add an HTTP endpoint for checking if Keeper is ready to accept traffic. [#55876](https://github.com/ClickHouse/ClickHouse/pull/55876) ([Konstantin Bogdanov](https://github.com/thevar1able)). -* Add 'union' mode for schema inference. In this mode the resulting table schema is the union of all files schemas (so schema is inferred from each file). The mode of schema inference is controlled by a setting `schema_inference_mode` with two possible values - `default` and `union`. Closes [#55428](https://github.com/ClickHouse/ClickHouse/issues/55428). [#55892](https://github.com/ClickHouse/ClickHouse/pull/55892) ([Kruglov Pavel](https://github.com/Avogar)). -* Add new setting `input_format_csv_try_infer_numbers_from_strings` that allows to infer numbers from strings in CSV format. Closes [#56455](https://github.com/ClickHouse/ClickHouse/issues/56455). [#56859](https://github.com/ClickHouse/ClickHouse/pull/56859) ([Kruglov Pavel](https://github.com/Avogar)). -* When the number of databases or tables exceeds a configurable threshold, show a warning to the user. [#57375](https://github.com/ClickHouse/ClickHouse/pull/57375) ([凌涛](https://github.com/lingtaolf)). -* Dictionary with `HASHED_ARRAY` (and `COMPLEX_KEY_HASHED_ARRAY`) layout supports `SHARDS` similarly to `HASHED`. [#57544](https://github.com/ClickHouse/ClickHouse/pull/57544) ([vdimir](https://github.com/vdimir)). -* Add asynchronous metrics for total primary key bytes and total allocated primary key bytes in memory. [#57551](https://github.com/ClickHouse/ClickHouse/pull/57551) ([Bharat Nallan](https://github.com/bharatnc)). -* Add `SHA512_256` function. [#57645](https://github.com/ClickHouse/ClickHouse/pull/57645) ([Bharat Nallan](https://github.com/bharatnc)). -* Add `FORMAT_BYTES` as an alias for `formatReadableSize`. [#57592](https://github.com/ClickHouse/ClickHouse/pull/57592) ([Bharat Nallan](https://github.com/bharatnc)). -* Allow passing optional session token to the `s3` table function. [#57850](https://github.com/ClickHouse/ClickHouse/pull/57850) ([Shani Elharrar](https://github.com/shanielh)). -* Introduce a new setting `http_make_head_request`. If it is turned off, the URL table engine will not do a HEAD request to determine the file size. This is needed to support inefficient, misconfigured, or not capable HTTP servers. [#54602](https://github.com/ClickHouse/ClickHouse/pull/54602) ([Fionera](https://github.com/fionera)). -* It is now possible to refer to ALIAS column in index (non-primary-key) definitions (issue [#55650](https://github.com/ClickHouse/ClickHouse/issues/55650)). Example: `CREATE TABLE tab(col UInt32, col_alias ALIAS col + 1, INDEX idx (col_alias) TYPE minmax) ENGINE = MergeTree ORDER BY col;`. [#57546](https://github.com/ClickHouse/ClickHouse/pull/57546) ([Robert Schulze](https://github.com/rschu1ze)). -* Added a new setting `readonly` which can be used to specify an S3 disk is read only. It can be useful to create a table on a disk of `s3_plain` type, while having read only access to the underlying S3 bucket. [#57977](https://github.com/ClickHouse/ClickHouse/pull/57977) ([Pengyuan Bian](https://github.com/bianpengyuan)). -* The primary key analysis in MergeTree tables will now be applied to predicates that include the virtual column `_part_offset` (optionally with `_part`). This feature can serve as a special kind of a secondary index. [#58224](https://github.com/ClickHouse/ClickHouse/pull/58224) ([Amos Bird](https://github.com/amosbird)). +* Implement Variant data type that represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value). Variant type is available under a setting `allow_experimental_variant_type`. Reference: [#54864](https://github.com/ClickHouse/ClickHouse/issues/54864). [#58047](https://github.com/ClickHouse/ClickHouse/pull/58047) ([Kruglov Pavel](https://github.com/Avogar)). +* Certain settings (currently `min_compress_block_size` and `max_compress_block_size`) can now be specified at column-level where they take precedence over the corresponding table-level setting. Example: `CREATE TABLE tab (col String SETTINGS (min_compress_block_size = 81920, max_compress_block_size = 163840)) ENGINE = MergeTree ORDER BY tuple();`. [#55201](https://github.com/ClickHouse/ClickHouse/pull/55201) ([Duc Canh Le](https://github.com/canhld94)). +* Add `quantileDD` aggregate function as well as the corresponding `quantilesDD` and `medianDD`. It is based on the DDSketch https://www.vldb.org/pvldb/vol12/p2195-masson.pdf. ### Documentation entry for user-facing changes. [#56342](https://github.com/ClickHouse/ClickHouse/pull/56342) ([Srikanth Chekuri](https://github.com/srikanthccv)). +* Allow to configure any kind of object storage with any kind of metadata type. [#58357](https://github.com/ClickHouse/ClickHouse/pull/58357) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added `null_status_on_timeout_only_active` and `throw_only_active` modes for `distributed_ddl_output_mode` that allow to avoid waiting for inactive replicas. [#58350](https://github.com/ClickHouse/ClickHouse/pull/58350) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Allow partitions from tables with different partition expressions to be attached when the destination table partition expression doesn't re-partition/split the part. [#39507](https://github.com/ClickHouse/ClickHouse/pull/39507) ([Arthur Passos](https://github.com/arthurpassos)). +* Add function `arrayShingles` to compute subarrays, e.g. `arrayShingles([1, 2, 3, 4, 5], 3)` returns `[[1,2,3],[2,3,4],[3,4,5]]`. [#58396](https://github.com/ClickHouse/ClickHouse/pull/58396) ([Zheng Miao](https://github.com/zenmiao7)). +* Added functions `punycodeEncode`, `punycodeDecode`, `idnaEncode` and `idnaDecode` which are useful for translating international domain names to an ASCII representation according to the IDNA standard. [#58454](https://github.com/ClickHouse/ClickHouse/pull/58454) ([Robert Schulze](https://github.com/rschu1ze)). +* Added string similarity functions `dramerauLevenshteinDistance`, `jaroSimilarity` and `jaroWinklerSimilarity`. [#58531](https://github.com/ClickHouse/ClickHouse/pull/58531) ([Robert Schulze](https://github.com/rschu1ze)). +* Add two settings `output_format_compression_level` to change output compression level and `output_format_compression_zstd_window_log` to explicitly set compression window size and enable long-range mode for zstd compression if output compression method is `zstd`. Applied for `INTO OUTFILE` and when writing to table functions `file`, `url`, `hdfs`, `s3`, and `azureBlobStorage`. [#58539](https://github.com/ClickHouse/ClickHouse/pull/58539) ([Duc Canh Le](https://github.com/canhld94)). +* Automatically disable ANSI escape sequences in Pretty formats if the output is not a terminal. Add new `auto` mode to setting `output_format_pretty_color`. [#58614](https://github.com/ClickHouse/ClickHouse/pull/58614) ([Shaun Struwig](https://github.com/Blargian)). +* Added function `sqidDecode` which decodes [Sqids](https://sqids.org/). [#58544](https://github.com/ClickHouse/ClickHouse/pull/58544) ([Robert Schulze](https://github.com/rschu1ze)). +* Allow to read Bool values into String in JSON input formats. It's done under a setting `input_format_json_read_bools_as_strings` that is enabled by default. [#58561](https://github.com/ClickHouse/ClickHouse/pull/58561) ([Kruglov Pavel](https://github.com/Avogar)). +* Added function `seriesDecomposeSTL` which decomposes a time series into a season, a trend and a residual component. [#57078](https://github.com/ClickHouse/ClickHouse/pull/57078) ([Bhavna Jindal](https://github.com/bhavnajindal)). +* Introduced MySQL Binlog Client for MaterializedMySQL: One binlog connection for many databases. [#57323](https://github.com/ClickHouse/ClickHouse/pull/57323) ([Val Doroshchuk](https://github.com/valbok)). +* Intel QuickAssist Technology (QAT) provides hardware-accelerated compression and cryptograpy. ClickHouse got a new compression codec `ZSTD_QAT` which utilizes QAT for zstd compression. The codec uses [Intel's QATlib](https://github.com/intel/qatlib) and [Inte's QAT ZSTD Plugin](https://github.com/intel/QAT-ZSTD-Plugin). Right now, only compression can be accelerated in hardware (a software fallback kicks in in case QAT could not be initialized), decompression always runs in software. [#57509](https://github.com/ClickHouse/ClickHouse/pull/57509) ([jasperzhu](https://github.com/jinjunzh)). +* Implementing the new way how object storage keys are generated for s3 disks. Now the format could be defined in terms of `re2` regex syntax with `key_template` option in disc description. [#57663](https://github.com/ClickHouse/ClickHouse/pull/57663) ([Sema Checherinda](https://github.com/CheSema)). +* Table system.dropped_tables_parts contains parts of system.dropped_tables tables (dropped but not yet removed tables). [#58038](https://github.com/ClickHouse/ClickHouse/pull/58038) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add settings `max_materialized_views_size_for_table` to limit the number of materialized views attached to a table. [#58068](https://github.com/ClickHouse/ClickHouse/pull/58068) ([zhongyuankai](https://github.com/zhongyuankai)). +* `clickhouse-format` improvements: support INSERT queries with `VALUES`; support comments (use `--comments` to output them); support `--max_line_length` option to format only long queries in multiline. [#58246](https://github.com/ClickHouse/ClickHouse/pull/58246) ([vdimir](https://github.com/vdimir)). +* Attach all system tables in `clickhouse-local`, including `system.parts`. This closes [#58312](https://github.com/ClickHouse/ClickHouse/issues/58312). [#58359](https://github.com/ClickHouse/ClickHouse/pull/58359) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support for `Enum` data types in function `transform`. This closes [#58241](https://github.com/ClickHouse/ClickHouse/issues/58241). [#58360](https://github.com/ClickHouse/ClickHouse/pull/58360) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add table `system.database_engines`. [#58390](https://github.com/ClickHouse/ClickHouse/pull/58390) ([Bharat Nallan](https://github.com/bharatnc)). Allow registering database engines independently in the codebase. [#58365](https://github.com/ClickHouse/ClickHouse/pull/58365) ([Bharat Nallan](https://github.com/bharatnc)). Allow registering interpreters independently. [#58443](https://github.com/ClickHouse/ClickHouse/pull/58443) ([Bharat Nallan](https://github.com/bharatnc)). +* Added `FROM ` modifier for `SYSTEM SYNC REPLICA LIGHTWEIGHT` query. With the `FROM` modifier ensures we wait for fetches and drop-ranges only for the specified source replicas, as well as any replica not in zookeeper or with an empty source_replica. [#58393](https://github.com/ClickHouse/ClickHouse/pull/58393) ([Jayme Bird](https://github.com/jaymebrd)). +* Added setting `update_insert_deduplication_token_in_dependent_materialized_views`. This setting allows to update insert deduplication token with table identifier during insert in dependent materialized views. Closes [#59165](https://github.com/ClickHouse/ClickHouse/issues/59165). [#59238](https://github.com/ClickHouse/ClickHouse/pull/59238) ([Maksim Kita](https://github.com/kitaisreal)). +* Added statement `SYSTEM RELOAD ASYNCHRONOUS METRICS` which updates the asynchronous metrics. Mostly useful for testing and development. [#53710](https://github.com/ClickHouse/ClickHouse/pull/53710) ([Robert Schulze](https://github.com/rschu1ze)). #### Performance Improvement -* Extract non-intersecting parts ranges from MergeTree table during FINAL processing. That way we can avoid additional FINAL logic for this non-intersecting parts ranges. In case when amount of duplicate values with same primary key is low, performance will be almost the same as without FINAL. Improve reading performance for MergeTree FINAL when `do_not_merge_across_partitions_select_final` setting is set. [#58120](https://github.com/ClickHouse/ClickHouse/pull/58120) ([Maksim Kita](https://github.com/kitaisreal)). -* Made copy between s3 disks using a s3-server-side copy instead of copying through the buffer. Improves `BACKUP/RESTORE` operations and `clickhouse-disks copy` command. [#56744](https://github.com/ClickHouse/ClickHouse/pull/56744) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). -* Hash JOIN respects setting `max_joined_block_size_rows` and do not produce large blocks for `ALL JOIN`. [#56996](https://github.com/ClickHouse/ClickHouse/pull/56996) ([vdimir](https://github.com/vdimir)). -* Release memory for aggregation earlier. This may avoid unnecessary external aggregation. [#57691](https://github.com/ClickHouse/ClickHouse/pull/57691) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Improve performance of string serialization. [#57717](https://github.com/ClickHouse/ClickHouse/pull/57717) ([Maksim Kita](https://github.com/kitaisreal)). -* Support trivial count optimization for `Merge`-engine tables. [#57867](https://github.com/ClickHouse/ClickHouse/pull/57867) ([skyoct](https://github.com/skyoct)). -* Optimized aggregation in some cases. [#57872](https://github.com/ClickHouse/ClickHouse/pull/57872) ([Anton Popov](https://github.com/CurtizJ)). -* The `hasAny` function can now take advantage of the full-text skipping indices. [#57878](https://github.com/ClickHouse/ClickHouse/pull/57878) ([Jpnock](https://github.com/Jpnock)). -* Function `if(cond, then, else)` (and its alias `cond ? then : else`) were optimized to use branch-free evaluation. [#57885](https://github.com/ClickHouse/ClickHouse/pull/57885) ([zhanglistar](https://github.com/zhanglistar)). -* MergeTree automatically derive `do_not_merge_across_partitions_select_final` setting if partition key expression contains only columns from primary key expression. [#58218](https://github.com/ClickHouse/ClickHouse/pull/58218) ([Maksim Kita](https://github.com/kitaisreal)). -* Speedup `MIN` and `MAX` for native types. [#58231](https://github.com/ClickHouse/ClickHouse/pull/58231) ([Raúl Marín](https://github.com/Algunenano)). -* Implement `SLRU` cache policy for filesystem cache. [#57076](https://github.com/ClickHouse/ClickHouse/pull/57076) ([Kseniia Sumarokova](https://github.com/kssenii)). -* The limit for the number of connections per endpoint for background fetches was raised from `15` to the value of `background_fetches_pool_size` setting. - MergeTree-level setting `replicated_max_parallel_fetches_for_host` became obsolete - MergeTree-level settings `replicated_fetches_http_connection_timeout`, `replicated_fetches_http_send_timeout` and `replicated_fetches_http_receive_timeout` are moved to the Server-level. - Setting `keep_alive_timeout` is added to the list of Server-level settings. [#57523](https://github.com/ClickHouse/ClickHouse/pull/57523) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Make querying `system.filesystem_cache` not memory intensive. [#57687](https://github.com/ClickHouse/ClickHouse/pull/57687) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Reduce memory usage on strings deserialization. [#57787](https://github.com/ClickHouse/ClickHouse/pull/57787) ([Maksim Kita](https://github.com/kitaisreal)). -* More efficient constructor for Enum - it makes sense when Enum has a boatload of values. [#57887](https://github.com/ClickHouse/ClickHouse/pull/57887) ([Duc Canh Le](https://github.com/canhld94)). -* An improvement for reading from the filesystem cache: always use `pread` method. [#57970](https://github.com/ClickHouse/ClickHouse/pull/57970) ([Nikita Taranov](https://github.com/nickitat)). -* Add optimization for AND notEquals chain in logical expression optimizer. This optimization is only available with the experimental Analyzer enabled. [#58214](https://github.com/ClickHouse/ClickHouse/pull/58214) ([Kevin Mingtarja](https://github.com/kevinmingtarja)). +* Coordination for parallel replicas is rewritten for better parallelism and cache locality. It has been tested for linear scalability on hundreds of replicas. It also got support for reading in order. [#57968](https://github.com/ClickHouse/ClickHouse/pull/57968) ([Nikita Taranov](https://github.com/nickitat)). +* Replace HTTP outgoing buffering based with the native ClickHouse buffers. Add bytes counting metrics for interfaces. [#56064](https://github.com/ClickHouse/ClickHouse/pull/56064) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Large aggregation states of `uniqExact` will be merged in parallel in distrubuted queries. [#59009](https://github.com/ClickHouse/ClickHouse/pull/59009) ([Nikita Taranov](https://github.com/nickitat)). +* Lower memory usage after reading from `MergeTree` tables. [#59290](https://github.com/ClickHouse/ClickHouse/pull/59290) ([Anton Popov](https://github.com/CurtizJ)). +* Lower memory usage in vertical merges. [#59340](https://github.com/ClickHouse/ClickHouse/pull/59340) ([Anton Popov](https://github.com/CurtizJ)). +* Avoid huge memory consumption during Keeper startup for more cases. [#58455](https://github.com/ClickHouse/ClickHouse/pull/58455) ([Antonio Andelic](https://github.com/antonio2368)). +* Keeper improvement: reduce Keeper's memory usage for stored nodes. [#59002](https://github.com/ClickHouse/ClickHouse/pull/59002) ([Antonio Andelic](https://github.com/antonio2368)). +* More cache-friendly final implementation. Note on the behaviour change: previously queries with `FINAL` modifier that read with a single stream (e.g. `max_threads = 1`) produced sorted output without explicitly provided `ORDER BY` clause. This is no longer guaranteed when `enable_vertical_final = true` (and it is so by default). [#54366](https://github.com/ClickHouse/ClickHouse/pull/54366) ([Duc Canh Le](https://github.com/canhld94)). +* Bypass extra copying in `ReadBufferFromIStream` which is used, e.g., for reading from S3. [#56961](https://github.com/ClickHouse/ClickHouse/pull/56961) ([Nikita Taranov](https://github.com/nickitat)). +* Optimize array element function when input is Array(Map)/Array(Array(Num)/Array(Array(String))/Array(BigInt)/Array(Decimal). The previous implementations did more allocations than needed. The optimization speed up is up to ~6x especially when input type is Array(Map). [#56403](https://github.com/ClickHouse/ClickHouse/pull/56403) ([李扬](https://github.com/taiyang-li)). +* Read column once while reading more than one subcolumn from it in compact parts. [#57631](https://github.com/ClickHouse/ClickHouse/pull/57631) ([Kruglov Pavel](https://github.com/Avogar)). +* Rewrite the AST of `sum(column + constant)` function. This is available as an optimization pass for Analyzer [#57853](https://github.com/ClickHouse/ClickHouse/pull/57853) ([Jiebin Sun](https://github.com/jiebinn)). +* The evaluation of function `match` now utilizes skipping indices `ngrambf_v1` and `tokenbf_v1`. [#57882](https://github.com/ClickHouse/ClickHouse/pull/57882) ([凌涛](https://github.com/lingtaolf)). +* The evaluation of function `match` now utilizes inverted indices. [#58284](https://github.com/ClickHouse/ClickHouse/pull/58284) ([凌涛](https://github.com/lingtaolf)). +* MergeTree `FINAL` does not compare rows from same non-L0 part. [#58142](https://github.com/ClickHouse/ClickHouse/pull/58142) ([Duc Canh Le](https://github.com/canhld94)). +* Speed up iota calls (filling array with consecutive numbers). [#58271](https://github.com/ClickHouse/ClickHouse/pull/58271) ([Raúl Marín](https://github.com/Algunenano)). +* Speedup MIN/MAX for non-numeric types. [#58334](https://github.com/ClickHouse/ClickHouse/pull/58334) ([Raúl Marín](https://github.com/Algunenano)). +* Optimize the combination of filters (like in multi-stage PREWHERE) with BMI2/SSE intrinsics [#58800](https://github.com/ClickHouse/ClickHouse/pull/58800) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Use one thread less in `clickhouse-local`. [#58968](https://github.com/ClickHouse/ClickHouse/pull/58968) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve the `multiIf` function performance when the type is Nullable. [#57745](https://github.com/ClickHouse/ClickHouse/pull/57745) ([KevinyhZou](https://github.com/KevinyhZou)). +* Add `SYSTEM JEMALLOC PURGE` for purging unused jemalloc pages, `SYSTEM JEMALLOC [ ENABLE | DISABLE | FLUSH ] PROFILE` for controlling jemalloc profile if the profiler is enabled. Add jemalloc-related 4LW command in Keeper: `jmst` for dumping jemalloc stats, `jmfp`, `jmep`, `jmdp` for controlling jemalloc profile if the profiler is enabled. [#58665](https://github.com/ClickHouse/ClickHouse/pull/58665) ([Antonio Andelic](https://github.com/antonio2368)). +* Lower memory consumption in backups to S3. [#58962](https://github.com/ClickHouse/ClickHouse/pull/58962) ([Vitaly Baranov](https://github.com/vitlibar)). #### Improvement -* Support for soft memory limit in Keeper. It will refuse requests if the memory usage is close to the maximum. [#57271](https://github.com/ClickHouse/ClickHouse/pull/57271) ([Han Fei](https://github.com/hanfei1991)). [#57699](https://github.com/ClickHouse/ClickHouse/pull/57699) ([Han Fei](https://github.com/hanfei1991)). -* Make inserts into distributed tables handle updated cluster configuration properly. When the list of cluster nodes is dynamically updated, the Directory Monitor of the distribution table will update it. [#42826](https://github.com/ClickHouse/ClickHouse/pull/42826) ([zhongyuankai](https://github.com/zhongyuankai)). -* Do not allow creating a replicated table with inconsistent merge parameters. [#56833](https://github.com/ClickHouse/ClickHouse/pull/56833) ([Duc Canh Le](https://github.com/canhld94)). -* Show uncompressed size in `system.tables`. [#56618](https://github.com/ClickHouse/ClickHouse/issues/56618). [#57186](https://github.com/ClickHouse/ClickHouse/pull/57186) ([Chen Lixiang](https://github.com/chenlx0)). -* Add `skip_unavailable_shards` as a setting for `Distributed` tables that is similar to the corresponding query-level setting. Closes [#43666](https://github.com/ClickHouse/ClickHouse/issues/43666). [#57218](https://github.com/ClickHouse/ClickHouse/pull/57218) ([Gagan Goel](https://github.com/tntnatbry)). -* The function `substring` (aliases: `substr`, `mid`) can now be used with `Enum` types. Previously, the first function argument had to be a value of type `String` or `FixedString`. This improves compatibility with 3rd party tools such as Tableau via MySQL interface. [#57277](https://github.com/ClickHouse/ClickHouse/pull/57277) ([Serge Klochkov](https://github.com/slvrtrn)). -* Function `format` now supports arbitrary argument types (instead of only `String` and `FixedString` arguments). This is important to calculate `SELECT format('The {0} to all questions is {1}', 'answer', 42)`. [#57549](https://github.com/ClickHouse/ClickHouse/pull/57549) ([Robert Schulze](https://github.com/rschu1ze)). -* Allows to use the `date_trunc` function with a case-insensitive first argument. Both cases are now supported: `SELECT date_trunc('day', now())` and `SELECT date_trunc('DAY', now())`. [#57624](https://github.com/ClickHouse/ClickHouse/pull/57624) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Better hints when a table doesn't exist. [#57342](https://github.com/ClickHouse/ClickHouse/pull/57342) ([Bharat Nallan](https://github.com/bharatnc)). -* Allow to overwrite `max_partition_size_to_drop` and `max_table_size_to_drop` server settings in query time. [#57452](https://github.com/ClickHouse/ClickHouse/pull/57452) ([Jordi Villar](https://github.com/jrdi)). -* Slightly better inference of unnamed tupes in JSON formats. [#57751](https://github.com/ClickHouse/ClickHouse/pull/57751) ([Kruglov Pavel](https://github.com/Avogar)). -* Add support for read-only flag when connecting to Keeper (fixes [#53749](https://github.com/ClickHouse/ClickHouse/issues/53749)). [#57479](https://github.com/ClickHouse/ClickHouse/pull/57479) ([Mikhail Koviazin](https://github.com/mkmkme)). -* Fix possible distributed sends stuck due to "No such file or directory" (during recovering a batch from disk). Fix possible issues with `error_count` from `system.distribution_queue` (in case of `distributed_directory_monitor_max_sleep_time_ms` >5min). Introduce profile event to track async INSERT failures - `DistributedAsyncInsertionFailures`. [#57480](https://github.com/ClickHouse/ClickHouse/pull/57480) ([Azat Khuzhin](https://github.com/azat)). -* Support PostgreSQL generated columns and default column values in `MaterializedPostgreSQL` (experimental feature). Closes [#40449](https://github.com/ClickHouse/ClickHouse/issues/40449). [#57568](https://github.com/ClickHouse/ClickHouse/pull/57568) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Allow to apply some filesystem cache config settings changes without server restart. [#57578](https://github.com/ClickHouse/ClickHouse/pull/57578) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Properly handling PostgreSQL table structure with empty array. [#57618](https://github.com/ClickHouse/ClickHouse/pull/57618) ([Mike Kot](https://github.com/myrrc)). -* Expose the total number of errors occurred since last server restart as a `ClickHouseErrorMetric_ALL` metric. [#57627](https://github.com/ClickHouse/ClickHouse/pull/57627) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Allow nodes in the configuration file with `from_env`/`from_zk` reference and non empty element with replace=1. [#57628](https://github.com/ClickHouse/ClickHouse/pull/57628) ([Azat Khuzhin](https://github.com/azat)). -* A table function `fuzzJSON` which allows generating a lot of malformed JSON for fuzzing. [#57646](https://github.com/ClickHouse/ClickHouse/pull/57646) ([Julia Kartseva](https://github.com/jkartseva)). -* Allow IPv6 to UInt128 conversion and binary arithmetic. [#57707](https://github.com/ClickHouse/ClickHouse/pull/57707) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Add a setting for `async inserts deduplication cache` - how long we wait for cache update. Deprecate setting `async_block_ids_cache_min_update_interval_ms`. Now cache is updated only in case of conflicts. [#57743](https://github.com/ClickHouse/ClickHouse/pull/57743) ([alesapin](https://github.com/alesapin)). -* `sleep()` function now can be cancelled with `KILL QUERY`. [#57746](https://github.com/ClickHouse/ClickHouse/pull/57746) ([Vitaly Baranov](https://github.com/vitlibar)). -* Forbid `CREATE TABLE ... AS SELECT` queries for `Replicated` table engines in the experimental `Replicated` database because they are not supported. Reference [#35408](https://github.com/ClickHouse/ClickHouse/issues/35408). [#57796](https://github.com/ClickHouse/ClickHouse/pull/57796) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix and improve transforming queries for external databases, to recursively obtain all compatible predicates. [#57888](https://github.com/ClickHouse/ClickHouse/pull/57888) ([flynn](https://github.com/ucasfl)). -* Support dynamic reloading of the filesystem cache size. Closes [#57866](https://github.com/ClickHouse/ClickHouse/issues/57866). [#57897](https://github.com/ClickHouse/ClickHouse/pull/57897) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Correctly support `system.stack_trace` for threads with blocked SIGRTMIN (these threads can exist in low-quality external libraries such as Apache rdkafka). [#57907](https://github.com/ClickHouse/ClickHouse/pull/57907) ([Azat Khuzhin](https://github.com/azat)). Aand also send signal to the threads only if it is not blocked to avoid waiting `storage_system_stack_trace_pipe_read_timeout_ms` when it does not make any sense. [#58136](https://github.com/ClickHouse/ClickHouse/pull/58136) ([Azat Khuzhin](https://github.com/azat)). -* Tolerate keeper failures in the quorum inserts' check. [#57986](https://github.com/ClickHouse/ClickHouse/pull/57986) ([Raúl Marín](https://github.com/Algunenano)). -* Add max/peak RSS (`MemoryResidentMax`) into system.asynchronous_metrics. [#58095](https://github.com/ClickHouse/ClickHouse/pull/58095) ([Azat Khuzhin](https://github.com/azat)). -* This PR allows users to use s3-style links (`https://` and `s3://`) without mentioning region if it's not default. Also find the correct region if the user mentioned the wrong one. [#58148](https://github.com/ClickHouse/ClickHouse/pull/58148) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* `clickhouse-format --obfuscate` will know about Settings, MergeTreeSettings, and time zones and keep their names unchanged. [#58179](https://github.com/ClickHouse/ClickHouse/pull/58179) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Added explicit `finalize()` function in `ZipArchiveWriter`. Simplify too complicated code in `ZipArchiveWriter`. This fixes [#58074](https://github.com/ClickHouse/ClickHouse/issues/58074). [#58202](https://github.com/ClickHouse/ClickHouse/pull/58202) ([Vitaly Baranov](https://github.com/vitlibar)). -* Make caches with the same path use the same cache objects. This behaviour existed before, but was broken in 23.4. If such caches with the same path have different set of cache settings, an exception will be thrown, that this is not allowed. [#58264](https://github.com/ClickHouse/ClickHouse/pull/58264) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Parallel replicas (experimental feature): friendly settings [#57542](https://github.com/ClickHouse/ClickHouse/pull/57542) ([Igor Nikonov](https://github.com/devcrafter)). -* Parallel replicas (experimental feature): announcement response handling improvement [#57749](https://github.com/ClickHouse/ClickHouse/pull/57749) ([Igor Nikonov](https://github.com/devcrafter)). -* Parallel replicas (experimental feature): give more respect to `min_number_of_marks` in `ParallelReplicasReadingCoordinator` [#57763](https://github.com/ClickHouse/ClickHouse/pull/57763) ([Nikita Taranov](https://github.com/nickitat)). -* Parallel replicas (experimental feature): disable parallel replicas with IN (subquery) [#58133](https://github.com/ClickHouse/ClickHouse/pull/58133) ([Igor Nikonov](https://github.com/devcrafter)). -* Parallel replicas (experimental feature): add profile event 'ParallelReplicasUsedCount' [#58173](https://github.com/ClickHouse/ClickHouse/pull/58173) ([Igor Nikonov](https://github.com/devcrafter)). -* Non POST requests such as HEAD will be readonly similar to GET. [#58060](https://github.com/ClickHouse/ClickHouse/pull/58060) ([San](https://github.com/santrancisco)). -* Add `bytes_uncompressed` column to `system.part_log` [#58167](https://github.com/ClickHouse/ClickHouse/pull/58167) ([Jordi Villar](https://github.com/jrdi)). -* Add base backup name to `system.backups` and `system.backup_log` tables [#58178](https://github.com/ClickHouse/ClickHouse/pull/58178) ([Pradeep Chhetri](https://github.com/chhetripradeep)). -* Add support for specifying query parameters in the command line in clickhouse-local [#58210](https://github.com/ClickHouse/ClickHouse/pull/58210) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Added comments (brief descriptions) to all columns of system tables. There are several reasons for this: - We use system tables a lot, and sometimes it could be very difficult for developer to understand the purpose and the meaning of a particular column. - We change (add new ones or modify existing) system tables a lot and the documentation for them is always outdated. For example take a look at the documentation page for [`system.parts`](https://clickhouse.com/docs/en/operations/system-tables/parts). It misses a lot of columns - We would like to eventually generate documentation directly from ClickHouse. [#58356](https://github.com/ClickHouse/ClickHouse/pull/58356) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow queries without aliases for subqueries for `PASTE JOIN`. [#58654](https://github.com/ClickHouse/ClickHouse/pull/58654) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Enable `MySQL`/`MariaDB` integration on macOS. This closes [#21191](https://github.com/ClickHouse/ClickHouse/issues/21191). [#46316](https://github.com/ClickHouse/ClickHouse/pull/46316) ([Alexey Milovidov](https://github.com/alexey-milovidov)) ([Robert Schulze](https://github.com/rschu1ze)). +* Disable `max_rows_in_set_to_optimize_join` by default. [#56396](https://github.com/ClickHouse/ClickHouse/pull/56396) ([vdimir](https://github.com/vdimir)). +* Add `` config parameter that allows avoiding resolving hostnames in ON CLUSTER DDL queries and Replicated database engines. This mitigates the possibility of the queue being stuck in case of a change in cluster definition. Closes [#57573](https://github.com/ClickHouse/ClickHouse/issues/57573). [#57603](https://github.com/ClickHouse/ClickHouse/pull/57603) ([Nikolay Degterinsky](https://github.com/evillique)). +* Increase `load_metadata_threads` to 16 for the filesystem cache. It will make the server start up faster. [#57732](https://github.com/ClickHouse/ClickHouse/pull/57732) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add ability to throttle merges/mutations (`max_mutations_bandwidth_for_server`/`max_merges_bandwidth_for_server`). [#57877](https://github.com/ClickHouse/ClickHouse/pull/57877) ([Azat Khuzhin](https://github.com/azat)). +* Replaced undocumented (boolean) column `is_hot_reloadable` in system table `system.server_settings` by (Enum8) column `changeable_without_restart` with possible values `No`, `Yes`, `IncreaseOnly` and `DecreaseOnly`. Also documented the column. [#58029](https://github.com/ClickHouse/ClickHouse/pull/58029) ([skyoct](https://github.com/skyoct)). +* Cluster discovery supports setting username and password, close [#58063](https://github.com/ClickHouse/ClickHouse/issues/58063). [#58123](https://github.com/ClickHouse/ClickHouse/pull/58123) ([vdimir](https://github.com/vdimir)). +* Support query parameters in `ALTER TABLE ... PART`. [#58297](https://github.com/ClickHouse/ClickHouse/pull/58297) ([Azat Khuzhin](https://github.com/azat)). +* Create consumers for Kafka tables on the fly (but keep them for some period - `kafka_consumers_pool_ttl_ms`, since last used), this should fix problem with statistics for `system.kafka_consumers` (that does not consumed when nobody reads from Kafka table, which leads to live memory leak and slow table detach) and also this PR enables stats for `system.kafka_consumers` by default again. [#58310](https://github.com/ClickHouse/ClickHouse/pull/58310) ([Azat Khuzhin](https://github.com/azat)). +* `sparkBar` as an alias to `sparkbar`. [#58335](https://github.com/ClickHouse/ClickHouse/pull/58335) ([凌涛](https://github.com/lingtaolf)). +* Avoid sending `ComposeObject` requests after upload to `GCS`. [#58343](https://github.com/ClickHouse/ClickHouse/pull/58343) ([Azat Khuzhin](https://github.com/azat)). +* Correctly handle keys with dot in the name in configurations XMLs. [#58354](https://github.com/ClickHouse/ClickHouse/pull/58354) ([Azat Khuzhin](https://github.com/azat)). +* Make function `format` return constant on constant arguments. This closes [#58355](https://github.com/ClickHouse/ClickHouse/issues/58355). [#58358](https://github.com/ClickHouse/ClickHouse/pull/58358) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Adding a setting `max_estimated_execution_time` to separate `max_execution_time` and `max_estimated_execution_time`. [#58402](https://github.com/ClickHouse/ClickHouse/pull/58402) ([Zhang Yifan](https://github.com/zhangyifan27)). +* Provide a hint when an invalid database engine name is used. [#58444](https://github.com/ClickHouse/ClickHouse/pull/58444) ([Bharat Nallan](https://github.com/bharatnc)). +* Add settings for better control of indexes type in Arrow dictionary. Use signed integer type for indexes by default as Arrow recommends. Closes [#57401](https://github.com/ClickHouse/ClickHouse/issues/57401). [#58519](https://github.com/ClickHouse/ClickHouse/pull/58519) ([Kruglov Pavel](https://github.com/Avogar)). +* Implement [#58575](https://github.com/ClickHouse/ClickHouse/issues/58575) Support `CLICKHOUSE_PASSWORD_FILE ` environment variable when running the docker image. [#58583](https://github.com/ClickHouse/ClickHouse/pull/58583) ([Eyal Halpern Shalev](https://github.com/Eyal-Shalev)). +* When executing some queries, which require a lot of streams for reading data, the error `"Paste JOIN requires sorted tables only"` was previously thrown. Now the numbers of streams resize to 1 in that case. [#58608](https://github.com/ClickHouse/ClickHouse/pull/58608) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Better message for INVALID_IDENTIFIER error. [#58703](https://github.com/ClickHouse/ClickHouse/pull/58703) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Improved handling of signed numeric literals in normalizeQuery. [#58710](https://github.com/ClickHouse/ClickHouse/pull/58710) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Support Point data type for MySQL. [#58721](https://github.com/ClickHouse/ClickHouse/pull/58721) ([Kseniia Sumarokova](https://github.com/kssenii)). +* When comparing a Float32 column and a const string, read the string as Float32 (instead of Float64). [#58724](https://github.com/ClickHouse/ClickHouse/pull/58724) ([Raúl Marín](https://github.com/Algunenano)). +* Improve S3 compatibility, add ECloud EOS storage support. [#58786](https://github.com/ClickHouse/ClickHouse/pull/58786) ([xleoken](https://github.com/xleoken)). +* Allow `KILL QUERY` to cancel backups / restores. This PR also makes running backups and restores visible in `system.processes`. Also, there is a new setting in the server configuration now - `shutdown_wait_backups_and_restores` (default=true) which makes the server either wait on shutdown for all running backups and restores to finish or just cancel them. [#58804](https://github.com/ClickHouse/ClickHouse/pull/58804) ([Vitaly Baranov](https://github.com/vitlibar)). +* Avro format to support ZSTD codec. Closes [#58735](https://github.com/ClickHouse/ClickHouse/issues/58735). [#58805](https://github.com/ClickHouse/ClickHouse/pull/58805) ([flynn](https://github.com/ucasfl)). +* MySQL interface gained support for `net_write_timeout` and `net_read_timeout` settings. `net_write_timeout` is translated into the native `send_timeout` ClickHouse setting and, similarly, `net_read_timeout` into `receive_timeout`. Fixed an issue where it was possible to set MySQL `sql_select_limit` setting only if the entire statement was in upper case. [#58835](https://github.com/ClickHouse/ClickHouse/pull/58835) ([Serge Klochkov](https://github.com/slvrtrn)). +* A better exception message while conflict of creating dictionary and table with the same name. [#58841](https://github.com/ClickHouse/ClickHouse/pull/58841) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Make sure that for custom (created from SQL) disks ether `filesystem_caches_path` (a common directory prefix for all filesystem caches) or `custom_cached_disks_base_directory` (a common directory prefix for only filesystem caches created from custom disks) is specified in server config. `custom_cached_disks_base_directory` has higher priority for custom disks over `filesystem_caches_path`, which is used if the former one is absent. Filesystem cache setting `path` must lie inside that directory, otherwise exception will be thrown preventing disk to be created. This will not affect disks created on an older version and server was upgraded - then the exception will not be thrown to allow the server to successfully start). `custom_cached_disks_base_directory` is added to default server config as `/var/lib/clickhouse/caches/`. Closes [#57825](https://github.com/ClickHouse/ClickHouse/issues/57825). [#58869](https://github.com/ClickHouse/ClickHouse/pull/58869) ([Kseniia Sumarokova](https://github.com/kssenii)). +* MySQL interface gained compatibility with `SHOW WARNINGS`/`SHOW COUNT(*) WARNINGS` queries, though the returned result is always an empty set. [#58929](https://github.com/ClickHouse/ClickHouse/pull/58929) ([Serge Klochkov](https://github.com/slvrtrn)). +* Skip unavailable replicas when executing parallel distributed `INSERT SELECT`. [#58931](https://github.com/ClickHouse/ClickHouse/pull/58931) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Display word-descriptive log level while enabling structured log formatting in json. [#58936](https://github.com/ClickHouse/ClickHouse/pull/58936) ([Tim Liou](https://github.com/wheatdog)). +* MySQL interface gained support for `CAST(x AS SIGNED)` and `CAST(x AS UNSIGNED)` statements via data type aliases: `SIGNED` for Int64, and `UNSIGNED` for UInt64. This improves compatibility with BI tools such as Looker Studio. [#58954](https://github.com/ClickHouse/ClickHouse/pull/58954) ([Serge Klochkov](https://github.com/slvrtrn)). +* Change working directory to the data path in docker container. [#58975](https://github.com/ClickHouse/ClickHouse/pull/58975) ([cangyin](https://github.com/cangyin)). +* Added setting for Azure Blob Storage `azure_max_unexpected_write_error_retries` , can also be set from config under azure section. [#59001](https://github.com/ClickHouse/ClickHouse/pull/59001) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Allow server to start with broken data lake table. Closes [#58625](https://github.com/ClickHouse/ClickHouse/issues/58625). [#59080](https://github.com/ClickHouse/ClickHouse/pull/59080) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to ignore schema evolution in the `Iceberg` table engine and read all data using schema specified by the user on table creation or latest schema parsed from metadata on table creation. This is done under a setting `iceberg_engine_ignore_schema_evolution` that is disabled by default. Note that enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema. [#59133](https://github.com/ClickHouse/ClickHouse/pull/59133) ([Kruglov Pavel](https://github.com/Avogar)). +* Prohibit mutable operations (`INSERT`/`ALTER`/`OPTIMIZE`/...) on read-only/write-once storages with a proper `TABLE_IS_READ_ONLY` error (to avoid leftovers). Avoid leaving left-overs on write-once disks (`format_version.txt`) on `CREATE`/`ATTACH`. Ignore `DROP` for `ReplicatedMergeTree` (so as for `MergeTree`). Fix iterating over `s3_plain` (`MetadataStorageFromPlainObjectStorage::iterateDirectory`). Note read-only is `web` disk, and write-once is `s3_plain`. [#59170](https://github.com/ClickHouse/ClickHouse/pull/59170) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug in the experimental `_block_number` column which could lead to logical error during complex combination of `ALTER`s and `merge`s. Fixes [#56202](https://github.com/ClickHouse/ClickHouse/issues/56202). Replaces [#58601](https://github.com/ClickHouse/ClickHouse/issues/58601). [#59295](https://github.com/ClickHouse/ClickHouse/pull/59295) ([alesapin](https://github.com/alesapin)). +* Play UI understands when an exception is returned inside JSON. Adjustment for [#52853](https://github.com/ClickHouse/ClickHouse/issues/52853). [#59303](https://github.com/ClickHouse/ClickHouse/pull/59303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `/binary` HTTP handler allows to specify user, host, and optionally, password in the query string. [#59311](https://github.com/ClickHouse/ClickHouse/pull/59311) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support backups for compressed in-memory tables. This closes [#57893](https://github.com/ClickHouse/ClickHouse/issues/57893). [#59315](https://github.com/ClickHouse/ClickHouse/pull/59315) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support the `FORMAT` clause in `BACKUP` and `RESTORE` queries. [#59338](https://github.com/ClickHouse/ClickHouse/pull/59338) ([Vitaly Baranov](https://github.com/vitlibar)). +* Function `concatWithSeparator` now supports arbitrary argument types (instead of only `String` and `FixedString` arguments). For example, `SELECT concatWithSeparator('.', 'number', 1)` now returns `number.1`. [#59341](https://github.com/ClickHouse/ClickHouse/pull/59341) ([Robert Schulze](https://github.com/rschu1ze)). #### Build/Testing/Packaging Improvement -* Randomize more settings [#39663](https://github.com/ClickHouse/ClickHouse/pull/39663) ([Anton Popov](https://github.com/CurtizJ)). -* Randomize disabled optimizations in CI [#57315](https://github.com/ClickHouse/ClickHouse/pull/57315) ([Raúl Marín](https://github.com/Algunenano)). -* Allow usage of Azure-related table engines/functions on macOS. [#51866](https://github.com/ClickHouse/ClickHouse/pull/51866) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* ClickHouse Fast Test now uses Musl instead of GLibc. [#57711](https://github.com/ClickHouse/ClickHouse/pull/57711) ([Alexey Milovidov](https://github.com/alexey-milovidov)). The fully-static Musl build is available to download from the CI. -* Run ClickBench for every commit. This closes [#57708](https://github.com/ClickHouse/ClickHouse/issues/57708). [#57712](https://github.com/ClickHouse/ClickHouse/pull/57712) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Remove the usage of a harmful C/POSIX `select` function from external libraries. [#57467](https://github.com/ClickHouse/ClickHouse/pull/57467) ([Igor Nikonov](https://github.com/devcrafter)). -* Settings only available in ClickHouse Cloud will be also present in the open-source ClickHouse build for convenience. [#57638](https://github.com/ClickHouse/ClickHouse/pull/57638) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). - -#### Bug Fix (user-visible misbehavior in an official stable release) -* Fixed a possibility of sorting order breakage in TTL GROUP BY [#49103](https://github.com/ClickHouse/ClickHouse/pull/49103) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix: split `lttb` bucket strategy, first bucket and last bucket should only contain single point [#57003](https://github.com/ClickHouse/ClickHouse/pull/57003) ([FFish](https://github.com/wxybear)). -* Fix possible deadlock in the `Template` format during sync after error [#57004](https://github.com/ClickHouse/ClickHouse/pull/57004) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix early stop while parsing a file with skipping lots of errors [#57006](https://github.com/ClickHouse/ClickHouse/pull/57006) ([Kruglov Pavel](https://github.com/Avogar)). -* Prevent dictionary's ACL bypass via the `dictionary` table function [#57362](https://github.com/ClickHouse/ClickHouse/pull/57362) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Fix another case of a "non-ready set" error found by Fuzzer. [#57423](https://github.com/ClickHouse/ClickHouse/pull/57423) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix several issues regarding PostgreSQL `array_ndims` usage. [#57436](https://github.com/ClickHouse/ClickHouse/pull/57436) ([Ryan Jacobs](https://github.com/ryanmjacobs)). -* Fix RWLock inconsistency after write lock timeout [#57454](https://github.com/ClickHouse/ClickHouse/pull/57454) ([Vitaly Baranov](https://github.com/vitlibar)). Fix RWLock inconsistency after write lock timeout (again) [#57733](https://github.com/ClickHouse/ClickHouse/pull/57733) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix: don't exclude ephemeral column when building pushing to view chain [#57461](https://github.com/ClickHouse/ClickHouse/pull/57461) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* MaterializedPostgreSQL (experimental issue): fix issue [#41922](https://github.com/ClickHouse/ClickHouse/issues/41922), add test for [#41923](https://github.com/ClickHouse/ClickHouse/issues/41923) [#57515](https://github.com/ClickHouse/ClickHouse/pull/57515) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Ignore ON CLUSTER clause in grant/revoke queries for management of replicated access entities. [#57538](https://github.com/ClickHouse/ClickHouse/pull/57538) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). -* Fix crash in clickhouse-local [#57553](https://github.com/ClickHouse/ClickHouse/pull/57553) ([Nikolay Degterinsky](https://github.com/evillique)). -* A fix for Hash JOIN. [#57564](https://github.com/ClickHouse/ClickHouse/pull/57564) ([vdimir](https://github.com/vdimir)). -* Fix possible error in PostgreSQL source [#57567](https://github.com/ClickHouse/ClickHouse/pull/57567) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix type correction in Hash JOIN for nested LowCardinality. [#57614](https://github.com/ClickHouse/ClickHouse/pull/57614) ([vdimir](https://github.com/vdimir)). -* Avoid hangs of `system.stack_trace` by correctly prohibiting parallel reading from it. [#57641](https://github.com/ClickHouse/ClickHouse/pull/57641) ([Azat Khuzhin](https://github.com/azat)). -* Fix an error for aggregation of sparse columns with `any(...) RESPECT NULL` [#57710](https://github.com/ClickHouse/ClickHouse/pull/57710) ([Azat Khuzhin](https://github.com/azat)). -* Fix unary operators parsing [#57713](https://github.com/ClickHouse/ClickHouse/pull/57713) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix dependency loading for the experimental table engine `MaterializedPostgreSQL`. [#57754](https://github.com/ClickHouse/ClickHouse/pull/57754) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix retries for disconnected nodes for BACKUP/RESTORE ON CLUSTER [#57764](https://github.com/ClickHouse/ClickHouse/pull/57764) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix result of external aggregation in case of partially materialized projection [#57790](https://github.com/ClickHouse/ClickHouse/pull/57790) ([Anton Popov](https://github.com/CurtizJ)). -* Fix merge in aggregation functions with `*Map` combinator [#57795](https://github.com/ClickHouse/ClickHouse/pull/57795) ([Anton Popov](https://github.com/CurtizJ)). -* Disable `system.kafka_consumers` because it has a bug. [#57822](https://github.com/ClickHouse/ClickHouse/pull/57822) ([Azat Khuzhin](https://github.com/azat)). -* Fix LowCardinality keys support in Merge JOIN. [#57827](https://github.com/ClickHouse/ClickHouse/pull/57827) ([vdimir](https://github.com/vdimir)). -* A fix for `InterpreterCreateQuery` related to the sample block. [#57855](https://github.com/ClickHouse/ClickHouse/pull/57855) ([Maksim Kita](https://github.com/kitaisreal)). -* `addresses_expr` were ignored for named collections from PostgreSQL. [#57874](https://github.com/ClickHouse/ClickHouse/pull/57874) ([joelynch](https://github.com/joelynch)). -* Fix invalid memory access in BLAKE3 (Rust) [#57876](https://github.com/ClickHouse/ClickHouse/pull/57876) ([Raúl Marín](https://github.com/Algunenano)). Then it was rewritten from Rust to C++ for better [memory-safety](https://www.memorysafety.org/). [#57994](https://github.com/ClickHouse/ClickHouse/pull/57994) ([Raúl Marín](https://github.com/Algunenano)). -* Normalize function names in `CREATE INDEX` [#57906](https://github.com/ClickHouse/ClickHouse/pull/57906) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix handling of unavailable replicas before first request happened [#57933](https://github.com/ClickHouse/ClickHouse/pull/57933) ([Nikita Taranov](https://github.com/nickitat)). -* Fix literal alias misclassification [#57988](https://github.com/ClickHouse/ClickHouse/pull/57988) ([Chen768959](https://github.com/Chen768959)). -* Fix invalid preprocessing on Keeper [#58069](https://github.com/ClickHouse/ClickHouse/pull/58069) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix integer overflow in the `Poco` library, related to `UTF32Encoding` [#58073](https://github.com/ClickHouse/ClickHouse/pull/58073) ([Andrey Fedotov](https://github.com/anfedotoff)). -* Fix parallel replicas (experimental feature) in presence of a scalar subquery with a big integer value [#58118](https://github.com/ClickHouse/ClickHouse/pull/58118) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix `accurateCastOrNull` for out-of-range `DateTime` [#58139](https://github.com/ClickHouse/ClickHouse/pull/58139) ([Andrey Zvonov](https://github.com/zvonand)). -* Fix possible `PARAMETER_OUT_OF_BOUND` error during subcolumns reading from a wide part in MergeTree [#58175](https://github.com/ClickHouse/ClickHouse/pull/58175) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix a slow-down of CREATE VIEW with an enormous number of subqueries [#58220](https://github.com/ClickHouse/ClickHouse/pull/58220) ([Tao Wang](https://github.com/wangtZJU)). -* Fix parallel parsing for JSONCompactEachRow [#58181](https://github.com/ClickHouse/ClickHouse/pull/58181) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#58250](https://github.com/ClickHouse/ClickHouse/pull/58250) ([Kruglov Pavel](https://github.com/Avogar)). - - -### ClickHouse release 23.11, 2023-12-06 - -#### Backward Incompatible Change -* The default ClickHouse server configuration file has enabled `access_management` (user manipulation by SQL queries) and `named_collection_control` (manipulation of named collection by SQL queries) for the `default` user by default. This closes [#56482](https://github.com/ClickHouse/ClickHouse/issues/56482). [#56619](https://github.com/ClickHouse/ClickHouse/pull/56619) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Multiple improvements for `RESPECT NULLS`/`IGNORE NULLS` for window functions. If you use them as aggregate functions and store the states of aggregate functions with these modifiers, they might become incompatible. [#57189](https://github.com/ClickHouse/ClickHouse/pull/57189) ([Raúl Marín](https://github.com/Algunenano)). -* Remove optimization `optimize_move_functions_out_of_any`. [#57190](https://github.com/ClickHouse/ClickHouse/pull/57190) ([Raúl Marín](https://github.com/Algunenano)). -* Formatters `%l`/`%k`/`%c` in function `parseDateTime` are now able to parse hours/months without leading zeros, e.g. `select parseDateTime('2023-11-26 8:14', '%F %k:%i')` now works. Set `parsedatetime_parse_without_leading_zeros = 0` to restore the previous behavior which required two digits. Function `formatDateTime` is now also able to print hours/months without leading zeros. This is controlled by setting `formatdatetime_format_without_leading_zeros` but off by default to not break existing use cases. [#55872](https://github.com/ClickHouse/ClickHouse/pull/55872) ([Azat Khuzhin](https://github.com/azat)). -* You can no longer use the aggregate function `avgWeighted` with arguments of type `Decimal`. Workaround: convert arguments to `Float64`. This closes [#43928](https://github.com/ClickHouse/ClickHouse/issues/43928). This closes [#31768](https://github.com/ClickHouse/ClickHouse/issues/31768). This closes [#56435](https://github.com/ClickHouse/ClickHouse/issues/56435). If you have used this function inside materialized views or projections with `Decimal` arguments, contact support@clickhouse.com. Fixed error in aggregate function `sumMap` and made it slower around 1.5..2 times. It does not matter because the function is garbage anyway. This closes [#54955](https://github.com/ClickHouse/ClickHouse/issues/54955). This closes [#53134](https://github.com/ClickHouse/ClickHouse/issues/53134). This closes [#55148](https://github.com/ClickHouse/ClickHouse/issues/55148). Fix a bug in function `groupArraySample` - it used the same random seed in case more than one aggregate state is generated in a query. [#56350](https://github.com/ClickHouse/ClickHouse/pull/56350) ([Alexey Milovidov](https://github.com/alexey-milovidov)). - -#### New Feature -* Added server setting `async_load_databases` for asynchronous loading of databases and tables. Speeds up the server start time. Applies to databases with `Ordinary`, `Atomic` and `Replicated` engines. Their tables load metadata asynchronously. Query to a table increases the priority of the load job and waits for it to be done. Added a new table `system.asynchronous_loader` for introspection. [#49351](https://github.com/ClickHouse/ClickHouse/pull/49351) ([Sergei Trifonov](https://github.com/serxa)). -* Add system table `blob_storage_log`. It allows auditing all the data written to S3 and other object storages. [#52918](https://github.com/ClickHouse/ClickHouse/pull/52918) ([vdimir](https://github.com/vdimir)). -* Use statistics to order prewhere conditions better. [#53240](https://github.com/ClickHouse/ClickHouse/pull/53240) ([Han Fei](https://github.com/hanfei1991)). -* Added support for compression in the Keeper's protocol. It can be enabled on the ClickHouse side by using this flag `use_compression` inside `zookeeper` section. Keep in mind that only ClickHouse Keeper supports compression, while Apache ZooKeeper does not. Resolves [#49507](https://github.com/ClickHouse/ClickHouse/issues/49507). [#54957](https://github.com/ClickHouse/ClickHouse/pull/54957) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Introduce the feature `storage_metadata_write_full_object_key`. If it is set as `true` then metadata files are written with the new format. With that format ClickHouse stores full remote object key in the metadata file which allows better flexibility and optimization. [#55566](https://github.com/ClickHouse/ClickHouse/pull/55566) ([Sema Checherinda](https://github.com/CheSema)). -* Add new settings and syntax to protect named collections' fields from being overridden. This is meant to prevent a malicious user from obtaining unauthorized access to secrets. [#55782](https://github.com/ClickHouse/ClickHouse/pull/55782) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Add `hostname` column to all system log tables - it is useful if you make the system tables replicated, shared, or distributed. [#55894](https://github.com/ClickHouse/ClickHouse/pull/55894) ([Bharat Nallan](https://github.com/bharatnc)). -* Add `CHECK ALL TABLES` query. [#56022](https://github.com/ClickHouse/ClickHouse/pull/56022) ([vdimir](https://github.com/vdimir)). -* Added function `fromDaysSinceYearZero` which is similar to MySQL's `FROM_DAYS`. E.g. `SELECT fromDaysSinceYearZero(739136)` returns `2023-09-08`. [#56088](https://github.com/ClickHouse/ClickHouse/pull/56088) ([Joanna Hulboj](https://github.com/jh0x)). -* Add an external Python tool to view backups and to extract information from them without using ClickHouse. [#56268](https://github.com/ClickHouse/ClickHouse/pull/56268) ([Vitaly Baranov](https://github.com/vitlibar)). -* Implement a new setting called `preferred_optimize_projection_name`. If it is set to a non-empty string, the specified projection would be used if possible instead of choosing from all the candidates. [#56309](https://github.com/ClickHouse/ClickHouse/pull/56309) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Add 4-letter command for yielding/resigning leadership (https://github.com/ClickHouse/ClickHouse/issues/56352). [#56354](https://github.com/ClickHouse/ClickHouse/pull/56354) ([Pradeep Chhetri](https://github.com/chhetripradeep)). [#56620](https://github.com/ClickHouse/ClickHouse/pull/56620) ([Pradeep Chhetri](https://github.com/chhetripradeep)). -* Added a new SQL function, `arrayRandomSample(arr, k)` which returns a sample of k elements from the input array. Similar functionality could previously be achieved only with less convenient syntax, e.g. `SELECT arrayReduce('groupArraySample(3)', range(10))`. [#56416](https://github.com/ClickHouse/ClickHouse/pull/56416) ([Robert Schulze](https://github.com/rschu1ze)). -* Added support for `Float16` type data to use in `.npy` files. Closes [#56344](https://github.com/ClickHouse/ClickHouse/issues/56344). [#56424](https://github.com/ClickHouse/ClickHouse/pull/56424) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Added a system view `information_schema.statistics` for better compatibility with Tableau Online. [#56425](https://github.com/ClickHouse/ClickHouse/pull/56425) ([Serge Klochkov](https://github.com/slvrtrn)). -* Add `system.symbols` table useful for introspection of the binary. [#56548](https://github.com/ClickHouse/ClickHouse/pull/56548) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Configurable dashboards. Queries for charts are now loaded using a query, which by default uses a new `system.dashboards` table. [#56771](https://github.com/ClickHouse/ClickHouse/pull/56771) ([Sergei Trifonov](https://github.com/serxa)). -* Introduce `fileCluster` table function - it is useful if you mount a shared filesystem (NFS and similar) into the `user_files` directory. [#56868](https://github.com/ClickHouse/ClickHouse/pull/56868) ([Andrey Zvonov](https://github.com/zvonand)). -* Add `_size` virtual column with file size in bytes to `s3/file/hdfs/url/azureBlobStorage` engines. [#57126](https://github.com/ClickHouse/ClickHouse/pull/57126) ([Kruglov Pavel](https://github.com/Avogar)). -* Expose the number of errors for each error code occurred on a server since last restart from the Prometheus endpoint. [#57209](https://github.com/ClickHouse/ClickHouse/pull/57209) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* ClickHouse keeper reports its running availability zone at `/keeper/availability-zone` path. This can be configured via `us-west-1a`. [#56715](https://github.com/ClickHouse/ClickHouse/pull/56715) ([Jianfei Hu](https://github.com/incfly)). -* Make ALTER materialized_view MODIFY QUERY non experimental and deprecate `allow_experimental_alter_materialized_view_structure` setting. Fixes [#15206](https://github.com/ClickHouse/ClickHouse/issues/15206). [#57311](https://github.com/ClickHouse/ClickHouse/pull/57311) ([alesapin](https://github.com/alesapin)). -* Setting `join_algorithm` respects specified order [#51745](https://github.com/ClickHouse/ClickHouse/pull/51745) ([vdimir](https://github.com/vdimir)). -* Add support for the [well-known Protobuf types](https://protobuf.dev/reference/protobuf/google.protobuf/) in the Protobuf format. [#56741](https://github.com/ClickHouse/ClickHouse/pull/56741) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). - -#### Performance Improvement -* Adaptive timeouts for interacting with S3. The first attempt is made with low send and receive timeouts. [#56314](https://github.com/ClickHouse/ClickHouse/pull/56314) ([Sema Checherinda](https://github.com/CheSema)). -* Increase the default value of `max_concurrent_queries` from 100 to 1000. This makes sense when there is a large number of connecting clients, which are slowly sending or receiving data, so the server is not limited by CPU, or when the number of CPU cores is larger than 100. Also, enable the concurrency control by default, and set the desired number of query processing threads in total as twice the number of CPU cores. It improves performance in scenarios with a very large number of concurrent queries. [#46927](https://github.com/ClickHouse/ClickHouse/pull/46927) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Support parallel evaluation of window functions. Fixes [#34688](https://github.com/ClickHouse/ClickHouse/issues/34688). [#39631](https://github.com/ClickHouse/ClickHouse/pull/39631) ([Dmitry Novik](https://github.com/novikd)). -* `Numbers` table engine (of the `system.numbers` table) now analyzes the condition to generate the needed subset of data, like table's index. [#50909](https://github.com/ClickHouse/ClickHouse/pull/50909) ([JackyWoo](https://github.com/JackyWoo)). -* Improved the performance of filtering by `IN (...)` condition for `Merge` table engine. [#54905](https://github.com/ClickHouse/ClickHouse/pull/54905) ([Nikita Taranov](https://github.com/nickitat)). -* An improvement which takes place when the filesystem cache is full and there are big reads. [#55158](https://github.com/ClickHouse/ClickHouse/pull/55158) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add ability to disable checksums for S3 to avoid excessive pass over the file (this is controlled by the setting `s3_disable_checksum`). [#55559](https://github.com/ClickHouse/ClickHouse/pull/55559) ([Azat Khuzhin](https://github.com/azat)). -* Now we read synchronously from remote tables when data is in page cache (like we do for local tables). It is faster, it doesn't require synchronisation inside the thread pool, and doesn't hesitate to do `seek`-s on local FS, and reduces CPU wait. [#55841](https://github.com/ClickHouse/ClickHouse/pull/55841) ([Nikita Taranov](https://github.com/nickitat)). -* Optimization for getting value from `map`, `arrayElement`. It will bring about 30% speedup. - reduce the reserved memory - reduce the `resize` call. [#55957](https://github.com/ClickHouse/ClickHouse/pull/55957) ([lgbo](https://github.com/lgbo-ustc)). -* Optimization of multi-stage filtering with AVX-512. The performance experiments of the OnTime dataset on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring the improvements of 7.4%, 5.9%, 4.7%, 3.0%, and 4.6% to the QPS of the query Q2, Q3, Q4, Q5 and Q6 respectively while having no impact on others. [#56079](https://github.com/ClickHouse/ClickHouse/pull/56079) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* Limit the number of threads busy inside the query profiler. If there are more - they will skip profiling. [#56105](https://github.com/ClickHouse/ClickHouse/pull/56105) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Decrease the amount of virtual function calls in window functions. [#56120](https://github.com/ClickHouse/ClickHouse/pull/56120) ([Maksim Kita](https://github.com/kitaisreal)). -* Allow recursive Tuple field pruning in ORC data format to speed up scaning. [#56122](https://github.com/ClickHouse/ClickHouse/pull/56122) ([李扬](https://github.com/taiyang-li)). -* Trivial count optimization for `Npy` data format: queries like `select count() from 'data.npy'` will work much more fast because of caching the results. [#56304](https://github.com/ClickHouse/ClickHouse/pull/56304) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Queries with aggregation and a large number of streams will use less amount of memory during the plan's construction. [#57074](https://github.com/ClickHouse/ClickHouse/pull/57074) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Improve performance of executing queries for use cases with many users and highly concurrent queries (>2000 QPS) by optimizing the access to ProcessList. [#57106](https://github.com/ClickHouse/ClickHouse/pull/57106) ([Andrej Hoos](https://github.com/adikus)). -* Trivial improvement on array join, reuse some intermediate results. [#57183](https://github.com/ClickHouse/ClickHouse/pull/57183) ([李扬](https://github.com/taiyang-li)). -* There are cases when stack unwinding was slow. Not anymore. [#57221](https://github.com/ClickHouse/ClickHouse/pull/57221) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Now we use default read pool for reading from external storage when `max_streams = 1`. It is beneficial when read prefetches are enabled. [#57334](https://github.com/ClickHouse/ClickHouse/pull/57334) ([Nikita Taranov](https://github.com/nickitat)). -* Keeper improvement: improve memory-usage during startup by delaying log preprocessing. [#55660](https://github.com/ClickHouse/ClickHouse/pull/55660) ([Antonio Andelic](https://github.com/antonio2368)). -* Improved performance of glob matching for `File` and `HDFS` storages. [#56141](https://github.com/ClickHouse/ClickHouse/pull/56141) ([Andrey Zvonov](https://github.com/zvonand)). -* Posting lists in experimental full text indexes are now compressed which reduces their size by 10-30%. [#56226](https://github.com/ClickHouse/ClickHouse/pull/56226) ([Harry Lee](https://github.com/HarryLeeIBM)). -* Parallelise `BackupEntriesCollector` in backups. [#56312](https://github.com/ClickHouse/ClickHouse/pull/56312) ([Kseniia Sumarokova](https://github.com/kssenii)). - -#### Improvement -* Add a new `MergeTree` setting `add_implicit_sign_column_constraint_for_collapsing_engine` (disabled by default). When enabled, it adds an implicit CHECK constraint for `CollapsingMergeTree` tables that restricts the value of the `Sign` column to be only -1 or 1. [#56701](https://github.com/ClickHouse/ClickHouse/issues/56701). [#56986](https://github.com/ClickHouse/ClickHouse/pull/56986) ([Kevin Mingtarja](https://github.com/kevinmingtarja)). -* Enable adding new disk to storage configuration without restart. [#56367](https://github.com/ClickHouse/ClickHouse/pull/56367) ([Duc Canh Le](https://github.com/canhld94)). -* Support creating and materializing index in the same alter query, also support "modify TTL" and "materialize TTL" in the same query. Closes [#55651](https://github.com/ClickHouse/ClickHouse/issues/55651). [#56331](https://github.com/ClickHouse/ClickHouse/pull/56331) ([flynn](https://github.com/ucasfl)). -* Add a new table function named `fuzzJSON` with rows containing perturbed versions of the source JSON string with random variations. [#56490](https://github.com/ClickHouse/ClickHouse/pull/56490) ([Julia Kartseva](https://github.com/jkartseva)). -* Engine `Merge` filters the records according to the row policies of the underlying tables, so you don't have to create another row policy on a `Merge` table. [#50209](https://github.com/ClickHouse/ClickHouse/pull/50209) ([Ilya Golshtein](https://github.com/ilejn)). -* Add a setting `max_execution_time_leaf` to limit the execution time on shard for distributed query, and `timeout_overflow_mode_leaf` to control the behaviour if timeout happens. [#51823](https://github.com/ClickHouse/ClickHouse/pull/51823) ([Duc Canh Le](https://github.com/canhld94)). -* Add ClickHouse setting to disable tunneling for HTTPS requests over HTTP proxy. [#55033](https://github.com/ClickHouse/ClickHouse/pull/55033) ([Arthur Passos](https://github.com/arthurpassos)). -* Set `background_fetches_pool_size` to 16, background_schedule_pool_size to 512 that is better for production usage with frequent small insertions. [#54327](https://github.com/ClickHouse/ClickHouse/pull/54327) ([Denny Crane](https://github.com/den-crane)). -* While read data from a csv format file, and at end of line is `\r` , which not followed by `\n`, then we will enconter the exception as follows `Cannot parse CSV format: found \r (CR) not followed by \n (LF). Line must end by \n (LF) or \r\n (CR LF) or \n\r.` In clickhouse, the csv end of line must be `\n` or `\r\n` or `\n\r`, so the `\r` must be followed by `\n`, but in some suitation, the csv input data is abnormal, like above, `\r` is at end of line. [#54340](https://github.com/ClickHouse/ClickHouse/pull/54340) ([KevinyhZou](https://github.com/KevinyhZou)). -* Update Arrow library to release-13.0.0 that supports new encodings. Closes [#44505](https://github.com/ClickHouse/ClickHouse/issues/44505). [#54800](https://github.com/ClickHouse/ClickHouse/pull/54800) ([Kruglov Pavel](https://github.com/Avogar)). -* Improve performance of ON CLUSTER queries by removing heavy system calls to get all network interfaces when looking for local ip address in the DDL entry hosts list. [#54909](https://github.com/ClickHouse/ClickHouse/pull/54909) ([Duc Canh Le](https://github.com/canhld94)). -* Fixed accounting of memory allocated before attaching a thread to a query or a user. [#56089](https://github.com/ClickHouse/ClickHouse/pull/56089) ([Nikita Taranov](https://github.com/nickitat)). -* Add support for `LARGE_LIST` in Apache Arrow formats. [#56118](https://github.com/ClickHouse/ClickHouse/pull/56118) ([edef](https://github.com/edef1c)). -* Allow manual compaction of `EmbeddedRocksDB` via `OPTIMIZE` query. [#56225](https://github.com/ClickHouse/ClickHouse/pull/56225) ([Azat Khuzhin](https://github.com/azat)). -* Add ability to specify BlockBasedTableOptions for `EmbeddedRocksDB` tables. [#56264](https://github.com/ClickHouse/ClickHouse/pull/56264) ([Azat Khuzhin](https://github.com/azat)). -* `SHOW COLUMNS` now displays MySQL's equivalent data type name when the connection was made through the MySQL protocol. Previously, this was the case when setting `use_mysql_types_in_show_columns = 1`. The setting is retained but made obsolete. [#56277](https://github.com/ClickHouse/ClickHouse/pull/56277) ([Robert Schulze](https://github.com/rschu1ze)). -* Fixed possible `The local set of parts of table doesn't look like the set of parts in ZooKeeper` error if server was restarted just after `TRUNCATE` or `DROP PARTITION`. [#56282](https://github.com/ClickHouse/ClickHouse/pull/56282) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fixed handling of non-const query strings in functions `formatQuery`/ `formatQuerySingleLine`. Also added `OrNull` variants of both functions that return a NULL when a query cannot be parsed instead of throwing an exception. [#56327](https://github.com/ClickHouse/ClickHouse/pull/56327) ([Robert Schulze](https://github.com/rschu1ze)). -* Allow backup of materialized view with dropped inner table instead of failing the backup. [#56387](https://github.com/ClickHouse/ClickHouse/pull/56387) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Queries to `system.replicas` initiate requests to ZooKeeper when certain columns are queried. When there are thousands of tables these requests might produce a considerable load on ZooKeeper. If there are multiple simultaneous queries to `system.replicas` they do same requests multiple times. The change is to "deduplicate" requests from concurrent queries. [#56420](https://github.com/ClickHouse/ClickHouse/pull/56420) ([Alexander Gololobov](https://github.com/davenger)). -* Fix translation to MySQL compatible query for querying external databases. [#56456](https://github.com/ClickHouse/ClickHouse/pull/56456) ([flynn](https://github.com/ucasfl)). -* Add support for backing up and restoring tables using `KeeperMap` engine. [#56460](https://github.com/ClickHouse/ClickHouse/pull/56460) ([Antonio Andelic](https://github.com/antonio2368)). -* 404 response for CompleteMultipartUpload has to be rechecked. Operation could be done on server even if client got timeout or other network errors. The next retry of CompleteMultipartUpload receives 404 response. If the object key exists that operation is considered as successful. [#56475](https://github.com/ClickHouse/ClickHouse/pull/56475) ([Sema Checherinda](https://github.com/CheSema)). -* Enable the HTTP OPTIONS method by default - it simplifies requesting ClickHouse from a web browser. [#56483](https://github.com/ClickHouse/ClickHouse/pull/56483) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The value for `dns_max_consecutive_failures` was changed by mistake in [#46550](https://github.com/ClickHouse/ClickHouse/issues/46550) - this is reverted and adjusted to a better value. Also, increased the HTTP keep-alive timeout to a reasonable value from production. [#56485](https://github.com/ClickHouse/ClickHouse/pull/56485) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Load base backups lazily (a base backup won't be loaded until it's needed). Also add some log message and profile events for backups. [#56516](https://github.com/ClickHouse/ClickHouse/pull/56516) ([Vitaly Baranov](https://github.com/vitlibar)). -* Setting `query_cache_store_results_of_queries_with_nondeterministic_functions` (with values `false` or `true`) was marked obsolete. It was replaced by setting `query_cache_nondeterministic_function_handling`, a three-valued enum that controls how the query cache handles queries with non-deterministic functions: a) throw an exception (default behavior), b) save the non-deterministic query result regardless, or c) ignore, i.e. don't throw an exception and don't cache the result. [#56519](https://github.com/ClickHouse/ClickHouse/pull/56519) ([Robert Schulze](https://github.com/rschu1ze)). -* Rewrite equality with `is null` check in JOIN ON section. Experimental *Analyzer only*. [#56538](https://github.com/ClickHouse/ClickHouse/pull/56538) ([vdimir](https://github.com/vdimir)). -* Function`concat` now supports arbitrary argument types (instead of only String and FixedString arguments). This makes it behave more similar to MySQL `concat` implementation. For example, `SELECT concat('ab', 42)` now returns `ab42`. [#56540](https://github.com/ClickHouse/ClickHouse/pull/56540) ([Serge Klochkov](https://github.com/slvrtrn)). -* Allow getting cache configuration from 'named_collection' section in config or from SQL created named collections. [#56541](https://github.com/ClickHouse/ClickHouse/pull/56541) ([Kseniia Sumarokova](https://github.com/kssenii)). -* PostgreSQL database engine: Make the removal of outdated tables less aggressive with unsuccessful postgres connection. [#56609](https://github.com/ClickHouse/ClickHouse/pull/56609) ([jsc0218](https://github.com/jsc0218)). -* It took too much time to connnect to PG when URL is not right, so the relevant query stucks there and get cancelled. [#56648](https://github.com/ClickHouse/ClickHouse/pull/56648) ([jsc0218](https://github.com/jsc0218)). -* Keeper improvement: disable compressed logs by default in Keeper. [#56763](https://github.com/ClickHouse/ClickHouse/pull/56763) ([Antonio Andelic](https://github.com/antonio2368)). -* Add config setting `wait_dictionaries_load_at_startup`. [#56782](https://github.com/ClickHouse/ClickHouse/pull/56782) ([Vitaly Baranov](https://github.com/vitlibar)). -* There was a potential vulnerability in previous ClickHouse versions: if a user has connected and unsuccessfully tried to authenticate with the "interserver secret" method, the server didn't terminate the connection immediately but continued to receive and ignore the leftover packets from the client. While these packets are ignored, they are still parsed, and if they use a compression method with another known vulnerability, it will lead to exploitation of it without authentication. This issue was found with [ClickHouse Bug Bounty Program](https://github.com/ClickHouse/ClickHouse/issues/38986) by https://twitter.com/malacupa. [#56794](https://github.com/ClickHouse/ClickHouse/pull/56794) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fetching a part waits when that part is fully committed on remote replica. It is better not send part in PreActive state. In case of zero copy this is mandatory restriction. [#56808](https://github.com/ClickHouse/ClickHouse/pull/56808) ([Sema Checherinda](https://github.com/CheSema)). -* Fix possible postgresql logical replication conversion error when using experimental `MaterializedPostgreSQL`. [#53721](https://github.com/ClickHouse/ClickHouse/pull/53721) ([takakawa](https://github.com/takakawa)). -* Implement user-level setting `alter_move_to_space_execute_async` which allow to execute queries `ALTER TABLE ... MOVE PARTITION|PART TO DISK|VOLUME` asynchronously. The size of pool for background executions is controlled by `background_move_pool_size`. Default behavior is synchronous execution. Fixes [#47643](https://github.com/ClickHouse/ClickHouse/issues/47643). [#56809](https://github.com/ClickHouse/ClickHouse/pull/56809) ([alesapin](https://github.com/alesapin)). -* Able to filter by engine when scanning system.tables, avoid unnecessary (potentially time-consuming) connection. [#56813](https://github.com/ClickHouse/ClickHouse/pull/56813) ([jsc0218](https://github.com/jsc0218)). -* Show `total_bytes` and `total_rows` in system tables for RocksDB storage. [#56816](https://github.com/ClickHouse/ClickHouse/pull/56816) ([Aleksandr Musorin](https://github.com/AVMusorin)). -* Allow basic commands in ALTER for TEMPORARY tables. [#56892](https://github.com/ClickHouse/ClickHouse/pull/56892) ([Sergey](https://github.com/icuken)). -* LZ4 compression. Buffer compressed block in a rare case when out buffer capacity is not enough for writing compressed block directly to out's buffer. [#56938](https://github.com/ClickHouse/ClickHouse/pull/56938) ([Sema Checherinda](https://github.com/CheSema)). -* Add metrics for the number of queued jobs, which is useful for the IO thread pool. [#56958](https://github.com/ClickHouse/ClickHouse/pull/56958) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add a setting for PostgreSQL table engine setting in the config file. Added a check for the setting Added documentation around the additional setting. [#56959](https://github.com/ClickHouse/ClickHouse/pull/56959) ([Peignon Melvyn](https://github.com/melvynator)). -* Function `concat` can now be called with a single argument, e.g., `SELECT concat('abc')`. This makes its behavior more consistent with MySQL's concat implementation. [#57000](https://github.com/ClickHouse/ClickHouse/pull/57000) ([Serge Klochkov](https://github.com/slvrtrn)). -* Signs all `x-amz-*` headers as required by AWS S3 docs. [#57001](https://github.com/ClickHouse/ClickHouse/pull/57001) ([Arthur Passos](https://github.com/arthurpassos)). -* Function `fromDaysSinceYearZero` (alias: `FROM_DAYS`) can now be used with unsigned and signed integer types (previously, it had to be an unsigned integer). This improve compatibility with 3rd party tools such as Tableau Online. [#57002](https://github.com/ClickHouse/ClickHouse/pull/57002) ([Serge Klochkov](https://github.com/slvrtrn)). -* Add `system.s3queue_log` to default config. [#57036](https://github.com/ClickHouse/ClickHouse/pull/57036) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Change the default for `wait_dictionaries_load_at_startup` to true, and use this setting only if `dictionaries_lazy_load` is false. [#57133](https://github.com/ClickHouse/ClickHouse/pull/57133) ([Vitaly Baranov](https://github.com/vitlibar)). -* Check dictionary source type on creation even if `dictionaries_lazy_load` is enabled. [#57134](https://github.com/ClickHouse/ClickHouse/pull/57134) ([Vitaly Baranov](https://github.com/vitlibar)). -* Plan-level optimizations can now be enabled/disabled individually. Previously, it was only possible to disable them all. The setting which previously did that (`query_plan_enable_optimizations`) is retained and can still be used to disable all optimizations. [#57152](https://github.com/ClickHouse/ClickHouse/pull/57152) ([Robert Schulze](https://github.com/rschu1ze)). -* The server's exit code will correspond to the exception code. For example, if the server cannot start due to memory limit, it will exit with the code 241 = MEMORY_LIMIT_EXCEEDED. In previous versions, the exit code for exceptions was always 70 = Poco::Util::ExitCode::EXIT_SOFTWARE. [#57153](https://github.com/ClickHouse/ClickHouse/pull/57153) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Do not demangle and symbolize stack frames from `functional` C++ header. [#57201](https://github.com/ClickHouse/ClickHouse/pull/57201) ([Mike Kot](https://github.com/myrrc)). -* HTTP server page `/dashboard` now supports charts with multiple lines. [#57236](https://github.com/ClickHouse/ClickHouse/pull/57236) ([Sergei Trifonov](https://github.com/serxa)). -* The `max_memory_usage_in_client` command line option supports a string value with a suffix (K, M, G, etc). Closes [#56879](https://github.com/ClickHouse/ClickHouse/issues/56879). [#57273](https://github.com/ClickHouse/ClickHouse/pull/57273) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Bumped Intel QPL (used by codec `DEFLATE_QPL`) from v1.2.0 to v1.3.1 . Also fixed a bug in case of BOF (Block On Fault) = 0, changed to handle page faults by falling back to SW path. [#57291](https://github.com/ClickHouse/ClickHouse/pull/57291) ([jasperzhu](https://github.com/jinjunzh)). -* Increase default `replicated_deduplication_window` of MergeTree settings from 100 to 1k. [#57335](https://github.com/ClickHouse/ClickHouse/pull/57335) ([sichenzhao](https://github.com/sichenzhao)). -* Stop using `INCONSISTENT_METADATA_FOR_BACKUP` that much. If possible prefer to continue scanning instead of stopping and starting the scanning for backup from the beginning. [#57385](https://github.com/ClickHouse/ClickHouse/pull/57385) ([Vitaly Baranov](https://github.com/vitlibar)). - -#### Build/Testing/Packaging Improvement -* Add SQLLogic test. [#56078](https://github.com/ClickHouse/ClickHouse/pull/56078) ([Han Fei](https://github.com/hanfei1991)). -* Make `clickhouse-local` and `clickhouse-client` available under short names (`ch`, `chl`, `chc`) for usability. [#56634](https://github.com/ClickHouse/ClickHouse/pull/56634) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Optimized build size further by removing unused code from external libraries. [#56786](https://github.com/ClickHouse/ClickHouse/pull/56786) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add automatic check that there are no large translation units. [#56559](https://github.com/ClickHouse/ClickHouse/pull/56559) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Lower the size of the single-binary distribution. This closes [#55181](https://github.com/ClickHouse/ClickHouse/issues/55181). [#56617](https://github.com/ClickHouse/ClickHouse/pull/56617) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Information about the sizes of every translation unit and binary file after each build will be sent to the CI database in ClickHouse Cloud. This closes [#56107](https://github.com/ClickHouse/ClickHouse/issues/56107). [#56636](https://github.com/ClickHouse/ClickHouse/pull/56636) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Certain files of "Apache Arrow" library (which we use only for non-essential things like parsing the arrow format) were rebuilt all the time regardless of the build cache. This is fixed. [#56657](https://github.com/ClickHouse/ClickHouse/pull/56657) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Avoid recompiling translation units depending on the autogenerated source file about version. [#56660](https://github.com/ClickHouse/ClickHouse/pull/56660) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Tracing data of the linker invocations will be sent to the CI database in ClickHouse Cloud. [#56725](https://github.com/ClickHouse/ClickHouse/pull/56725) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Use DWARF 5 debug symbols for the clickhouse binary (was DWARF 4 previously). [#56770](https://github.com/ClickHouse/ClickHouse/pull/56770) ([Michael Kolupaev](https://github.com/al13n321)). -* Add a new build option `SANITIZE_COVERAGE`. If it is enabled, the code is instrumented to track the coverage. The collected information is available inside ClickHouse with: (1) a new function `coverage` that returns an array of unique addresses in the code found after the previous coverage reset; (2) `SYSTEM RESET COVERAGE` query that resets the accumulated data. This allows us to compare the coverage of different tests, including differential code coverage. Continuation of [#20539](https://github.com/ClickHouse/ClickHouse/issues/20539). [#56102](https://github.com/ClickHouse/ClickHouse/pull/56102) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Some of the stack frames might not be resolved when collecting stacks. In such cases the raw address might be helpful. [#56267](https://github.com/ClickHouse/ClickHouse/pull/56267) ([Alexander Gololobov](https://github.com/davenger)). -* Add an option to disable `libssh`. [#56333](https://github.com/ClickHouse/ClickHouse/pull/56333) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Enable temporary_data_in_cache in S3 tests in CI. [#48425](https://github.com/ClickHouse/ClickHouse/pull/48425) ([vdimir](https://github.com/vdimir)). -* Set the max memory usage for clickhouse-client (`1G`) in the CI. [#56873](https://github.com/ClickHouse/ClickHouse/pull/56873) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). - -#### Bug Fix (user-visible misbehavior in an official stable release) -* Fix exerimental Analyzer - insertion from select with subquery referencing insertion table should process only insertion block. [#50857](https://github.com/ClickHouse/ClickHouse/pull/50857) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix a bug in `str_to_map` function. [#56423](https://github.com/ClickHouse/ClickHouse/pull/56423) ([Arthur Passos](https://github.com/arthurpassos)). -* Keeper `reconfig`: add timeout before yielding/taking leadership [#53481](https://github.com/ClickHouse/ClickHouse/pull/53481) ([Mike Kot](https://github.com/myrrc)). -* Fix incorrect header in grace hash join and filter pushdown [#53922](https://github.com/ClickHouse/ClickHouse/pull/53922) ([vdimir](https://github.com/vdimir)). -* Select from system tables when table based on table function. [#55540](https://github.com/ClickHouse/ClickHouse/pull/55540) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). -* RFC: Fix "Cannot find column X in source stream" for Distributed queries with LIMIT BY [#55836](https://github.com/ClickHouse/ClickHouse/pull/55836) ([Azat Khuzhin](https://github.com/azat)). -* Fix 'Cannot read from file:' while running client in a background [#55976](https://github.com/ClickHouse/ClickHouse/pull/55976) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix clickhouse-local exit on bad send_logs_level setting [#55994](https://github.com/ClickHouse/ClickHouse/pull/55994) ([Kruglov Pavel](https://github.com/Avogar)). -* Bug fix explain ast with parameterized view [#56004](https://github.com/ClickHouse/ClickHouse/pull/56004) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix a crash during table loading on startup [#56232](https://github.com/ClickHouse/ClickHouse/pull/56232) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix ClickHouse-sourced dictionaries with an explicit query [#56236](https://github.com/ClickHouse/ClickHouse/pull/56236) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix segfault in signal handler for Keeper [#56266](https://github.com/ClickHouse/ClickHouse/pull/56266) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix incomplete query result for UNION in view() function. [#56274](https://github.com/ClickHouse/ClickHouse/pull/56274) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix inconsistency of "cast('0' as DateTime64(3))" and "cast('0' as Nullable(DateTime64(3)))" [#56286](https://github.com/ClickHouse/ClickHouse/pull/56286) ([李扬](https://github.com/taiyang-li)). -* Fix rare race condition related to Memory allocation failure [#56303](https://github.com/ClickHouse/ClickHouse/pull/56303) ([alesapin](https://github.com/alesapin)). -* Fix restore from backup with `flatten_nested` and `data_type_default_nullable` [#56306](https://github.com/ClickHouse/ClickHouse/pull/56306) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix crash in case of adding a column with type Object(JSON) [#56307](https://github.com/ClickHouse/ClickHouse/pull/56307) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix crash in filterPushDown [#56380](https://github.com/ClickHouse/ClickHouse/pull/56380) ([vdimir](https://github.com/vdimir)). -* Fix restore from backup with mat view and dropped source table [#56383](https://github.com/ClickHouse/ClickHouse/pull/56383) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix segfault during Kerberos initialization [#56401](https://github.com/ClickHouse/ClickHouse/pull/56401) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix buffer overflow in T64 [#56434](https://github.com/ClickHouse/ClickHouse/pull/56434) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix nullable primary key in final (2) [#56452](https://github.com/ClickHouse/ClickHouse/pull/56452) ([Amos Bird](https://github.com/amosbird)). -* Fix ON CLUSTER queries without database on initial node [#56484](https://github.com/ClickHouse/ClickHouse/pull/56484) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix startup failure due to TTL dependency [#56489](https://github.com/ClickHouse/ClickHouse/pull/56489) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix ALTER COMMENT queries ON CLUSTER [#56491](https://github.com/ClickHouse/ClickHouse/pull/56491) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix ALTER COLUMN with ALIAS [#56493](https://github.com/ClickHouse/ClickHouse/pull/56493) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix empty NAMED COLLECTIONs [#56494](https://github.com/ClickHouse/ClickHouse/pull/56494) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix two cases of projection analysis. [#56502](https://github.com/ClickHouse/ClickHouse/pull/56502) ([Amos Bird](https://github.com/amosbird)). -* Fix handling of aliases in query cache [#56545](https://github.com/ClickHouse/ClickHouse/pull/56545) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix conversion from `Nullable(Enum)` to `Nullable(String)` [#56644](https://github.com/ClickHouse/ClickHouse/pull/56644) ([Nikolay Degterinsky](https://github.com/evillique)). -* More reliable log handling in Keeper [#56670](https://github.com/ClickHouse/ClickHouse/pull/56670) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix configuration merge for nodes with substitution attributes [#56694](https://github.com/ClickHouse/ClickHouse/pull/56694) ([Konstantin Bogdanov](https://github.com/thevar1able)). -* Fix duplicate usage of table function input(). [#56695](https://github.com/ClickHouse/ClickHouse/pull/56695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix: RabbitMQ OpenSSL dynamic loading issue [#56703](https://github.com/ClickHouse/ClickHouse/pull/56703) ([Igor Nikonov](https://github.com/devcrafter)). -* Fix crash in GCD codec in case when zeros present in data [#56704](https://github.com/ClickHouse/ClickHouse/pull/56704) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix 'mutex lock failed: Invalid argument' in clickhouse-local during insert into function [#56710](https://github.com/ClickHouse/ClickHouse/pull/56710) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix Date text parsing in optimistic path [#56765](https://github.com/ClickHouse/ClickHouse/pull/56765) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix crash in FPC codec [#56795](https://github.com/ClickHouse/ClickHouse/pull/56795) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* DatabaseReplicated: fix DDL query timeout after recovering a replica [#56796](https://github.com/ClickHouse/ClickHouse/pull/56796) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix incorrect nullable columns reporting in MySQL binary protocol [#56799](https://github.com/ClickHouse/ClickHouse/pull/56799) ([Serge Klochkov](https://github.com/slvrtrn)). -* Support Iceberg metadata files for metastore tables [#56810](https://github.com/ClickHouse/ClickHouse/pull/56810) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix TSAN report under transform [#56817](https://github.com/ClickHouse/ClickHouse/pull/56817) ([Raúl Marín](https://github.com/Algunenano)). -* Fix SET query and SETTINGS formatting [#56825](https://github.com/ClickHouse/ClickHouse/pull/56825) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix failure to start due to table dependency in joinGet [#56828](https://github.com/ClickHouse/ClickHouse/pull/56828) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix flattening existing Nested columns during ADD COLUMN [#56830](https://github.com/ClickHouse/ClickHouse/pull/56830) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix allow cr end of line for csv [#56901](https://github.com/ClickHouse/ClickHouse/pull/56901) ([KevinyhZou](https://github.com/KevinyhZou)). -* Fix `tryBase64Decode` with invalid input [#56913](https://github.com/ClickHouse/ClickHouse/pull/56913) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix generating deep nested columns in CapnProto/Protobuf schemas [#56941](https://github.com/ClickHouse/ClickHouse/pull/56941) ([Kruglov Pavel](https://github.com/Avogar)). -* Prevent incompatible ALTER of projection columns [#56948](https://github.com/ClickHouse/ClickHouse/pull/56948) ([Amos Bird](https://github.com/amosbird)). -* Fix sqlite file path validation [#56984](https://github.com/ClickHouse/ClickHouse/pull/56984) ([San](https://github.com/santrancisco)). -* S3Queue: fix metadata reference increment [#56990](https://github.com/ClickHouse/ClickHouse/pull/56990) ([Kseniia Sumarokova](https://github.com/kssenii)). -* S3Queue minor fix [#56999](https://github.com/ClickHouse/ClickHouse/pull/56999) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix file path validation for DatabaseFileSystem [#57029](https://github.com/ClickHouse/ClickHouse/pull/57029) ([San](https://github.com/santrancisco)). -* Fix `fuzzBits` with `ARRAY JOIN` [#57033](https://github.com/ClickHouse/ClickHouse/pull/57033) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix Nullptr dereference in partial merge join with joined_subquery_re… [#57048](https://github.com/ClickHouse/ClickHouse/pull/57048) ([vdimir](https://github.com/vdimir)). -* Fix race condition in RemoteSource [#57052](https://github.com/ClickHouse/ClickHouse/pull/57052) ([Raúl Marín](https://github.com/Algunenano)). -* Implement `bitHammingDistance` for big integers [#57073](https://github.com/ClickHouse/ClickHouse/pull/57073) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* S3-style links bug fix [#57075](https://github.com/ClickHouse/ClickHouse/pull/57075) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Fix JSON_QUERY function with multiple numeric paths [#57096](https://github.com/ClickHouse/ClickHouse/pull/57096) ([KevinyhZou](https://github.com/KevinyhZou)). -* Fix buffer overflow in Gorilla codec [#57107](https://github.com/ClickHouse/ClickHouse/pull/57107) ([Nikolay Degterinsky](https://github.com/evillique)). -* Close interserver connection on any exception before authentication [#57142](https://github.com/ClickHouse/ClickHouse/pull/57142) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix segfault after ALTER UPDATE with Nullable MATERIALIZED column [#57147](https://github.com/ClickHouse/ClickHouse/pull/57147) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix incorrect JOIN plan optimization with partially materialized normal projection [#57196](https://github.com/ClickHouse/ClickHouse/pull/57196) ([Amos Bird](https://github.com/amosbird)). -* Ignore comments when comparing column descriptions [#57259](https://github.com/ClickHouse/ClickHouse/pull/57259) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix `ReadonlyReplica` metric for all cases [#57267](https://github.com/ClickHouse/ClickHouse/pull/57267) ([Antonio Andelic](https://github.com/antonio2368)). -* Background merges correctly use temporary data storage in the cache [#57275](https://github.com/ClickHouse/ClickHouse/pull/57275) ([vdimir](https://github.com/vdimir)). -* Keeper fix for changelog and snapshots [#57299](https://github.com/ClickHouse/ClickHouse/pull/57299) ([Antonio Andelic](https://github.com/antonio2368)). -* Ignore finished ON CLUSTER tasks if hostname changed [#57339](https://github.com/ClickHouse/ClickHouse/pull/57339) ([Alexander Tokmakov](https://github.com/tavplubix)). -* MergeTree mutations reuse source part index granularity [#57352](https://github.com/ClickHouse/ClickHouse/pull/57352) ([Maksim Kita](https://github.com/kitaisreal)). -* FS cache: add a limit for background download [#57424](https://github.com/ClickHouse/ClickHouse/pull/57424) ([Kseniia Sumarokova](https://github.com/kssenii)). - - -### ClickHouse release 23.10, 2023-11-02 - -#### Backward Incompatible Change -* There is no longer an option to automatically remove broken data parts. This closes [#55174](https://github.com/ClickHouse/ClickHouse/issues/55174). [#55184](https://github.com/ClickHouse/ClickHouse/pull/55184) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#55557](https://github.com/ClickHouse/ClickHouse/pull/55557) ([Jihyuk Bok](https://github.com/tomahawk28)). -* The obsolete in-memory data parts can no longer be read from the write-ahead log. If you have configured in-memory parts before, they have to be removed before the upgrade. [#55186](https://github.com/ClickHouse/ClickHouse/pull/55186) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Remove the integration with Meilisearch. Reason: it was compatible only with the old version 0.18. The recent version of Meilisearch changed the protocol and does not work anymore. Note: we would appreciate it if you help to return it back. [#55189](https://github.com/ClickHouse/ClickHouse/pull/55189) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Rename directory monitor concept into background INSERT. All the settings `*directory_monitor*` had been renamed to `distributed_background_insert*`. *Backward compatibility should be preserved* (since old settings had been added as an alias). [#55978](https://github.com/ClickHouse/ClickHouse/pull/55978) ([Azat Khuzhin](https://github.com/azat)). -* Do not interpret the `send_timeout` set on the client side as the `receive_timeout` on the server side and vise-versa. [#56035](https://github.com/ClickHouse/ClickHouse/pull/56035) ([Azat Khuzhin](https://github.com/azat)). -* Comparison of time intervals with different units will throw an exception. This closes [#55942](https://github.com/ClickHouse/ClickHouse/issues/55942). You might have occasionally rely on the previous behavior when the underlying numeric values were compared regardless of the units. [#56090](https://github.com/ClickHouse/ClickHouse/pull/56090) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Rewrited the experimental `S3Queue` table engine completely: changed the way we keep information in zookeeper which allows to make less zookeeper requests, added caching of zookeeper state in cases when we know the state will not change, improved the polling from s3 process to make it less aggressive, changed the way ttl and max set for trached files is maintained, now it is a background process. Added `system.s3queue` and `system.s3queue_log` tables. Closes [#54998](https://github.com/ClickHouse/ClickHouse/issues/54998). [#54422](https://github.com/ClickHouse/ClickHouse/pull/54422) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Arbitrary paths on HTTP endpoint are no longer interpreted as a request to the `/query` endpoint. [#55521](https://github.com/ClickHouse/ClickHouse/pull/55521) ([Konstantin Bogdanov](https://github.com/thevar1able)). - -#### New Feature -* Add function `arrayFold(accumulator, x1, ..., xn -> expression, initial, array1, ..., arrayn)` which applies a lambda function to multiple arrays of the same cardinality and collects the result in an accumulator. [#49794](https://github.com/ClickHouse/ClickHouse/pull/49794) ([Lirikl](https://github.com/Lirikl)). -* Support for `Npy` format. `SELECT * FROM file('example_array.npy', Npy)`. [#55982](https://github.com/ClickHouse/ClickHouse/pull/55982) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* If a table has a space-filling curve in its key, e.g., `ORDER BY mortonEncode(x, y)`, the conditions on its arguments, e.g., `x >= 10 AND x <= 20 AND y >= 20 AND y <= 30` can be used for indexing. A setting `analyze_index_with_space_filling_curves` is added to enable or disable this analysis. This closes [#41195](https://github.com/ClickHouse/ClickHouse/issue/41195). Continuation of [#4538](https://github.com/ClickHouse/ClickHouse/pull/4538). Continuation of [#6286](https://github.com/ClickHouse/ClickHouse/pull/6286). Continuation of [#28130](https://github.com/ClickHouse/ClickHouse/pull/28130). Continuation of [#41753](https://github.com/ClickHouse/ClickHouse/pull/#41753). [#55642](https://github.com/ClickHouse/ClickHouse/pull/55642) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* A new setting called `force_optimize_projection_name`, it takes a name of projection as an argument. If it's value set to a non-empty string, ClickHouse checks that this projection is used in the query at least once. Closes [#55331](https://github.com/ClickHouse/ClickHouse/issues/55331). [#56134](https://github.com/ClickHouse/ClickHouse/pull/56134) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Support asynchronous inserts with external data via native protocol. Previously it worked only if data is inlined into query. [#54730](https://github.com/ClickHouse/ClickHouse/pull/54730) ([Anton Popov](https://github.com/CurtizJ)). -* Added aggregation function `lttb` which uses the [Largest-Triangle-Three-Buckets](https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf) algorithm for downsampling data for visualization. [#53145](https://github.com/ClickHouse/ClickHouse/pull/53145) ([Sinan](https://github.com/sinsinan)). -* Query`CHECK TABLE` has better performance and usability (sends progress updates, cancellable). Support checking particular part with `CHECK TABLE ... PART 'part_name'`. [#53404](https://github.com/ClickHouse/ClickHouse/pull/53404) ([vdimir](https://github.com/vdimir)). -* Added function `jsonMergePatch`. When working with JSON data as strings, it provides a way to merge these strings (of JSON objects) together to form a single string containing a single JSON object. [#54364](https://github.com/ClickHouse/ClickHouse/pull/54364) ([Memo](https://github.com/Joeywzr)). -* The second part of Kusto Query Language dialect support. [Phase 1 implementation ](https://github.com/ClickHouse/ClickHouse/pull/37961) has been merged. [#42510](https://github.com/ClickHouse/ClickHouse/pull/42510) ([larryluogit](https://github.com/larryluogit)). -* Added a new SQL function, `arrayRandomSample(arr, k)` which returns a sample of k elements from the input array. Similar functionality could previously be achieved only with less convenient syntax, e.g. "SELECT arrayReduce('groupArraySample(3)', range(10))". [#54391](https://github.com/ClickHouse/ClickHouse/pull/54391) ([itayisraelov](https://github.com/itayisraelov)). -* Introduce `-ArgMin`/`-ArgMax` aggregate combinators which allow to aggregate by min/max values only. One use case can be found in [#54818](https://github.com/ClickHouse/ClickHouse/issues/54818). This PR also reorganize combinators into dedicated folder. [#54947](https://github.com/ClickHouse/ClickHouse/pull/54947) ([Amos Bird](https://github.com/amosbird)). -* Allow to drop cache for Protobuf format with `SYSTEM DROP SCHEMA FORMAT CACHE [FOR Protobuf]`. [#55064](https://github.com/ClickHouse/ClickHouse/pull/55064) ([Aleksandr Musorin](https://github.com/AVMusorin)). -* Add external HTTP Basic authenticator. [#55199](https://github.com/ClickHouse/ClickHouse/pull/55199) ([Aleksei Filatov](https://github.com/aalexfvk)). -* Added function `byteSwap` which reverses the bytes of unsigned integers. This is particularly useful for reversing values of types which are represented as unsigned integers internally such as IPv4. [#55211](https://github.com/ClickHouse/ClickHouse/pull/55211) ([Priyansh Agrawal](https://github.com/Priyansh121096)). -* Added function `formatQuery` which returns a formatted version (possibly spanning multiple lines) of a SQL query string. Also added function `formatQuerySingleLine` which does the same but the returned string will not contain linebreaks. [#55239](https://github.com/ClickHouse/ClickHouse/pull/55239) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Added `DWARF` input format that reads debug symbols from an ELF executable/library/object file. [#55450](https://github.com/ClickHouse/ClickHouse/pull/55450) ([Michael Kolupaev](https://github.com/al13n321)). -* Allow to save unparsed records and errors in RabbitMQ, NATS and FileLog engines. Add virtual columns `_error` and `_raw_message`(for NATS and RabbitMQ), `_raw_record` (for FileLog) that are filled when ClickHouse fails to parse new record. The behaviour is controlled under storage settings `nats_handle_error_mode` for NATS, `rabbitmq_handle_error_mode` for RabbitMQ, `handle_error_mode` for FileLog similar to `kafka_handle_error_mode`. If it's set to `default`, en exception will be thrown when ClickHouse fails to parse a record, if it's set to `stream`, erorr and raw record will be saved into virtual columns. Closes [#36035](https://github.com/ClickHouse/ClickHouse/issues/36035). [#55477](https://github.com/ClickHouse/ClickHouse/pull/55477) ([Kruglov Pavel](https://github.com/Avogar)). -* Keeper client improvement: add `get_all_children_number command` that returns number of all children nodes under a specific path. [#55485](https://github.com/ClickHouse/ClickHouse/pull/55485) ([guoxiaolong](https://github.com/guoxiaolongzte)). -* Keeper client improvement: add `get_direct_children_number` command that returns number of direct children nodes under a path. [#55898](https://github.com/ClickHouse/ClickHouse/pull/55898) ([xuzifu666](https://github.com/xuzifu666)). -* Add statement `SHOW SETTING setting_name` which is a simpler version of existing statement `SHOW SETTINGS`. [#55979](https://github.com/ClickHouse/ClickHouse/pull/55979) ([Maksim Kita](https://github.com/kitaisreal)). -* Added fields `substreams` and `filenames` to the `system.parts_columns` table. [#55108](https://github.com/ClickHouse/ClickHouse/pull/55108) ([Anton Popov](https://github.com/CurtizJ)). -* Add support for `SHOW MERGES` query. [#55815](https://github.com/ClickHouse/ClickHouse/pull/55815) ([megao](https://github.com/jetgm)). -* Introduce a setting `create_table_empty_primary_key_by_default` for default `ORDER BY ()`. [#55899](https://github.com/ClickHouse/ClickHouse/pull/55899) ([Srikanth Chekuri](https://github.com/srikanthccv)). - -#### Performance Improvement -* Add option `query_plan_preserve_num_streams_after_window_functions` to preserve the number of streams after evaluating window functions to allow parallel stream processing. [#50771](https://github.com/ClickHouse/ClickHouse/pull/50771) ([frinkr](https://github.com/frinkr)). -* Release more streams if data is small. [#53867](https://github.com/ClickHouse/ClickHouse/pull/53867) ([Jiebin Sun](https://github.com/jiebinn)). -* RoaringBitmaps being optimized before serialization. [#55044](https://github.com/ClickHouse/ClickHouse/pull/55044) ([UnamedRus](https://github.com/UnamedRus)). -* Posting lists in inverted indexes are now optimized to use the smallest possible representation for internal bitmaps. Depending on the repetitiveness of the data, this may significantly reduce the space consumption of inverted indexes. [#55069](https://github.com/ClickHouse/ClickHouse/pull/55069) ([Harry Lee](https://github.com/HarryLeeIBM)). -* Fix contention on Context lock, this significantly improves performance for a lot of short-running concurrent queries. [#55121](https://github.com/ClickHouse/ClickHouse/pull/55121) ([Maksim Kita](https://github.com/kitaisreal)). -* Improved the performance of inverted index creation by 30%. This was achieved by replacing `std::unordered_map` with `absl::flat_hash_map`. [#55210](https://github.com/ClickHouse/ClickHouse/pull/55210) ([Harry Lee](https://github.com/HarryLeeIBM)). -* Support ORC filter push down (rowgroup level). [#55330](https://github.com/ClickHouse/ClickHouse/pull/55330) ([李扬](https://github.com/taiyang-li)). -* Improve performance of external aggregation with a lot of temporary files. [#55489](https://github.com/ClickHouse/ClickHouse/pull/55489) ([Maksim Kita](https://github.com/kitaisreal)). -* Set a reasonable size for the marks cache for secondary indices by default to avoid loading the marks over and over again. [#55654](https://github.com/ClickHouse/ClickHouse/pull/55654) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Avoid unnecessary reconstruction of index granules when reading skip indexes. This addresses [#55653](https://github.com/ClickHouse/ClickHouse/issues/55653#issuecomment-1763766009). [#55683](https://github.com/ClickHouse/ClickHouse/pull/55683) ([Amos Bird](https://github.com/amosbird)). -* Cache CAST function in set during execution to improve the performance of function `IN` when set element type doesn't exactly match column type. [#55712](https://github.com/ClickHouse/ClickHouse/pull/55712) ([Duc Canh Le](https://github.com/canhld94)). -* Performance improvement for `ColumnVector::insertMany` and `ColumnVector::insertManyFrom`. [#55714](https://github.com/ClickHouse/ClickHouse/pull/55714) ([frinkr](https://github.com/frinkr)). -* Optimized Map subscript operations by predicting the next row's key position and reduce the comparisons. [#55929](https://github.com/ClickHouse/ClickHouse/pull/55929) ([lgbo](https://github.com/lgbo-ustc)). -* Support struct fields pruning in Parquet (in previous versions it didn't work in some cases). [#56117](https://github.com/ClickHouse/ClickHouse/pull/56117) ([lgbo](https://github.com/lgbo-ustc)). -* Add the ability to tune the number of parallel replicas used in a query execution based on the estimation of rows to read. [#51692](https://github.com/ClickHouse/ClickHouse/pull/51692) ([Raúl Marín](https://github.com/Algunenano)). -* Optimized external aggregation memory consumption in case many temporary files were generated. [#54798](https://github.com/ClickHouse/ClickHouse/pull/54798) ([Nikita Taranov](https://github.com/nickitat)). -* Distributed queries executed in `async_socket_for_remote` mode (default) now respect `max_threads` limit. Previously, some queries could create excessive threads (up to `max_distributed_connections`), causing server performance issues. [#53504](https://github.com/ClickHouse/ClickHouse/pull/53504) ([filimonov](https://github.com/filimonov)). -* Caching skip-able entries while executing DDL from Zookeeper distributed DDL queue. [#54828](https://github.com/ClickHouse/ClickHouse/pull/54828) ([Duc Canh Le](https://github.com/canhld94)). -* Experimental inverted indexes do not store tokens with too many matches (i.e. row ids in the posting list). This saves space and avoids ineffective index lookups when sequential scans would be equally fast or faster. The previous heuristics (`density` parameter passed to the index definition) that controlled when tokens would not be stored was too confusing for users. A much simpler heuristics based on parameter `max_rows_per_postings_list` (default: 64k) is introduced which directly controls the maximum allowed number of row ids in a postings list. [#55616](https://github.com/ClickHouse/ClickHouse/pull/55616) ([Harry Lee](https://github.com/HarryLeeIBM)). -* Improve write performance to `EmbeddedRocksDB` tables. [#55732](https://github.com/ClickHouse/ClickHouse/pull/55732) ([Duc Canh Le](https://github.com/canhld94)). -* Improved overall resilience for ClickHouse in case of many parts within partition (more than 1000). It might reduce the number of `TOO_MANY_PARTS` errors. [#55526](https://github.com/ClickHouse/ClickHouse/pull/55526) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Reduced memory consumption during loading of hierarchical dictionaries. [#55838](https://github.com/ClickHouse/ClickHouse/pull/55838) ([Nikita Taranov](https://github.com/nickitat)). -* All dictionaries support setting `dictionary_use_async_executor`. [#55839](https://github.com/ClickHouse/ClickHouse/pull/55839) ([vdimir](https://github.com/vdimir)). -* Prevent excesive memory usage when deserializing AggregateFunctionTopKGenericData. [#55947](https://github.com/ClickHouse/ClickHouse/pull/55947) ([Raúl Marín](https://github.com/Algunenano)). -* On a Keeper with lots of watches AsyncMetrics threads can consume 100% of CPU for noticable time in `DB::KeeperStorage::getSessionsWithWatchesCount`. The fix is to avoid traversing heavy `watches` and `list_watches` sets. [#56054](https://github.com/ClickHouse/ClickHouse/pull/56054) ([Alexander Gololobov](https://github.com/davenger)). -* Add setting `optimize_trivial_approximate_count_query` to use `count` approximation for storage EmbeddedRocksDB. Enable trivial count for StorageJoin. [#55806](https://github.com/ClickHouse/ClickHouse/pull/55806) ([Duc Canh Le](https://github.com/canhld94)). - -#### Improvement -* Functions `toDayOfWeek` (MySQL alias: `DAYOFWEEK`), `toYearWeek` (`YEARWEEK`) and `toWeek` (`WEEK`) now supports `String` arguments. This makes its behavior consistent with MySQL's behavior. [#55589](https://github.com/ClickHouse/ClickHouse/pull/55589) ([Robert Schulze](https://github.com/rschu1ze)). -* Introduced setting `date_time_overflow_behavior` with possible values `ignore`, `throw`, `saturate` that controls the overflow behavior when converting from Date, Date32, DateTime64, Integer or Float to Date, Date32, DateTime or DateTime64. [#55696](https://github.com/ClickHouse/ClickHouse/pull/55696) ([Andrey Zvonov](https://github.com/zvonand)). -* Implement query parameters support for `ALTER TABLE ... ACTION PARTITION [ID] {parameter_name:ParameterType}`. Merges [#49516](https://github.com/ClickHouse/ClickHouse/issues/49516). Closes [#49449](https://github.com/ClickHouse/ClickHouse/issues/49449). [#55604](https://github.com/ClickHouse/ClickHouse/pull/55604) ([alesapin](https://github.com/alesapin)). -* Print processor ids in a prettier manner in EXPLAIN. [#48852](https://github.com/ClickHouse/ClickHouse/pull/48852) ([Vlad Seliverstov](https://github.com/behebot)). -* Creating a direct dictionary with a lifetime field will be rejected at create time (as the lifetime does not make sense for direct dictionaries). Fixes: [#27861](https://github.com/ClickHouse/ClickHouse/issues/27861). [#49043](https://github.com/ClickHouse/ClickHouse/pull/49043) ([Rory Crispin](https://github.com/RoryCrispin)). -* Allow parameters in queries with partitions like `ALTER TABLE t DROP PARTITION`. Closes [#49449](https://github.com/ClickHouse/ClickHouse/issues/49449). [#49516](https://github.com/ClickHouse/ClickHouse/pull/49516) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add a new column `xid` for `system.zookeeper_connection`. [#50702](https://github.com/ClickHouse/ClickHouse/pull/50702) ([helifu](https://github.com/helifu)). -* Display the correct server settings in `system.server_settings` after configuration reload. [#53774](https://github.com/ClickHouse/ClickHouse/pull/53774) ([helifu](https://github.com/helifu)). -* Add support for mathematical minus `−` character in queries, similar to `-`. [#54100](https://github.com/ClickHouse/ClickHouse/pull/54100) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add replica groups to the experimental `Replicated` database engine. Closes [#53620](https://github.com/ClickHouse/ClickHouse/issues/53620). [#54421](https://github.com/ClickHouse/ClickHouse/pull/54421) ([Nikolay Degterinsky](https://github.com/evillique)). -* It is better to retry retriable s3 errors than totally fail the query. Set bigger value to the s3_retry_attempts by default. [#54770](https://github.com/ClickHouse/ClickHouse/pull/54770) ([Sema Checherinda](https://github.com/CheSema)). -* Add load balancing mode `hostname_levenshtein_distance`. [#54826](https://github.com/ClickHouse/ClickHouse/pull/54826) ([JackyWoo](https://github.com/JackyWoo)). -* Improve hiding secrets in logs. [#55089](https://github.com/ClickHouse/ClickHouse/pull/55089) ([Vitaly Baranov](https://github.com/vitlibar)). -* For now the projection analysis will be performed only on top of query plan. The setting `query_plan_optimize_projection` became obsolete (it was enabled by default long time ago). [#55112](https://github.com/ClickHouse/ClickHouse/pull/55112) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* When function `untuple` is now called on a tuple with named elements and itself has an alias (e.g. `select untuple(tuple(1)::Tuple(element_alias Int)) AS untuple_alias`), then the result column name is now generated from the untuple alias and the tuple element alias (in the example: "untuple_alias.element_alias"). [#55123](https://github.com/ClickHouse/ClickHouse/pull/55123) ([garcher22](https://github.com/garcher22)). -* Added setting `describe_include_virtual_columns`, which allows to include virtual columns of table into result of `DESCRIBE` query. Added setting `describe_compact_output`. If it is set to `true`, `DESCRIBE` query returns only names and types of columns without extra information. [#55129](https://github.com/ClickHouse/ClickHouse/pull/55129) ([Anton Popov](https://github.com/CurtizJ)). -* Sometimes `OPTIMIZE` with `optimize_throw_if_noop=1` may fail with an error `unknown reason` while the real cause of it - different projections in different parts. This behavior is fixed. [#55130](https://github.com/ClickHouse/ClickHouse/pull/55130) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Allow to have several `MaterializedPostgreSQL` tables following the same Postgres table. By default this behaviour is not enabled (for compatibility, because it is a backward-incompatible change), but can be turned on with setting `materialized_postgresql_use_unique_replication_consumer_identifier`. Closes [#54918](https://github.com/ClickHouse/ClickHouse/issues/54918). [#55145](https://github.com/ClickHouse/ClickHouse/pull/55145) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Allow to parse negative `DateTime64` and `DateTime` with fractional part from short strings. [#55146](https://github.com/ClickHouse/ClickHouse/pull/55146) ([Andrey Zvonov](https://github.com/zvonand)). -* To improve compatibility with MySQL, 1. `information_schema.tables` now includes the new field `table_rows`, and 2. `information_schema.columns` now includes the new field `extra`. [#55215](https://github.com/ClickHouse/ClickHouse/pull/55215) ([Robert Schulze](https://github.com/rschu1ze)). -* Clickhouse-client won't show "0 rows in set" if it is zero and if exception was thrown. [#55240](https://github.com/ClickHouse/ClickHouse/pull/55240) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Support rename table without keyword `TABLE` like `RENAME db.t1 to db.t2`. [#55373](https://github.com/ClickHouse/ClickHouse/pull/55373) ([凌涛](https://github.com/lingtaolf)). -* Add `internal_replication` to `system.clusters`. [#55377](https://github.com/ClickHouse/ClickHouse/pull/55377) ([Konstantin Morozov](https://github.com/k-morozov)). -* Select remote proxy resolver based on request protocol, add proxy feature docs and remove `DB::ProxyConfiguration::Protocol::ANY`. [#55430](https://github.com/ClickHouse/ClickHouse/pull/55430) ([Arthur Passos](https://github.com/arthurpassos)). -* Avoid retrying keeper operations on INSERT after table shutdown. [#55519](https://github.com/ClickHouse/ClickHouse/pull/55519) ([Azat Khuzhin](https://github.com/azat)). -* `SHOW COLUMNS` now correctly reports type `FixedString` as `BLOB` if setting `use_mysql_types_in_show_columns` is on. Also added two new settings, `mysql_map_string_to_text_in_show_columns` and `mysql_map_fixed_string_to_text_in_show_columns` to switch the output for types `String` and `FixedString` as `TEXT` or `BLOB`. [#55617](https://github.com/ClickHouse/ClickHouse/pull/55617) ([Serge Klochkov](https://github.com/slvrtrn)). -* During ReplicatedMergeTree tables startup clickhouse server checks set of parts for unexpected parts (exists locally, but not in zookeeper). All unexpected parts move to detached directory and instead of them server tries to restore some ancestor (covered) parts. Now server tries to restore closest ancestors instead of random covered parts. [#55645](https://github.com/ClickHouse/ClickHouse/pull/55645) ([alesapin](https://github.com/alesapin)). -* The advanced dashboard now supports draggable charts on touch devices. This closes [#54206](https://github.com/ClickHouse/ClickHouse/issues/54206). [#55649](https://github.com/ClickHouse/ClickHouse/pull/55649) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Use the default query format if declared when outputting exception with `http_write_exception_in_output_format`. [#55739](https://github.com/ClickHouse/ClickHouse/pull/55739) ([Raúl Marín](https://github.com/Algunenano)). -* Provide a better message for common MATERIALIZED VIEW pitfalls. [#55826](https://github.com/ClickHouse/ClickHouse/pull/55826) ([Raúl Marín](https://github.com/Algunenano)). -* If you dropped the current database, you will still be able to run some queries in `clickhouse-local` and switch to another database. This makes the behavior consistent with `clickhouse-client`. This closes [#55834](https://github.com/ClickHouse/ClickHouse/issues/55834). [#55853](https://github.com/ClickHouse/ClickHouse/pull/55853) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Functions `(add|subtract)(Year|Quarter|Month|Week|Day|Hour|Minute|Second|Millisecond|Microsecond|Nanosecond)` now support string-encoded date arguments, e.g. `SELECT addDays('2023-10-22', 1)`. This increases compatibility with MySQL and is needed by Tableau Online. [#55869](https://github.com/ClickHouse/ClickHouse/pull/55869) ([Robert Schulze](https://github.com/rschu1ze)). -* The setting `apply_deleted_mask` when disabled allows to read rows that where marked as deleted by lightweight DELETE queries. This is useful for debugging. [#55952](https://github.com/ClickHouse/ClickHouse/pull/55952) ([Alexander Gololobov](https://github.com/davenger)). -* Allow skipping `null` values when serailizing Tuple to json objects, which makes it possible to keep compatiability with Spark's `to_json` function, which is also useful for gluten. [#55956](https://github.com/ClickHouse/ClickHouse/pull/55956) ([李扬](https://github.com/taiyang-li)). -* Functions `(add|sub)Date` now support string-encoded date arguments, e.g. `SELECT addDate('2023-10-22 11:12:13', INTERVAL 5 MINUTE)`. The same support for string-encoded date arguments is added to the plus and minus operators, e.g. `SELECT '2023-10-23' + INTERVAL 1 DAY`. This increases compatibility with MySQL and is needed by Tableau Online. [#55960](https://github.com/ClickHouse/ClickHouse/pull/55960) ([Robert Schulze](https://github.com/rschu1ze)). -* Allow unquoted strings with CR (`\r`) in CSV format. Closes [#39930](https://github.com/ClickHouse/ClickHouse/issues/39930). [#56046](https://github.com/ClickHouse/ClickHouse/pull/56046) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow to run `clickhouse-keeper` using embedded config. [#56086](https://github.com/ClickHouse/ClickHouse/pull/56086) ([Maksim Kita](https://github.com/kitaisreal)). -* Set limit of the maximum configuration value for `queued.min.messages` to avoid problem with start fetching data with Kafka. [#56121](https://github.com/ClickHouse/ClickHouse/pull/56121) ([Stas Morozov](https://github.com/r3b-fish)). -* Fixed a typo in SQL function `minSampleSizeContinous` (renamed `minSampleSizeContinuous`). Old name is preserved for backward compatibility. This closes: [#56139](https://github.com/ClickHouse/ClickHouse/issues/56139). [#56143](https://github.com/ClickHouse/ClickHouse/pull/56143) ([Dorota Szeremeta](https://github.com/orotaday)). -* Print path for broken parts on disk before shutting down the server. Before this change if a part is corrupted on disk and server cannot start, it was almost impossible to understand which part is broken. This is fixed. [#56181](https://github.com/ClickHouse/ClickHouse/pull/56181) ([Duc Canh Le](https://github.com/canhld94)). - -#### Build/Testing/Packaging Improvement -* If the database in Docker is already initialized, it doesn't need to be initialized again upon subsequent launches. This can potentially fix the issue of infinite container restarts when the database fails to load within 1000 attempts (relevant for very large databases and multi-node setups). [#50724](https://github.com/ClickHouse/ClickHouse/pull/50724) ([Alexander Nikolaev](https://github.com/AlexNik)). -* Resource with source code including submodules is built in Darwin special build task. It may be used to build ClickHouse without checking out the submodules. [#51435](https://github.com/ClickHouse/ClickHouse/pull/51435) ([Ilya Yatsishin](https://github.com/qoega)). -* An error was occuring when building ClickHouse with the AVX series of instructions enabled globally (which isn't recommended). The reason is that snappy does not enable `SNAPPY_HAVE_X86_CRC32`. [#55049](https://github.com/ClickHouse/ClickHouse/pull/55049) ([monchickey](https://github.com/monchickey)). -* Solve issue with launching standalone `clickhouse-keeper` from `clickhouse-server` package. [#55226](https://github.com/ClickHouse/ClickHouse/pull/55226) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* In the tests, RabbitMQ version is updated to 3.12.6. Improved logs collection for RabbitMQ tests. [#55424](https://github.com/ClickHouse/ClickHouse/pull/55424) ([Ilya Yatsishin](https://github.com/qoega)). -* Modified the error message difference between openssl and boringssl to fix the functional test. [#55975](https://github.com/ClickHouse/ClickHouse/pull/55975) ([MeenaRenganathan22](https://github.com/MeenaRenganathan22)). -* Use upstream repo for apache datasketches. [#55787](https://github.com/ClickHouse/ClickHouse/pull/55787) ([Nikita Taranov](https://github.com/nickitat)). - -#### Bug Fix (user-visible misbehavior in an official stable release) -* Skip hardlinking inverted index files in mutation [#47663](https://github.com/ClickHouse/ClickHouse/pull/47663) ([cangyin](https://github.com/cangyin)). -* Fixed bug of `match` function (regex) with pattern containing alternation produces incorrect key condition. Closes #53222. [#54696](https://github.com/ClickHouse/ClickHouse/pull/54696) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix 'Cannot find column' in read-in-order optimization with ARRAY JOIN [#51746](https://github.com/ClickHouse/ClickHouse/pull/51746) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Support missed experimental `Object(Nullable(json))` subcolumns in query. [#54052](https://github.com/ClickHouse/ClickHouse/pull/54052) ([zps](https://github.com/VanDarkholme7)). -* Re-add fix for `accurateCastOrNull` [#54629](https://github.com/ClickHouse/ClickHouse/pull/54629) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Fix detecting `DEFAULT` for columns of a Distributed table created without AS [#55060](https://github.com/ClickHouse/ClickHouse/pull/55060) ([Vitaly Baranov](https://github.com/vitlibar)). -* Proper cleanup in case of exception in ctor of ShellCommandSource [#55103](https://github.com/ClickHouse/ClickHouse/pull/55103) ([Alexander Gololobov](https://github.com/davenger)). -* Fix deadlock in LDAP assigned role update [#55119](https://github.com/ClickHouse/ClickHouse/pull/55119) ([Julian Maicher](https://github.com/jmaicher)). -* Suppress error statistics update for internal exceptions [#55128](https://github.com/ClickHouse/ClickHouse/pull/55128) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix deadlock in backups [#55132](https://github.com/ClickHouse/ClickHouse/pull/55132) ([alesapin](https://github.com/alesapin)). -* Fix storage Iceberg files retrieval [#55144](https://github.com/ClickHouse/ClickHouse/pull/55144) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix partition pruning of extra columns in set. [#55172](https://github.com/ClickHouse/ClickHouse/pull/55172) ([Amos Bird](https://github.com/amosbird)). -* Fix recalculation of skip indexes in ALTER UPDATE queries when table has adaptive granularity [#55202](https://github.com/ClickHouse/ClickHouse/pull/55202) ([Duc Canh Le](https://github.com/canhld94)). -* Fix for background download in fs cache [#55252](https://github.com/ClickHouse/ClickHouse/pull/55252) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Avoid possible memory leaks in compressors in case of missing buffer finalization [#55262](https://github.com/ClickHouse/ClickHouse/pull/55262) ([Azat Khuzhin](https://github.com/azat)). -* Fix functions execution over sparse columns [#55275](https://github.com/ClickHouse/ClickHouse/pull/55275) ([Azat Khuzhin](https://github.com/azat)). -* Fix incorrect merging of Nested for SELECT FINAL FROM SummingMergeTree [#55276](https://github.com/ClickHouse/ClickHouse/pull/55276) ([Azat Khuzhin](https://github.com/azat)). -* Fix bug with inability to drop detached partition in replicated merge tree on top of S3 without zero copy [#55309](https://github.com/ClickHouse/ClickHouse/pull/55309) ([alesapin](https://github.com/alesapin)). -* Fix a crash in MergeSortingPartialResultTransform (due to zero chunks after `remerge`) [#55335](https://github.com/ClickHouse/ClickHouse/pull/55335) ([Azat Khuzhin](https://github.com/azat)). -* Fix data-race in CreatingSetsTransform (on errors) due to throwing shared exception [#55338](https://github.com/ClickHouse/ClickHouse/pull/55338) ([Azat Khuzhin](https://github.com/azat)). -* Fix trash optimization (up to a certain extent) [#55353](https://github.com/ClickHouse/ClickHouse/pull/55353) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix leak in StorageHDFS [#55370](https://github.com/ClickHouse/ClickHouse/pull/55370) ([Azat Khuzhin](https://github.com/azat)). -* Fix parsing of arrays in cast operator [#55417](https://github.com/ClickHouse/ClickHouse/pull/55417) ([Anton Popov](https://github.com/CurtizJ)). -* Fix filtering by virtual columns with OR filter in query [#55418](https://github.com/ClickHouse/ClickHouse/pull/55418) ([Azat Khuzhin](https://github.com/azat)). -* Fix MongoDB connection issues [#55419](https://github.com/ClickHouse/ClickHouse/pull/55419) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix MySQL interface boolean representation [#55427](https://github.com/ClickHouse/ClickHouse/pull/55427) ([Serge Klochkov](https://github.com/slvrtrn)). -* Fix MySQL text protocol DateTime formatting and LowCardinality(Nullable(T)) types reporting [#55479](https://github.com/ClickHouse/ClickHouse/pull/55479) ([Serge Klochkov](https://github.com/slvrtrn)). -* Make `use_mysql_types_in_show_columns` affect only `SHOW COLUMNS` [#55481](https://github.com/ClickHouse/ClickHouse/pull/55481) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix stack symbolizer parsing `DW_FORM_ref_addr` incorrectly and sometimes crashing [#55483](https://github.com/ClickHouse/ClickHouse/pull/55483) ([Michael Kolupaev](https://github.com/al13n321)). -* Destroy fiber in case of exception in cancelBefore in AsyncTaskExecutor [#55516](https://github.com/ClickHouse/ClickHouse/pull/55516) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix Query Parameters not working with custom HTTP handlers [#55521](https://github.com/ClickHouse/ClickHouse/pull/55521) ([Konstantin Bogdanov](https://github.com/thevar1able)). -* Fix checking of non handled data for Values format [#55527](https://github.com/ClickHouse/ClickHouse/pull/55527) ([Azat Khuzhin](https://github.com/azat)). -* Fix 'Invalid cursor state' in odbc interacting with MS SQL Server [#55558](https://github.com/ClickHouse/ClickHouse/pull/55558) ([vdimir](https://github.com/vdimir)). -* Fix max execution time and 'break' overflow mode [#55577](https://github.com/ClickHouse/ClickHouse/pull/55577) ([Alexander Gololobov](https://github.com/davenger)). -* Fix crash in QueryNormalizer with cyclic aliases [#55602](https://github.com/ClickHouse/ClickHouse/pull/55602) ([vdimir](https://github.com/vdimir)). -* Disable wrong optimization and add a test [#55609](https://github.com/ClickHouse/ClickHouse/pull/55609) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Merging [#52352](https://github.com/ClickHouse/ClickHouse/issues/52352) [#55621](https://github.com/ClickHouse/ClickHouse/pull/55621) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add a test to avoid incorrect decimal sorting [#55662](https://github.com/ClickHouse/ClickHouse/pull/55662) ([Amos Bird](https://github.com/amosbird)). -* Fix progress bar for s3 and azure Cluster functions with url without globs [#55666](https://github.com/ClickHouse/ClickHouse/pull/55666) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix filtering by virtual columns with OR filter in query (resubmit) [#55678](https://github.com/ClickHouse/ClickHouse/pull/55678) ([Azat Khuzhin](https://github.com/azat)). -* Fixes and improvements for Iceberg storage [#55695](https://github.com/ClickHouse/ClickHouse/pull/55695) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix data race in CreatingSetsTransform (v2) [#55786](https://github.com/ClickHouse/ClickHouse/pull/55786) ([Azat Khuzhin](https://github.com/azat)). -* Throw exception when parsing illegal string as float if precise_float_parsing is true [#55861](https://github.com/ClickHouse/ClickHouse/pull/55861) ([李扬](https://github.com/taiyang-li)). -* Disable predicate pushdown if the CTE contains stateful functions [#55871](https://github.com/ClickHouse/ClickHouse/pull/55871) ([Raúl Marín](https://github.com/Algunenano)). -* Fix normalize ASTSelectWithUnionQuery, as it was stripping `FORMAT` from the query [#55887](https://github.com/ClickHouse/ClickHouse/pull/55887) ([flynn](https://github.com/ucasfl)). -* Try to fix possible segfault in Native ORC input format [#55891](https://github.com/ClickHouse/ClickHouse/pull/55891) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix window functions in case of sparse columns. [#55895](https://github.com/ClickHouse/ClickHouse/pull/55895) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* fix: StorageNull supports subcolumns [#55912](https://github.com/ClickHouse/ClickHouse/pull/55912) ([FFish](https://github.com/wxybear)). -* Do not write retriable errors for Replicated mutate/merge into error log [#55944](https://github.com/ClickHouse/ClickHouse/pull/55944) ([Azat Khuzhin](https://github.com/azat)). -* Fix `SHOW DATABASES LIMIT ` [#55962](https://github.com/ClickHouse/ClickHouse/pull/55962) ([Raúl Marín](https://github.com/Algunenano)). -* Fix autogenerated Protobuf schema with fields with underscore [#55974](https://github.com/ClickHouse/ClickHouse/pull/55974) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix dateTime64ToSnowflake64() with non-default scale [#55983](https://github.com/ClickHouse/ClickHouse/pull/55983) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix output/input of Arrow dictionary column [#55989](https://github.com/ClickHouse/ClickHouse/pull/55989) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix fetching schema from schema registry in AvroConfluent [#55991](https://github.com/ClickHouse/ClickHouse/pull/55991) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix 'Block structure mismatch' on concurrent ALTER and INSERTs in Buffer table [#55995](https://github.com/ClickHouse/ClickHouse/pull/55995) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix incorrect free space accounting for least_used JBOD policy [#56030](https://github.com/ClickHouse/ClickHouse/pull/56030) ([Azat Khuzhin](https://github.com/azat)). -* Fix missing scalar issue when evaluating subqueries inside table functions [#56057](https://github.com/ClickHouse/ClickHouse/pull/56057) ([Amos Bird](https://github.com/amosbird)). -* Fix wrong query result when http_write_exception_in_output_format=1 [#56135](https://github.com/ClickHouse/ClickHouse/pull/56135) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix schema cache for fallback JSON->JSONEachRow with changed settings [#56172](https://github.com/ClickHouse/ClickHouse/pull/56172) ([Kruglov Pavel](https://github.com/Avogar)). -* Add error handler to odbc-bridge [#56185](https://github.com/ClickHouse/ClickHouse/pull/56185) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). - - -### ClickHouse release 23.9, 2023-09-28 - -#### Backward Incompatible Change -* Remove the `status_info` configuration option and dictionaries status from the default Prometheus handler. [#54090](https://github.com/ClickHouse/ClickHouse/pull/54090) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The experimental parts metadata cache is removed from the codebase. [#54215](https://github.com/ClickHouse/ClickHouse/pull/54215) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Disable setting `input_format_json_try_infer_numbers_from_strings` by default, so we don't try to infer numbers from strings in JSON formats by default to avoid possible parsing errors when sample data contains strings that looks like a number. [#55099](https://github.com/ClickHouse/ClickHouse/pull/55099) ([Kruglov Pavel](https://github.com/Avogar)). - -#### New Feature -* Improve schema inference from JSON formats: 1) Now it's possible to infer named Tuples from JSON objects without experimantal JSON type under a setting `input_format_json_try_infer_named_tuples_from_objects` in JSON formats. Previously without experimantal type JSON we could only infer JSON objects as Strings or Maps, now we can infer named Tuple. Resulting Tuple type will conain all keys of objects that were read in data sample during schema inference. It can be useful for reading structured JSON data without sparse objects. The setting is enabled by default. 2) Allow parsing JSON array into a column with type String under setting `input_format_json_read_arrays_as_strings`. It can help reading arrays with values with different types. 3) Allow to use type String for JSON keys with unkown types (`null`/`[]`/`{}`) in sample data under setting `input_format_json_infer_incomplete_types_as_strings`. Now in JSON formats we can read any value into String column and we can avoid getting error `Cannot determine type for column 'column_name' by first 25000 rows of data, most likely this column contains only Nulls or empty Arrays/Maps` during schema inference by using type String for unknown types, so the data will be read successfully. [#54427](https://github.com/ClickHouse/ClickHouse/pull/54427) ([Kruglov Pavel](https://github.com/Avogar)). -* Added IO scheduling support for remote disks. Storage configuration for disk types `s3`, `s3_plain`, `hdfs` and `azure_blob_storage` can now contain `read_resource` and `write_resource` elements holding resource names. Scheduling policies for these resources can be configured in a separate server configuration section `resources`. Queries can be marked using setting `workload` and classified using server configuration section `workload_classifiers` to achieve diverse resource scheduling goals. More details in [the docs](https://clickhouse.com/docs/en/operations/workload-scheduling). [#47009](https://github.com/ClickHouse/ClickHouse/pull/47009) ([Sergei Trifonov](https://github.com/serxa)). Added "bandwidth_limit" IO scheduling node type. It allows you to specify `max_speed` and `max_burst` constraints on traffic passing though this node. [#54618](https://github.com/ClickHouse/ClickHouse/pull/54618) ([Sergei Trifonov](https://github.com/serxa)). -* Added new type of authentication based on SSH keys. It works only for the native TCP protocol. [#41109](https://github.com/ClickHouse/ClickHouse/pull/41109) ([George Gamezardashvili](https://github.com/InfJoker)). -* Added a new column `_block_number` for MergeTree tables. [#44532](https://github.com/ClickHouse/ClickHouse/issues/44532). [#47532](https://github.com/ClickHouse/ClickHouse/pull/47532) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Add `IF EMPTY` clause for `DROP TABLE` queries. [#48915](https://github.com/ClickHouse/ClickHouse/pull/48915) ([Pavel Novitskiy](https://github.com/pnovitskiy)). -* SQL functions `toString(datetime, timezone)` and `formatDateTime(datetime, format, timezone)` now support non-constant timezone arguments. [#53680](https://github.com/ClickHouse/ClickHouse/pull/53680) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Add support for `ALTER TABLE MODIFY COMMENT`. Note: something similar was added by an external contributor a long time ago, but the feature did not work at all and only confused users. This closes [#36377](https://github.com/ClickHouse/ClickHouse/issues/36377). [#51304](https://github.com/ClickHouse/ClickHouse/pull/51304) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Note: this command does not propagate between replicas, so the replicas of a table could have different comments. -* Added `GCD` a.k.a. "greatest common denominator" as a new data compression codec. The codec computes the GCD of all column values, and then divides each value by the GCD. The GCD codec is a data preparation codec (similar to Delta and DoubleDelta) and cannot be used stand-alone. It works with data integer, decimal and date/time type. A viable use case for the GCD codec are column values that change (increase/decrease) in multiples of the GCD, e.g. 24 - 28 - 16 - 24 - 8 - 24 (assuming GCD = 4). [#53149](https://github.com/ClickHouse/ClickHouse/pull/53149) ([Alexander Nam](https://github.com/seshWCS)). -* Two new type aliases `DECIMAL(P)` (as shortcut for `DECIMAL(P, 0)` and `DECIMAL` (as shortcut for `DECIMAL(10, 0)`) were added. This makes ClickHouse more compatible with MySQL's SQL dialect. [#53328](https://github.com/ClickHouse/ClickHouse/pull/53328) ([Val Doroshchuk](https://github.com/valbok)). -* Added a new system log table `backup_log` to track all `BACKUP` and `RESTORE` operations. [#53638](https://github.com/ClickHouse/ClickHouse/pull/53638) ([Victor Krasnov](https://github.com/sirvickr)). -* Added a format setting `output_format_markdown_escape_special_characters` (default: false). The setting controls whether special characters like `!`, `#`, `$` etc. are escaped (i.e. prefixed by a backslash) in the `Markdown` output format. [#53860](https://github.com/ClickHouse/ClickHouse/pull/53860) ([irenjj](https://github.com/irenjj)). -* Add function `decodeHTMLComponent`. [#54097](https://github.com/ClickHouse/ClickHouse/pull/54097) ([Bharat Nallan](https://github.com/bharatnc)). -* Added `peak_threads_usage` to query_log table. [#54335](https://github.com/ClickHouse/ClickHouse/pull/54335) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Add `SHOW FUNCTIONS` support to clickhouse-client. [#54337](https://github.com/ClickHouse/ClickHouse/pull/54337) ([Julia Kartseva](https://github.com/wat-ze-hex)). -* Added function `toDaysSinceYearZero` with alias `TO_DAYS` (for compatibility with MySQL) which returns the number of days passed since `0001-01-01` (in Proleptic Gregorian Calendar). [#54479](https://github.com/ClickHouse/ClickHouse/pull/54479) ([Robert Schulze](https://github.com/rschu1ze)). Function `toDaysSinceYearZero` now supports arguments of type `DateTime` and `DateTime64`. [#54856](https://github.com/ClickHouse/ClickHouse/pull/54856) ([Serge Klochkov](https://github.com/slvrtrn)). -* Added functions `YYYYMMDDtoDate`, `YYYYMMDDtoDate32`, `YYYYMMDDhhmmssToDateTime` and `YYYYMMDDhhmmssToDateTime64`. They convert a date or date with time encoded as integer (e.g. 20230911) into a native date or date with time. As such, they provide the opposite functionality of existing functions `YYYYMMDDToDate`, `YYYYMMDDToDateTime`, `YYYYMMDDhhmmddToDateTime`, `YYYYMMDDhhmmddToDateTime64`. [#54509](https://github.com/ClickHouse/ClickHouse/pull/54509) ([Quanfa Fu](https://github.com/dentiscalprum)) ([Robert Schulze](https://github.com/rschu1ze)). -* Add several string distance functions, including `byteHammingDistance`, `editDistance`. [#54935](https://github.com/ClickHouse/ClickHouse/pull/54935) ([flynn](https://github.com/ucasfl)). -* Allow specifying the expiration date and, optionally, the time for user credentials with `VALID UNTIL datetime` clause. [#51261](https://github.com/ClickHouse/ClickHouse/pull/51261) ([Nikolay Degterinsky](https://github.com/evillique)). -* Allow S3-style URLs for table functions `s3`, `gcs`, `oss`. URL is automatically converted to HTTP. Example: `'s3://clickhouse-public-datasets/hits.csv'` is converted to `'https://clickhouse-public-datasets.s3.amazonaws.com/hits.csv'`. [#54931](https://github.com/ClickHouse/ClickHouse/pull/54931) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Add new setting `print_pretty_type_names` to print pretty deep nested types like Tuple/Maps/Arrays. [#55095](https://github.com/ClickHouse/ClickHouse/pull/55095) ([Kruglov Pavel](https://github.com/Avogar)). - -#### Performance Improvement -* Speed up reading from S3 by enabling prefetches by default. [#53709](https://github.com/ClickHouse/ClickHouse/pull/53709) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Do not implicitly read PK and version columns in lonely parts if unnecessary for queries with FINAL. [#53919](https://github.com/ClickHouse/ClickHouse/pull/53919) ([Duc Canh Le](https://github.com/canhld94)). -* Optimize group by constant keys. Will optimize queries with group by `_file/_path` after https://github.com/ClickHouse/ClickHouse/pull/53529. [#53549](https://github.com/ClickHouse/ClickHouse/pull/53549) ([Kruglov Pavel](https://github.com/Avogar)). -* Improve performance of sorting for `Decimal` columns. Improve performance of insertion into `MergeTree` if ORDER BY contains a `Decimal` column. Improve performance of sorting when data is already sorted or almost sorted. [#35961](https://github.com/ClickHouse/ClickHouse/pull/35961) ([Maksim Kita](https://github.com/kitaisreal)). -* Improve performance for huge query analysis. Fixes [#51224](https://github.com/ClickHouse/ClickHouse/issues/51224). [#51469](https://github.com/ClickHouse/ClickHouse/pull/51469) ([frinkr](https://github.com/frinkr)). -* An optimization to rewrite `COUNT(DISTINCT ...)` and various `uniq` variants to `count` if it is selected from a subquery with GROUP BY. [#52082](https://github.com/ClickHouse/ClickHouse/pull/52082) [#52645](https://github.com/ClickHouse/ClickHouse/pull/52645) ([JackyWoo](https://github.com/JackyWoo)). -* Remove manual calls to `mmap/mremap/munmap` and delegate all this work to `jemalloc` - and it slightly improves performance. [#52792](https://github.com/ClickHouse/ClickHouse/pull/52792) ([Nikita Taranov](https://github.com/nickitat)). -* Fixed high in CPU consumption when working with NATS. [#54399](https://github.com/ClickHouse/ClickHouse/pull/54399) ([Vasilev Pyotr](https://github.com/vahpetr)). -* Since we use separate instructions for executing `toString` with datetime argument, it is possible to improve performance a bit for non-datetime arguments and have some parts of the code cleaner. Follows up [#53680](https://github.com/ClickHouse/ClickHouse/issues/53680). [#54443](https://github.com/ClickHouse/ClickHouse/pull/54443) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Instead of serializing json elements into a `std::stringstream`, this PR try to put the serialization result into `ColumnString` direclty. [#54613](https://github.com/ClickHouse/ClickHouse/pull/54613) ([lgbo](https://github.com/lgbo-ustc)). -* Enable ORDER BY optimization for reading data in corresponding order from a MergeTree table in case that the table is behind a view. [#54628](https://github.com/ClickHouse/ClickHouse/pull/54628) ([Vitaly Baranov](https://github.com/vitlibar)). -* Improve JSON SQL functions by reusing `GeneratorJSONPath` and removing several shared pointers. [#54735](https://github.com/ClickHouse/ClickHouse/pull/54735) ([lgbo](https://github.com/lgbo-ustc)). -* Keeper tries to batch flush requests for better performance. [#53049](https://github.com/ClickHouse/ClickHouse/pull/53049) ([Antonio Andelic](https://github.com/antonio2368)). -* Now `clickhouse-client` processes files in parallel in case of `INFILE 'glob_expression'`. Closes [#54218](https://github.com/ClickHouse/ClickHouse/issues/54218). [#54533](https://github.com/ClickHouse/ClickHouse/pull/54533) ([Max K.](https://github.com/mkaynov)). -* Allow to use primary key for IN function where primary key column types are different from `IN` function right side column types. Example: `SELECT id FROM test_table WHERE id IN (SELECT '5')`. Closes [#48936](https://github.com/ClickHouse/ClickHouse/issues/48936). [#54544](https://github.com/ClickHouse/ClickHouse/pull/54544) ([Maksim Kita](https://github.com/kitaisreal)). -* Hash JOIN tries to shrink internal buffers consuming half of maximal available memory (set by `max_bytes_in_join`). [#54584](https://github.com/ClickHouse/ClickHouse/pull/54584) ([vdimir](https://github.com/vdimir)). -* Respect `max_block_size` for array join to avoid possible OOM. Close [#54290](https://github.com/ClickHouse/ClickHouse/issues/54290). [#54664](https://github.com/ClickHouse/ClickHouse/pull/54664) ([李扬](https://github.com/taiyang-li)). -* Reuse HTTP connections in the `s3` table function. [#54812](https://github.com/ClickHouse/ClickHouse/pull/54812) ([Michael Kolupaev](https://github.com/al13n321)). -* Replace the linear search in `MergeTreeRangeReader::Stream::ceilRowsToCompleteGranules` with a binary search. [#54869](https://github.com/ClickHouse/ClickHouse/pull/54869) ([usurai](https://github.com/usurai)). - -#### Experimental Feature -* The creation of `Annoy` indexes can now be parallelized using setting `max_threads_for_annoy_index_creation`. [#54047](https://github.com/ClickHouse/ClickHouse/pull/54047) ([Robert Schulze](https://github.com/rschu1ze)). -* Parallel replicas over distributed don't read from all replicas [#54199](https://github.com/ClickHouse/ClickHouse/pull/54199) ([Igor Nikonov](https://github.com/devcrafter)). - -#### Improvement -* Allow to replace long names of files of columns in `MergeTree` data parts to hashes of names. It helps to avoid `File name too long` error in some cases. [#50612](https://github.com/ClickHouse/ClickHouse/pull/50612) ([Anton Popov](https://github.com/CurtizJ)). -* Parse data in `JSON` format as `JSONEachRow` if failed to parse metadata. It will allow to read files with `.json` extension even if real format is JSONEachRow. Closes [#45740](https://github.com/ClickHouse/ClickHouse/issues/45740). [#54405](https://github.com/ClickHouse/ClickHouse/pull/54405) ([Kruglov Pavel](https://github.com/Avogar)). -* Output valid JSON/XML on excetpion during HTTP query execution. Add setting `http_write_exception_in_output_format` to enable/disable this behaviour (enabled by default). [#52853](https://github.com/ClickHouse/ClickHouse/pull/52853) ([Kruglov Pavel](https://github.com/Avogar)). -* View `information_schema.tables` now has a new field `data_length` which shows the approximate size of the data on disk. Required to run queries generated by Amazon QuickSight. [#55037](https://github.com/ClickHouse/ClickHouse/pull/55037) ([Robert Schulze](https://github.com/rschu1ze)). -* The MySQL interface gained a minimal implementation of prepared statements, just enough to allow a connection from Tableau Online to ClickHouse via the MySQL connector. [#54115](https://github.com/ClickHouse/ClickHouse/pull/54115) ([Serge Klochkov](https://github.com/slvrtrn)). Please note: the prepared statements implementation is pretty minimal, we do not support arguments binding yet, it is not required in this particular Tableau online use case. It will be implemented as a follow-up if necessary after extensive testing of Tableau Online in case we discover issues. -* Support case-insensitive and dot-all matching modes in `regexp_tree` dictionaries. [#50906](https://github.com/ClickHouse/ClickHouse/pull/50906) ([Johann Gan](https://github.com/johanngan)). -* Keeper improvement: Add a `createIfNotExists` Keeper command. [#48855](https://github.com/ClickHouse/ClickHouse/pull/48855) ([Konstantin Bogdanov](https://github.com/thevar1able)). -* More precise integer type inference, fix [#51236](https://github.com/ClickHouse/ClickHouse/issues/51236). [#53003](https://github.com/ClickHouse/ClickHouse/pull/53003) ([Chen768959](https://github.com/Chen768959)). -* Introduced resolving of charsets in the string literals for MaterializedMySQL. [#53220](https://github.com/ClickHouse/ClickHouse/pull/53220) ([Val Doroshchuk](https://github.com/valbok)). -* Fix a subtle issue with a rarely used `EmbeddedRocksDB` table engine in an extremely rare scenario: sometimes the `EmbeddedRocksDB` table engine does not close files correctly in NFS after running `DROP TABLE`. [#53502](https://github.com/ClickHouse/ClickHouse/pull/53502) ([Mingliang Pan](https://github.com/liangliangpan)). -* `RESTORE TABLE ON CLUSTER` must create replicated tables with a matching UUID on hosts. Otherwise the macro `{uuid}` in ZooKeeper path can't work correctly after RESTORE. This PR implements that. [#53765](https://github.com/ClickHouse/ClickHouse/pull/53765) ([Vitaly Baranov](https://github.com/vitlibar)). -* Added restore setting `restore_broken_parts_as_detached`: if it's true the RESTORE process won't stop on broken parts while restoring, instead all the broken parts will be copied to the `detached` folder with the prefix `broken-from-backup'. If it's false the RESTORE process will stop on the first broken part (if any). The default value is false. [#53877](https://github.com/ClickHouse/ClickHouse/pull/53877) ([Vitaly Baranov](https://github.com/vitlibar)). -* Add `elapsed_ns` field to HTTP headers X-ClickHouse-Progress and X-ClickHouse-Summary. [#54179](https://github.com/ClickHouse/ClickHouse/pull/54179) ([joelynch](https://github.com/joelynch)). -* Implementation of `reconfig` (https://github.com/ClickHouse/ClickHouse/pull/49450), `sync`, and `exists` commands for keeper-client. [#54201](https://github.com/ClickHouse/ClickHouse/pull/54201) ([pufit](https://github.com/pufit)). -* `clickhouse-local` and `clickhouse-client` now allow to specify the `--query` parameter multiple times, e.g. `./clickhouse-client --query "SELECT 1" --query "SELECT 2"`. This syntax is slightly more intuitive than `./clickhouse-client --multiquery "SELECT 1;S ELECT 2"`, a bit easier to script (e.g. `queries.push_back('--query "$q"')`) and more consistent with the behavior of existing parameter `--queries-file` (e.g. `./clickhouse client --queries-file queries1.sql --queries-file queries2.sql`). [#54249](https://github.com/ClickHouse/ClickHouse/pull/54249) ([Robert Schulze](https://github.com/rschu1ze)). -* Add sub-second precision to `formatReadableTimeDelta`. [#54250](https://github.com/ClickHouse/ClickHouse/pull/54250) ([Andrey Zvonov](https://github.com/zvonand)). -* Enable `allow_remove_stale_moving_parts` by default. [#54260](https://github.com/ClickHouse/ClickHouse/pull/54260) ([vdimir](https://github.com/vdimir)). -* Fix using count from cache and improve progress bar for reading from archives. [#54271](https://github.com/ClickHouse/ClickHouse/pull/54271) ([Kruglov Pavel](https://github.com/Avogar)). -* Add support for S3 credentials using SSO. To define a profile to be used with SSO, set `AWS_PROFILE` environment variable. [#54347](https://github.com/ClickHouse/ClickHouse/pull/54347) ([Antonio Andelic](https://github.com/antonio2368)). -* Support NULL as default for nested types Array/Tuple/Map for input formats. Closes [#51100](https://github.com/ClickHouse/ClickHouse/issues/51100). [#54351](https://github.com/ClickHouse/ClickHouse/pull/54351) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow reading some unusual configuration of chunks from Arrow/Parquet formats. [#54370](https://github.com/ClickHouse/ClickHouse/pull/54370) ([Arthur Passos](https://github.com/arthurpassos)). -* Add `STD` alias to `stddevPop` function for MySQL compatibility. Closes [#54274](https://github.com/ClickHouse/ClickHouse/issues/54274). [#54382](https://github.com/ClickHouse/ClickHouse/pull/54382) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add `addDate` function for compatibility with MySQL and `subDate` for consistency. Reference [#54275](https://github.com/ClickHouse/ClickHouse/issues/54275). [#54400](https://github.com/ClickHouse/ClickHouse/pull/54400) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add `modification_time` into `system.detached_parts`. [#54506](https://github.com/ClickHouse/ClickHouse/pull/54506) ([Azat Khuzhin](https://github.com/azat)). -* Added a setting `splitby_max_substrings_includes_remaining_string` which controls if functions "splitBy*()" with argument "max_substring" > 0 include the remaining string (if any) in the result array (Python/Spark semantics) or not. The default behavior does not change. [#54518](https://github.com/ClickHouse/ClickHouse/pull/54518) ([Robert Schulze](https://github.com/rschu1ze)). -* Better integer types inference for `Int64`/`UInt64` fields. Continuation of [#53003](https://github.com/ClickHouse/ClickHouse/pull/53003). Now it works also for nested types like Arrays of Arrays and for functions like `map/tuple`. Issue: [#51236](https://github.com/ClickHouse/ClickHouse/issues/51236). [#54553](https://github.com/ClickHouse/ClickHouse/pull/54553) ([Kruglov Pavel](https://github.com/Avogar)). -* Added array operations for multiplying, dividing and modulo on scalar. Works in each way, for example `5 * [5, 5]` and `[5, 5] * 5` - both cases are possible. [#54608](https://github.com/ClickHouse/ClickHouse/pull/54608) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Add optional `version` argument to `rm` command in `keeper-client` to support safer deletes. [#54708](https://github.com/ClickHouse/ClickHouse/pull/54708) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Disable killing the server by systemd (that may lead to data loss when using Buffer tables). [#54744](https://github.com/ClickHouse/ClickHouse/pull/54744) ([Azat Khuzhin](https://github.com/azat)). -* Added field `is_deterministic` to system table `system.functions` which indicates whether the result of a function is stable between two invocations (given exactly the same inputs) or not. [#54766](https://github.com/ClickHouse/ClickHouse/pull/54766) [#55035](https://github.com/ClickHouse/ClickHouse/pull/55035) ([Robert Schulze](https://github.com/rschu1ze)). -* Made the views in schema `information_schema` more compatible with the equivalent views in MySQL (i.e. modified and extended them) up to a point where Tableau Online is able to connect to ClickHouse. More specifically: 1. The type of field `information_schema.tables.table_type` changed from Enum8 to String. 2. Added fields `table_comment` and `table_collation` to view `information_schema.table`. 3. Added views `information_schema.key_column_usage` and `referential_constraints`. 4. Replaced uppercase aliases in `information_schema` views with concrete uppercase columns. [#54773](https://github.com/ClickHouse/ClickHouse/pull/54773) ([Serge Klochkov](https://github.com/slvrtrn)). -* The query cache now returns an error if the user tries to cache the result of a query with a non-deterministic function such as `now`, `randomString` and `dictGet`. Compared to the previous behavior (silently don't cache the result), this reduces confusion and surprise for users. [#54801](https://github.com/ClickHouse/ClickHouse/pull/54801) ([Robert Schulze](https://github.com/rschu1ze)). -* Forbid special columns like materialized/ephemeral/alias for `file`/`s3`/`url`/... storages, fix insert into ephemeral columns from files. Closes [#53477](https://github.com/ClickHouse/ClickHouse/issues/53477). [#54803](https://github.com/ClickHouse/ClickHouse/pull/54803) ([Kruglov Pavel](https://github.com/Avogar)). -* More configurable collecting metadata for backup. [#54804](https://github.com/ClickHouse/ClickHouse/pull/54804) ([Vitaly Baranov](https://github.com/vitlibar)). -* `clickhouse-local`'s log file (if enabled with --server_logs_file flag) will now prefix each line with timestamp, thread id, etc, just like `clickhouse-server`. [#54807](https://github.com/ClickHouse/ClickHouse/pull/54807) ([Michael Kolupaev](https://github.com/al13n321)). -* Field `is_obsolete` in the `system.merge_tree_settings` table - it is now 1 for obsolete merge tree settings. Previously, only the description indicated that the setting is obsolete. [#54837](https://github.com/ClickHouse/ClickHouse/pull/54837) ([Robert Schulze](https://github.com/rschu1ze)). -* Make it possible to use plural when using interval literals. `INTERVAL 2 HOURS` should be equivalent to `INTERVAL 2 HOUR`. [#54860](https://github.com/ClickHouse/ClickHouse/pull/54860) ([Jordi Villar](https://github.com/jrdi)). -* Always allow the creation of a projection with `Nullable` PK. This fixes [#54814](https://github.com/ClickHouse/ClickHouse/issues/54814). [#54895](https://github.com/ClickHouse/ClickHouse/pull/54895) ([Amos Bird](https://github.com/amosbird)). -* Retry backup's S3 operations after connection reset failure. [#54900](https://github.com/ClickHouse/ClickHouse/pull/54900) ([Vitaly Baranov](https://github.com/vitlibar)). -* Make the exception message exact in case of the maximum value of a settings is less than the minimum value. [#54925](https://github.com/ClickHouse/ClickHouse/pull/54925) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* `LIKE`, `match`, and other regular expressions matching functions now allow matching with patterns containing non-UTF-8 substrings by falling back to binary matching. Example: you can use `string LIKE '\xFE\xFF%'` to detect BOM. This closes [#54486](https://github.com/ClickHouse/ClickHouse/issues/54486). [#54942](https://github.com/ClickHouse/ClickHouse/pull/54942) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Added `ContextLockWaitMicroseconds` profile event. [#55029](https://github.com/ClickHouse/ClickHouse/pull/55029) ([Maksim Kita](https://github.com/kitaisreal)). -* The Keeper dynamically adjusts log levels. [#50372](https://github.com/ClickHouse/ClickHouse/pull/50372) ([helifu](https://github.com/helifu)). -* Added function `timestamp` for compatibility with MySQL. Closes [#54275](https://github.com/ClickHouse/ClickHouse/issues/54275). [#54639](https://github.com/ClickHouse/ClickHouse/pull/54639) ([Nikolay Degterinsky](https://github.com/evillique)). - -#### Build/Testing/Packaging Improvement -* Bumped the compiler of official and continuous integration builds of ClickHouse from Clang 16 to 17. [#53831](https://github.com/ClickHouse/ClickHouse/pull/53831) ([Robert Schulze](https://github.com/rschu1ze)). -* Regenerated tld data for lookups (`tldLookup.generated.cpp`). [#54269](https://github.com/ClickHouse/ClickHouse/pull/54269) ([Bharat Nallan](https://github.com/bharatnc)). -* Remove the redundant `clickhouse-keeper-client` symlink. [#54587](https://github.com/ClickHouse/ClickHouse/pull/54587) ([Tomas Barton](https://github.com/deric)). -* Use `/usr/bin/env` to resolve bash - now it supports Nix OS. [#54603](https://github.com/ClickHouse/ClickHouse/pull/54603) ([Fionera](https://github.com/fionera)). -* CMake added `PROFILE_CPU` option needed to perform `perf record` without using a DWARF call graph. [#54917](https://github.com/ClickHouse/ClickHouse/pull/54917) ([Maksim Kita](https://github.com/kitaisreal)). -* If the linker is different than LLD, stop with a fatal error. [#55036](https://github.com/ClickHouse/ClickHouse/pull/55036) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Replaced the library to handle (encode/decode) base64 values from Turbo-Base64 to aklomp-base64. Both are SIMD-accelerated on x86 and ARM but 1. the license of the latter (BSD-2) is more favorable for ClickHouse, Turbo64 switched in the meantime to GPL-3, 2. with more GitHub stars, aklomp-base64 seems more future-proof, 3. aklomp-base64 has a slightly nicer API (which is arguably subjective), and 4. aklomp-base64 does not require us to hack around bugs (like non-threadsafe initialization). Note: aklomp-base64 rejects unpadded base64 values whereas Turbo-Base64 decodes them on a best-effort basis. RFC-4648 leaves it open whether padding is mandatory or not, but depending on the context this may be a behavioral change to be aware of. [#54119](https://github.com/ClickHouse/ClickHouse/pull/54119) ([Mikhail Koviazin](https://github.com/mkmkme)). - -#### Bug Fix (user-visible misbehavior in an official stable release) -* Fix REPLACE/MOVE PARTITION with zero-copy replication (note: "zero-copy replication" is an experimental feature) [#54193](https://github.com/ClickHouse/ClickHouse/pull/54193) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix zero copy locks with hardlinks (note: "zero-copy replication" is an experimental feature) [#54859](https://github.com/ClickHouse/ClickHouse/pull/54859) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix zero copy garbage (note: "zero-copy replication" is an experimental feature) [#54550](https://github.com/ClickHouse/ClickHouse/pull/54550) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Pass HTTP retry timeout as milliseconds (it was incorrect before). [#54438](https://github.com/ClickHouse/ClickHouse/pull/54438) ([Duc Canh Le](https://github.com/canhld94)). -* Fix misleading error message in OUTFILE with `CapnProto`/`Protobuf` [#52870](https://github.com/ClickHouse/ClickHouse/pull/52870) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix summary reporting with parallel replicas with LIMIT [#53050](https://github.com/ClickHouse/ClickHouse/pull/53050) ([Raúl Marín](https://github.com/Algunenano)). -* Fix throttling of BACKUPs from/to S3 (in case native copy was not used) and in some other places as well [#53336](https://github.com/ClickHouse/ClickHouse/pull/53336) ([Azat Khuzhin](https://github.com/azat)). -* Fix IO throttling during copying whole directories [#53338](https://github.com/ClickHouse/ClickHouse/pull/53338) ([Azat Khuzhin](https://github.com/azat)). -* Fix: moved to prewhere condition actions can lose column [#53492](https://github.com/ClickHouse/ClickHouse/pull/53492) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fixed internal error when replacing with byte-equal parts [#53735](https://github.com/ClickHouse/ClickHouse/pull/53735) ([Pedro Riera](https://github.com/priera)). -* Fix: require columns participating in interpolate expression [#53754](https://github.com/ClickHouse/ClickHouse/pull/53754) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix cluster discovery initialization + setting up fail points in config [#54113](https://github.com/ClickHouse/ClickHouse/pull/54113) ([vdimir](https://github.com/vdimir)). -* Fix issues in `accurateCastOrNull` [#54136](https://github.com/ClickHouse/ClickHouse/pull/54136) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Fix nullable primary key with the FINAL modifier [#54164](https://github.com/ClickHouse/ClickHouse/pull/54164) ([Amos Bird](https://github.com/amosbird)). -* Fixed error that prevented insertion in replicated materialized view of new data in presence of duplicated data. [#54184](https://github.com/ClickHouse/ClickHouse/pull/54184) ([Pedro Riera](https://github.com/priera)). -* Fix: allow `IPv6` for bloom filter [#54200](https://github.com/ClickHouse/ClickHouse/pull/54200) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* fix possible type mismatch with `IPv4` [#54212](https://github.com/ClickHouse/ClickHouse/pull/54212) ([Bharat Nallan](https://github.com/bharatnc)). -* Fix `system.data_skipping_indices` for recreated indices [#54225](https://github.com/ClickHouse/ClickHouse/pull/54225) ([Artur Malchanau](https://github.com/Hexta)). -* fix name clash for multiple join rewriter v2 [#54240](https://github.com/ClickHouse/ClickHouse/pull/54240) ([Tao Wang](https://github.com/wangtZJU)). -* Fix unexpected errors in `system.errors` after join [#54306](https://github.com/ClickHouse/ClickHouse/pull/54306) ([vdimir](https://github.com/vdimir)). -* Fix `isZeroOrNull(NULL)` [#54316](https://github.com/ClickHouse/ClickHouse/pull/54316) ([flynn](https://github.com/ucasfl)). -* Fix: parallel replicas over distributed with `prefer_localhost_replica` = 1 [#54334](https://github.com/ClickHouse/ClickHouse/pull/54334) ([Igor Nikonov](https://github.com/devcrafter)). -* Fix logical error in vertical merge + replacing merge tree + optimize cleanup [#54368](https://github.com/ClickHouse/ClickHouse/pull/54368) ([alesapin](https://github.com/alesapin)). -* Fix possible error `URI contains invalid characters` in the `s3` table function [#54373](https://github.com/ClickHouse/ClickHouse/pull/54373) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix segfault in AST optimization of `arrayExists` function [#54379](https://github.com/ClickHouse/ClickHouse/pull/54379) ([Nikolay Degterinsky](https://github.com/evillique)). -* Check for overflow before addition in `analysisOfVariance` function [#54385](https://github.com/ClickHouse/ClickHouse/pull/54385) ([Antonio Andelic](https://github.com/antonio2368)). -* Reproduce and fix the bug in removeSharedRecursive [#54430](https://github.com/ClickHouse/ClickHouse/pull/54430) ([Sema Checherinda](https://github.com/CheSema)). -* Fix possible incorrect result with SimpleAggregateFunction in PREWHERE and FINAL [#54436](https://github.com/ClickHouse/ClickHouse/pull/54436) ([Azat Khuzhin](https://github.com/azat)). -* Fix filtering parts with indexHint for non analyzer [#54449](https://github.com/ClickHouse/ClickHouse/pull/54449) ([Azat Khuzhin](https://github.com/azat)). -* Fix aggregate projections with normalized states [#54480](https://github.com/ClickHouse/ClickHouse/pull/54480) ([Amos Bird](https://github.com/amosbird)). -* `clickhouse-local`: something for multiquery parameter [#54498](https://github.com/ClickHouse/ClickHouse/pull/54498) ([CuiShuoGuo](https://github.com/bakam412)). -* `clickhouse-local` supports `--database` command line argument [#54503](https://github.com/ClickHouse/ClickHouse/pull/54503) ([vdimir](https://github.com/vdimir)). -* Fix possible parsing error in `-WithNames` formats with disabled `input_format_with_names_use_header` [#54513](https://github.com/ClickHouse/ClickHouse/pull/54513) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix rare case of CHECKSUM_DOESNT_MATCH error [#54549](https://github.com/ClickHouse/ClickHouse/pull/54549) ([alesapin](https://github.com/alesapin)). -* Fix sorting of UNION ALL of already sorted results [#54564](https://github.com/ClickHouse/ClickHouse/pull/54564) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix snapshot install in Keeper [#54572](https://github.com/ClickHouse/ClickHouse/pull/54572) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix race in `ColumnUnique` [#54575](https://github.com/ClickHouse/ClickHouse/pull/54575) ([Nikita Taranov](https://github.com/nickitat)). -* Annoy/Usearch index: Fix LOGICAL_ERROR during build-up with default values [#54600](https://github.com/ClickHouse/ClickHouse/pull/54600) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix serialization of `ColumnDecimal` [#54601](https://github.com/ClickHouse/ClickHouse/pull/54601) ([Nikita Taranov](https://github.com/nickitat)). -* Fix schema inference for *Cluster functions for column names with spaces [#54635](https://github.com/ClickHouse/ClickHouse/pull/54635) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix using structure from insertion tables in case of defaults and explicit insert columns [#54655](https://github.com/ClickHouse/ClickHouse/pull/54655) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix: avoid using regex match, possibly containing alternation, as a key condition. [#54696](https://github.com/ClickHouse/ClickHouse/pull/54696) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix ReplacingMergeTree with vertical merge and cleanup [#54706](https://github.com/ClickHouse/ClickHouse/pull/54706) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix virtual columns having incorrect values after ORDER BY [#54811](https://github.com/ClickHouse/ClickHouse/pull/54811) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix filtering parts with indexHint for non analyzer [#54825](https://github.com/ClickHouse/ClickHouse/pull/54825) [#54449](https://github.com/ClickHouse/ClickHouse/pull/54449) ([Azat Khuzhin](https://github.com/azat)). -* Fix Keeper segfault during shutdown [#54841](https://github.com/ClickHouse/ClickHouse/pull/54841) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix `Invalid number of rows in Chunk` in MaterializedPostgreSQL [#54844](https://github.com/ClickHouse/ClickHouse/pull/54844) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Move obsolete format settings to separate section [#54855](https://github.com/ClickHouse/ClickHouse/pull/54855) ([Kruglov Pavel](https://github.com/Avogar)). -* Rebuild `minmax_count_projection` when partition key gets modified [#54943](https://github.com/ClickHouse/ClickHouse/pull/54943) ([Amos Bird](https://github.com/amosbird)). -* Fix bad cast to `ColumnVector` in function `if` [#55019](https://github.com/ClickHouse/ClickHouse/pull/55019) ([Kruglov Pavel](https://github.com/Avogar)). -* Prevent attaching parts from tables with different projections or indices [#55062](https://github.com/ClickHouse/ClickHouse/pull/55062) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Store NULL in scalar result map for empty subquery result [#52240](https://github.com/ClickHouse/ClickHouse/pull/52240) ([vdimir](https://github.com/vdimir)). -* Fix `FINAL` produces invalid read ranges in a rare case [#54934](https://github.com/ClickHouse/ClickHouse/pull/54934) ([Nikita Taranov](https://github.com/nickitat)). -* Fix: insert quorum w/o keeper retries [#55026](https://github.com/ClickHouse/ClickHouse/pull/55026) ([Igor Nikonov](https://github.com/devcrafter)). -* Fix simple state with nullable [#55030](https://github.com/ClickHouse/ClickHouse/pull/55030) ([Pedro Riera](https://github.com/priera)). - - -### ClickHouse release 23.8 LTS, 2023-08-31 - -#### Backward Incompatible Change -* If a dynamic disk contains a name, it should be specified as `disk = disk(name = 'disk_name'`, ...) in disk function arguments. In previous version it could be specified as `disk = disk_(...)`, which is no longer supported. [#52820](https://github.com/ClickHouse/ClickHouse/pull/52820) ([Kseniia Sumarokova](https://github.com/kssenii)). -* `clickhouse-benchmark` will establish connections in parallel when invoked with `--concurrency` more than one. Previously it was unusable if you ran it with 1000 concurrent connections from Europe to the US. Correct calculation of QPS for connections with high latency. Backward incompatible change: the option for JSON output of `clickhouse-benchmark` is removed. If you've used this option, you can also extract data from the `system.query_log` in JSON format as a workaround. [#53293](https://github.com/ClickHouse/ClickHouse/pull/53293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The `microseconds` column is removed from the `system.text_log`, and the `milliseconds` column is removed from the `system.metric_log`, because they are redundant in the presence of the `event_time_microseconds` column. [#53601](https://github.com/ClickHouse/ClickHouse/pull/53601) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Deprecate the metadata cache feature. It is experimental and we have never used it. The feature is dangerous: [#51182](https://github.com/ClickHouse/ClickHouse/issues/51182). Remove the `system.merge_tree_metadata_cache` system table. The metadata cache is still available in this version but will be removed soon. This closes [#39197](https://github.com/ClickHouse/ClickHouse/issues/39197). [#51303](https://github.com/ClickHouse/ClickHouse/pull/51303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Disable support for 3DES in TLS connections. [#52893](https://github.com/ClickHouse/ClickHouse/pull/52893) ([Kenji Noguchi](https://github.com/knoguchi)). - -#### New Feature -* Direct import from zip/7z/tar archives. Example: `file('*.zip :: *.csv')`. [#50321](https://github.com/ClickHouse/ClickHouse/pull/50321) ([nikitakeba](https://github.com/nikitakeba)). -* Add column `ptr` to `system.trace_log` for `trace_type = 'MemorySample'`. This column contains an address of allocation. Added function `flameGraph` which can build flamegraph containing allocated and not released memory. Reworking of [#38391](https://github.com/ClickHouse/ClickHouse/issues/38391). [#45322](https://github.com/ClickHouse/ClickHouse/pull/45322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Added table function `azureBlobStorageCluster`. The supported set of features is very similar to table function `s3Cluster`. [#50795](https://github.com/ClickHouse/ClickHouse/pull/50795) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Allow using `cluster`, `clusterAllReplicas`, `remote`, and `remoteSecure` without table name in issue [#50808](https://github.com/ClickHouse/ClickHouse/issues/50808). [#50848](https://github.com/ClickHouse/ClickHouse/pull/50848) ([Yangkuan Liu](https://github.com/LiuYangkuan)). -* A system table to monitor Kafka consumers. [#50999](https://github.com/ClickHouse/ClickHouse/pull/50999) ([Ilya Golshtein](https://github.com/ilejn)). -* Added `max_sessions_for_user` setting. [#51724](https://github.com/ClickHouse/ClickHouse/pull/51724) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* New functions `toUTCTimestamp/fromUTCTimestamp` to act same as spark's `to_utc_timestamp/from_utc_timestamp`. [#52117](https://github.com/ClickHouse/ClickHouse/pull/52117) ([KevinyhZou](https://github.com/KevinyhZou)). -* Add new functions `structureToCapnProtoSchema`/`structureToProtobufSchema` that convert ClickHouse table structure to CapnProto/Protobuf format schema. Allow to input/output data in CapnProto/Protobuf format without external format schema using autogenerated schema from table structure (controlled by settings `format_capn_proto_use_autogenerated_schema`/`format_protobuf_use_autogenerated_schema`). Allow to export autogenerated schema while input/output using setting `output_format_schema`. [#52278](https://github.com/ClickHouse/ClickHouse/pull/52278) ([Kruglov Pavel](https://github.com/Avogar)). -* A new field `query_cache_usage` in `system.query_log` now shows if and how the query cache was used. [#52384](https://github.com/ClickHouse/ClickHouse/pull/52384) ([Robert Schulze](https://github.com/rschu1ze)). -* Add new function `startsWithUTF8` and `endsWithUTF8`. [#52555](https://github.com/ClickHouse/ClickHouse/pull/52555) ([李扬](https://github.com/taiyang-li)). -* Allow variable number of columns in TSV/CustomSeparated/JSONCompactEachRow, make schema inference work with variable number of columns. Add settings `input_format_tsv_allow_variable_number_of_columns`, `input_format_custom_allow_variable_number_of_columns`, `input_format_json_compact_allow_variable_number_of_columns`. [#52692](https://github.com/ClickHouse/ClickHouse/pull/52692) ([Kruglov Pavel](https://github.com/Avogar)). -* Added `SYSTEM STOP/START PULLING REPLICATION LOG` queries (for testing `ReplicatedMergeTree`). [#52881](https://github.com/ClickHouse/ClickHouse/pull/52881) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Allow to execute constant non-deterministic functions in mutations on initiator. [#53129](https://github.com/ClickHouse/ClickHouse/pull/53129) ([Anton Popov](https://github.com/CurtizJ)). -* Add input format `One` that doesn't read any data and always returns single row with column `dummy` with type `UInt8` and value `0` like `system.one`. It can be used together with `_file/_path` virtual columns to list files in file/s3/url/hdfs/etc table functions without reading any data. [#53209](https://github.com/ClickHouse/ClickHouse/pull/53209) ([Kruglov Pavel](https://github.com/Avogar)). -* Add `tupleConcat` function. Closes [#52759](https://github.com/ClickHouse/ClickHouse/issues/52759). [#53239](https://github.com/ClickHouse/ClickHouse/pull/53239) ([Nikolay Degterinsky](https://github.com/evillique)). -* Support `TRUNCATE DATABASE` operation. [#53261](https://github.com/ClickHouse/ClickHouse/pull/53261) ([Bharat Nallan](https://github.com/bharatnc)). -* Add `max_threads_for_indexes` setting to limit number of threads used for primary key processing. [#53313](https://github.com/ClickHouse/ClickHouse/pull/53313) ([jorisgio](https://github.com/jorisgio)). -* Re-add SipHash keyed functions. [#53525](https://github.com/ClickHouse/ClickHouse/pull/53525) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* ([#52755](https://github.com/ClickHouse/ClickHouse/issues/52755) , [#52895](https://github.com/ClickHouse/ClickHouse/issues/52895)) Added functions `arrayRotateLeft`, `arrayRotateRight`, `arrayShiftLeft`, `arrayShiftRight`. [#53557](https://github.com/ClickHouse/ClickHouse/pull/53557) ([Mikhail Koviazin](https://github.com/mkmkme)). -* Add column `name` to `system.clusters` as an alias to cluster. [#53605](https://github.com/ClickHouse/ClickHouse/pull/53605) ([irenjj](https://github.com/irenjj)). -* The advanced dashboard now allows mass editing (save/load). [#53608](https://github.com/ClickHouse/ClickHouse/pull/53608) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The advanced dashboard now has an option to maximize charts and move them around. [#53622](https://github.com/ClickHouse/ClickHouse/pull/53622) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Added support for adding and subtracting arrays: `[5,2] + [1,7]`. Division and multiplication were not implemented due to confusion between pointwise multiplication and the scalar product of arguments. Closes [#49939](https://github.com/ClickHouse/ClickHouse/issues/49939). [#52625](https://github.com/ClickHouse/ClickHouse/pull/52625) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Add support for string literals as table names. Closes [#52178](https://github.com/ClickHouse/ClickHouse/issues/52178). [#52635](https://github.com/ClickHouse/ClickHouse/pull/52635) ([hendrik-m](https://github.com/hendrik-m)). - -#### Experimental Feature -* Add new table engine `S3Queue` for streaming data import from s3. Closes [#37012](https://github.com/ClickHouse/ClickHouse/issues/37012). [#49086](https://github.com/ClickHouse/ClickHouse/pull/49086) ([s-kat](https://github.com/s-kat)). It is not ready to use. Do not use it. -* Enable parallel reading from replicas over distributed table. Related to [#49708](https://github.com/ClickHouse/ClickHouse/issues/49708). [#53005](https://github.com/ClickHouse/ClickHouse/pull/53005) ([Igor Nikonov](https://github.com/devcrafter)). -* Add experimental support for HNSW as approximate neighbor search method. [#53447](https://github.com/ClickHouse/ClickHouse/pull/53447) ([Davit Vardanyan](https://github.com/davvard)). This is currently intended for those who continue working on the implementation. Do not use it. - -#### Performance Improvement -* Parquet filter pushdown. I.e. when reading Parquet files, row groups (chunks of the file) are skipped based on the WHERE condition and the min/max values in each column. In particular, if the file is roughly sorted by some column, queries that filter by a short range of that column will be much faster. [#52951](https://github.com/ClickHouse/ClickHouse/pull/52951) ([Michael Kolupaev](https://github.com/al13n321)). -* Optimize reading small row groups by batching them together in Parquet. Closes [#53069](https://github.com/ClickHouse/ClickHouse/issues/53069). [#53281](https://github.com/ClickHouse/ClickHouse/pull/53281) ([Kruglov Pavel](https://github.com/Avogar)). -* Optimize count from files in most input formats. Closes [#44334](https://github.com/ClickHouse/ClickHouse/issues/44334). [#53637](https://github.com/ClickHouse/ClickHouse/pull/53637) ([Kruglov Pavel](https://github.com/Avogar)). -* Use filter by file/path before reading in `url`/`file`/`hdfs` table functions. [#53529](https://github.com/ClickHouse/ClickHouse/pull/53529) ([Kruglov Pavel](https://github.com/Avogar)). -* Enable JIT compilation for AArch64, PowerPC, SystemZ, RISC-V. [#38217](https://github.com/ClickHouse/ClickHouse/pull/38217) ([Maksim Kita](https://github.com/kitaisreal)). -* Add setting `rewrite_count_distinct_if_with_count_distinct_implementation` to rewrite `countDistinctIf` with `count_distinct_implementation`. Closes [#30642](https://github.com/ClickHouse/ClickHouse/issues/30642). [#46051](https://github.com/ClickHouse/ClickHouse/pull/46051) ([flynn](https://github.com/ucasfl)). -* Speed up merging of states of `uniq` and `uniqExact` aggregate functions by parallelizing conversion before merge. [#50748](https://github.com/ClickHouse/ClickHouse/pull/50748) ([Jiebin Sun](https://github.com/jiebinn)). -* Optimize aggregation performance of nullable string key when using a large number of variable length keys. [#51399](https://github.com/ClickHouse/ClickHouse/pull/51399) ([LiuNeng](https://github.com/liuneng1994)). -* Add a pass in Analyzer for time filter optimization with preimage. The performance experiments of SSB on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of 8.5% to the geomean QPS when the experimental analyzer is enabled. [#52091](https://github.com/ClickHouse/ClickHouse/pull/52091) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* Optimize the merge if all hash sets are single-level in the `uniqExact` (COUNT DISTINCT) function. [#52973](https://github.com/ClickHouse/ClickHouse/pull/52973) ([Jiebin Sun](https://github.com/jiebinn)). -* `Join` table engine: do not clone hash join data structure with all columns. [#53046](https://github.com/ClickHouse/ClickHouse/pull/53046) ([Duc Canh Le](https://github.com/canhld94)). -* Implement native `ORC` input format without the "apache arrow" library to improve performance. [#53324](https://github.com/ClickHouse/ClickHouse/pull/53324) ([李扬](https://github.com/taiyang-li)). -* The dashboard will tell the server to compress the data, which is useful for large time frames over slow internet connections. For example, one chart with 86400 points can be 1.5 MB uncompressed and 60 KB compressed with `br`. [#53569](https://github.com/ClickHouse/ClickHouse/pull/53569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Better utilization of thread pool for BACKUPs and RESTOREs. [#53649](https://github.com/ClickHouse/ClickHouse/pull/53649) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Load filesystem cache metadata on startup in parallel. Configured by `load_metadata_threads` (default: 1) cache config setting. Related to [#52037](https://github.com/ClickHouse/ClickHouse/issues/52037). [#52943](https://github.com/ClickHouse/ClickHouse/pull/52943) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Improve `move_primary_key_columns_to_end_of_prewhere`. [#53337](https://github.com/ClickHouse/ClickHouse/pull/53337) ([Han Fei](https://github.com/hanfei1991)). -* This optimizes the interaction with ClickHouse Keeper. Previously the caller could register the same watch callback multiple times. In that case each entry was consuming memory and the same callback was called multiple times which didn't make much sense. In order to avoid this the caller could have some logic to not add the same watch multiple times. With this change this deduplication is done internally if the watch callback is passed via shared_ptr. [#53452](https://github.com/ClickHouse/ClickHouse/pull/53452) ([Alexander Gololobov](https://github.com/davenger)). -* Cache number of rows in files for count in file/s3/url/hdfs/azure functions. The cache can be enabled/disabled by setting `use_cache_for_count_from_files` (enabled by default). Continuation of https://github.com/ClickHouse/ClickHouse/pull/53637. [#53692](https://github.com/ClickHouse/ClickHouse/pull/53692) ([Kruglov Pavel](https://github.com/Avogar)). -* More careful thread management will improve the speed of the S3 table function over a large number of files by more than ~25%. [#53668](https://github.com/ClickHouse/ClickHouse/pull/53668) ([pufit](https://github.com/pufit)). - -#### Improvement -* Add `stderr_reaction` configuration/setting to control the reaction (none, log or throw) when external command stderr has data. This helps make debugging external command easier. [#43210](https://github.com/ClickHouse/ClickHouse/pull/43210) ([Amos Bird](https://github.com/amosbird)). -* Add `partition` column to the `system part_log` and merge table. [#48990](https://github.com/ClickHouse/ClickHouse/pull/48990) ([Jianfei Hu](https://github.com/incfly)). -* The sizes of the (index) uncompressed/mark, mmap and query caches can now be configured dynamically at runtime (without server restart). [#51446](https://github.com/ClickHouse/ClickHouse/pull/51446) ([Robert Schulze](https://github.com/rschu1ze)). -* If a dictionary is created with a complex key, automatically choose the "complex key" layout variant. [#49587](https://github.com/ClickHouse/ClickHouse/pull/49587) ([xiebin](https://github.com/xbthink)). -* Add setting `use_concurrency_control` for better testing of the new concurrency control feature. [#49618](https://github.com/ClickHouse/ClickHouse/pull/49618) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Added suggestions for mistyped names for databases and tables. [#49801](https://github.com/ClickHouse/ClickHouse/pull/49801) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* While read small files from HDFS by Gluten, we found that it will cost more times when compare to directly query by Spark. And we did something with that. [#50063](https://github.com/ClickHouse/ClickHouse/pull/50063) ([KevinyhZou](https://github.com/KevinyhZou)). -* There were too many worthless error logs after session expiration, which we didn't like. [#50171](https://github.com/ClickHouse/ClickHouse/pull/50171) ([helifu](https://github.com/helifu)). -* Introduce fallback ZooKeeper sessions which are time-bound. Fixed `index` column in system.zookeeper_connection for DNS addresses. [#50424](https://github.com/ClickHouse/ClickHouse/pull/50424) ([Anton Kozlov](https://github.com/tonickkozlov)). -* Add ability to log when max_partitions_per_insert_block is reached. [#50948](https://github.com/ClickHouse/ClickHouse/pull/50948) ([Sean Haynes](https://github.com/seandhaynes)). -* Added a bunch of custom commands to clickhouse-keeper-client (mostly to make ClickHouse debugging easier). [#51117](https://github.com/ClickHouse/ClickHouse/pull/51117) ([pufit](https://github.com/pufit)). -* Updated check for connection string in `azureBlobStorage` table function as connection string with "sas" does not always begin with the default endpoint and updated connection URL to include "sas" token after adding Azure's container to URL. [#51141](https://github.com/ClickHouse/ClickHouse/pull/51141) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix description for filtering sets in the `full_sorting_merge` JOIN algorithm. [#51329](https://github.com/ClickHouse/ClickHouse/pull/51329) ([Tanay Tummalapalli](https://github.com/ttanay)). -* Fixed memory consumption in `Aggregator` when `max_block_size` is huge. [#51566](https://github.com/ClickHouse/ClickHouse/pull/51566) ([Nikita Taranov](https://github.com/nickitat)). -* Add `SYSTEM SYNC FILESYSTEM CACHE` command. It will compare in-memory state of filesystem cache with what it has on disk and fix in-memory state if needed. This is only needed if you are making manual interventions in on-disk data, which is highly discouraged. [#51622](https://github.com/ClickHouse/ClickHouse/pull/51622) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Attempt to create a generic proxy resolver for CH while keeping backwards compatibility with existing S3 storage conf proxy resolver. [#51749](https://github.com/ClickHouse/ClickHouse/pull/51749) ([Arthur Passos](https://github.com/arthurpassos)). -* Support reading tuple subcolumns from file/s3/hdfs/url/azureBlobStorage table functions. [#51806](https://github.com/ClickHouse/ClickHouse/pull/51806) ([Kruglov Pavel](https://github.com/Avogar)). -* Function `arrayIntersect` now returns the values in the order, corresponding to the first argument. Closes [#27622](https://github.com/ClickHouse/ClickHouse/issues/27622). [#51850](https://github.com/ClickHouse/ClickHouse/pull/51850) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Add new queries, which allow to create/drop of access entities in specified access storage or move access entities from one access storage to another. [#51912](https://github.com/ClickHouse/ClickHouse/pull/51912) ([pufit](https://github.com/pufit)). -* Make `ALTER TABLE FREEZE` queries not replicated in the Replicated database engine. [#52064](https://github.com/ClickHouse/ClickHouse/pull/52064) ([Mike Kot](https://github.com/myrrc)). -* Added possibility to flush system tables on unexpected shutdown. [#52174](https://github.com/ClickHouse/ClickHouse/pull/52174) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Fix the case when `s3` table function refused to work with pre-signed URLs. close [#50846](https://github.com/ClickHouse/ClickHouse/issues/50846). [#52310](https://github.com/ClickHouse/ClickHouse/pull/52310) ([chen](https://github.com/xiedeyantu)). -* Add column `name` as an alias to `event` and `metric` in the `system.events` and `system.metrics` tables. Closes [#51257](https://github.com/ClickHouse/ClickHouse/issues/51257). [#52315](https://github.com/ClickHouse/ClickHouse/pull/52315) ([chen](https://github.com/xiedeyantu)). -* Added support of syntax `CREATE UNIQUE INDEX` in parser as a no-op for better SQL compatibility. `UNIQUE` index is not supported. Set `create_index_ignore_unique = 1` to ignore UNIQUE keyword in queries. [#52320](https://github.com/ClickHouse/ClickHouse/pull/52320) ([Ilya Yatsishin](https://github.com/qoega)). -* Add support of predefined macro (`{database}` and `{table}`) in some Kafka engine settings: topic, consumer, client_id, etc. [#52386](https://github.com/ClickHouse/ClickHouse/pull/52386) ([Yury Bogomolov](https://github.com/ybogo)). -* Disable updating the filesystem cache during backup/restore. Filesystem cache must not be updated during backup/restore, it seems it just slows down the process without any profit (because the BACKUP command can read a lot of data and it's no use to put all the data to the filesystem cache and immediately evict it). [#52402](https://github.com/ClickHouse/ClickHouse/pull/52402) ([Vitaly Baranov](https://github.com/vitlibar)). -* The configuration of S3 endpoint allow using it from the root, and append '/' automatically if needed. [#47809](https://github.com/ClickHouse/ClickHouse/issues/47809). [#52600](https://github.com/ClickHouse/ClickHouse/pull/52600) ([xiaolei565](https://github.com/xiaolei565)). -* For clickhouse-local allow positional options and populate global UDF settings (user_scripts_path and user_defined_executable_functions_config). [#52643](https://github.com/ClickHouse/ClickHouse/pull/52643) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* `system.asynchronous_metrics` now includes metrics "QueryCacheEntries" and "QueryCacheBytes" to inspect the query cache. [#52650](https://github.com/ClickHouse/ClickHouse/pull/52650) ([Robert Schulze](https://github.com/rschu1ze)). -* Added possibility to use `s3_storage_class` parameter in the `SETTINGS` clause of the `BACKUP` statement for backups to S3. [#52658](https://github.com/ClickHouse/ClickHouse/pull/52658) ([Roman Vasin](https://github.com/rvasin)). -* Add utility `print-backup-info.py` which parses a backup metadata file and prints information about the backup. [#52690](https://github.com/ClickHouse/ClickHouse/pull/52690) ([Vitaly Baranov](https://github.com/vitlibar)). -* Closes [#49510](https://github.com/ClickHouse/ClickHouse/issues/49510). Currently we have database and table names case-sensitive, but BI tools query `information_schema` sometimes in lowercase, sometimes in uppercase. For this reason we have `information_schema` database, containing lowercase tables, such as `information_schema.tables` and `INFORMATION_SCHEMA` database, containing uppercase tables, such as `INFORMATION_SCHEMA.TABLES`. But some tools are querying `INFORMATION_SCHEMA.tables` and `information_schema.TABLES`. The proposed solution is to duplicate both lowercase and uppercase tables in lowercase and uppercase `information_schema` database. [#52695](https://github.com/ClickHouse/ClickHouse/pull/52695) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Query`CHECK TABLE` has better performance and usability (sends progress updates, cancellable). [#52745](https://github.com/ClickHouse/ClickHouse/pull/52745) ([vdimir](https://github.com/vdimir)). -* Add support for `modulo`, `intDiv`, `intDivOrZero` for tuples by distributing them across tuple's elements. [#52758](https://github.com/ClickHouse/ClickHouse/pull/52758) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Search for default `yaml` and `yml` configs in clickhouse-client after `xml`. [#52767](https://github.com/ClickHouse/ClickHouse/pull/52767) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* When merging into non-'clickhouse' rooted configuration, configs with different root node name just bypassed without exception. [#52770](https://github.com/ClickHouse/ClickHouse/pull/52770) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Now it's possible to specify min (`memory_profiler_sample_min_allocation_size`) and max (`memory_profiler_sample_max_allocation_size`) size for allocations to be tracked with sampling memory profiler. [#52779](https://github.com/ClickHouse/ClickHouse/pull/52779) ([alesapin](https://github.com/alesapin)). -* Add `precise_float_parsing` setting to switch float parsing methods (fast/precise). [#52791](https://github.com/ClickHouse/ClickHouse/pull/52791) ([Andrey Zvonov](https://github.com/zvonand)). -* Use the same default paths for `clickhouse-keeper` (symlink) as for `clickhouse-keeper` (executable). [#52861](https://github.com/ClickHouse/ClickHouse/pull/52861) ([Vitaly Baranov](https://github.com/vitlibar)). -* Improve error message for table function `remote`. Closes [#40220](https://github.com/ClickHouse/ClickHouse/issues/40220). [#52959](https://github.com/ClickHouse/ClickHouse/pull/52959) ([jiyoungyoooo](https://github.com/jiyoungyoooo)). -* Added the possibility to specify custom storage policy in the `SETTINGS` clause of `RESTORE` queries. [#52970](https://github.com/ClickHouse/ClickHouse/pull/52970) ([Victor Krasnov](https://github.com/sirvickr)). -* Add the ability to throttle the S3 requests on backup operations (`BACKUP` and `RESTORE` commands now honor `s3_max_[get/put]_[rps/burst]`). [#52974](https://github.com/ClickHouse/ClickHouse/pull/52974) ([Daniel Pozo Escalona](https://github.com/danipozo)). -* Add settings to ignore ON CLUSTER clause in queries for management of replicated user-defined functions or access control entities with replicated storage. [#52975](https://github.com/ClickHouse/ClickHouse/pull/52975) ([Aleksei Filatov](https://github.com/aalexfvk)). -* EXPLAIN actions for JOIN step. [#53006](https://github.com/ClickHouse/ClickHouse/pull/53006) ([Maksim Kita](https://github.com/kitaisreal)). -* Make `hasTokenOrNull` and `hasTokenCaseInsensitiveOrNull` return null for empty needles. [#53059](https://github.com/ClickHouse/ClickHouse/pull/53059) ([ltrk2](https://github.com/ltrk2)). -* Allow to restrict allowed paths for filesystem caches. Mainly useful for dynamic disks. If in server config `filesystem_caches_path` is specified, all filesystem caches' paths will be restricted to this directory. E.g. if the `path` in cache config is relative - it will be put in `filesystem_caches_path`; if `path` in cache config is absolute, it will be required to lie inside `filesystem_caches_path`. If `filesystem_caches_path` is not specified in config, then behaviour will be the same as in earlier versions. [#53124](https://github.com/ClickHouse/ClickHouse/pull/53124) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Added a bunch of custom commands (mostly to make ClickHouse debugging easier). [#53127](https://github.com/ClickHouse/ClickHouse/pull/53127) ([pufit](https://github.com/pufit)). -* Add diagnostic info about file name during schema inference - it helps when you process multiple files with globs. [#53135](https://github.com/ClickHouse/ClickHouse/pull/53135) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Client will load suggestions using the main connection if the second connection is not allowed to create a session. [#53177](https://github.com/ClickHouse/ClickHouse/pull/53177) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Add EXCEPT clause to `SYSTEM STOP/START LISTEN QUERIES [ALL/DEFAULT/CUSTOM]` query, for example `SYSTEM STOP LISTEN QUERIES ALL EXCEPT TCP, HTTP`. [#53280](https://github.com/ClickHouse/ClickHouse/pull/53280) ([Nikolay Degterinsky](https://github.com/evillique)). -* Change the default of `max_concurrent_queries` from 100 to 1000. It's ok to have many concurrent queries if they are not heavy, and mostly waiting for the network. Note: don't confuse concurrent queries and QPS: for example, ClickHouse server can do tens of thousands of QPS with less than 100 concurrent queries. [#53285](https://github.com/ClickHouse/ClickHouse/pull/53285) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Limit number of concurrent background partition optimize merges. [#53405](https://github.com/ClickHouse/ClickHouse/pull/53405) ([Duc Canh Le](https://github.com/canhld94)). -* Added a setting `allow_moving_table_directory_to_trash` that allows to ignore `Directory for table data already exists` error when replicating/recovering a `Replicated` database. [#53425](https://github.com/ClickHouse/ClickHouse/pull/53425) ([Alexander Tokmakov](https://github.com/tavplubix)). -* If server settings `asynchronous_metrics_update_period_s` and `asynchronous_heavy_metrics_update_period_s` are misconfigured to 0, it will now fail gracefully instead of terminating the application. [#53428](https://github.com/ClickHouse/ClickHouse/pull/53428) ([Robert Schulze](https://github.com/rschu1ze)). -* The ClickHouse server now respects memory limits changed via cgroups when reloading its configuration. [#53455](https://github.com/ClickHouse/ClickHouse/pull/53455) ([Robert Schulze](https://github.com/rschu1ze)). -* Add ability to turn off flush of Distributed tables on `DETACH`, `DROP`, or server shutdown. [#53501](https://github.com/ClickHouse/ClickHouse/pull/53501) ([Azat Khuzhin](https://github.com/azat)). -* The `domainRFC` function now supports IPv6 in square brackets. [#53506](https://github.com/ClickHouse/ClickHouse/pull/53506) ([Chen768959](https://github.com/Chen768959)). -* Use longer timeout for S3 CopyObject requests, which are used in backups. [#53533](https://github.com/ClickHouse/ClickHouse/pull/53533) ([Michael Kolupaev](https://github.com/al13n321)). -* Added server setting `aggregate_function_group_array_max_element_size`. This setting is used to limit array size for `groupArray` function at serialization. The default value is `16777215`. [#53550](https://github.com/ClickHouse/ClickHouse/pull/53550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* `SCHEMA` was added as alias for `DATABASE` to improve MySQL compatibility. [#53587](https://github.com/ClickHouse/ClickHouse/pull/53587) ([Daniël van Eeden](https://github.com/dveeden)). -* Add asynchronous metrics about tables in the system database. For example, `TotalBytesOfMergeTreeTablesSystem`. This closes [#53603](https://github.com/ClickHouse/ClickHouse/issues/53603). [#53604](https://github.com/ClickHouse/ClickHouse/pull/53604) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* SQL editor in the Play UI and Dashboard will not use Grammarly. [#53614](https://github.com/ClickHouse/ClickHouse/pull/53614) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* As expert-level settings, it is now possible to (1) configure the size_ratio (i.e. the relative size of the protected queue) of the [index] mark/uncompressed caches, (2) configure the cache policy of the index mark and index uncompressed caches. [#53657](https://github.com/ClickHouse/ClickHouse/pull/53657) ([Robert Schulze](https://github.com/rschu1ze)). -* Added client info validation to the query packet in TCPHandler. [#53673](https://github.com/ClickHouse/ClickHouse/pull/53673) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Retry loading parts in case of network errors while interaction with Microsoft Azure. [#53750](https://github.com/ClickHouse/ClickHouse/pull/53750) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Stacktrace for exceptions, Materailized view exceptions are propagated. [#53766](https://github.com/ClickHouse/ClickHouse/pull/53766) ([Ilya Golshtein](https://github.com/ilejn)). -* If no hostname or port were specified, keeper client will try to search for a connection string in the ClickHouse's config.xml. [#53769](https://github.com/ClickHouse/ClickHouse/pull/53769) ([pufit](https://github.com/pufit)). -* Add profile event `PartsLockMicroseconds` which shows the amount of microseconds we hold the data parts lock in MergeTree table engine family. [#53797](https://github.com/ClickHouse/ClickHouse/pull/53797) ([alesapin](https://github.com/alesapin)). -* Make reconnect limit in RAFT limits configurable for keeper. This configuration can help to make keeper to rebuild connection with peers quicker if the current connection is broken. [#53817](https://github.com/ClickHouse/ClickHouse/pull/53817) ([Pengyuan Bian](https://github.com/bianpengyuan)). -* Ignore foreign keys in tables definition to improve compatibility with MySQL, so a user wouldn't need to rewrite his SQL of the foreign key part, ref [#53380](https://github.com/ClickHouse/ClickHouse/issues/53380). [#53864](https://github.com/ClickHouse/ClickHouse/pull/53864) ([jsc0218](https://github.com/jsc0218)). - -#### Build/Testing/Packaging Improvement -* Don't expose symbols from ClickHouse binary to dynamic linker. It might fix [#43933](https://github.com/ClickHouse/ClickHouse/issues/43933). [#47475](https://github.com/ClickHouse/ClickHouse/pull/47475) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Add https://github.com/elliotchance/sqltest to CI to report the SQL 2016 conformance. [#52293](https://github.com/ClickHouse/ClickHouse/pull/52293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Upgrade PRQL to 0.9.3. [#53060](https://github.com/ClickHouse/ClickHouse/pull/53060) ([Maximilian Roos](https://github.com/max-sixty)). -* System tables from CI checks are exported to ClickHouse Cloud. [#53086](https://github.com/ClickHouse/ClickHouse/pull/53086) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud. [#53100](https://github.com/ClickHouse/ClickHouse/pull/53100) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Speed up Debug and Tidy builds. [#53178](https://github.com/ClickHouse/ClickHouse/pull/53178) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Speed up the build by removing tons and tonnes of garbage. One of the frequently included headers was poisoned by boost. [#53180](https://github.com/ClickHouse/ClickHouse/pull/53180) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Remove even more garbage. [#53182](https://github.com/ClickHouse/ClickHouse/pull/53182) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The function `arrayAUC` was using heavy C++ templates - ditched them. [#53183](https://github.com/ClickHouse/ClickHouse/pull/53183) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Some translation units were always rebuilt regardless of ccache. The culprit is found and fixed. [#53184](https://github.com/ClickHouse/ClickHouse/pull/53184) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Export logs from CI in stateful tests to ClickHouse Cloud. [#53351](https://github.com/ClickHouse/ClickHouse/pull/53351) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Export logs from CI in stress tests. [#53353](https://github.com/ClickHouse/ClickHouse/pull/53353) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Export logs from CI in fuzzer. [#53354](https://github.com/ClickHouse/ClickHouse/pull/53354) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Follow up for [#53418](https://github.com/ClickHouse/ClickHouse/issues/53418). Small improvements for install_check.py, adding tests for proper ENV parameters passing to the main process on `init.d start`. [#53457](https://github.com/ClickHouse/ClickHouse/pull/53457) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Reorganize file management in CMake to prevent potential duplications. For instance, `indexHint.cpp` is duplicated in both `dbms_sources` and `clickhouse_functions_sources`. [#53621](https://github.com/ClickHouse/ClickHouse/pull/53621) ([Amos Bird](https://github.com/amosbird)). -* Upgrade snappy to 1.1.10. [#53672](https://github.com/ClickHouse/ClickHouse/pull/53672) ([李扬](https://github.com/taiyang-li)). -* Slightly improve cmake build by sanitizing some dependencies and removing some duplicates. Each commit includes a short description of the changes made. [#53759](https://github.com/ClickHouse/ClickHouse/pull/53759) ([Amos Bird](https://github.com/amosbird)). - -#### Bug Fix (user-visible misbehavior in an official stable release) -* Do not reset (experimental) Annoy index during build-up with more than one mark [#51325](https://github.com/ClickHouse/ClickHouse/pull/51325) ([Tian Xinhui](https://github.com/xinhuitian)). -* Fix usage of temporary directories during RESTORE [#51493](https://github.com/ClickHouse/ClickHouse/pull/51493) ([Azat Khuzhin](https://github.com/azat)). -* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Support IPv4 and IPv6 data types as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* A fix for checksum of compress marks [#51777](https://github.com/ClickHouse/ClickHouse/pull/51777) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix mistakenly comma parsing as part of datetime in CSV best effort parsing [#51950](https://github.com/ClickHouse/ClickHouse/pull/51950) ([Kruglov Pavel](https://github.com/Avogar)). -* Don't throw exception when executable UDF has parameters [#51961](https://github.com/ClickHouse/ClickHouse/pull/51961) ([Nikita Taranov](https://github.com/nickitat)). -* Fix recalculation of skip indexes and projections in `ALTER DELETE` queries [#52530](https://github.com/ClickHouse/ClickHouse/pull/52530) ([Anton Popov](https://github.com/CurtizJ)). -* MaterializedMySQL: Fix the infinite loop in ReadBuffer::read [#52621](https://github.com/ClickHouse/ClickHouse/pull/52621) ([Val Doroshchuk](https://github.com/valbok)). -* Load suggestion only with `clickhouse` dialect [#52628](https://github.com/ClickHouse/ClickHouse/pull/52628) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Init and destroy ares channel on demand. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). -* Fix filtering by virtual columns with OR expression [#52653](https://github.com/ClickHouse/ClickHouse/pull/52653) ([Azat Khuzhin](https://github.com/azat)). -* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). -* Fix named collections on cluster [#52687](https://github.com/ClickHouse/ClickHouse/pull/52687) ([Al Korgun](https://github.com/alkorgun)). -* Fix reading of unnecessary column in case of multistage `PREWHERE` [#52689](https://github.com/ClickHouse/ClickHouse/pull/52689) ([Anton Popov](https://github.com/CurtizJ)). -* Fix unexpected sort result on multi columns with nulls first direction [#52761](https://github.com/ClickHouse/ClickHouse/pull/52761) ([copperybean](https://github.com/copperybean)). -* Fix data race in Keeper reconfiguration [#52804](https://github.com/ClickHouse/ClickHouse/pull/52804) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix sorting of sparse columns with large limit [#52827](https://github.com/ClickHouse/ClickHouse/pull/52827) ([Anton Popov](https://github.com/CurtizJ)). -* clickhouse-keeper: fix implementation of server with poll. [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). -* Make regexp analyzer recognize named capturing groups [#52840](https://github.com/ClickHouse/ClickHouse/pull/52840) ([Han Fei](https://github.com/hanfei1991)). -* Fix possible assert in `~PushingAsyncPipelineExecutor` in clickhouse-local [#52862](https://github.com/ClickHouse/ClickHouse/pull/52862) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix reading of empty `Nested(Array(LowCardinality(...)))` [#52949](https://github.com/ClickHouse/ClickHouse/pull/52949) ([Anton Popov](https://github.com/CurtizJ)). -* Added new tests for session_log and fixed the inconsistency between login and logout. [#52958](https://github.com/ClickHouse/ClickHouse/pull/52958) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). -* Convert sparse column format to full in CreateSetAndFilterOnTheFlyStep [#53000](https://github.com/ClickHouse/ClickHouse/pull/53000) ([vdimir](https://github.com/vdimir)). -* Fix rare race condition with empty key prefix directory deletion in fs cache [#53055](https://github.com/ClickHouse/ClickHouse/pull/53055) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix ZstdDeflatingWriteBuffer truncating the output sometimes [#53064](https://github.com/ClickHouse/ClickHouse/pull/53064) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix query_id in part_log with async flush queries [#53103](https://github.com/ClickHouse/ClickHouse/pull/53103) ([Raúl Marín](https://github.com/Algunenano)). -* Fix possible error from cache "Read unexpected size" [#53121](https://github.com/ClickHouse/ClickHouse/pull/53121) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Disable the new parquet encoder [#53130](https://github.com/ClickHouse/ClickHouse/pull/53130) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix "Not-ready Set" exception [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix character escaping in the PostgreSQL engine [#53250](https://github.com/ClickHouse/ClickHouse/pull/53250) ([Nikolay Degterinsky](https://github.com/evillique)). -* Experimental session_log table: Added new tests for session_log and fixed the inconsistency between login and logout. [#53255](https://github.com/ClickHouse/ClickHouse/pull/53255) ([Alexey Gerasimchuck](https://github.com/Demilivor)). Fixed inconsistency between login success and logout [#53302](https://github.com/ClickHouse/ClickHouse/pull/53302) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Fix adding sub-second intervals to DateTime [#53309](https://github.com/ClickHouse/ClickHouse/pull/53309) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix "Context has expired" error in dictionaries [#53342](https://github.com/ClickHouse/ClickHouse/pull/53342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). -* Forbid use_structure_from_insertion_table_in_table_functions when execute Scalar [#53348](https://github.com/ClickHouse/ClickHouse/pull/53348) ([flynn](https://github.com/ucasfl)). -* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fixed system.data_skipping_indices for MaterializedMySQL [#53381](https://github.com/ClickHouse/ClickHouse/pull/53381) ([Filipp Ozinov](https://github.com/bakwc)). -* Fix processing single carriage return in TSV file segmentation engine [#53407](https://github.com/ClickHouse/ClickHouse/pull/53407) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix `Context has expired` error properly [#53433](https://github.com/ClickHouse/ClickHouse/pull/53433) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix `timeout_overflow_mode` when having subquery in the rhs of IN [#53439](https://github.com/ClickHouse/ClickHouse/pull/53439) ([Duc Canh Le](https://github.com/canhld94)). -* Fix an unexpected behavior in [#53152](https://github.com/ClickHouse/ClickHouse/issues/53152) [#53440](https://github.com/ClickHouse/ClickHouse/pull/53440) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* Fix JSON_QUERY Function parse error while path is all number [#53470](https://github.com/ClickHouse/ClickHouse/pull/53470) ([KevinyhZou](https://github.com/KevinyhZou)). -* Fix wrong columns order for queries with parallel FINAL. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fixed SELECTing from ReplacingMergeTree with do_not_merge_across_partitions_select_final [#53511](https://github.com/ClickHouse/ClickHouse/pull/53511) ([Vasily Nemkov](https://github.com/Enmk)). -* Flush async insert queue first on shutdown [#53547](https://github.com/ClickHouse/ClickHouse/pull/53547) ([joelynch](https://github.com/joelynch)). -* Fix crash in join on sparse columna [#53548](https://github.com/ClickHouse/ClickHouse/pull/53548) ([vdimir](https://github.com/vdimir)). -* Fix possible UB in Set skipping index for functions with incorrect args [#53559](https://github.com/ClickHouse/ClickHouse/pull/53559) ([Azat Khuzhin](https://github.com/azat)). -* Fix possible UB in inverted indexes (experimental feature) [#53560](https://github.com/ClickHouse/ClickHouse/pull/53560) ([Azat Khuzhin](https://github.com/azat)). -* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix number of dropped granules in EXPLAIN PLAN index=1 [#53616](https://github.com/ClickHouse/ClickHouse/pull/53616) ([wangxiaobo](https://github.com/wzb5212)). -* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). -* Prepared set cache in mutation pipeline stuck [#53645](https://github.com/ClickHouse/ClickHouse/pull/53645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix bug on mutations with subcolumns of type JSON in predicates of UPDATE and DELETE queries. [#53677](https://github.com/ClickHouse/ClickHouse/pull/53677) ([VanDarkholme7](https://github.com/VanDarkholme7)). -* Fix filter pushdown for full_sorting_merge join [#53699](https://github.com/ClickHouse/ClickHouse/pull/53699) ([vdimir](https://github.com/vdimir)). -* Try to fix bug with `NULL::LowCardinality(Nullable(...)) NOT IN` [#53706](https://github.com/ClickHouse/ClickHouse/pull/53706) ([Andrey Zvonov](https://github.com/zvonand)). -* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). -* `transform`: correctly handle default column with multiple rows [#53742](https://github.com/ClickHouse/ClickHouse/pull/53742) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Fix fuzzer crash in parseDateTime [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)). -* MaterializedPostgreSQL: fix uncaught exception in getCreateTableQueryImpl [#53832](https://github.com/ClickHouse/ClickHouse/pull/53832) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix possible segfault while using PostgreSQL engine [#53847](https://github.com/ClickHouse/ClickHouse/pull/53847) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix named_collection_admin alias [#54066](https://github.com/ClickHouse/ClickHouse/pull/54066) ([Kseniia Sumarokova](https://github.com/kssenii)). - -### ClickHouse release 23.7, 2023-07-27 - -#### Backward Incompatible Change -* Add `NAMED COLLECTION` access type (aliases `USE NAMED COLLECTION`, `NAMED COLLECTION USAGE`). This PR is backward incompatible because this access type is disabled by default (because a parent access type `NAMED COLLECTION ADMIN` is disabled by default as well). Proposed in [#50277](https://github.com/ClickHouse/ClickHouse/issues/50277). To grant use `GRANT NAMED COLLECTION ON collection_name TO user` or `GRANT NAMED COLLECTION ON * TO user`, to be able to give these grants `named_collection_admin` is required in config (previously it was named `named_collection_control`, so will remain as an alias). [#50625](https://github.com/ClickHouse/ClickHouse/pull/50625) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fixing a typo in the `system.parts` column name `last_removal_attemp_time`. Now it is named `last_removal_attempt_time`. [#52104](https://github.com/ClickHouse/ClickHouse/pull/52104) ([filimonov](https://github.com/filimonov)). -* Bump version of the distributed_ddl_entry_format_version to 5 by default (enables opentelemetry and initial_query_idd pass through). This will not allow to process existing entries for distributed DDL after *downgrade* (but note, that usually there should be no such unprocessed entries). [#52128](https://github.com/ClickHouse/ClickHouse/pull/52128) ([Azat Khuzhin](https://github.com/azat)). -* Check projection metadata the same way we check ordinary metadata. This change may prevent the server from starting in case there was a table with an invalid projection. An example is a projection that created positional columns in PK (e.g. `projection p (select * order by 1, 4)` which is not allowed in table PK and can cause a crash during insert/merge). Drop such projections before the update. Fixes [#52353](https://github.com/ClickHouse/ClickHouse/issues/52353). [#52361](https://github.com/ClickHouse/ClickHouse/pull/52361) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* The experimental feature `hashid` is removed due to a bug. The quality of implementation was questionable at the start, and it didn't get through the experimental status. This closes [#52406](https://github.com/ClickHouse/ClickHouse/issues/52406). [#52449](https://github.com/ClickHouse/ClickHouse/pull/52449) ([Alexey Milovidov](https://github.com/alexey-milovidov)). - -#### New Feature -* Added `Overlay` database engine to combine multiple databases into one. Added `Filesystem` database engine to represent a directory in the filesystem as a set of implicitly available tables with auto-detected formats and structures. A new `S3` database engine allows to read-only interact with s3 storage by representing a prefix as a set of tables. A new `HDFS` database engine allows to interact with HDFS storage in the same way. [#48821](https://github.com/ClickHouse/ClickHouse/pull/48821) ([alekseygolub](https://github.com/alekseygolub)). -* Add support for external disks in Keeper for storing snapshots and logs. [#50098](https://github.com/ClickHouse/ClickHouse/pull/50098) ([Antonio Andelic](https://github.com/antonio2368)). -* Add support for multi-directory selection (`{}`) globs. [#50559](https://github.com/ClickHouse/ClickHouse/pull/50559) ([Andrey Zvonov](https://github.com/zvonand)). -* Kafka connector can fetch Avro schema from schema registry with basic authentication using url-encoded credentials. [#49664](https://github.com/ClickHouse/ClickHouse/pull/49664) ([Ilya Golshtein](https://github.com/ilejn)). -* Add function `arrayJaccardIndex` which computes the Jaccard similarity between two arrays. [#50076](https://github.com/ClickHouse/ClickHouse/pull/50076) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). -* Add a column `is_obsolete` to `system.settings` and similar tables. Closes [#50819](https://github.com/ClickHouse/ClickHouse/issues/50819). [#50826](https://github.com/ClickHouse/ClickHouse/pull/50826) ([flynn](https://github.com/ucasfl)). -* Implement support of encrypted elements in configuration file. Added possibility to use encrypted text in leaf elements of configuration file. The text is encrypted using encryption codecs from `` section. [#50986](https://github.com/ClickHouse/ClickHouse/pull/50986) ([Roman Vasin](https://github.com/rvasin)). -* Grace Hash Join algorithm is now applicable to FULL and RIGHT JOINs. [#49483](https://github.com/ClickHouse/ClickHouse/issues/49483). [#51013](https://github.com/ClickHouse/ClickHouse/pull/51013) ([lgbo](https://github.com/lgbo-ustc)). -* Add `SYSTEM STOP LISTEN` query for more graceful termination. Closes [#47972](https://github.com/ClickHouse/ClickHouse/issues/47972). [#51016](https://github.com/ClickHouse/ClickHouse/pull/51016) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add `input_format_csv_allow_variable_number_of_columns` options. [#51273](https://github.com/ClickHouse/ClickHouse/pull/51273) ([Dmitry Kardymon](https://github.com/kardymonds)). -* Another boring feature: add function `substring_index`, as in Spark or MySQL. [#51472](https://github.com/ClickHouse/ClickHouse/pull/51472) ([李扬](https://github.com/taiyang-li)). -* A system table `jemalloc_bins` to show stats for jemalloc bins. Example `SELECT *, size * (nmalloc - ndalloc) AS allocated_bytes FROM system.jemalloc_bins WHERE allocated_bytes > 0 ORDER BY allocated_bytes DESC LIMIT 10`. Enjoy. [#51674](https://github.com/ClickHouse/ClickHouse/pull/51674) ([Alexander Gololobov](https://github.com/davenger)). -* Add `RowBinaryWithDefaults` format with extra byte before each column as a flag for using the column's default value. Closes [#50854](https://github.com/ClickHouse/ClickHouse/issues/50854). [#51695](https://github.com/ClickHouse/ClickHouse/pull/51695) ([Kruglov Pavel](https://github.com/Avogar)). -* Added `default_temporary_table_engine` setting. Same as `default_table_engine` but for temporary tables. [#51292](https://github.com/ClickHouse/ClickHouse/issues/51292). [#51708](https://github.com/ClickHouse/ClickHouse/pull/51708) ([velavokr](https://github.com/velavokr)). -* Added new `initcap` / `initcapUTF8` functions which convert the first letter of each word to upper case and the rest to lower case. [#51735](https://github.com/ClickHouse/ClickHouse/pull/51735) ([Dmitry Kardymon](https://github.com/kardymonds)). -* Create table now supports `PRIMARY KEY` syntax in column definition. Columns are added to primary index in the same order columns are defined. [#51881](https://github.com/ClickHouse/ClickHouse/pull/51881) ([Ilya Yatsishin](https://github.com/qoega)). -* Added the possibility to use date and time format specifiers in log and error log file names, either in config files (`log` and `errorlog` tags) or command line arguments (`--log-file` and `--errorlog-file`). [#51945](https://github.com/ClickHouse/ClickHouse/pull/51945) ([Victor Krasnov](https://github.com/sirvickr)). -* Added Peak Memory Usage statistic to HTTP headers. [#51946](https://github.com/ClickHouse/ClickHouse/pull/51946) ([Dmitry Kardymon](https://github.com/kardymonds)). -* Added new `hasSubsequence` (+`CaseInsensitive` and `UTF8` versions) functions to match subsequences in strings. [#52050](https://github.com/ClickHouse/ClickHouse/pull/52050) ([Dmitry Kardymon](https://github.com/kardymonds)). -* Add `array_agg` as alias of `groupArray` for PostgreSQL compatibility. Closes [#52100](https://github.com/ClickHouse/ClickHouse/issues/52100). ### Documentation entry for user-facing changes. [#52135](https://github.com/ClickHouse/ClickHouse/pull/52135) ([flynn](https://github.com/ucasfl)). -* Add `any_value` as a compatibility alias for `any` aggregate function. Closes [#52140](https://github.com/ClickHouse/ClickHouse/issues/52140). [#52147](https://github.com/ClickHouse/ClickHouse/pull/52147) ([flynn](https://github.com/ucasfl)). -* Add aggregate function `array_concat_agg` for compatibility with BigQuery, it's alias of `groupArrayArray`. Closes [#52139](https://github.com/ClickHouse/ClickHouse/issues/52139). [#52149](https://github.com/ClickHouse/ClickHouse/pull/52149) ([flynn](https://github.com/ucasfl)). -* Add `OCTET_LENGTH` as an alias to `length`. Closes [#52153](https://github.com/ClickHouse/ClickHouse/issues/52153). [#52176](https://github.com/ClickHouse/ClickHouse/pull/52176) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). -* Added `firstLine` function to extract the first line from the multi-line string. This closes [#51172](https://github.com/ClickHouse/ClickHouse/issues/51172). [#52209](https://github.com/ClickHouse/ClickHouse/pull/52209) ([Mikhail Koviazin](https://github.com/mkmkme)). -* Implement KQL-style formatting for the `Interval` data type. This is only needed for compatibility with the `Kusto` query language. [#45671](https://github.com/ClickHouse/ClickHouse/pull/45671) ([ltrk2](https://github.com/ltrk2)). -* Added query `SYSTEM FLUSH ASYNC INSERT QUEUE` which flushes all pending asynchronous inserts to the destination tables. Added a server-side setting `async_insert_queue_flush_on_shutdown` (`true` by default) which determines whether to flush queue of asynchronous inserts on graceful shutdown. Setting `async_insert_threads` is now a server-side setting. [#49160](https://github.com/ClickHouse/ClickHouse/pull/49160) ([Anton Popov](https://github.com/CurtizJ)). -* Aliases `current_database` and a new function `current_schemas` for compatibility with PostgreSQL. [#51076](https://github.com/ClickHouse/ClickHouse/pull/51076) ([Pedro Riera](https://github.com/priera)). -* Add alias for functions `today` (now available under the `curdate`/`current_date` names) and `now` (`current_timestamp`). [#52106](https://github.com/ClickHouse/ClickHouse/pull/52106) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)). -* Support `async_deduplication_token` for async insert. [#52136](https://github.com/ClickHouse/ClickHouse/pull/52136) ([Han Fei](https://github.com/hanfei1991)). -* Add new setting `disable_url_encoding` that allows to disable decoding/encoding path in uri in URL engine. [#52337](https://github.com/ClickHouse/ClickHouse/pull/52337) ([Kruglov Pavel](https://github.com/Avogar)). - -#### Performance Improvement -* Enable automatic selection of the sparse serialization format by default. It improves performance. The format is supported since version 22.1. After this change, downgrading to versions older than 22.1 might not be possible. A downgrade may require to set `ratio_of_defaults_for_sparse_serialization=0.9375` [55153](https://github.com/ClickHouse/ClickHouse/issues/55153). You can turn off the usage of the sparse serialization format by providing the `ratio_of_defaults_for_sparse_serialization = 1` setting for your MergeTree tables. [#49631](https://github.com/ClickHouse/ClickHouse/pull/49631) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Enable `move_all_conditions_to_prewhere` and `enable_multiple_prewhere_read_steps` settings by default. [#46365](https://github.com/ClickHouse/ClickHouse/pull/46365) ([Alexander Gololobov](https://github.com/davenger)). -* Improves performance of some queries by tuning allocator. [#46416](https://github.com/ClickHouse/ClickHouse/pull/46416) ([Azat Khuzhin](https://github.com/azat)). -* Now we use fixed-size tasks in `MergeTreePrefetchedReadPool` as in `MergeTreeReadPool`. Also from now we use connection pool for S3 requests. [#49732](https://github.com/ClickHouse/ClickHouse/pull/49732) ([Nikita Taranov](https://github.com/nickitat)). -* More pushdown to the right side of join. [#50532](https://github.com/ClickHouse/ClickHouse/pull/50532) ([Nikita Taranov](https://github.com/nickitat)). -* Improve grace_hash join by reserving hash table's size (resubmit). [#50875](https://github.com/ClickHouse/ClickHouse/pull/50875) ([lgbo](https://github.com/lgbo-ustc)). -* Waiting on lock in `OpenedFileCache` could be noticeable sometimes. We sharded it into multiple sub-maps (each with its own lock) to avoid contention. [#51341](https://github.com/ClickHouse/ClickHouse/pull/51341) ([Nikita Taranov](https://github.com/nickitat)). -* Move conditions with primary key columns to the end of PREWHERE chain. The idea is that conditions with PK columns are likely to be used in PK analysis and will not contribute much more to PREWHERE filtering. [#51958](https://github.com/ClickHouse/ClickHouse/pull/51958) ([Alexander Gololobov](https://github.com/davenger)). -* Speed up `COUNT(DISTINCT)` for String types by inlining SipHash. The performance experiments of *OnTime* on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of *11.6%* to the QPS of the query *Q8* while having no impact on others. [#52036](https://github.com/ClickHouse/ClickHouse/pull/52036) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* Enable `allow_vertical_merges_from_compact_to_wide_parts` by default. It will save memory usage during merges. [#52295](https://github.com/ClickHouse/ClickHouse/pull/52295) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1`. This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823). This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173). [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)). -* Reduce the number of syscalls in `FileCache::loadMetadata` - this speeds up server startup if the filesystem cache is configured. [#52435](https://github.com/ClickHouse/ClickHouse/pull/52435) ([Raúl Marín](https://github.com/Algunenano)). -* Allow to have strict lower boundary for file segment size by downloading remaining data in the background. Minimum size of file segment (if actual file size is bigger) is configured as cache configuration setting `boundary_alignment`, by default `4Mi`. Number of background threads are configured as cache configuration setting `background_download_threads`, by default `2`. Also `max_file_segment_size` was increased from `8Mi` to `32Mi` in this PR. [#51000](https://github.com/ClickHouse/ClickHouse/pull/51000) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Decreased default timeouts for S3 from 30 seconds to 3 seconds, and for other HTTP from 180 seconds to 30 seconds. [#51171](https://github.com/ClickHouse/ClickHouse/pull/51171) ([Michael Kolupaev](https://github.com/al13n321)). -* New setting `merge_tree_determine_task_size_by_prewhere_columns` added. If set to `true` only sizes of the columns from `PREWHERE` section will be considered to determine reading task size. Otherwise all the columns from query are considered. [#52606](https://github.com/ClickHouse/ClickHouse/pull/52606) ([Nikita Taranov](https://github.com/nickitat)). - -#### Improvement -* Use read_bytes/total_bytes_to_read for progress bar in s3/file/url/... table functions for better progress indication. [#51286](https://github.com/ClickHouse/ClickHouse/pull/51286) ([Kruglov Pavel](https://github.com/Avogar)). -* Introduce a table setting `wait_for_unique_parts_send_before_shutdown_ms` which specify the amount of time replica will wait before closing interserver handler for replicated sends. Also fix inconsistency with shutdown of tables and interserver handlers: now server shutdown tables first and only after it shut down interserver handlers. [#51851](https://github.com/ClickHouse/ClickHouse/pull/51851) ([alesapin](https://github.com/alesapin)). -* Allow SQL standard `FETCH` without `OFFSET`. See https://antonz.org/sql-fetch/. [#51293](https://github.com/ClickHouse/ClickHouse/pull/51293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Allow filtering HTTP headers for the URL/S3 table functions with the new `http_forbid_headers` section in config. Both exact matching and regexp filters are available. [#51038](https://github.com/ClickHouse/ClickHouse/pull/51038) ([Nikolay Degterinsky](https://github.com/evillique)). -* Don't show messages about `16 EiB` free space in logs, as they don't make sense. This closes [#49320](https://github.com/ClickHouse/ClickHouse/issues/49320). [#49342](https://github.com/ClickHouse/ClickHouse/pull/49342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Properly check the limit for the `sleepEachRow` function. Add a setting `function_sleep_max_microseconds_per_block`. This is needed for generic query fuzzer. [#49343](https://github.com/ClickHouse/ClickHouse/pull/49343) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix two issues in `geoHash` functions. [#50066](https://github.com/ClickHouse/ClickHouse/pull/50066) ([李扬](https://github.com/taiyang-li)). -* Log async insert flush queries into `system.query_log`. [#51160](https://github.com/ClickHouse/ClickHouse/pull/51160) ([Raúl Marín](https://github.com/Algunenano)). -* Functions `date_diff` and `age` now support millisecond/microsecond unit and work with microsecond precision. [#51291](https://github.com/ClickHouse/ClickHouse/pull/51291) ([Dmitry Kardymon](https://github.com/kardymonds)). -* Improve parsing of path in clickhouse-keeper-client. [#51359](https://github.com/ClickHouse/ClickHouse/pull/51359) ([Azat Khuzhin](https://github.com/azat)). -* A third-party product depending on ClickHouse (Gluten: a Plugin to Double SparkSQL's Performance) had a bug. This fix avoids heap overflow in that third-party product while reading from HDFS. [#51386](https://github.com/ClickHouse/ClickHouse/pull/51386) ([李扬](https://github.com/taiyang-li)). -* Add ability to disable native copy for S3 (setting for BACKUP/RESTORE `allow_s3_native_copy`, and `s3_allow_native_copy` for `s3`/`s3_plain` disks). [#51448](https://github.com/ClickHouse/ClickHouse/pull/51448) ([Azat Khuzhin](https://github.com/azat)). -* Add column `primary_key_size` to `system.parts` table to show compressed primary key size on disk. Closes [#51400](https://github.com/ClickHouse/ClickHouse/issues/51400). [#51496](https://github.com/ClickHouse/ClickHouse/pull/51496) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). -* Allow running `clickhouse-local` without procfs, without home directory existing, and without name resolution plugins from glibc. [#51518](https://github.com/ClickHouse/ClickHouse/pull/51518) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add placeholder `%a` for rull filename in rename_files_after_processing setting. [#51603](https://github.com/ClickHouse/ClickHouse/pull/51603) ([Kruglov Pavel](https://github.com/Avogar)). -* Add column `modification_time` into `system.parts_columns`. [#51685](https://github.com/ClickHouse/ClickHouse/pull/51685) ([Azat Khuzhin](https://github.com/azat)). -* Add new setting `input_format_csv_use_default_on_bad_values` to CSV format that allows to insert default value when parsing of a single field failed. [#51716](https://github.com/ClickHouse/ClickHouse/pull/51716) ([KevinyhZou](https://github.com/KevinyhZou)). -* Added a crash log flush to the disk after the unexpected crash. [#51720](https://github.com/ClickHouse/ClickHouse/pull/51720) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Fix behavior in dashboard page where errors unrelated to authentication are not shown. Also fix 'overlapping' chart behavior. [#51744](https://github.com/ClickHouse/ClickHouse/pull/51744) ([Zach Naimon](https://github.com/ArctypeZach)). -* Allow UUID to UInt128 conversion. [#51765](https://github.com/ClickHouse/ClickHouse/pull/51765) ([Dmitry Kardymon](https://github.com/kardymonds)). -* Added support for function `range` of Nullable arguments. [#51767](https://github.com/ClickHouse/ClickHouse/pull/51767) ([Dmitry Kardymon](https://github.com/kardymonds)). -* Convert condition like `toyear(x) = c` to `c1 <= x < c2`. [#51795](https://github.com/ClickHouse/ClickHouse/pull/51795) ([Han Fei](https://github.com/hanfei1991)). -* Improve MySQL compatibility of the statement `SHOW INDEX`. [#51796](https://github.com/ClickHouse/ClickHouse/pull/51796) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix `use_structure_from_insertion_table_in_table_functions` does not work with `MATERIALIZED` and `ALIAS` columns. Closes [#51817](https://github.com/ClickHouse/ClickHouse/issues/51817). Closes [#51019](https://github.com/ClickHouse/ClickHouse/issues/51019). [#51825](https://github.com/ClickHouse/ClickHouse/pull/51825) ([flynn](https://github.com/ucasfl)). -* Cache dictionary now requests only unique keys from source. Closes [#51762](https://github.com/ClickHouse/ClickHouse/issues/51762). [#51853](https://github.com/ClickHouse/ClickHouse/pull/51853) ([Maksim Kita](https://github.com/kitaisreal)). -* Fixed the case when settings were not applied for EXPLAIN query when FORMAT was provided. [#51859](https://github.com/ClickHouse/ClickHouse/pull/51859) ([Nikita Taranov](https://github.com/nickitat)). -* Allow SETTINGS before FORMAT in DESCRIBE TABLE query for compatibility with SELECT query. Closes [#51544](https://github.com/ClickHouse/ClickHouse/issues/51544). [#51899](https://github.com/ClickHouse/ClickHouse/pull/51899) ([Nikolay Degterinsky](https://github.com/evillique)). -* Var-Int encoded integers (e.g. used by the native protocol) can now use the full 64-bit range. 3rd party clients are advised to update their var-int code accordingly. [#51905](https://github.com/ClickHouse/ClickHouse/pull/51905) ([Robert Schulze](https://github.com/rschu1ze)). -* Update certificates when they change without the need to manually SYSTEM RELOAD CONFIG. [#52030](https://github.com/ClickHouse/ClickHouse/pull/52030) ([Mike Kot](https://github.com/myrrc)). -* Added `allow_create_index_without_type` setting that allow to ignore `ADD INDEX` queries without specified `TYPE`. Standard SQL queries will just succeed without changing table schema. [#52056](https://github.com/ClickHouse/ClickHouse/pull/52056) ([Ilya Yatsishin](https://github.com/qoega)). -* Log messages are written to the `system.text_log` from the server startup. [#52113](https://github.com/ClickHouse/ClickHouse/pull/52113) ([Dmitry Kardymon](https://github.com/kardymonds)). -* In cases where the HTTP endpoint has multiple IP addresses and the first of them is unreachable, a timeout exception was thrown. Made session creation with handling all resolved endpoints. [#52116](https://github.com/ClickHouse/ClickHouse/pull/52116) ([Aleksei Filatov](https://github.com/aalexfvk)). -* Avro input format now supports Union even if it contains only a single type. Closes [#52131](https://github.com/ClickHouse/ClickHouse/issues/52131). [#52137](https://github.com/ClickHouse/ClickHouse/pull/52137) ([flynn](https://github.com/ucasfl)). -* Add setting `optimize_use_implicit_projections` to disable implicit projections (currently only `min_max_count` projection). [#52152](https://github.com/ClickHouse/ClickHouse/pull/52152) ([Amos Bird](https://github.com/amosbird)). -* It was possible to use the function `hasToken` for infinite loop. Now this possibility is removed. This closes [#52156](https://github.com/ClickHouse/ClickHouse/issues/52156). [#52160](https://github.com/ClickHouse/ClickHouse/pull/52160) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Create ZK ancestors optimistically. [#52195](https://github.com/ClickHouse/ClickHouse/pull/52195) ([Raúl Marín](https://github.com/Algunenano)). -* Fix [#50582](https://github.com/ClickHouse/ClickHouse/issues/50582). Avoid the `Not found column ... in block` error in some cases of reading in-order and constants. [#52259](https://github.com/ClickHouse/ClickHouse/pull/52259) ([Chen768959](https://github.com/Chen768959)). -* Check whether S2 geo primitives are invalid as early as possible on ClickHouse side. This closes: [#27090](https://github.com/ClickHouse/ClickHouse/issues/27090). [#52260](https://github.com/ClickHouse/ClickHouse/pull/52260) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Add back missing projection QueryAccessInfo when `query_plan_optimize_projection = 1`. This fixes [#50183](https://github.com/ClickHouse/ClickHouse/issues/50183) . This fixes [#50093](https://github.com/ClickHouse/ClickHouse/issues/50093). [#52327](https://github.com/ClickHouse/ClickHouse/pull/52327) ([Amos Bird](https://github.com/amosbird)). -* When `ZooKeeperRetriesControl` rethrows an error, it's more useful to see its original stack trace, not the one from `ZooKeeperRetriesControl` itself. [#52347](https://github.com/ClickHouse/ClickHouse/pull/52347) ([Vitaly Baranov](https://github.com/vitlibar)). -* Wait for zero copy replication lock even if some disks don't support it. [#52376](https://github.com/ClickHouse/ClickHouse/pull/52376) ([Raúl Marín](https://github.com/Algunenano)). -* Now interserver port will be closed only after tables are shut down. [#52498](https://github.com/ClickHouse/ClickHouse/pull/52498) ([alesapin](https://github.com/alesapin)). - -#### Experimental Feature -* Writing parquet files is 10x faster, it's multi-threaded now. Almost the same speed as reading. [#49367](https://github.com/ClickHouse/ClickHouse/pull/49367) ([Michael Kolupaev](https://github.com/al13n321)). This is controlled by the setting `output_format_parquet_use_custom_encoder` which is disabled by default, because the feature is non-ideal. -* Added support for [PRQL](https://prql-lang.org/) as a query language. [#50686](https://github.com/ClickHouse/ClickHouse/pull/50686) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Allow to add disk name for custom disks. Previously custom disks would use an internal generated disk name. Now it will be possible with `disk = disk_(...)` (e.g. disk will have name `name`) . [#51552](https://github.com/ClickHouse/ClickHouse/pull/51552) ([Kseniia Sumarokova](https://github.com/kssenii)). This syntax can be changed in this release. -* (experimental MaterializedMySQL) Fixed crash when `mysqlxx::Pool::Entry` is used after it was disconnected. [#52063](https://github.com/ClickHouse/ClickHouse/pull/52063) ([Val Doroshchuk](https://github.com/valbok)). -* (experimental MaterializedMySQL) `CREATE TABLE ... AS SELECT` .. is now supported in MaterializedMySQL. [#52067](https://github.com/ClickHouse/ClickHouse/pull/52067) ([Val Doroshchuk](https://github.com/valbok)). -* (experimental MaterializedMySQL) Introduced automatic conversion of text types to utf8 for MaterializedMySQL. [#52084](https://github.com/ClickHouse/ClickHouse/pull/52084) ([Val Doroshchuk](https://github.com/valbok)). -* (experimental MaterializedMySQL) Now unquoted UTF-8 strings are supported in DDL for MaterializedMySQL. [#52318](https://github.com/ClickHouse/ClickHouse/pull/52318) ([Val Doroshchuk](https://github.com/valbok)). -* (experimental MaterializedMySQL) Now double quoted comments are supported in MaterializedMySQL. [#52355](https://github.com/ClickHouse/ClickHouse/pull/52355) ([Val Doroshchuk](https://github.com/valbok)). -* Upgrade Intel QPL from v1.1.0 to v1.2.0 2. Upgrade Intel accel-config from v3.5 to v4.0 3. Fixed issue that Device IOTLB miss has big perf. impact for IAA accelerators. [#52180](https://github.com/ClickHouse/ClickHouse/pull/52180) ([jasperzhu](https://github.com/jinjunzh)). -* The `session_timezone` setting (new in version 23.6) is demoted to experimental. [#52445](https://github.com/ClickHouse/ClickHouse/pull/52445) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Support ZooKeeper `reconfig` command for ClickHouse Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)). It is suspected that this feature is incomplete. - -#### Build/Testing/Packaging Improvement -* Add experimental ClickHouse builds for Linux RISC-V 64 to CI. [#31398](https://github.com/ClickHouse/ClickHouse/pull/31398) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add integration test check with the enabled Analyzer. [#50926](https://github.com/ClickHouse/ClickHouse/pull/50926) [#52210](https://github.com/ClickHouse/ClickHouse/pull/52210) ([Dmitry Novik](https://github.com/novikd)). -* Reproducible builds for Rust. [#52395](https://github.com/ClickHouse/ClickHouse/pull/52395) ([Azat Khuzhin](https://github.com/azat)). -* Update Cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)). -* Make the function `CHColumnToArrowColumn::fillArrowArrayWithArrayColumnData` to work with nullable arrays, which are not possible in ClickHouse, but needed for Gluten. [#52112](https://github.com/ClickHouse/ClickHouse/pull/52112) ([李扬](https://github.com/taiyang-li)). -* We've updated the CCTZ library to master, but there are no user-visible changes. [#52124](https://github.com/ClickHouse/ClickHouse/pull/52124) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The `system.licenses` table now includes the hard-forked library Poco. This closes [#52066](https://github.com/ClickHouse/ClickHouse/issues/52066). [#52127](https://github.com/ClickHouse/ClickHouse/pull/52127) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Check that there are no cases of bad punctuation: whitespace before a comma like `Hello ,world` instead of `Hello, world`. [#52549](https://github.com/ClickHouse/ClickHouse/pull/52549) ([Alexey Milovidov](https://github.com/alexey-milovidov)). - -#### Bug Fix (user-visible misbehavior in an official stable release) -* Fix MaterializedPostgreSQL syncTables [#49698](https://github.com/ClickHouse/ClickHouse/pull/49698) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix projection with optimize_aggregators_of_group_by_keys [#49709](https://github.com/ClickHouse/ClickHouse/pull/49709) ([Amos Bird](https://github.com/amosbird)). -* Fix optimize_skip_unused_shards with JOINs [#51037](https://github.com/ClickHouse/ClickHouse/pull/51037) ([Azat Khuzhin](https://github.com/azat)). -* Fix formatDateTime() with fractional negative datetime64 [#51290](https://github.com/ClickHouse/ClickHouse/pull/51290) ([Dmitry Kardymon](https://github.com/kardymonds)). -* Functions `hasToken*` were totally wrong. Add a test for [#43358](https://github.com/ClickHouse/ClickHouse/issues/43358) [#51378](https://github.com/ClickHouse/ClickHouse/pull/51378) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix optimization to move functions before sorting. [#51481](https://github.com/ClickHouse/ClickHouse/pull/51481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)). -* Fix SIGSEGV for clusters with zero weight across all shards (fixes INSERT INTO FUNCTION clusterAllReplicas()) [#51545](https://github.com/ClickHouse/ClickHouse/pull/51545) ([Azat Khuzhin](https://github.com/azat)). -* Fix timeout for hedged requests [#51582](https://github.com/ClickHouse/ClickHouse/pull/51582) ([Azat Khuzhin](https://github.com/azat)). -* Fix logical error in ANTI join with NULL [#51601](https://github.com/ClickHouse/ClickHouse/pull/51601) ([vdimir](https://github.com/vdimir)). -* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)). -* Do not apply PredicateExpressionsOptimizer for ASOF/ANTI join [#51633](https://github.com/ClickHouse/ClickHouse/pull/51633) ([vdimir](https://github.com/vdimir)). -* Fix async insert with deduplication for ReplicatedMergeTree using merging algorithms [#51676](https://github.com/ClickHouse/ClickHouse/pull/51676) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)). -* Fix segfault when create invalid EmbeddedRocksdb table [#51847](https://github.com/ClickHouse/ClickHouse/pull/51847) ([Duc Canh Le](https://github.com/canhld94)). -* Fix inserts into MongoDB tables [#51876](https://github.com/ClickHouse/ClickHouse/pull/51876) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix deadlock on DatabaseCatalog shutdown [#51908](https://github.com/ClickHouse/ClickHouse/pull/51908) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix error in subquery operators [#51922](https://github.com/ClickHouse/ClickHouse/pull/51922) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix async connect to hosts with multiple ips [#51934](https://github.com/ClickHouse/ClickHouse/pull/51934) ([Kruglov Pavel](https://github.com/Avogar)). -* Do not remove inputs after ActionsDAG::merge [#51947](https://github.com/ClickHouse/ClickHouse/pull/51947) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` [#51954](https://github.com/ClickHouse/ClickHouse/pull/51954) ([vdimir](https://github.com/vdimir)). -* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Small fix for toDateTime64() for dates after 2283-12-31 [#52130](https://github.com/ClickHouse/ClickHouse/pull/52130) ([Andrey Zvonov](https://github.com/zvonand)). -* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix incorrect projection analysis when aggregation expression contains monotonic functions [#52151](https://github.com/ClickHouse/ClickHouse/pull/52151) ([Amos Bird](https://github.com/amosbird)). -* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Disable direct join for range dictionary [#52187](https://github.com/ClickHouse/ClickHouse/pull/52187) ([Duc Canh Le](https://github.com/canhld94)). -* Fix sticky mutations test (and extremely rare race condition) [#52197](https://github.com/ClickHouse/ClickHouse/pull/52197) ([alesapin](https://github.com/alesapin)). -* Fix race in Web disk [#52211](https://github.com/ClickHouse/ClickHouse/pull/52211) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix data race in Connection::setAsyncCallback on unknown packet from server [#52219](https://github.com/ClickHouse/ClickHouse/pull/52219) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix temp data deletion on startup, add test [#52275](https://github.com/ClickHouse/ClickHouse/pull/52275) ([vdimir](https://github.com/vdimir)). -* Don't use minmax_count projections when counting nullable columns [#52297](https://github.com/ClickHouse/ClickHouse/pull/52297) ([Amos Bird](https://github.com/amosbird)). -* MergeTree/ReplicatedMergeTree should use server timezone for log entries [#52325](https://github.com/ClickHouse/ClickHouse/pull/52325) ([Azat Khuzhin](https://github.com/azat)). -* Fix parameterized view with cte and multiple usage [#52328](https://github.com/ClickHouse/ClickHouse/pull/52328) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix `apply_snapshot` in Keeper [#52358](https://github.com/ClickHouse/ClickHouse/pull/52358) ([Antonio Andelic](https://github.com/antonio2368)). -* Update build-osx.md [#52377](https://github.com/ClickHouse/ClickHouse/pull/52377) ([AlexBykovski](https://github.com/AlexBykovski)). -* Fix `countSubstrings` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). -* Fix normal projection with merge table [#52432](https://github.com/ClickHouse/ClickHouse/pull/52432) ([Amos Bird](https://github.com/amosbird)). -* Fix possible double-free in Aggregator [#52439](https://github.com/ClickHouse/ClickHouse/pull/52439) ([Nikita Taranov](https://github.com/nickitat)). -* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). -* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Check recursion depth in OptimizedRegularExpression [#52451](https://github.com/ClickHouse/ClickHouse/pull/52451) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix data-race DatabaseReplicated::startupTables()/canExecuteReplicatedMetadataAlter() [#52490](https://github.com/ClickHouse/ClickHouse/pull/52490) ([Azat Khuzhin](https://github.com/azat)). -* Fix abort in function `transform` [#52513](https://github.com/ClickHouse/ClickHouse/pull/52513) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix lightweight delete after drop of projection [#52517](https://github.com/ClickHouse/ClickHouse/pull/52517) ([Anton Popov](https://github.com/CurtizJ)). -* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)). - - -### ClickHouse release 23.6, 2023-06-29 - -#### Backward Incompatible Change -* Delete feature `do_not_evict_index_and_mark_files` in the fs cache. This feature was only making things worse. [#51253](https://github.com/ClickHouse/ClickHouse/pull/51253) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Remove ALTER support for experimental LIVE VIEW. [#51287](https://github.com/ClickHouse/ClickHouse/pull/51287) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Decrease the default values for `http_max_field_value_size` and `http_max_field_name_size` to 128 KiB. [#51163](https://github.com/ClickHouse/ClickHouse/pull/51163) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* CGroups metrics related to CPU are replaced with one metric, `CGroupMaxCPU` for better usability. The `Normalized` CPU usage metrics will be normalized to CGroups limits instead of the total number of CPUs when they are set. This closes [#50836](https://github.com/ClickHouse/ClickHouse/issues/50836). [#50835](https://github.com/ClickHouse/ClickHouse/pull/50835) ([Alexey Milovidov](https://github.com/alexey-milovidov)). - -#### New Feature -* The function `transform` as well as `CASE` with value matching started to support all data types. This closes [#29730](https://github.com/ClickHouse/ClickHouse/issues/29730). This closes [#32387](https://github.com/ClickHouse/ClickHouse/issues/32387). This closes [#50827](https://github.com/ClickHouse/ClickHouse/issues/50827). This closes [#31336](https://github.com/ClickHouse/ClickHouse/issues/31336). This closes [#40493](https://github.com/ClickHouse/ClickHouse/issues/40493). [#51351](https://github.com/ClickHouse/ClickHouse/pull/51351) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Added option `--rename_files_after_processing `. This closes [#34207](https://github.com/ClickHouse/ClickHouse/issues/34207). [#49626](https://github.com/ClickHouse/ClickHouse/pull/49626) ([alekseygolub](https://github.com/alekseygolub)). -* Add support for `TRUNCATE` modifier in `INTO OUTFILE` clause. Suggest using `APPEND` or `TRUNCATE` for `INTO OUTFILE` when file exists. [#50950](https://github.com/ClickHouse/ClickHouse/pull/50950) ([alekar](https://github.com/alekar)). -* Add table engine `Redis` and table function `redis`. It allows querying external Redis servers. [#50150](https://github.com/ClickHouse/ClickHouse/pull/50150) ([JackyWoo](https://github.com/JackyWoo)). -* Allow to skip empty files in file/s3/url/hdfs table functions using settings `s3_skip_empty_files`, `hdfs_skip_empty_files`, `engine_file_skip_empty_files`, `engine_url_skip_empty_files`. [#50364](https://github.com/ClickHouse/ClickHouse/pull/50364) ([Kruglov Pavel](https://github.com/Avogar)). -* Add a new setting named `use_mysql_types_in_show_columns` to alter the `SHOW COLUMNS` SQL statement to display MySQL equivalent types when a client is connected via the MySQL compatibility port. [#49577](https://github.com/ClickHouse/ClickHouse/pull/49577) ([Thomas Panetti](https://github.com/tpanetti)). -* Clickhouse-client can now be called with a connection string instead of "--host", "--port", "--user" etc. [#50689](https://github.com/ClickHouse/ClickHouse/pull/50689) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Add setting `session_timezone`; it is used as the default timezone for a session when not explicitly specified. [#44149](https://github.com/ClickHouse/ClickHouse/pull/44149) ([Andrey Zvonov](https://github.com/zvonand)). -* Codec DEFLATE_QPL is now controlled via server setting "enable_deflate_qpl_codec" (default: false) instead of setting "allow_experimental_codecs". This marks DEFLATE_QPL non-experimental. [#50775](https://github.com/ClickHouse/ClickHouse/pull/50775) ([Robert Schulze](https://github.com/rschu1ze)). - -#### Performance Improvement -* Improved scheduling of merge selecting and cleanup tasks in `ReplicatedMergeTree`. The tasks will not be executed too frequently when there's nothing to merge or cleanup. Added settings `max_merge_selecting_sleep_ms`, `merge_selecting_sleep_slowdown_factor`, `max_cleanup_delay_period` and `cleanup_thread_preferred_points_per_iteration`. It should close [#31919](https://github.com/ClickHouse/ClickHouse/issues/31919). [#50107](https://github.com/ClickHouse/ClickHouse/pull/50107) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Make filter push down through cross join. [#50605](https://github.com/ClickHouse/ClickHouse/pull/50605) ([Han Fei](https://github.com/hanfei1991)). -* Improve performance with enabled QueryProfiler using thread-local timer_id instead of global object. [#48778](https://github.com/ClickHouse/ClickHouse/pull/48778) ([Jiebin Sun](https://github.com/jiebinn)). -* Rewrite CapnProto input/output format to improve its performance. Map column names and CapnProto fields case insensitive, fix reading/writing of nested structure fields. [#49752](https://github.com/ClickHouse/ClickHouse/pull/49752) ([Kruglov Pavel](https://github.com/Avogar)). -* Optimize parquet write performance for parallel threads. [#50102](https://github.com/ClickHouse/ClickHouse/pull/50102) ([Hongbin Ma](https://github.com/binmahone)). -* Disable `parallelize_output_from_storages` for processing MATERIALIZED VIEWs and storages with one block only. [#50214](https://github.com/ClickHouse/ClickHouse/pull/50214) ([Azat Khuzhin](https://github.com/azat)). -* Merge PR [#46558](https://github.com/ClickHouse/ClickHouse/pull/46558). Avoid block permutation during sort if the block is already sorted. [#50697](https://github.com/ClickHouse/ClickHouse/pull/50697) ([Alexey Milovidov](https://github.com/alexey-milovidov), [Maksim Kita](https://github.com/kitaisreal)). -* Make multiple list requests to ZooKeeper in parallel to speed up reading from system.zookeeper table. [#51042](https://github.com/ClickHouse/ClickHouse/pull/51042) ([Alexander Gololobov](https://github.com/davenger)). -* Speedup initialization of DateTime lookup tables for time zones. This should reduce startup/connect time of clickhouse-client especially in debug build as it is rather heavy. [#51347](https://github.com/ClickHouse/ClickHouse/pull/51347) ([Alexander Gololobov](https://github.com/davenger)). -* Fix data lakes slowness because of synchronous head requests. (Related to Iceberg/Deltalake/Hudi being slow with a lot of files). [#50976](https://github.com/ClickHouse/ClickHouse/pull/50976) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Do not read all the columns from right GLOBAL JOIN table. [#50721](https://github.com/ClickHouse/ClickHouse/pull/50721) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). - -#### Experimental Feature -* Support parallel replicas with the analyzer. [#50441](https://github.com/ClickHouse/ClickHouse/pull/50441) ([Raúl Marín](https://github.com/Algunenano)). -* Add random sleep before large merges/mutations execution to split load more evenly between replicas in case of zero-copy replication. [#51282](https://github.com/ClickHouse/ClickHouse/pull/51282) ([alesapin](https://github.com/alesapin)). -* Do not replicate `ALTER PARTITION` queries and mutations through `Replicated` database if it has only one shard and the underlying table is `ReplicatedMergeTree`. [#51049](https://github.com/ClickHouse/ClickHouse/pull/51049) ([Alexander Tokmakov](https://github.com/tavplubix)). - -#### Improvement -* Relax the thresholds for "too many parts" to be more modern. Return the backpressure during long-running insert queries. [#50856](https://github.com/ClickHouse/ClickHouse/pull/50856) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Allow to cast IPv6 to IPv4 address for CIDR ::ffff:0:0/96 (IPv4-mapped addresses). [#49759](https://github.com/ClickHouse/ClickHouse/pull/49759) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Update MongoDB protocol to support MongoDB 5.1 version and newer. Support for the versions with the old protocol (<3.6) is preserved. Closes [#45621](https://github.com/ClickHouse/ClickHouse/issues/45621), [#49879](https://github.com/ClickHouse/ClickHouse/issues/49879). [#50061](https://github.com/ClickHouse/ClickHouse/pull/50061) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add setting `input_format_max_bytes_to_read_for_schema_inference` to limit the number of bytes to read in schema inference. Closes [#50577](https://github.com/ClickHouse/ClickHouse/issues/50577). [#50592](https://github.com/ClickHouse/ClickHouse/pull/50592) ([Kruglov Pavel](https://github.com/Avogar)). -* Respect setting `input_format_null_as_default` in schema inference. [#50602](https://github.com/ClickHouse/ClickHouse/pull/50602) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow to skip trailing empty lines in CSV/TSV/CustomSeparated formats via settings `input_format_csv_skip_trailing_empty_lines`, `input_format_tsv_skip_trailing_empty_lines` and `input_format_custom_skip_trailing_empty_lines` (disabled by default). Closes [#49315](https://github.com/ClickHouse/ClickHouse/issues/49315). [#50635](https://github.com/ClickHouse/ClickHouse/pull/50635) ([Kruglov Pavel](https://github.com/Avogar)). -* Functions "toDateOrDefault|OrNull" and "accuateCast[OrDefault|OrNull]" now correctly parse numeric arguments. [#50709](https://github.com/ClickHouse/ClickHouse/pull/50709) ([Dmitry Kardymon](https://github.com/kardymonds)). -* Support CSV with whitespace or `\t` field delimiters, and these delimiters are supported in Spark. [#50712](https://github.com/ClickHouse/ClickHouse/pull/50712) ([KevinyhZou](https://github.com/KevinyhZou)). -* Settings `number_of_mutations_to_delay` and `number_of_mutations_to_throw` are enabled by default now with values 500 and 1000 respectively. [#50726](https://github.com/ClickHouse/ClickHouse/pull/50726) ([Anton Popov](https://github.com/CurtizJ)). -* The dashboard correctly shows missing values. This closes [#50831](https://github.com/ClickHouse/ClickHouse/issues/50831). [#50832](https://github.com/ClickHouse/ClickHouse/pull/50832) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Added the possibility to use date and time arguments in the syslog timestamp format in functions `parseDateTimeBestEffort*` and `parseDateTime64BestEffort*`. [#50925](https://github.com/ClickHouse/ClickHouse/pull/50925) ([Victor Krasnov](https://github.com/sirvickr)). -* Command line parameter "--password" in clickhouse-client can now be specified only once. [#50966](https://github.com/ClickHouse/ClickHouse/pull/50966) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Use `hash_of_all_files` from `system.parts` to check identity of parts during on-cluster backups. [#50997](https://github.com/ClickHouse/ClickHouse/pull/50997) ([Vitaly Baranov](https://github.com/vitlibar)). -* The system table zookeeper_connection connected_time identifies the time when the connection is established (standard format), and session_uptime_elapsed_seconds is added, which labels the duration of the established connection session (in seconds). [#51026](https://github.com/ClickHouse/ClickHouse/pull/51026) ([郭小龙](https://github.com/guoxiaolongzte)). -* Improve the progress bar for file/s3/hdfs/url table functions by using chunk size from source data and using incremental total size counting in each thread. Fix the progress bar for *Cluster functions. This closes [#47250](https://github.com/ClickHouse/ClickHouse/issues/47250). [#51088](https://github.com/ClickHouse/ClickHouse/pull/51088) ([Kruglov Pavel](https://github.com/Avogar)). -* Add total_bytes_to_read to the Progress packet in TCP protocol for better Progress bar. [#51158](https://github.com/ClickHouse/ClickHouse/pull/51158) ([Kruglov Pavel](https://github.com/Avogar)). -* Better checking of data parts on disks with filesystem cache. [#51164](https://github.com/ClickHouse/ClickHouse/pull/51164) ([Anton Popov](https://github.com/CurtizJ)). -* Fix sometimes not correct current_elements_num in fs cache. [#51242](https://github.com/ClickHouse/ClickHouse/pull/51242) ([Kseniia Sumarokova](https://github.com/kssenii)). - -#### Build/Testing/Packaging Improvement -* Add embedded keeper-client to standalone keeper binary. [#50964](https://github.com/ClickHouse/ClickHouse/pull/50964) ([pufit](https://github.com/pufit)). -* Actual LZ4 version is used now. [#50621](https://github.com/ClickHouse/ClickHouse/pull/50621) ([Nikita Taranov](https://github.com/nickitat)). -* ClickHouse server will print the list of changed settings on fatal errors. This closes [#51137](https://github.com/ClickHouse/ClickHouse/issues/51137). [#51138](https://github.com/ClickHouse/ClickHouse/pull/51138) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Allow building ClickHouse with clang-17. [#51300](https://github.com/ClickHouse/ClickHouse/pull/51300) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* [SQLancer](https://github.com/sqlancer/sqlancer) check is considered stable as bugs that were triggered by it are fixed. Now failures of SQLancer check will be reported as failed check status. [#51340](https://github.com/ClickHouse/ClickHouse/pull/51340) ([Ilya Yatsishin](https://github.com/qoega)). -* Split huge `RUN` in Dockerfile into smaller conditional. Install the necessary tools on demand in the same `RUN` layer, and remove them after that. Upgrade the OS only once at the beginning. Use a modern way to check the signed repository. Downgrade the base repo to ubuntu:20.04 to address the issues on older docker versions. Upgrade golang version to address golang vulnerabilities. [#51504](https://github.com/ClickHouse/ClickHouse/pull/51504) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Improve aliases for clickhouse binary (now `ch`/`clickhouse` is `clickhouse-local` or `clickhouse` depends on the arguments) and add bash completion for new aliases. [#58344](https://github.com/ClickHouse/ClickHouse/pull/58344) ([Azat Khuzhin](https://github.com/azat)). +* Add settings changes check to CI to check that all settings changes are reflected in settings changes history. [#58555](https://github.com/ClickHouse/ClickHouse/pull/58555) ([Kruglov Pavel](https://github.com/Avogar)). +* Use tables directly attached from S3 in stateful tests. [#58791](https://github.com/ClickHouse/ClickHouse/pull/58791) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Save the whole `fuzzer.log` as an archive instead of the last 100k lines. `tail -n 100000` often removes lines with table definitions. Example:. [#58821](https://github.com/ClickHouse/ClickHouse/pull/58821) ([Dmitry Novik](https://github.com/novikd)). +* Enable Rust on macOS with Aarch64 (this will add fuzzy search in client with skim and the PRQL language, though I don't think that are people who host ClickHouse on darwin, so it is mostly for fuzzy search in client I would say). [#59272](https://github.com/ClickHouse/ClickHouse/pull/59272) ([Azat Khuzhin](https://github.com/azat)). +* Fix aggregation issue in mixed x86_64 and ARM clusters [#59132](https://github.com/ClickHouse/ClickHouse/pull/59132) ([Harry Lee](https://github.com/HarryLeeIBM)). #### Bug Fix (user-visible misbehavior in an official stable release) -* Report loading status for executable dictionaries correctly [#48775](https://github.com/ClickHouse/ClickHouse/pull/48775) ([Anton Kozlov](https://github.com/tonickkozlov)). -* Proper mutation of skip indices and projections [#50104](https://github.com/ClickHouse/ClickHouse/pull/50104) ([Amos Bird](https://github.com/amosbird)). -* Cleanup moving parts [#50489](https://github.com/ClickHouse/ClickHouse/pull/50489) ([vdimir](https://github.com/vdimir)). -* Fix backward compatibility for IP types hashing in aggregate functions [#50551](https://github.com/ClickHouse/ClickHouse/pull/50551) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix Log family table return wrong rows count after truncate [#50585](https://github.com/ClickHouse/ClickHouse/pull/50585) ([flynn](https://github.com/ucasfl)). -* Fix bug in `uniqExact` parallel merging [#50590](https://github.com/ClickHouse/ClickHouse/pull/50590) ([Nikita Taranov](https://github.com/nickitat)). -* Revert recent grace hash join changes [#50699](https://github.com/ClickHouse/ClickHouse/pull/50699) ([vdimir](https://github.com/vdimir)). -* Query Cache: Try to fix bad cast from `ColumnConst` to `ColumnVector` [#50704](https://github.com/ClickHouse/ClickHouse/pull/50704) ([Robert Schulze](https://github.com/rschu1ze)). -* Avoid storing logs in Keeper containing unknown operation [#50751](https://github.com/ClickHouse/ClickHouse/pull/50751) ([Antonio Andelic](https://github.com/antonio2368)). -* SummingMergeTree support for DateTime64 [#50797](https://github.com/ClickHouse/ClickHouse/pull/50797) ([Jordi Villar](https://github.com/jrdi)). -* Add compatibility setting for non-const timezones [#50834](https://github.com/ClickHouse/ClickHouse/pull/50834) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix hashing of LDAP params in the cache entries [#50865](https://github.com/ClickHouse/ClickHouse/pull/50865) ([Julian Maicher](https://github.com/jmaicher)). -* Fallback to parsing big integer from String instead of exception in Parquet format [#50873](https://github.com/ClickHouse/ClickHouse/pull/50873) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix checking the lock file too often while writing a backup [#50889](https://github.com/ClickHouse/ClickHouse/pull/50889) ([Vitaly Baranov](https://github.com/vitlibar)). -* Do not apply projection if read-in-order was enabled. [#50923](https://github.com/ClickHouse/ClickHouse/pull/50923) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix race in the Azure blob storage iterator [#50936](https://github.com/ClickHouse/ClickHouse/pull/50936) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix erroneous `sort_description` propagation in `CreatingSets` [#50955](https://github.com/ClickHouse/ClickHouse/pull/50955) ([Nikita Taranov](https://github.com/nickitat)). -* Fix Iceberg v2 optional metadata parsing [#50974](https://github.com/ClickHouse/ClickHouse/pull/50974) ([Kseniia Sumarokova](https://github.com/kssenii)). -* MaterializedMySQL: Keep parentheses for empty table overrides [#50977](https://github.com/ClickHouse/ClickHouse/pull/50977) ([Val Doroshchuk](https://github.com/valbok)). -* Fix crash in BackupCoordinationStageSync::setError() [#51012](https://github.com/ClickHouse/ClickHouse/pull/51012) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)). -* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Fix ineffective query cache for SELECTs with subqueries [#51132](https://github.com/ClickHouse/ClickHouse/pull/51132) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix Set index with constant nullable comparison. [#51205](https://github.com/ClickHouse/ClickHouse/pull/51205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix a crash in s3 and s3Cluster functions [#51209](https://github.com/ClickHouse/ClickHouse/pull/51209) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix a crash with compiled expressions [#51231](https://github.com/ClickHouse/ClickHouse/pull/51231) ([LiuNeng](https://github.com/liuneng1994)). -* Fix use-after-free in StorageURL when switching URLs [#51260](https://github.com/ClickHouse/ClickHouse/pull/51260) ([Michael Kolupaev](https://github.com/al13n321)). -* Updated check for parameterized view [#51272](https://github.com/ClickHouse/ClickHouse/pull/51272) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix multiple writing of same file to backup [#51299](https://github.com/ClickHouse/ClickHouse/pull/51299) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix fuzzer failure in ActionsDAG [#51301](https://github.com/ClickHouse/ClickHouse/pull/51301) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Remove garbage from function `transform` [#51350](https://github.com/ClickHouse/ClickHouse/pull/51350) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add join keys conversion for nested LowCardinality [#51550](https://github.com/ClickHouse/ClickHouse/pull/51550) ([vdimir](https://github.com/vdimir)). +* Flatten only true Nested type if flatten_nested=1, not all Array(Tuple) [#56132](https://github.com/ClickHouse/ClickHouse/pull/56132) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix a bug with projections and the `aggregate_functions_null_for_empty` setting during insertion. [#56944](https://github.com/ClickHouse/ClickHouse/pull/56944) ([Amos Bird](https://github.com/amosbird)). +* Fixed potential exception due to stale profile UUID [#57263](https://github.com/ClickHouse/ClickHouse/pull/57263) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix working with read buffers in StreamingFormatExecutor [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)). +* Ignore MVs with dropped target table during pushing to views [#57520](https://github.com/ClickHouse/ClickHouse/pull/57520) ([Kruglov Pavel](https://github.com/Avogar)). +* Eliminate possible race between ALTER_METADATA and MERGE_PARTS [#57755](https://github.com/ClickHouse/ClickHouse/pull/57755) ([Azat Khuzhin](https://github.com/azat)). +* Fix the expressions order bug in group by with rollup [#57786](https://github.com/ClickHouse/ClickHouse/pull/57786) ([Chen768959](https://github.com/Chen768959)). +* A fix for the obsolete "zero-copy" replication feature: Fix lost blobs after dropping a replica with broken detached parts [#58333](https://github.com/ClickHouse/ClickHouse/pull/58333) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Allow users to work with symlinks in user_files_path [#58447](https://github.com/ClickHouse/ClickHouse/pull/58447) ([Duc Canh Le](https://github.com/canhld94)). +* Fix a crash when graphite table does not have an agg function [#58453](https://github.com/ClickHouse/ClickHouse/pull/58453) ([Duc Canh Le](https://github.com/canhld94)). +* Delay reading from StorageKafka to allow multiple reads in materialized views [#58477](https://github.com/ClickHouse/ClickHouse/pull/58477) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix a stupid case of intersecting parts [#58482](https://github.com/ClickHouse/ClickHouse/pull/58482) ([Alexander Tokmakov](https://github.com/tavplubix)). +* MergeTreePrefetchedReadPool disable for LIMIT only queries [#58505](https://github.com/ClickHouse/ClickHouse/pull/58505) ([Maksim Kita](https://github.com/kitaisreal)). +* Enable ordinary databases while restoration [#58520](https://github.com/ClickHouse/ClickHouse/pull/58520) ([Jihyuk Bok](https://github.com/tomahawk28)). +* Fix Apache Hive threadpool reading for ORC/Parquet/... [#58537](https://github.com/ClickHouse/ClickHouse/pull/58537) ([sunny](https://github.com/sunny19930321)). +* Hide credentials in `system.backup_log`'s `base_backup_name` column [#58550](https://github.com/ClickHouse/ClickHouse/pull/58550) ([Daniel Pozo Escalona](https://github.com/danipozo)). +* `toStartOfInterval` for milli- microsencods values rounding [#58557](https://github.com/ClickHouse/ClickHouse/pull/58557) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Disable `max_joined_block_rows` in ConcurrentHashJoin [#58595](https://github.com/ClickHouse/ClickHouse/pull/58595) ([vdimir](https://github.com/vdimir)). +* Fix join using nullable in the old analyzer [#58596](https://github.com/ClickHouse/ClickHouse/pull/58596) ([vdimir](https://github.com/vdimir)). +* `makeDateTime64`: Allow non-const fraction argument [#58597](https://github.com/ClickHouse/ClickHouse/pull/58597) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix possible NULL dereference during symbolizing inline frames [#58607](https://github.com/ClickHouse/ClickHouse/pull/58607) ([Azat Khuzhin](https://github.com/azat)). +* Improve isolation of query cache entries under re-created users or role switches [#58611](https://github.com/ClickHouse/ClickHouse/pull/58611) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix broken partition key analysis when doing projection optimization [#58638](https://github.com/ClickHouse/ClickHouse/pull/58638) ([Amos Bird](https://github.com/amosbird)). +* Query cache: Fix per-user quota [#58731](https://github.com/ClickHouse/ClickHouse/pull/58731) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix stream partitioning in parallel window functions [#58739](https://github.com/ClickHouse/ClickHouse/pull/58739) ([Dmitry Novik](https://github.com/novikd)). +* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)). +* Don't process requests in Keeper during shutdown [#58765](https://github.com/ClickHouse/ClickHouse/pull/58765) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix a null pointer dereference in `SlabsPolygonIndex::find` [#58771](https://github.com/ClickHouse/ClickHouse/pull/58771) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix JSONExtract function for LowCardinality(Nullable) columns [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)). +* A fix for unexpected accumulation of memory usage while creating a huge number of tables by CREATE and DROP. [#58831](https://github.com/ClickHouse/ClickHouse/pull/58831) ([Maksim Kita](https://github.com/kitaisreal)). +* Multiple read file log storage in mv [#58877](https://github.com/ClickHouse/ClickHouse/pull/58877) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Restriction for the access key id for s3. [#58900](https://github.com/ClickHouse/ClickHouse/pull/58900) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix possible crash in clickhouse-local during loading suggestions [#58907](https://github.com/ClickHouse/ClickHouse/pull/58907) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash when `indexHint` is used [#58911](https://github.com/ClickHouse/ClickHouse/pull/58911) ([Dmitry Novik](https://github.com/novikd)). +* Fix StorageURL forgetting headers on server restart [#58933](https://github.com/ClickHouse/ClickHouse/pull/58933) ([Michael Kolupaev](https://github.com/al13n321)). +* Analyzer: fix storage replacement with insertion block [#58958](https://github.com/ClickHouse/ClickHouse/pull/58958) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix seek in ReadBufferFromZipArchive [#58966](https://github.com/ClickHouse/ClickHouse/pull/58966) ([Michael Kolupaev](https://github.com/al13n321)). +* A fix for experimental inverted indices (don't use in production): `DROP INDEX` of inverted index now removes all relevant files from persistence [#59040](https://github.com/ClickHouse/ClickHouse/pull/59040) ([mochi](https://github.com/MochiXu)). +* Fix data race on query_factories_info [#59049](https://github.com/ClickHouse/ClickHouse/pull/59049) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable "Too many redirects" error retry [#59099](https://github.com/ClickHouse/ClickHouse/pull/59099) ([skyoct](https://github.com/skyoct)). +* Fix not started database shutdown deadlock [#59137](https://github.com/ClickHouse/ClickHouse/pull/59137) ([Sergei Trifonov](https://github.com/serxa)). +* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix crash with nullable timezone for `toString` [#59190](https://github.com/ClickHouse/ClickHouse/pull/59190) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix abort in iceberg metadata on bad file paths [#59275](https://github.com/ClickHouse/ClickHouse/pull/59275) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix architecture name in select of Rust target [#59307](https://github.com/ClickHouse/ClickHouse/pull/59307) ([p1rattttt](https://github.com/p1rattttt)). +* Fix a logical error about "not-ready set" for querying from `system.tables` with a subquery in the IN clause. [#59351](https://github.com/ClickHouse/ClickHouse/pull/59351) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). - -### ClickHouse release 23.5, 2023-06-08 - -#### Upgrade Notes -* Compress marks and primary key by default. It significantly reduces the cold query time. Upgrade notes: the support for compressed marks and primary key has been added in version 22.9. If you turned on compressed marks or primary key or installed version 23.5 or newer, which has compressed marks or primary key on by default, you will not be able to downgrade to version 22.8 or earlier. You can also explicitly disable compressed marks or primary keys by specifying the `compress_marks` and `compress_primary_key` settings in the `` section of the server configuration file. **Upgrade notes:** If you upgrade from versions prior to 22.9, you should either upgrade all replicas at once or disable the compression before upgrade, or upgrade through an intermediate version, where the compressed marks are supported but not enabled by default, such as 23.3. [#42587](https://github.com/ClickHouse/ClickHouse/pull/42587) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Make local object storage work consistently with s3 object storage, fix problem with append (closes [#48465](https://github.com/ClickHouse/ClickHouse/issues/48465)), make it configurable as independent storage. The change is backward incompatible because the cache on top of local object storage is not compatible to previous versions. [#48791](https://github.com/ClickHouse/ClickHouse/pull/48791) ([Kseniia Sumarokova](https://github.com/kssenii)). -* The experimental feature "in-memory data parts" is removed. The data format is still supported, but the settings are no-op, and compact or wide parts will be used instead. This closes [#45409](https://github.com/ClickHouse/ClickHouse/issues/45409). [#49429](https://github.com/ClickHouse/ClickHouse/pull/49429) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Changed default values of settings `parallelize_output_from_storages` and `input_format_parquet_preserve_order`. This allows ClickHouse to reorder rows when reading from files (e.g. CSV or Parquet), greatly improving performance in many cases. To restore the old behavior of preserving order, use `parallelize_output_from_storages = 0`, `input_format_parquet_preserve_order = 1`. [#49479](https://github.com/ClickHouse/ClickHouse/pull/49479) ([Michael Kolupaev](https://github.com/al13n321)). -* Make projections production-ready. Add the `optimize_use_projections` setting to control whether the projections will be selected for SELECT queries. The setting `allow_experimental_projection_optimization` is obsolete and does nothing. [#49719](https://github.com/ClickHouse/ClickHouse/pull/49719) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Mark `joinGet` as non-deterministic (so as `dictGet`). It allows using them in mutations without an extra setting. [#49843](https://github.com/ClickHouse/ClickHouse/pull/49843) ([Azat Khuzhin](https://github.com/azat)). -* Revert the "`groupArray` returns cannot be nullable" change (due to binary compatibility breakage for `groupArray`/`groupArrayLast`/`groupArraySample` over `Nullable` types, which likely will lead to `TOO_LARGE_ARRAY_SIZE` or `CANNOT_READ_ALL_DATA`). [#49971](https://github.com/ClickHouse/ClickHouse/pull/49971) ([Azat Khuzhin](https://github.com/azat)). -* Setting `enable_memory_bound_merging_of_aggregation_results` is enabled by default. If you update from version prior to 22.12, we recommend to set this flag to `false` until update is finished. [#50319](https://github.com/ClickHouse/ClickHouse/pull/50319) ([Nikita Taranov](https://github.com/nickitat)). - -#### New Feature -* Added storage engine AzureBlobStorage and azureBlobStorage table function. The supported set of features is very similar to storage/table function S3 [#50604] (https://github.com/ClickHouse/ClickHouse/pull/50604) ([alesapin](https://github.com/alesapin)) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni). -* Added native ClickHouse Keeper CLI Client, it is available as `clickhouse keeper-client` [#47414](https://github.com/ClickHouse/ClickHouse/pull/47414) ([pufit](https://github.com/pufit)). -* Add `urlCluster` table function. Refactor all *Cluster table functions to reduce code duplication. Make schema inference work for all possible *Cluster function signatures and for named collections. Closes [#38499](https://github.com/ClickHouse/ClickHouse/issues/38499). [#45427](https://github.com/ClickHouse/ClickHouse/pull/45427) ([attack204](https://github.com/attack204)), Pavel Kruglov. -* The query cache can now be used for production workloads. [#47977](https://github.com/ClickHouse/ClickHouse/pull/47977) ([Robert Schulze](https://github.com/rschu1ze)). The query cache can now support queries with totals and extremes modifier. [#48853](https://github.com/ClickHouse/ClickHouse/pull/48853) ([Robert Schulze](https://github.com/rschu1ze)). Make `allow_experimental_query_cache` setting as obsolete for backward-compatibility. It was removed in https://github.com/ClickHouse/ClickHouse/pull/47977. [#49934](https://github.com/ClickHouse/ClickHouse/pull/49934) ([Timur Solodovnikov](https://github.com/tsolodov)). -* Geographical data types (`Point`, `Ring`, `Polygon`, and `MultiPolygon`) are production-ready. [#50022](https://github.com/ClickHouse/ClickHouse/pull/50022) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add schema inference to PostgreSQL, MySQL, MeiliSearch, and SQLite table engines. Closes [#49972](https://github.com/ClickHouse/ClickHouse/issues/49972). [#50000](https://github.com/ClickHouse/ClickHouse/pull/50000) ([Nikolay Degterinsky](https://github.com/evillique)). -* Password type in queries like `CREATE USER u IDENTIFIED BY 'p'` will be automatically set according to the setting `default_password_type` in the `config.xml` on the server. Closes [#42915](https://github.com/ClickHouse/ClickHouse/issues/42915). [#44674](https://github.com/ClickHouse/ClickHouse/pull/44674) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add bcrypt password authentication type. Closes [#34599](https://github.com/ClickHouse/ClickHouse/issues/34599). [#44905](https://github.com/ClickHouse/ClickHouse/pull/44905) ([Nikolay Degterinsky](https://github.com/evillique)). -* Introduces new keyword `INTO OUTFILE 'file.txt' APPEND`. [#48880](https://github.com/ClickHouse/ClickHouse/pull/48880) ([alekar](https://github.com/alekar)). -* Added `system.zookeeper_connection` table that shows information about Keeper connections. [#45245](https://github.com/ClickHouse/ClickHouse/pull/45245) ([mateng915](https://github.com/mateng0915)). -* Add new function `generateRandomStructure` that generates random table structure. It can be used in combination with table function `generateRandom`. [#47409](https://github.com/ClickHouse/ClickHouse/pull/47409) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow the use of `CASE` without an `ELSE` branch and extended `transform` to deal with more types. Also fix some issues that made transform() return incorrect results when decimal types were mixed with other numeric types. [#48300](https://github.com/ClickHouse/ClickHouse/pull/48300) ([Salvatore Mesoraca](https://github.com/aiven-sal)). This closes #2655. This closes #9596. This closes #38666. -* Added [server-side encryption using KMS keys](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) with S3 tables, and the `header` setting with S3 disks. Closes [#48723](https://github.com/ClickHouse/ClickHouse/issues/48723). [#48724](https://github.com/ClickHouse/ClickHouse/pull/48724) ([Johann Gan](https://github.com/johanngan)). -* Add MemoryTracker for the background tasks (merges and mutation). Introduces `merges_mutations_memory_usage_soft_limit` and `merges_mutations_memory_usage_to_ram_ratio` settings that represent the soft memory limit for merges and mutations. If this limit is reached ClickHouse won't schedule new merge or mutation tasks. Also `MergesMutationsMemoryTracking` metric is introduced to allow observing current memory usage of background tasks. Resubmit [#46089](https://github.com/ClickHouse/ClickHouse/issues/46089). Closes [#48774](https://github.com/ClickHouse/ClickHouse/issues/48774). [#48787](https://github.com/ClickHouse/ClickHouse/pull/48787) ([Dmitry Novik](https://github.com/novikd)). -* Function `dotProduct` work for array. [#49050](https://github.com/ClickHouse/ClickHouse/pull/49050) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). -* Support statement `SHOW INDEX` to improve compatibility with MySQL. [#49158](https://github.com/ClickHouse/ClickHouse/pull/49158) ([Robert Schulze](https://github.com/rschu1ze)). -* Add virtual column `_file` and `_path` support to table function `url`. - Improve error message for table function `url`. - resolves [#49231](https://github.com/ClickHouse/ClickHouse/issues/49231) - resolves [#49232](https://github.com/ClickHouse/ClickHouse/issues/49232). [#49356](https://github.com/ClickHouse/ClickHouse/pull/49356) ([Ziyi Tan](https://github.com/Ziy1-Tan)). -* Adding the `grants` field in the users.xml file, which allows specifying grants for users. [#49381](https://github.com/ClickHouse/ClickHouse/pull/49381) ([pufit](https://github.com/pufit)). -* Support full/right join by using grace hash join algorithm. [#49483](https://github.com/ClickHouse/ClickHouse/pull/49483) ([lgbo](https://github.com/lgbo-ustc)). -* `WITH FILL` modifier groups filling by sorting prefix. Controlled by `use_with_fill_by_sorting_prefix` setting (enabled by default). Related to [#33203](https://github.com/ClickHouse/ClickHouse/issues/33203)#issuecomment-1418736794. [#49503](https://github.com/ClickHouse/ClickHouse/pull/49503) ([Igor Nikonov](https://github.com/devcrafter)). -* Clickhouse-client now accepts queries after "--multiquery" when "--query" (or "-q") is absent. example: clickhouse-client --multiquery "select 1; select 2;". [#49870](https://github.com/ClickHouse/ClickHouse/pull/49870) ([Alexey Gerasimchuk](https://github.com/Demilivor)). -* Add separate `handshake_timeout` for receiving Hello packet from replica. Closes [#48854](https://github.com/ClickHouse/ClickHouse/issues/48854). [#49948](https://github.com/ClickHouse/ClickHouse/pull/49948) ([Kruglov Pavel](https://github.com/Avogar)). -* Added a function "space" which repeats a space as many times as specified. [#50103](https://github.com/ClickHouse/ClickHouse/pull/50103) ([Robert Schulze](https://github.com/rschu1ze)). -* Added --input_format_csv_trim_whitespaces option. [#50215](https://github.com/ClickHouse/ClickHouse/pull/50215) ([Alexey Gerasimchuk](https://github.com/Demilivor)). -* Allow the `dictGetAll` function for regexp tree dictionaries to return values from multiple matches as arrays. Closes [#50254](https://github.com/ClickHouse/ClickHouse/issues/50254). [#50255](https://github.com/ClickHouse/ClickHouse/pull/50255) ([Johann Gan](https://github.com/johanngan)). -* Added `toLastDayOfWeek` function to round a date or a date with time up to the nearest Saturday or Sunday. [#50315](https://github.com/ClickHouse/ClickHouse/pull/50315) ([Victor Krasnov](https://github.com/sirvickr)). -* Ability to ignore a skip index by specifying `ignore_data_skipping_indices`. [#50329](https://github.com/ClickHouse/ClickHouse/pull/50329) ([Boris Kuschel](https://github.com/bkuschel)). -* Add `system.user_processes` table and `SHOW USER PROCESSES` query to show memory info and ProfileEvents on user level. [#50492](https://github.com/ClickHouse/ClickHouse/pull/50492) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). -* Add server and format settings `display_secrets_in_show_and_select` for displaying secrets of tables, databases, table functions, and dictionaries. Add privilege `displaySecretsInShowAndSelect` controlling which users can view secrets. [#46528](https://github.com/ClickHouse/ClickHouse/pull/46528) ([Mike Kot](https://github.com/myrrc)). -* Allow to set up a ROW POLICY for all tables that belong to a DATABASE. [#47640](https://github.com/ClickHouse/ClickHouse/pull/47640) ([Ilya Golshtein](https://github.com/ilejn)). - -#### Performance Improvement -* Compress marks and primary key by default. It significantly reduces the cold query time. Upgrade notes: the support for compressed marks and primary key has been added in version 22.9. If you turned on compressed marks or primary key or installed version 23.5 or newer, which has compressed marks or primary key on by default, you will not be able to downgrade to version 22.8 or earlier. You can also explicitly disable compressed marks or primary keys by specifying the `compress_marks` and `compress_primary_key` settings in the `` section of the server configuration file. [#42587](https://github.com/ClickHouse/ClickHouse/pull/42587) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* New setting s3_max_inflight_parts_for_one_file sets the limit of concurrently loaded parts with multipart upload request in scope of one file. [#49961](https://github.com/ClickHouse/ClickHouse/pull/49961) ([Sema Checherinda](https://github.com/CheSema)). -* When reading from multiple files reduce parallel parsing threads for each file. Resolves [#42192](https://github.com/ClickHouse/ClickHouse/issues/42192). [#46661](https://github.com/ClickHouse/ClickHouse/pull/46661) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Use aggregate projection only if it reads fewer granules than normal reading. It should help in case if query hits the PK of the table, but not the projection. Fixes [#49150](https://github.com/ClickHouse/ClickHouse/issues/49150). [#49417](https://github.com/ClickHouse/ClickHouse/pull/49417) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Do not store blocks in `ANY` hash join if nothing is inserted. [#48633](https://github.com/ClickHouse/ClickHouse/pull/48633) ([vdimir](https://github.com/vdimir)). -* Fixes aggregate combinator `-If` when JIT compiled, and enable JIT compilation for aggregate functions. Closes [#48120](https://github.com/ClickHouse/ClickHouse/issues/48120). [#49083](https://github.com/ClickHouse/ClickHouse/pull/49083) ([Igor Nikonov](https://github.com/devcrafter)). -* For reading from remote tables we use smaller tasks (instead of reading the whole part) to make tasks stealing work * task size is determined by size of columns to read * always use 1mb buffers for reading from s3 * boundaries of cache segments aligned to 1mb so they have decent size even with small tasks. it also should prevent fragmentation. [#49287](https://github.com/ClickHouse/ClickHouse/pull/49287) ([Nikita Taranov](https://github.com/nickitat)). -* Introduced settings: - `merge_max_block_size_bytes` to limit the amount of memory used for background operations. - `vertical_merge_algorithm_min_bytes_to_activate` to add another condition to activate vertical merges. [#49313](https://github.com/ClickHouse/ClickHouse/pull/49313) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Default size of a read buffer for reading from local filesystem changed to a slightly better value. Also two new settings are introduced: `max_read_buffer_size_local_fs` and `max_read_buffer_size_remote_fs`. [#49321](https://github.com/ClickHouse/ClickHouse/pull/49321) ([Nikita Taranov](https://github.com/nickitat)). -* Improve memory usage and speed of `SPARSE_HASHED`/`HASHED` dictionaries (e.g. `SPARSE_HASHED` now eats 2.6x less memory, and is ~2x faster). [#49380](https://github.com/ClickHouse/ClickHouse/pull/49380) ([Azat Khuzhin](https://github.com/azat)). -* Optimize the `system.query_log` and `system.query_thread_log` tables by applying `LowCardinality` when appropriate. The queries over these tables will be faster. [#49530](https://github.com/ClickHouse/ClickHouse/pull/49530) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Better performance when reading local `Parquet` files (through parallel reading). [#49539](https://github.com/ClickHouse/ClickHouse/pull/49539) ([Michael Kolupaev](https://github.com/al13n321)). -* Improve the performance of `RIGHT/FULL JOIN` by up to 2 times in certain scenarios, especially when joining a small left table with a large right table. [#49585](https://github.com/ClickHouse/ClickHouse/pull/49585) ([lgbo](https://github.com/lgbo-ustc)). -* Improve performance of BLAKE3 by 11% by enabling LTO for Rust. [#49600](https://github.com/ClickHouse/ClickHouse/pull/49600) ([Azat Khuzhin](https://github.com/azat)). Now it is on par with C++. -* Optimize the structure of the `system.opentelemetry_span_log`. Use `LowCardinality` where appropriate. Although this table is generally stupid (it is using the Map data type even for common attributes), it will be slightly better. [#49647](https://github.com/ClickHouse/ClickHouse/pull/49647) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Try to reserve hash table's size in `grace_hash` join. [#49816](https://github.com/ClickHouse/ClickHouse/pull/49816) ([lgbo](https://github.com/lgbo-ustc)). -* Parallel merge of `uniqExactIf` states. Closes [#49885](https://github.com/ClickHouse/ClickHouse/issues/49885). [#50285](https://github.com/ClickHouse/ClickHouse/pull/50285) ([flynn](https://github.com/ucasfl)). -* Keeper improvement: add `CheckNotExists` request to Keeper, which allows to improve the performance of Replicated tables. [#48897](https://github.com/ClickHouse/ClickHouse/pull/48897) ([Antonio Andelic](https://github.com/antonio2368)). -* Keeper performance improvements: avoid serializing same request twice while processing. Cache deserialization results of large requests. Controlled by new coordination setting `min_request_size_for_cache`. [#49004](https://github.com/ClickHouse/ClickHouse/pull/49004) ([Antonio Andelic](https://github.com/antonio2368)). -* Reduced number of `List` ZooKeeper requests when selecting parts to merge and a lot of partitions do not have anything to merge. [#49637](https://github.com/ClickHouse/ClickHouse/pull/49637) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Rework locking in the FS cache [#44985](https://github.com/ClickHouse/ClickHouse/pull/44985) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Disable pure parallel replicas if trivial count optimization is possible. [#50594](https://github.com/ClickHouse/ClickHouse/pull/50594) ([Raúl Marín](https://github.com/Algunenano)). -* Don't send head request for all keys in Iceberg schema inference, only for keys that are used for reaing data. [#50203](https://github.com/ClickHouse/ClickHouse/pull/50203) ([Kruglov Pavel](https://github.com/Avogar)). -* Setting `enable_memory_bound_merging_of_aggregation_results` is enabled by default. [#50319](https://github.com/ClickHouse/ClickHouse/pull/50319) ([Nikita Taranov](https://github.com/nickitat)). - -#### Experimental Feature -* `DEFLATE_QPL` codec lower the minimum simd version to SSE 4.2. [doc change in qpl](https://github.com/intel/qpl/commit/3f8f5cea27739f5261e8fd577dc233ffe88bf679) - Intel® QPL relies on a run-time kernels dispatcher and cpuid check to choose the best available implementation(sse/avx2/avx512) - restructured cmakefile for qpl build in clickhouse to align with latest upstream qpl. [#49811](https://github.com/ClickHouse/ClickHouse/pull/49811) ([jasperzhu](https://github.com/jinjunzh)). -* Add initial support to do JOINs with pure parallel replicas. [#49544](https://github.com/ClickHouse/ClickHouse/pull/49544) ([Raúl Marín](https://github.com/Algunenano)). -* More parallelism on `Outdated` parts removal with "zero-copy replication". [#49630](https://github.com/ClickHouse/ClickHouse/pull/49630) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Parallel Replicas: 1) Fixed an error `NOT_FOUND_COLUMN_IN_BLOCK` in case of using parallel replicas with non-replicated storage with disabled setting `parallel_replicas_for_non_replicated_merge_tree` 2) Now `allow_experimental_parallel_reading_from_replicas` have 3 possible values - 0, 1 and 2. 0 - disabled, 1 - enabled, silently disable them in case of failure (in case of FINAL or JOIN), 2 - enabled, throw an exception in case of failure. 3) If FINAL modifier is used in SELECT query and parallel replicas are enabled, ClickHouse will try to disable them if `allow_experimental_parallel_reading_from_replicas` is set to 1 and throw an exception otherwise. [#50195](https://github.com/ClickHouse/ClickHouse/pull/50195) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* When parallel replicas are enabled they will always skip unavailable servers (the behavior is controlled by the setting `skip_unavailable_shards`, enabled by default and can be only disabled). This closes: [#48565](https://github.com/ClickHouse/ClickHouse/issues/48565). [#50293](https://github.com/ClickHouse/ClickHouse/pull/50293) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). - -#### Improvement -* The `BACKUP` command will not decrypt data from encrypted disks while making a backup. Instead the data will be stored in a backup in encrypted form. Such backups can be restored only to an encrypted disk with the same (or extended) list of encryption keys. [#48896](https://github.com/ClickHouse/ClickHouse/pull/48896) ([Vitaly Baranov](https://github.com/vitlibar)). -* Added possibility to use temporary tables in FROM part of ATTACH PARTITION FROM and REPLACE PARTITION FROM. [#49436](https://github.com/ClickHouse/ClickHouse/pull/49436) ([Roman Vasin](https://github.com/rvasin)). -* Added setting `async_insert` for `MergeTree` tables. It has the same meaning as query-level setting `async_insert` and enables asynchronous inserts for specific table. Note: it doesn't take effect for insert queries from `clickhouse-client`, use query-level setting in that case. [#49122](https://github.com/ClickHouse/ClickHouse/pull/49122) ([Anton Popov](https://github.com/CurtizJ)). -* Add support for size suffixes in quota creation statement parameters. [#49087](https://github.com/ClickHouse/ClickHouse/pull/49087) ([Eridanus](https://github.com/Eridanus117)). -* Extend `first_value` and `last_value` to accept NULL. [#46467](https://github.com/ClickHouse/ClickHouse/pull/46467) ([lgbo](https://github.com/lgbo-ustc)). -* Add alias `str_to_map` and `mapFromString` for `extractKeyValuePairs`. closes https://github.com/clickhouse/clickhouse/issues/47185. [#49466](https://github.com/ClickHouse/ClickHouse/pull/49466) ([flynn](https://github.com/ucasfl)). -* Add support for CGroup version 2 for asynchronous metrics about the memory usage and availability. This closes [#37983](https://github.com/ClickHouse/ClickHouse/issues/37983). [#45999](https://github.com/ClickHouse/ClickHouse/pull/45999) ([sichenzhao](https://github.com/sichenzhao)). -* Cluster table functions should always skip unavailable shards. close [#46314](https://github.com/ClickHouse/ClickHouse/issues/46314). [#46765](https://github.com/ClickHouse/ClickHouse/pull/46765) ([zk_kiger](https://github.com/zk-kiger)). -* Allow CSV file to contain empty columns in its header. [#47496](https://github.com/ClickHouse/ClickHouse/pull/47496) ([你不要过来啊](https://github.com/iiiuwioajdks)). -* Add Google Cloud Storage S3 compatible table function `gcs`. Like the `oss` and `cosn` functions, it is just an alias over the `s3` table function, and it does not bring any new features. [#47815](https://github.com/ClickHouse/ClickHouse/pull/47815) ([Kuba Kaflik](https://github.com/jkaflik)). -* Add ability to use strict parts size for S3 (compatibility with CloudFlare R2 S3 Storage). [#48492](https://github.com/ClickHouse/ClickHouse/pull/48492) ([Azat Khuzhin](https://github.com/azat)). -* Added new columns with info about `Replicated` database replicas to `system.clusters`: `database_shard_name`, `database_replica_name`, `is_active`. Added an optional `FROM SHARD` clause to `SYSTEM DROP DATABASE REPLICA` query. [#48548](https://github.com/ClickHouse/ClickHouse/pull/48548) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Add a new column `zookeeper_name` in system.replicas, to indicate on which (auxiliary) zookeeper cluster the replicated table's metadata is stored. [#48549](https://github.com/ClickHouse/ClickHouse/pull/48549) ([cangyin](https://github.com/cangyin)). -* `IN` operator support the comparison of `Date` and `Date32`. Closes [#48736](https://github.com/ClickHouse/ClickHouse/issues/48736). [#48806](https://github.com/ClickHouse/ClickHouse/pull/48806) ([flynn](https://github.com/ucasfl)). -* Support for erasure codes in `HDFS`, author: @M1eyu2018, @tomscut. [#48833](https://github.com/ClickHouse/ClickHouse/pull/48833) ([M1eyu](https://github.com/M1eyu2018)). -* Implement SYSTEM DROP REPLICA from auxiliary ZooKeeper clusters, may be close [#48931](https://github.com/ClickHouse/ClickHouse/issues/48931). [#48932](https://github.com/ClickHouse/ClickHouse/pull/48932) ([wangxiaobo](https://github.com/wzb5212)). -* Add Array data type to MongoDB. Closes [#48598](https://github.com/ClickHouse/ClickHouse/issues/48598). [#48983](https://github.com/ClickHouse/ClickHouse/pull/48983) ([Nikolay Degterinsky](https://github.com/evillique)). -* Support storing `Interval` data types in tables. [#49085](https://github.com/ClickHouse/ClickHouse/pull/49085) ([larryluogit](https://github.com/larryluogit)). -* Allow using `ntile` window function without explicit window frame definition: `ntile(3) OVER (ORDER BY a)`, close [#46763](https://github.com/ClickHouse/ClickHouse/issues/46763). [#49093](https://github.com/ClickHouse/ClickHouse/pull/49093) ([vdimir](https://github.com/vdimir)). -* Added settings (`number_of_mutations_to_delay`, `number_of_mutations_to_throw`) to delay or throw `ALTER` queries that create mutations (`ALTER UPDATE`, `ALTER DELETE`, `ALTER MODIFY COLUMN`, ...) in case when table already has a lot of unfinished mutations. [#49117](https://github.com/ClickHouse/ClickHouse/pull/49117) ([Anton Popov](https://github.com/CurtizJ)). -* Catch exception from `create_directories` in filesystem cache. [#49203](https://github.com/ClickHouse/ClickHouse/pull/49203) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Copies embedded examples to a new field `example` in `system.functions` to supplement the field `description`. [#49222](https://github.com/ClickHouse/ClickHouse/pull/49222) ([Dan Roscigno](https://github.com/DanRoscigno)). -* Enable connection options for the MongoDB dictionary. Example: ``` xml localhost 27017 test dictionary_source ssl=true ``` ### Documentation entry for user-facing changes. [#49225](https://github.com/ClickHouse/ClickHouse/pull/49225) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). -* Added an alias `asymptotic` for `asymp` computational method for `kolmogorovSmirnovTest`. Improved documentation. [#49286](https://github.com/ClickHouse/ClickHouse/pull/49286) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Aggregation function groupBitAnd/Or/Xor now work on signed integer data. This makes them consistent with the behavior of scalar functions bitAnd/Or/Xor. [#49292](https://github.com/ClickHouse/ClickHouse/pull/49292) ([exmy](https://github.com/exmy)). -* Split function-documentation into more fine-granular fields. [#49300](https://github.com/ClickHouse/ClickHouse/pull/49300) ([Robert Schulze](https://github.com/rschu1ze)). -* Use multiple threads shared between all tables within a server to load outdated data parts. The the size of the pool and its queue is controlled by `max_outdated_parts_loading_thread_pool_size` and `outdated_part_loading_thread_pool_queue_size` settings. [#49317](https://github.com/ClickHouse/ClickHouse/pull/49317) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Don't overestimate the size of processed data for `LowCardinality` columns when they share dictionaries between blocks. This closes [#49322](https://github.com/ClickHouse/ClickHouse/issues/49322). See also [#48745](https://github.com/ClickHouse/ClickHouse/issues/48745). [#49323](https://github.com/ClickHouse/ClickHouse/pull/49323) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Parquet writer now uses reasonable row group size when invoked through `OUTFILE`. [#49325](https://github.com/ClickHouse/ClickHouse/pull/49325) ([Michael Kolupaev](https://github.com/al13n321)). -* Allow restricted keywords like `ARRAY` as an alias if the alias is quoted. Closes [#49324](https://github.com/ClickHouse/ClickHouse/issues/49324). [#49360](https://github.com/ClickHouse/ClickHouse/pull/49360) ([Nikolay Degterinsky](https://github.com/evillique)). -* Data parts loading and deletion jobs were moved to shared server-wide pools instead of per-table pools. Pools sizes are controlled via settings `max_active_parts_loading_thread_pool_size`, `max_outdated_parts_loading_thread_pool_size` and `max_parts_cleaning_thread_pool_size` in top-level config. Table-level settings `max_part_loading_threads` and `max_part_removal_threads` became obsolete. [#49474](https://github.com/ClickHouse/ClickHouse/pull/49474) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Allow `?password=pass` in URL of the Play UI. Password is replaced in browser history. [#49505](https://github.com/ClickHouse/ClickHouse/pull/49505) ([Mike Kot](https://github.com/myrrc)). -* Allow reading zero-size objects from remote filesystems. (because empty files are not backup'd, so we might end up with zero blobs in metadata file). Closes [#49480](https://github.com/ClickHouse/ClickHouse/issues/49480). [#49519](https://github.com/ClickHouse/ClickHouse/pull/49519) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Attach thread MemoryTracker to `total_memory_tracker` after `ThreadGroup` detached. [#49527](https://github.com/ClickHouse/ClickHouse/pull/49527) ([Dmitry Novik](https://github.com/novikd)). -* Fix parameterized views when a query parameter is used multiple times in the query. [#49556](https://github.com/ClickHouse/ClickHouse/pull/49556) ([Azat Khuzhin](https://github.com/azat)). -* Release memory allocated for the last sent ProfileEvents snapshot in the context of a query. Followup [#47564](https://github.com/ClickHouse/ClickHouse/issues/47564). [#49561](https://github.com/ClickHouse/ClickHouse/pull/49561) ([Dmitry Novik](https://github.com/novikd)). -* Function "makeDate" now provides a MySQL-compatible overload (year & day of the year argument). [#49603](https://github.com/ClickHouse/ClickHouse/pull/49603) ([Robert Schulze](https://github.com/rschu1ze)). -* Support `dictionary` table function for `RegExpTreeDictionary`. [#49666](https://github.com/ClickHouse/ClickHouse/pull/49666) ([Han Fei](https://github.com/hanfei1991)). -* Added weighted fair IO scheduling policy. Added dynamic resource manager, which allows IO scheduling hierarchy to be updated in runtime w/o server restarts. [#49671](https://github.com/ClickHouse/ClickHouse/pull/49671) ([Sergei Trifonov](https://github.com/serxa)). -* Add compose request after multipart upload to GCS. This enables the usage of copy operation on objects uploaded with the multipart upload. It's recommended to set `s3_strict_upload_part_size` to some value because compose request can fail on objects created with parts of different sizes. [#49693](https://github.com/ClickHouse/ClickHouse/pull/49693) ([Antonio Andelic](https://github.com/antonio2368)). -* For the `extractKeyValuePairs` function: improve the "best-effort" parsing logic to accept `key_value_delimiter` as a valid part of the value. This also simplifies branching and might even speed up things a bit. [#49760](https://github.com/ClickHouse/ClickHouse/pull/49760) ([Arthur Passos](https://github.com/arthurpassos)). -* Add `initial_query_id` field for system.processors_profile_log [#49777](https://github.com/ClickHouse/ClickHouse/pull/49777) ([helifu](https://github.com/helifu)). -* System log tables can now have custom sorting keys. [#49778](https://github.com/ClickHouse/ClickHouse/pull/49778) ([helifu](https://github.com/helifu)). -* A new field `partitions` to `system.query_log` is used to indicate which partitions are participating in the calculation. [#49779](https://github.com/ClickHouse/ClickHouse/pull/49779) ([helifu](https://github.com/helifu)). -* Added `enable_the_endpoint_id_with_zookeeper_name_prefix` setting for `ReplicatedMergeTree` (disabled by default). When enabled, it adds ZooKeeper cluster name to table's interserver communication endpoint. It avoids `Duplicate interserver IO endpoint` errors when having replicated tables with the same path, but different auxiliary ZooKeepers. [#49780](https://github.com/ClickHouse/ClickHouse/pull/49780) ([helifu](https://github.com/helifu)). -* Add query parameters to `clickhouse-local`. Closes [#46561](https://github.com/ClickHouse/ClickHouse/issues/46561). [#49785](https://github.com/ClickHouse/ClickHouse/pull/49785) ([Nikolay Degterinsky](https://github.com/evillique)). -* Allow loading dictionaries and functions from YAML by default. In previous versions, it required editing the `dictionaries_config` or `user_defined_executable_functions_config` in the configuration file, as they expected `*.xml` files. [#49812](https://github.com/ClickHouse/ClickHouse/pull/49812) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The Kafka table engine now allows to use alias columns. [#49824](https://github.com/ClickHouse/ClickHouse/pull/49824) ([Aleksandr Musorin](https://github.com/AVMusorin)). -* Add setting to limit the max number of pairs produced by `extractKeyValuePairs`, a safeguard to avoid using way too much memory. [#49836](https://github.com/ClickHouse/ClickHouse/pull/49836) ([Arthur Passos](https://github.com/arthurpassos)). -* Add support for (an unusual) case where the arguments in the `IN` operator are single-element tuples. [#49844](https://github.com/ClickHouse/ClickHouse/pull/49844) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). -* `bitHammingDistance` function support `String` and `FixedString` data type. Closes [#48827](https://github.com/ClickHouse/ClickHouse/issues/48827). [#49858](https://github.com/ClickHouse/ClickHouse/pull/49858) ([flynn](https://github.com/ucasfl)). -* Fix timeout resetting errors in the client on OS X. [#49863](https://github.com/ClickHouse/ClickHouse/pull/49863) ([alekar](https://github.com/alekar)). -* Add support for big integers, such as UInt128, Int128, UInt256, and Int256 in the function `bitCount`. This enables Hamming distance over large bit masks for AI applications. [#49867](https://github.com/ClickHouse/ClickHouse/pull/49867) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fingerprints to be used instead of key IDs in encrypted disks. This simplifies the configuration of encrypted disks. [#49882](https://github.com/ClickHouse/ClickHouse/pull/49882) ([Vitaly Baranov](https://github.com/vitlibar)). -* Add UUID data type to PostgreSQL. Closes [#49739](https://github.com/ClickHouse/ClickHouse/issues/49739). [#49894](https://github.com/ClickHouse/ClickHouse/pull/49894) ([Nikolay Degterinsky](https://github.com/evillique)). -* Function `toUnixTimestamp` now accepts `Date` and `Date32` arguments. [#49989](https://github.com/ClickHouse/ClickHouse/pull/49989) ([Victor Krasnov](https://github.com/sirvickr)). -* Charge only server memory for dictionaries. [#49995](https://github.com/ClickHouse/ClickHouse/pull/49995) ([Azat Khuzhin](https://github.com/azat)). -* The server will allow using the `SQL_*` settings such as `SQL_AUTO_IS_NULL` as no-ops for MySQL compatibility. This closes [#49927](https://github.com/ClickHouse/ClickHouse/issues/49927). [#50013](https://github.com/ClickHouse/ClickHouse/pull/50013) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Preserve initial_query_id for ON CLUSTER queries, which is useful for introspection (under `distributed_ddl_entry_format_version=5`). [#50015](https://github.com/ClickHouse/ClickHouse/pull/50015) ([Azat Khuzhin](https://github.com/azat)). -* Preserve backward incompatibility for renamed settings by using aliases (`allow_experimental_projection_optimization` for `optimize_use_projections`, `allow_experimental_lightweight_delete` for `enable_lightweight_delete`). [#50044](https://github.com/ClickHouse/ClickHouse/pull/50044) ([Azat Khuzhin](https://github.com/azat)). -* Support passing FQDN through setting my_hostname to register cluster node in keeper. Add setting of invisible to support multi compute groups. A compute group as a cluster, is invisible to other compute groups. [#50186](https://github.com/ClickHouse/ClickHouse/pull/50186) ([Yangkuan Liu](https://github.com/LiuYangkuan)). -* Fix PostgreSQL reading all the data even though `LIMIT n` could be specified. [#50187](https://github.com/ClickHouse/ClickHouse/pull/50187) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add new profile events for queries with subqueries (`QueriesWithSubqueries`/`SelectQueriesWithSubqueries`/`InsertQueriesWithSubqueries`). [#50204](https://github.com/ClickHouse/ClickHouse/pull/50204) ([Azat Khuzhin](https://github.com/azat)). -* Adding the roles field in the users.xml file, which allows specifying roles with grants via a config file. [#50278](https://github.com/ClickHouse/ClickHouse/pull/50278) ([pufit](https://github.com/pufit)). -* Report `CGroupCpuCfsPeriod` and `CGroupCpuCfsQuota` in AsynchronousMetrics. - Respect cgroup v2 memory limits during server startup. [#50379](https://github.com/ClickHouse/ClickHouse/pull/50379) ([alekar](https://github.com/alekar)). -* Add a signal handler for SIGQUIT to work the same way as SIGINT. Closes [#50298](https://github.com/ClickHouse/ClickHouse/issues/50298). [#50435](https://github.com/ClickHouse/ClickHouse/pull/50435) ([Nikolay Degterinsky](https://github.com/evillique)). -* In case JSON parse fails due to the large size of the object output the last position to allow debugging. [#50474](https://github.com/ClickHouse/ClickHouse/pull/50474) ([Valentin Alexeev](https://github.com/valentinalexeev)). -* Support decimals with not fixed size. Closes [#49130](https://github.com/ClickHouse/ClickHouse/issues/49130). [#50586](https://github.com/ClickHouse/ClickHouse/pull/50586) ([Kruglov Pavel](https://github.com/Avogar)). - -#### Build/Testing/Packaging Improvement -* New and improved `keeper-bench`. Everything can be customized from YAML/XML file: - request generator - each type of request generator can have a specific set of fields - multi requests can be generated just by doing the same under `multi` key - for each request or subrequest in multi a `weight` field can be defined to control distribution - define trees that need to be setup for a test run - hosts can be defined with all timeouts customizable and it's possible to control how many sessions to generate for each host - integers defined with `min_value` and `max_value` fields are random number generators. [#48547](https://github.com/ClickHouse/ClickHouse/pull/48547) ([Antonio Andelic](https://github.com/antonio2368)). -* Io_uring is not supported on macos, don't choose it when running tests on local to avoid occasional failures. [#49250](https://github.com/ClickHouse/ClickHouse/pull/49250) ([Frank Chen](https://github.com/FrankChen021)). -* Support named fault injection for testing. [#49361](https://github.com/ClickHouse/ClickHouse/pull/49361) ([Han Fei](https://github.com/hanfei1991)). -* Allow running ClickHouse in the OS where the `prctl` (process control) syscall is not available, such as AWS Lambda. [#49538](https://github.com/ClickHouse/ClickHouse/pull/49538) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fixed the issue of build conflict between contrib/isa-l and isa-l in qpl [49296](https://github.com/ClickHouse/ClickHouse/issues/49296). [#49584](https://github.com/ClickHouse/ClickHouse/pull/49584) ([jasperzhu](https://github.com/jinjunzh)). -* Utilities are now only build if explicitly requested ("-DENABLE_UTILS=1") instead of by default, this reduces link times in typical development builds. [#49620](https://github.com/ClickHouse/ClickHouse/pull/49620) ([Robert Schulze](https://github.com/rschu1ze)). -* Pull build description of idxd-config into a separate CMake file to avoid accidental removal in future. [#49651](https://github.com/ClickHouse/ClickHouse/pull/49651) ([jasperzhu](https://github.com/jinjunzh)). -* Add CI check with an enabled analyzer in the master. Follow-up [#49562](https://github.com/ClickHouse/ClickHouse/issues/49562). [#49668](https://github.com/ClickHouse/ClickHouse/pull/49668) ([Dmitry Novik](https://github.com/novikd)). -* Switch to LLVM/clang 16. [#49678](https://github.com/ClickHouse/ClickHouse/pull/49678) ([Azat Khuzhin](https://github.com/azat)). -* Allow building ClickHouse with clang-17. [#49851](https://github.com/ClickHouse/ClickHouse/pull/49851) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#50410](https://github.com/ClickHouse/ClickHouse/pull/50410) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* ClickHouse is now easier to be integrated into other cmake projects. [#49991](https://github.com/ClickHouse/ClickHouse/pull/49991) ([Amos Bird](https://github.com/amosbird)). (Which is strongly discouraged - Alexey Milovidov). -* Fix strange additional QEMU logging after [#47151](https://github.com/ClickHouse/ClickHouse/issues/47151), see https://s3.amazonaws.com/clickhouse-test-reports/50078/a4743996ee4f3583884d07bcd6501df0cfdaa346/stateless_tests__release__databasereplicated__[3_4].html. [#50442](https://github.com/ClickHouse/ClickHouse/pull/50442) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* ClickHouse can work on Linux RISC-V 6.1.22. This closes [#50456](https://github.com/ClickHouse/ClickHouse/issues/50456). [#50457](https://github.com/ClickHouse/ClickHouse/pull/50457) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Bump internal protobuf to v3.18 (fixes bogus CVE-2022-1941). [#50400](https://github.com/ClickHouse/ClickHouse/pull/50400) ([Robert Schulze](https://github.com/rschu1ze)). -* Bump internal libxml2 to v2.10.4 (fixes bogus CVE-2023-28484 and bogus CVE-2023-29469). [#50402](https://github.com/ClickHouse/ClickHouse/pull/50402) ([Robert Schulze](https://github.com/rschu1ze)). -* Bump c-ares to v1.19.1 (bogus CVE-2023-32067, bogus CVE-2023-31130, bogus CVE-2023-31147). [#50403](https://github.com/ClickHouse/ClickHouse/pull/50403) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix bogus CVE-2022-2469 in libgsasl. [#50404](https://github.com/ClickHouse/ClickHouse/pull/50404) ([Robert Schulze](https://github.com/rschu1ze)). - -#### Bug Fix (user-visible misbehavior in an official stable release) - -* ActionsDAG: fix wrong optimization [#47584](https://github.com/ClickHouse/ClickHouse/pull/47584) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Correctly handle concurrent snapshots in Keeper [#48466](https://github.com/ClickHouse/ClickHouse/pull/48466) ([Antonio Andelic](https://github.com/antonio2368)). -* MergeTreeMarksLoader holds DataPart instead of DataPartStorage [#48515](https://github.com/ClickHouse/ClickHouse/pull/48515) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Sequence state fix [#48603](https://github.com/ClickHouse/ClickHouse/pull/48603) ([Ilya Golshtein](https://github.com/ilejn)). -* Back/Restore concurrency check on previous fails [#48726](https://github.com/ClickHouse/ClickHouse/pull/48726) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix Attaching a table with non-existent ZK path does not increase the ReadonlyReplica metric [#48954](https://github.com/ClickHouse/ClickHouse/pull/48954) ([wangxiaobo](https://github.com/wzb5212)). -* Fix possible terminate called for uncaught exception in some places [#49112](https://github.com/ClickHouse/ClickHouse/pull/49112) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix key not found error for queries with multiple StorageJoin [#49137](https://github.com/ClickHouse/ClickHouse/pull/49137) ([vdimir](https://github.com/vdimir)). -* Fix wrong query result when using nullable primary key [#49172](https://github.com/ClickHouse/ClickHouse/pull/49172) ([Duc Canh Le](https://github.com/canhld94)). -* Fix reinterpretAs*() on big endian machines [#49198](https://github.com/ClickHouse/ClickHouse/pull/49198) ([Suzy Wang](https://github.com/SuzyWangIBMer)). -* (Experimental zero-copy replication) Lock zero copy parts more atomically [#49211](https://github.com/ClickHouse/ClickHouse/pull/49211) ([alesapin](https://github.com/alesapin)). -* Fix race on Outdated parts loading [#49223](https://github.com/ClickHouse/ClickHouse/pull/49223) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix all key value is null and group use rollup return wrong answer [#49282](https://github.com/ClickHouse/ClickHouse/pull/49282) ([Shuai li](https://github.com/loneylee)). -* Fix calculating load_factor for HASHED dictionaries with SHARDS [#49319](https://github.com/ClickHouse/ClickHouse/pull/49319) ([Azat Khuzhin](https://github.com/azat)). -* Disallow configuring compression CODECs for alias columns [#49363](https://github.com/ClickHouse/ClickHouse/pull/49363) ([Timur Solodovnikov](https://github.com/tsolodov)). -* Fix bug in removal of existing part directory [#49365](https://github.com/ClickHouse/ClickHouse/pull/49365) ([alesapin](https://github.com/alesapin)). -* Properly fix GCS when HMAC is used [#49390](https://github.com/ClickHouse/ClickHouse/pull/49390) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix fuzz bug when subquery set is not built when reading from remote() [#49425](https://github.com/ClickHouse/ClickHouse/pull/49425) ([Alexander Gololobov](https://github.com/davenger)). -* Invert `shutdown_wait_unfinished_queries` [#49427](https://github.com/ClickHouse/ClickHouse/pull/49427) ([Konstantin Bogdanov](https://github.com/thevar1able)). -* (Experimental zero-copy replication) Fix another zero copy bug [#49473](https://github.com/ClickHouse/ClickHouse/pull/49473) ([alesapin](https://github.com/alesapin)). -* Fix postgres database setting [#49481](https://github.com/ClickHouse/ClickHouse/pull/49481) ([Mal Curtis](https://github.com/snikch)). -* Correctly handle `s3Cluster` arguments [#49490](https://github.com/ClickHouse/ClickHouse/pull/49490) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix bug in TraceCollector destructor. [#49508](https://github.com/ClickHouse/ClickHouse/pull/49508) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix AsynchronousReadIndirectBufferFromRemoteFS breaking on short seeks [#49525](https://github.com/ClickHouse/ClickHouse/pull/49525) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix dictionaries loading order [#49560](https://github.com/ClickHouse/ClickHouse/pull/49560) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Forbid the change of data type of Object('json') column [#49563](https://github.com/ClickHouse/ClickHouse/pull/49563) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix stress test (Logical error: Expected 7134 >= 11030) [#49623](https://github.com/ClickHouse/ClickHouse/pull/49623) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix bug in DISTINCT [#49628](https://github.com/ClickHouse/ClickHouse/pull/49628) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix: DISTINCT in order with zero values in non-sorted columns [#49636](https://github.com/ClickHouse/ClickHouse/pull/49636) ([Igor Nikonov](https://github.com/devcrafter)). -* Fix one-off error in big integers found by UBSan with fuzzer [#49645](https://github.com/ClickHouse/ClickHouse/pull/49645) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix reading from sparse columns after restart [#49660](https://github.com/ClickHouse/ClickHouse/pull/49660) ([Anton Popov](https://github.com/CurtizJ)). -* Fix assert in SpanHolder::finish() with fibers [#49673](https://github.com/ClickHouse/ClickHouse/pull/49673) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix short circuit functions and mutations with sparse arguments [#49716](https://github.com/ClickHouse/ClickHouse/pull/49716) ([Anton Popov](https://github.com/CurtizJ)). -* Fix writing appended files to incremental backups [#49725](https://github.com/ClickHouse/ClickHouse/pull/49725) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix "There is no physical column _row_exists in table" error occurring during lightweight delete mutation on a table with Object column. [#49737](https://github.com/ClickHouse/ClickHouse/pull/49737) ([Alexander Gololobov](https://github.com/davenger)). -* Fix msan issue in randomStringUTF8(uneven number) [#49750](https://github.com/ClickHouse/ClickHouse/pull/49750) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix aggregate function kolmogorovSmirnovTest [#49768](https://github.com/ClickHouse/ClickHouse/pull/49768) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). -* Fix settings aliases in native protocol [#49776](https://github.com/ClickHouse/ClickHouse/pull/49776) ([Azat Khuzhin](https://github.com/azat)). -* Fix `arrayMap` with array of tuples with single argument [#49789](https://github.com/ClickHouse/ClickHouse/pull/49789) ([Anton Popov](https://github.com/CurtizJ)). -* Fix per-query IO/BACKUPs throttling settings [#49797](https://github.com/ClickHouse/ClickHouse/pull/49797) ([Azat Khuzhin](https://github.com/azat)). -* Fix setting NULL in profile definition [#49831](https://github.com/ClickHouse/ClickHouse/pull/49831) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix a bug with projections and the aggregate_functions_null_for_empty setting (for query_plan_optimize_projection) [#49873](https://github.com/ClickHouse/ClickHouse/pull/49873) ([Amos Bird](https://github.com/amosbird)). -* Fix processing pending batch for Distributed async INSERT after restart [#49884](https://github.com/ClickHouse/ClickHouse/pull/49884) ([Azat Khuzhin](https://github.com/azat)). -* Fix assertion in CacheMetadata::doCleanup [#49914](https://github.com/ClickHouse/ClickHouse/pull/49914) ([Kseniia Sumarokova](https://github.com/kssenii)). -* fix `is_prefix` in OptimizeRegularExpression [#49919](https://github.com/ClickHouse/ClickHouse/pull/49919) ([Han Fei](https://github.com/hanfei1991)). -* Fix metrics `WriteBufferFromS3Bytes`, `WriteBufferFromS3Microseconds` and `WriteBufferFromS3RequestsErrors` [#49930](https://github.com/ClickHouse/ClickHouse/pull/49930) ([Aleksandr Musorin](https://github.com/AVMusorin)). -* Fix IPv6 encoding in protobuf [#49933](https://github.com/ClickHouse/ClickHouse/pull/49933) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix possible Logical error on bad Nullable parsing for text formats [#49960](https://github.com/ClickHouse/ClickHouse/pull/49960) ([Kruglov Pavel](https://github.com/Avogar)). -* Add setting output_format_parquet_compliant_nested_types to produce more compatible Parquet files [#50001](https://github.com/ClickHouse/ClickHouse/pull/50001) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix logical error in stress test "Not enough space to add ..." [#50021](https://github.com/ClickHouse/ClickHouse/pull/50021) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Avoid deadlock when starting table in attach thread of `ReplicatedMergeTree` [#50026](https://github.com/ClickHouse/ClickHouse/pull/50026) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix assert in SpanHolder::finish() with fibers attempt 2 [#50034](https://github.com/ClickHouse/ClickHouse/pull/50034) ([Kruglov Pavel](https://github.com/Avogar)). -* Add proper escaping for DDL OpenTelemetry context serialization [#50045](https://github.com/ClickHouse/ClickHouse/pull/50045) ([Azat Khuzhin](https://github.com/azat)). -* Fix reporting broken projection parts [#50052](https://github.com/ClickHouse/ClickHouse/pull/50052) ([Amos Bird](https://github.com/amosbird)). -* JIT compilation not equals NaN fix [#50056](https://github.com/ClickHouse/ClickHouse/pull/50056) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix crashing in case of Replicated database without arguments [#50058](https://github.com/ClickHouse/ClickHouse/pull/50058) ([Azat Khuzhin](https://github.com/azat)). -* Fix crash with `multiIf` and constant condition and nullable arguments [#50123](https://github.com/ClickHouse/ClickHouse/pull/50123) ([Anton Popov](https://github.com/CurtizJ)). -* Fix invalid index analysis for date related keys [#50153](https://github.com/ClickHouse/ClickHouse/pull/50153) ([Amos Bird](https://github.com/amosbird)). -* do not allow modify order by when there are no order by cols [#50154](https://github.com/ClickHouse/ClickHouse/pull/50154) ([Han Fei](https://github.com/hanfei1991)). -* Fix broken index analysis when binary operator contains a null constant argument [#50177](https://github.com/ClickHouse/ClickHouse/pull/50177) ([Amos Bird](https://github.com/amosbird)). -* clickhouse-client: disallow usage of `--query` and `--queries-file` at the same time [#50210](https://github.com/ClickHouse/ClickHouse/pull/50210) ([Alexey Gerasimchuk](https://github.com/Demilivor)). -* Fix UB for INTO OUTFILE extensions (APPEND / AND STDOUT) and WATCH EVENTS [#50216](https://github.com/ClickHouse/ClickHouse/pull/50216) ([Azat Khuzhin](https://github.com/azat)). -* Fix skipping spaces at end of row in CustomSeparatedIgnoreSpaces format [#50224](https://github.com/ClickHouse/ClickHouse/pull/50224) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix iceberg metadata parsing [#50232](https://github.com/ClickHouse/ClickHouse/pull/50232) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix nested distributed SELECT in WITH clause [#50234](https://github.com/ClickHouse/ClickHouse/pull/50234) ([Azat Khuzhin](https://github.com/azat)). -* Fix msan issue in keyed siphash [#50245](https://github.com/ClickHouse/ClickHouse/pull/50245) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix bugs in Poco sockets in non-blocking mode, use true non-blocking sockets [#50252](https://github.com/ClickHouse/ClickHouse/pull/50252) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix checksum calculation for backup entries [#50264](https://github.com/ClickHouse/ClickHouse/pull/50264) ([Vitaly Baranov](https://github.com/vitlibar)). -* Comparison functions NaN fix [#50287](https://github.com/ClickHouse/ClickHouse/pull/50287) ([Maksim Kita](https://github.com/kitaisreal)). -* JIT aggregation nullable key fix [#50291](https://github.com/ClickHouse/ClickHouse/pull/50291) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix clickhouse-local crashing when writing empty Arrow or Parquet output [#50328](https://github.com/ClickHouse/ClickHouse/pull/50328) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix crash when Pool::Entry::disconnect() is called [#50334](https://github.com/ClickHouse/ClickHouse/pull/50334) ([Val Doroshchuk](https://github.com/valbok)). -* Improved fetch part by holding directory lock longer [#50339](https://github.com/ClickHouse/ClickHouse/pull/50339) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix bitShift* functions with both constant arguments [#50343](https://github.com/ClickHouse/ClickHouse/pull/50343) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix Keeper deadlock on exception when preprocessing requests. [#50387](https://github.com/ClickHouse/ClickHouse/pull/50387) ([frinkr](https://github.com/frinkr)). -* Fix hashing of const integer values [#50421](https://github.com/ClickHouse/ClickHouse/pull/50421) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix merge_tree_min_rows_for_seek/merge_tree_min_bytes_for_seek for data skipping indexes [#50432](https://github.com/ClickHouse/ClickHouse/pull/50432) ([Azat Khuzhin](https://github.com/azat)). -* Limit the number of in-flight tasks for loading outdated parts [#50450](https://github.com/ClickHouse/ClickHouse/pull/50450) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Keeper fix: apply uncommitted state after snapshot install [#50483](https://github.com/ClickHouse/ClickHouse/pull/50483) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix incorrect constant folding [#50536](https://github.com/ClickHouse/ClickHouse/pull/50536) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix logical error in stress test (Not enough space to add ...) [#50583](https://github.com/ClickHouse/ClickHouse/pull/50583) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix converting Null to LowCardinality(Nullable) in values table function [#50637](https://github.com/ClickHouse/ClickHouse/pull/50637) ([Kruglov Pavel](https://github.com/Avogar)). -* Revert invalid RegExpTreeDictionary optimization [#50642](https://github.com/ClickHouse/ClickHouse/pull/50642) ([Johann Gan](https://github.com/johanngan)). - -### ClickHouse release 23.4, 2023-04-26 - -#### Backward Incompatible Change -* Formatter '%M' in function formatDateTime() now prints the month name instead of the minutes. This makes the behavior consistent with MySQL. The previous behavior can be restored using setting "formatdatetime_parsedatetime_m_is_month_name = 0". [#47246](https://github.com/ClickHouse/ClickHouse/pull/47246) ([Robert Schulze](https://github.com/rschu1ze)). -* This change makes sense only if you are using the virtual filesystem cache. If `path` in the virtual filesystem cache configuration is not empty and is not an absolute path, then it will be put in `/caches/`. [#48784](https://github.com/ClickHouse/ClickHouse/pull/48784) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Primary/secondary indices and sorting keys with identical expressions are now rejected. This behavior can be disabled using setting `allow_suspicious_indices`. [#48536](https://github.com/ClickHouse/ClickHouse/pull/48536) ([凌涛](https://github.com/lingtaolf)). - -#### New Feature -* Support new aggregate function `quantileGK`/`quantilesGK`, like [approx_percentile](https://spark.apache.org/docs/latest/api/sql/index.html#approx_percentile) in spark. Greenwald-Khanna algorithm refer to http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf. [#46428](https://github.com/ClickHouse/ClickHouse/pull/46428) ([李扬](https://github.com/taiyang-li)). -* Add a statement `SHOW COLUMNS` which shows distilled information from system.columns. [#48017](https://github.com/ClickHouse/ClickHouse/pull/48017) ([Robert Schulze](https://github.com/rschu1ze)). -* Added `LIGHTWEIGHT` and `PULL` modifiers for `SYSTEM SYNC REPLICA` query. `LIGHTWEIGHT` version waits for fetches and drop-ranges only (merges and mutations are ignored). `PULL` version pulls new entries from ZooKeeper and does not wait for them. Fixes [#47794](https://github.com/ClickHouse/ClickHouse/issues/47794). [#48085](https://github.com/ClickHouse/ClickHouse/pull/48085) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Add `kafkaMurmurHash` function for compatibility with Kafka DefaultPartitioner. Closes [#47834](https://github.com/ClickHouse/ClickHouse/issues/47834). [#48185](https://github.com/ClickHouse/ClickHouse/pull/48185) ([Nikolay Degterinsky](https://github.com/evillique)). -* Allow to easily create a user with the same grants as the current user by using `GRANT CURRENT GRANTS`. [#48262](https://github.com/ClickHouse/ClickHouse/pull/48262) ([pufit](https://github.com/pufit)). -* Add statistical aggregate function `kolmogorovSmirnovTest`. Close [#48228](https://github.com/ClickHouse/ClickHouse/issues/48228). [#48325](https://github.com/ClickHouse/ClickHouse/pull/48325) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). -* Added a `lost_part_count` column to the `system.replicas` table. The column value shows the total number of lost parts in the corresponding table. Value is stored in zookeeper and can be used instead of not persistent `ReplicatedDataLoss` profile event for monitoring. [#48526](https://github.com/ClickHouse/ClickHouse/pull/48526) ([Sergei Trifonov](https://github.com/serxa)). -* Add `soundex` function for compatibility. Closes [#39880](https://github.com/ClickHouse/ClickHouse/issues/39880). [#48567](https://github.com/ClickHouse/ClickHouse/pull/48567) ([FriendLey](https://github.com/FriendLey)). -* Support `Map` type for JSONExtract. [#48629](https://github.com/ClickHouse/ClickHouse/pull/48629) ([李扬](https://github.com/taiyang-li)). -* Add `PrettyJSONEachRow` format to output pretty JSON with new line delimiters and 4 space indents. [#48898](https://github.com/ClickHouse/ClickHouse/pull/48898) ([Kruglov Pavel](https://github.com/Avogar)). -* Add `ParquetMetadata` input format to read Parquet file metadata. [#48911](https://github.com/ClickHouse/ClickHouse/pull/48911) ([Kruglov Pavel](https://github.com/Avogar)). -* Add `extractKeyValuePairs` function to extract key value pairs from strings. Input strings might contain noise (i.e. log files / do not need to be 100% formatted in key-value-pair format), the algorithm will look for key value pairs matching the arguments passed to the function. As of now, function accepts the following arguments: `data_column` (mandatory), `key_value_pair_delimiter` (defaults to `:`), `pair_delimiters` (defaults to `\space \, \;`) and `quoting_character` (defaults to double quotes). [#43606](https://github.com/ClickHouse/ClickHouse/pull/43606) ([Arthur Passos](https://github.com/arthurpassos)). -* Functions replaceOne(), replaceAll(), replaceRegexpOne() and replaceRegexpAll() can now be called with non-const pattern and replacement arguments. [#46589](https://github.com/ClickHouse/ClickHouse/pull/46589) ([Robert Schulze](https://github.com/rschu1ze)). -* Added functions to work with columns of type `Map`: `mapConcat`, `mapSort`, `mapExists`. [#48071](https://github.com/ClickHouse/ClickHouse/pull/48071) ([Anton Popov](https://github.com/CurtizJ)). - -#### Performance Improvement -* Reading files in `Parquet` format is now much faster. IO and decoding are parallelized (controlled by `max_threads` setting), and only required data ranges are read. [#47964](https://github.com/ClickHouse/ClickHouse/pull/47964) ([Michael Kolupaev](https://github.com/al13n321)). -* If we run a mutation with IN (subquery) like this: `ALTER TABLE t UPDATE col='new value' WHERE id IN (SELECT id FROM huge_table)` and the table `t` has multiple parts than for each part a set for subquery `SELECT id FROM huge_table` is built in memory. And if there are many parts then this might consume a lot of memory (and lead to an OOM) and CPU. The solution is to introduce a short-lived cache of sets that are currently being built by mutation tasks. If another task of the same mutation is executed concurrently it can look up the set in the cache, wait for it to be built and reuse it. [#46835](https://github.com/ClickHouse/ClickHouse/pull/46835) ([Alexander Gololobov](https://github.com/davenger)). -* Only check dependencies if necessary when applying `ALTER TABLE` queries. [#48062](https://github.com/ClickHouse/ClickHouse/pull/48062) ([Raúl Marín](https://github.com/Algunenano)). -* Optimize function `mapUpdate`. [#48118](https://github.com/ClickHouse/ClickHouse/pull/48118) ([Anton Popov](https://github.com/CurtizJ)). -* Now an internal query to local replica is sent explicitly and data from it received through loopback interface. Setting `prefer_localhost_replica` is not respected for parallel replicas. This is needed for better scheduling and makes the code cleaner: the initiator is only responsible for coordinating of the reading process and merging results, continuously answering for requests while all the secondary queries read the data. Note: Using loopback interface is not so performant, otherwise some replicas could starve for tasks which could lead to even slower query execution and not utilizing all possible resources. The initialization of the coordinator is now even more lazy. All incoming requests contain the information about the reading algorithm we initialize the coordinator with it when first request comes. If any replica decides to read with a different algorithm–an exception will be thrown and a query will be aborted. [#48246](https://github.com/ClickHouse/ClickHouse/pull/48246) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Do not build set for the right side of `IN` clause with subquery when it is used only for analysis of skip indexes, and they are disabled by setting (`use_skip_indexes=0`). Previously it might affect the performance of queries. [#48299](https://github.com/ClickHouse/ClickHouse/pull/48299) ([Anton Popov](https://github.com/CurtizJ)). -* Query processing is parallelized right after reading `FROM file(...)`. Related to [#38755](https://github.com/ClickHouse/ClickHouse/issues/38755). [#48525](https://github.com/ClickHouse/ClickHouse/pull/48525) ([Igor Nikonov](https://github.com/devcrafter)). Query processing is parallelized right after reading from any data source. Affected data sources are mostly simple or external storages like table functions `url`, `file`. [#48727](https://github.com/ClickHouse/ClickHouse/pull/48727) ([Igor Nikonov](https://github.com/devcrafter)). This is controlled by the setting `parallelize_output_from_storages` which is not enabled by default. -* Lowered contention of ThreadPool mutex (may increase performance for a huge amount of small jobs). [#48750](https://github.com/ClickHouse/ClickHouse/pull/48750) ([Sergei Trifonov](https://github.com/serxa)). -* Reduce memory usage for multiple `ALTER DELETE` mutations. [#48522](https://github.com/ClickHouse/ClickHouse/pull/48522) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Remove the excessive connection attempts if the `skip_unavailable_shards` setting is enabled. [#48771](https://github.com/ClickHouse/ClickHouse/pull/48771) ([Azat Khuzhin](https://github.com/azat)). - -#### Experimental Feature -* Entries in the query cache are now squashed to max_block_size and compressed. [#45912](https://github.com/ClickHouse/ClickHouse/pull/45912) ([Robert Schulze](https://github.com/rschu1ze)). -* It is now possible to define per-user quotas in the query cache. [#48284](https://github.com/ClickHouse/ClickHouse/pull/48284) ([Robert Schulze](https://github.com/rschu1ze)). -* Some fixes for parallel replicas [#48433](https://github.com/ClickHouse/ClickHouse/pull/48433) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Implement zero-copy-replication (an experimental feature) on encrypted disks. [#48741](https://github.com/ClickHouse/ClickHouse/pull/48741) ([Vitaly Baranov](https://github.com/vitlibar)). - -#### Improvement -* Increase default value for `connect_timeout_with_failover_ms` to 1000 ms (because of adding async connections in https://github.com/ClickHouse/ClickHouse/pull/47229) . Closes [#5188](https://github.com/ClickHouse/ClickHouse/issues/5188). [#49009](https://github.com/ClickHouse/ClickHouse/pull/49009) ([Kruglov Pavel](https://github.com/Avogar)). -* Several improvements around data lakes: - Make `Iceberg` work with non-partitioned data. - Support `Iceberg` format version v2 (previously only v1 was supported) - Support reading partitioned data for `DeltaLake`/`Hudi` - Faster reading of `DeltaLake` metadata by using Delta's checkpoint files - Fixed incorrect `Hudi` reads: previously it incorrectly chose which data to read and therefore was able to read correctly only small size tables - Made these engines to pickup updates of changed data (previously the state was set on table creation) - Make proper testing for `Iceberg`/`DeltaLake`/`Hudi` using spark. [#47307](https://github.com/ClickHouse/ClickHouse/pull/47307) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add async connection to socket and async writing to socket. Make creating connections and sending query/external tables async across shards. Refactor code with fibers. Closes [#46931](https://github.com/ClickHouse/ClickHouse/issues/46931). We will be able to increase `connect_timeout_with_failover_ms` by default after this PR (https://github.com/ClickHouse/ClickHouse/issues/5188). [#47229](https://github.com/ClickHouse/ClickHouse/pull/47229) ([Kruglov Pavel](https://github.com/Avogar)). -* Support config sections `keeper`/`keeper_server` as an alternative to `zookeeper`. Close [#34766](https://github.com/ClickHouse/ClickHouse/issues/34766) , [#34767](https://github.com/ClickHouse/ClickHouse/issues/34767). [#35113](https://github.com/ClickHouse/ClickHouse/pull/35113) ([李扬](https://github.com/taiyang-li)). -* It is possible to set _secure_ flag in named_collections for a dictionary with a ClickHouse table source. Addresses [#38450](https://github.com/ClickHouse/ClickHouse/issues/38450) . [#46323](https://github.com/ClickHouse/ClickHouse/pull/46323) ([Ilya Golshtein](https://github.com/ilejn)). -* `bitCount` function support `FixedString` and `String` data type. [#49044](https://github.com/ClickHouse/ClickHouse/pull/49044) ([flynn](https://github.com/ucasfl)). -* Added configurable retries for all operations with [Zoo]Keeper for Backup queries. [#47224](https://github.com/ClickHouse/ClickHouse/pull/47224) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Enable `use_environment_credentials` for S3 by default, so the entire provider chain is constructed by default. [#47397](https://github.com/ClickHouse/ClickHouse/pull/47397) ([Antonio Andelic](https://github.com/antonio2368)). -* Currently, the JSON_VALUE function is similar as spark's get_json_object function, which support to get value from JSON string by a path like '$.key'. But still has something different - 1. in spark's get_json_object will return null while the path is not exist, but in JSON_VALUE will return empty string; - 2. in spark's get_json_object will return a complex type value, such as a JSON object/array value, but in JSON_VALUE will return empty string. [#47494](https://github.com/ClickHouse/ClickHouse/pull/47494) ([KevinyhZou](https://github.com/KevinyhZou)). -* For `use_structure_from_insertion_table_in_table_functions` more flexible insert table structure propagation to table function. Fixed an issue with name mapping and using virtual columns. No more need for 'auto' setting. [#47962](https://github.com/ClickHouse/ClickHouse/pull/47962) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Do not continue retrying to connect to Keeper if the query is killed or over limits. [#47985](https://github.com/ClickHouse/ClickHouse/pull/47985) ([Raúl Marín](https://github.com/Algunenano)). -* Support Enum output/input in `BSONEachRow`, allow all map key types and avoid extra calculations on output. [#48122](https://github.com/ClickHouse/ClickHouse/pull/48122) ([Kruglov Pavel](https://github.com/Avogar)). -* Support more ClickHouse types in `ORC`/`Arrow`/`Parquet` formats: Enum(8|16), (U)Int(128|256), Decimal256 (for ORC), allow reading IPv4 from Int32 values (ORC outputs IPv4 as Int32, and we couldn't read it back), fix reading Nullable(IPv6) from binary data for `ORC`. [#48126](https://github.com/ClickHouse/ClickHouse/pull/48126) ([Kruglov Pavel](https://github.com/Avogar)). -* Add columns `perform_ttl_move_on_insert`, `load_balancing` for table `system.storage_policies`, modify column `volume_type` type to `Enum8`. [#48167](https://github.com/ClickHouse/ClickHouse/pull/48167) ([lizhuoyu5](https://github.com/lzydmxy)). -* Added support for `BACKUP ALL` command which backups all tables and databases, including temporary and system ones. [#48189](https://github.com/ClickHouse/ClickHouse/pull/48189) ([Vitaly Baranov](https://github.com/vitlibar)). -* Function mapFromArrays supports `Map` type as an input. [#48207](https://github.com/ClickHouse/ClickHouse/pull/48207) ([李扬](https://github.com/taiyang-li)). -* The output of some SHOW PROCESSLIST is now sorted. [#48241](https://github.com/ClickHouse/ClickHouse/pull/48241) ([Robert Schulze](https://github.com/rschu1ze)). -* Per-query/per-server throttling for remote IO/local IO/BACKUPs (server settings: `max_remote_read_network_bandwidth_for_server`, `max_remote_write_network_bandwidth_for_server`, `max_local_read_bandwidth_for_server`, `max_local_write_bandwidth_for_server`, `max_backup_bandwidth_for_server`, settings: `max_remote_read_network_bandwidth`, `max_remote_write_network_bandwidth`, `max_local_read_bandwidth`, `max_local_write_bandwidth`, `max_backup_bandwidth`). [#48242](https://github.com/ClickHouse/ClickHouse/pull/48242) ([Azat Khuzhin](https://github.com/azat)). -* Support more types in `CapnProto` format: Map, (U)Int(128|256), Decimal(128|256). Allow integer conversions during input/output. [#48257](https://github.com/ClickHouse/ClickHouse/pull/48257) ([Kruglov Pavel](https://github.com/Avogar)). -* Don't throw CURRENT_WRITE_BUFFER_IS_EXHAUSTED for normal behaviour. [#48288](https://github.com/ClickHouse/ClickHouse/pull/48288) ([Raúl Marín](https://github.com/Algunenano)). -* Add new setting `keeper_map_strict_mode` which enforces extra guarantees on operations made on top of `KeeperMap` tables. [#48293](https://github.com/ClickHouse/ClickHouse/pull/48293) ([Antonio Andelic](https://github.com/antonio2368)). -* Check primary key type for simple dictionary is native unsigned integer type Add setting `check_dictionary_primary_key ` for compatibility(set `check_dictionary_primary_key =false` to disable checking). [#48335](https://github.com/ClickHouse/ClickHouse/pull/48335) ([lizhuoyu5](https://github.com/lzydmxy)). -* Don't replicate mutations for `KeeperMap` because it's unnecessary. [#48354](https://github.com/ClickHouse/ClickHouse/pull/48354) ([Antonio Andelic](https://github.com/antonio2368)). -* Allow to write/read unnamed tuple as nested Message in Protobuf format. Tuple elements and Message fields are matched by position. [#48390](https://github.com/ClickHouse/ClickHouse/pull/48390) ([Kruglov Pavel](https://github.com/Avogar)). -* Support `additional_table_filters` and `additional_result_filter` settings in the new planner. Also, add a documentation entry for `additional_result_filter`. [#48405](https://github.com/ClickHouse/ClickHouse/pull/48405) ([Dmitry Novik](https://github.com/novikd)). -* `parseDateTime` now understands format string '%f' (fractional seconds). [#48420](https://github.com/ClickHouse/ClickHouse/pull/48420) ([Robert Schulze](https://github.com/rschu1ze)). -* Format string "%f" in formatDateTime() now prints "000000" if the formatted value has no fractional seconds, the previous behavior (single zero) can be restored using setting "formatdatetime_f_prints_single_zero = 1". [#48422](https://github.com/ClickHouse/ClickHouse/pull/48422) ([Robert Schulze](https://github.com/rschu1ze)). -* Don't replicate DELETE and TRUNCATE for KeeperMap. [#48434](https://github.com/ClickHouse/ClickHouse/pull/48434) ([Antonio Andelic](https://github.com/antonio2368)). -* Generate valid Decimals and Bools in generateRandom function. [#48436](https://github.com/ClickHouse/ClickHouse/pull/48436) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow trailing commas in expression list of SELECT query, for example `SELECT a, b, c, FROM table`. Closes [#37802](https://github.com/ClickHouse/ClickHouse/issues/37802). [#48438](https://github.com/ClickHouse/ClickHouse/pull/48438) ([Nikolay Degterinsky](https://github.com/evillique)). -* Override `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD` environment variables with `--user` and `--password` client parameters. Closes [#38909](https://github.com/ClickHouse/ClickHouse/issues/38909). [#48440](https://github.com/ClickHouse/ClickHouse/pull/48440) ([Nikolay Degterinsky](https://github.com/evillique)). -* Added retries to loading of data parts in `MergeTree` tables in case of retryable errors. [#48442](https://github.com/ClickHouse/ClickHouse/pull/48442) ([Anton Popov](https://github.com/CurtizJ)). -* Add support for `Date`, `Date32`, `DateTime`, `DateTime64` data types to `arrayMin`, `arrayMax`, `arrayDifference` functions. Closes [#21645](https://github.com/ClickHouse/ClickHouse/issues/21645). [#48445](https://github.com/ClickHouse/ClickHouse/pull/48445) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add support for `{server_uuid}` macro. It is useful for identifying replicas in autoscaled clusters when new replicas are constantly added and removed in runtime. This closes [#48554](https://github.com/ClickHouse/ClickHouse/issues/48554). [#48563](https://github.com/ClickHouse/ClickHouse/pull/48563) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The installation script will create a hard link instead of copying if it is possible. [#48578](https://github.com/ClickHouse/ClickHouse/pull/48578) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Support `SHOW TABLE` syntax meaning the same as `SHOW CREATE TABLE`. Closes [#48580](https://github.com/ClickHouse/ClickHouse/issues/48580). [#48591](https://github.com/ClickHouse/ClickHouse/pull/48591) ([flynn](https://github.com/ucasfl)). -* HTTP temporary buffers now support working by evicting data from the virtual filesystem cache. [#48664](https://github.com/ClickHouse/ClickHouse/pull/48664) ([Vladimir C](https://github.com/vdimir)). -* Make Schema inference works for `CREATE AS SELECT`. Closes [#47599](https://github.com/ClickHouse/ClickHouse/issues/47599). [#48679](https://github.com/ClickHouse/ClickHouse/pull/48679) ([flynn](https://github.com/ucasfl)). -* Added a `replicated_max_mutations_in_one_entry` setting for `ReplicatedMergeTree` that allows limiting the number of mutation commands per one `MUTATE_PART` entry (default is 10000). [#48731](https://github.com/ClickHouse/ClickHouse/pull/48731) ([Alexander Tokmakov](https://github.com/tavplubix)). -* In AggregateFunction types, don't count unused arena bytes as `read_bytes`. [#48745](https://github.com/ClickHouse/ClickHouse/pull/48745) ([Raúl Marín](https://github.com/Algunenano)). -* Fix some MySQL-related settings not being handled with the MySQL dictionary source + named collection. Closes [#48402](https://github.com/ClickHouse/ClickHouse/issues/48402). [#48759](https://github.com/ClickHouse/ClickHouse/pull/48759) ([Kseniia Sumarokova](https://github.com/kssenii)). -* If a user set `max_single_part_upload_size` to a very large value, it can lead to a crash due to a bug in the AWS S3 SDK. This fixes [#47679](https://github.com/ClickHouse/ClickHouse/issues/47679). [#48816](https://github.com/ClickHouse/ClickHouse/pull/48816) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix data race in `RabbitMQ` ([report](https://pastila.nl/?004f7100/de1505289ab5bb355e67ebe6c7cc8707)), refactor the code. [#48845](https://github.com/ClickHouse/ClickHouse/pull/48845) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add aliases `name` and `part_name` form `system.parts` and `system.part_log`. Closes [#48718](https://github.com/ClickHouse/ClickHouse/issues/48718). [#48850](https://github.com/ClickHouse/ClickHouse/pull/48850) ([sichenzhao](https://github.com/sichenzhao)). -* Functions "arrayDifferenceSupport()", "arrayCumSum()" and "arrayCumSumNonNegative()" now support input arrays of wide integer types (U)Int128/256. [#48866](https://github.com/ClickHouse/ClickHouse/pull/48866) ([cluster](https://github.com/infdahai)). -* Multi-line history in clickhouse-client is now no longer padded. This makes pasting more natural. [#48870](https://github.com/ClickHouse/ClickHouse/pull/48870) ([Joanna Hulboj](https://github.com/jh0x)). -* Implement a slight improvement for the rare case when ClickHouse is run inside LXC and LXCFS is used. The LXCFS has an issue: sometimes it returns an error "Transport endpoint is not connected" on reading from the file inside `/proc`. This error was correctly logged into ClickHouse's server log. We have additionally workaround this issue by reopening a file. This is a minuscule change. [#48922](https://github.com/ClickHouse/ClickHouse/pull/48922) ([Real](https://github.com/RunningXie)). -* Improve memory accounting for prefetches. Randomise prefetch settings In CI. [#48973](https://github.com/ClickHouse/ClickHouse/pull/48973) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Correctly set headers for native copy operations on GCS. [#48981](https://github.com/ClickHouse/ClickHouse/pull/48981) ([Antonio Andelic](https://github.com/antonio2368)). -* Add support for specifying setting names in the command line with dashes instead of underscores, for example, `--max-threads` instead of `--max_threads`. Additionally, support Unicode dash characters like `—` instead of `--` - this is useful when you communicate with a team in another company, and a manager from that team copy-pasted code from MS Word. [#48985](https://github.com/ClickHouse/ClickHouse/pull/48985) ([alekseygolub](https://github.com/alekseygolub)). -* Add fallback to password authentication when authentication with SSL user certificate has failed. Closes [#48974](https://github.com/ClickHouse/ClickHouse/issues/48974). [#48989](https://github.com/ClickHouse/ClickHouse/pull/48989) ([Nikolay Degterinsky](https://github.com/evillique)). -* Improve the embedded dashboard. Close [#46671](https://github.com/ClickHouse/ClickHouse/issues/46671). [#49036](https://github.com/ClickHouse/ClickHouse/pull/49036) ([Kevin Zhang](https://github.com/Kinzeng)). -* Add profile events for log messages, so you can easily see the count of log messages by severity. [#49042](https://github.com/ClickHouse/ClickHouse/pull/49042) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* In previous versions, the `LineAsString` format worked inconsistently when the parallel parsing was enabled or not, in presence of DOS or macOS Classic line breaks. This closes [#49039](https://github.com/ClickHouse/ClickHouse/issues/49039). [#49052](https://github.com/ClickHouse/ClickHouse/pull/49052) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The exception message about the unparsed query parameter will also tell about the name of the parameter. Reimplement [#48878](https://github.com/ClickHouse/ClickHouse/issues/48878). Close [#48772](https://github.com/ClickHouse/ClickHouse/issues/48772). [#49061](https://github.com/ClickHouse/ClickHouse/pull/49061) ([Alexey Milovidov](https://github.com/alexey-milovidov)). - -#### Build/Testing/Packaging Improvement -* Update time zones. The following were updated: Africa/Cairo, Africa/Casablanca, Africa/El_Aaiun, America/Bogota, America/Cambridge_Bay, America/Ciudad_Juarez, America/Godthab, America/Inuvik, America/Iqaluit, America/Nuuk, America/Ojinaga, America/Pangnirtung, America/Rankin_Inlet, America/Resolute, America/Whitehorse, America/Yellowknife, Asia/Gaza, Asia/Hebron, Asia/Kuala_Lumpur, Asia/Singapore, Canada/Yukon, Egypt, Europe/Kirov, Europe/Volgograd, Singapore. [#48572](https://github.com/ClickHouse/ClickHouse/pull/48572) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Reduce the number of dependencies in the header files to speed up the build. [#47984](https://github.com/ClickHouse/ClickHouse/pull/47984) ([Dmitry Novik](https://github.com/novikd)). -* Randomize compression of marks and indices in tests. [#48286](https://github.com/ClickHouse/ClickHouse/pull/48286) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Bump internal ZSTD from 1.5.4 to 1.5.5. [#46797](https://github.com/ClickHouse/ClickHouse/pull/46797) ([Robert Schulze](https://github.com/rschu1ze)). -* Randomize vertical merges from compact to wide parts in tests. [#48287](https://github.com/ClickHouse/ClickHouse/pull/48287) ([Raúl Marín](https://github.com/Algunenano)). -* Support for CRC32 checksum in HDFS. Fix performance issues. [#48614](https://github.com/ClickHouse/ClickHouse/pull/48614) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Remove remainders of GCC support. [#48671](https://github.com/ClickHouse/ClickHouse/pull/48671) ([Robert Schulze](https://github.com/rschu1ze)). -* Add CI run with new analyzer infrastructure enabled. [#48719](https://github.com/ClickHouse/ClickHouse/pull/48719) ([Dmitry Novik](https://github.com/novikd)). - -#### Bug Fix (user-visible misbehavior in an official stable release) - -* Fix system.query_views_log for MVs that are pushed from background threads [#46668](https://github.com/ClickHouse/ClickHouse/pull/46668) ([Azat Khuzhin](https://github.com/azat)). -* Fix several `RENAME COLUMN` bugs [#46946](https://github.com/ClickHouse/ClickHouse/pull/46946) ([alesapin](https://github.com/alesapin)). -* Fix minor hiliting issues in clickhouse-format [#47610](https://github.com/ClickHouse/ClickHouse/pull/47610) ([Natasha Murashkina](https://github.com/murfel)). -* Fix a bug in LLVM's libc++ leading to a crash for uploading parts to S3 which size is greater than INT_MAX [#47693](https://github.com/ClickHouse/ClickHouse/pull/47693) ([Azat Khuzhin](https://github.com/azat)). -* Fix overflow in the `sparkbar` function [#48121](https://github.com/ClickHouse/ClickHouse/pull/48121) ([Vladimir C](https://github.com/vdimir)). -* Fix race in S3 [#48190](https://github.com/ClickHouse/ClickHouse/pull/48190) ([Anton Popov](https://github.com/CurtizJ)). -* Disable JIT for aggregate functions due to inconsistent behavior [#48195](https://github.com/ClickHouse/ClickHouse/pull/48195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix alter formatting (minor) [#48289](https://github.com/ClickHouse/ClickHouse/pull/48289) ([Natasha Murashkina](https://github.com/murfel)). -* Fix CPU usage in RabbitMQ (was worsened in 23.2 after [#44404](https://github.com/ClickHouse/ClickHouse/issues/44404)) [#48311](https://github.com/ClickHouse/ClickHouse/pull/48311) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix crash in EXPLAIN PIPELINE for Merge over Distributed [#48320](https://github.com/ClickHouse/ClickHouse/pull/48320) ([Azat Khuzhin](https://github.com/azat)). -* Fix serializing LowCardinality as Arrow dictionary [#48361](https://github.com/ClickHouse/ClickHouse/pull/48361) ([Kruglov Pavel](https://github.com/Avogar)). -* Reset downloader for cache file segment in TemporaryFileStream [#48386](https://github.com/ClickHouse/ClickHouse/pull/48386) ([Vladimir C](https://github.com/vdimir)). -* Fix possible SYSTEM SYNC REPLICA stuck in case of DROP/REPLACE PARTITION [#48391](https://github.com/ClickHouse/ClickHouse/pull/48391) ([Azat Khuzhin](https://github.com/azat)). -* Fix a startup error when loading a distributed table that depends on a dictionary [#48419](https://github.com/ClickHouse/ClickHouse/pull/48419) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). -* Don't check dependencies when renaming system tables automatically [#48431](https://github.com/ClickHouse/ClickHouse/pull/48431) ([Raúl Marín](https://github.com/Algunenano)). -* Update only affected rows in KeeperMap storage [#48435](https://github.com/ClickHouse/ClickHouse/pull/48435) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix possible segfault in the VFS cache [#48469](https://github.com/ClickHouse/ClickHouse/pull/48469) ([Kseniia Sumarokova](https://github.com/kssenii)). -* `toTimeZone` function throws an error when no constant string is provided [#48471](https://github.com/ClickHouse/ClickHouse/pull/48471) ([Jordi Villar](https://github.com/jrdi)). -* Fix logical error with IPv4 in Protobuf, add support for Date32 [#48486](https://github.com/ClickHouse/ClickHouse/pull/48486) ([Kruglov Pavel](https://github.com/Avogar)). -* "changed" flag in system.settings was calculated incorrectly for settings with multiple values [#48516](https://github.com/ClickHouse/ClickHouse/pull/48516) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). -* Fix storage `Memory` with enabled compression [#48517](https://github.com/ClickHouse/ClickHouse/pull/48517) ([Anton Popov](https://github.com/CurtizJ)). -* Fix bracketed-paste mode messing up password input in the event of client reconnection [#48528](https://github.com/ClickHouse/ClickHouse/pull/48528) ([Michael Kolupaev](https://github.com/al13n321)). -* Fix nested map for keys of IP and UUID types [#48556](https://github.com/ClickHouse/ClickHouse/pull/48556) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix an uncaught exception in case of parallel loader for hashed dictionaries [#48571](https://github.com/ClickHouse/ClickHouse/pull/48571) ([Azat Khuzhin](https://github.com/azat)). -* The `groupArray` aggregate function correctly works for empty result over nullable types [#48593](https://github.com/ClickHouse/ClickHouse/pull/48593) ([lgbo](https://github.com/lgbo-ustc)). -* Fix bug in Keeper when a node is not created with scheme `auth` in ACL sometimes. [#48595](https://github.com/ClickHouse/ClickHouse/pull/48595) ([Aleksei Filatov](https://github.com/aalexfvk)). -* Allow IPv4 comparison operators with UInt [#48611](https://github.com/ClickHouse/ClickHouse/pull/48611) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix possible error from cache [#48636](https://github.com/ClickHouse/ClickHouse/pull/48636) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Async inserts with empty data will no longer throw exception. [#48663](https://github.com/ClickHouse/ClickHouse/pull/48663) ([Anton Popov](https://github.com/CurtizJ)). -* Fix table dependencies in case of failed RENAME TABLE [#48683](https://github.com/ClickHouse/ClickHouse/pull/48683) ([Azat Khuzhin](https://github.com/azat)). -* If the primary key has duplicate columns (which is only possible for projections), in previous versions it might lead to a bug [#48838](https://github.com/ClickHouse/ClickHouse/pull/48838) ([Amos Bird](https://github.com/amosbird)). -* Fix for a race condition in ZooKeeper when joining send_thread/receive_thread [#48849](https://github.com/ClickHouse/ClickHouse/pull/48849) ([Alexander Gololobov](https://github.com/davenger)). -* Fix unexpected part name error when trying to drop a ignored detached part with zero copy replication [#48862](https://github.com/ClickHouse/ClickHouse/pull/48862) ([Michael Lex](https://github.com/mlex)). -* Fix reading `Date32` Parquet/Arrow column into not a `Date32` column [#48864](https://github.com/ClickHouse/ClickHouse/pull/48864) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix `UNKNOWN_IDENTIFIER` error while selecting from table with row policy and column with dots [#48976](https://github.com/ClickHouse/ClickHouse/pull/48976) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix aggregation by empty nullable strings [#48999](https://github.com/ClickHouse/ClickHouse/pull/48999) ([LiuNeng](https://github.com/liuneng1994)). - -### ClickHouse release 23.3 LTS, 2023-03-30 - -#### Upgrade Notes -* Lightweight DELETEs are production ready and enabled by default. The `DELETE` query for MergeTree tables is now available by default. -* The behavior of `*domain*RFC` and `netloc` functions is slightly changed: relaxed the set of symbols that are allowed in the URL authority for better conformance. [#46841](https://github.com/ClickHouse/ClickHouse/pull/46841) ([Azat Khuzhin](https://github.com/azat)). -* Prohibited creating tables based on KafkaEngine with DEFAULT/EPHEMERAL/ALIAS/MATERIALIZED statements for columns. [#47138](https://github.com/ClickHouse/ClickHouse/pull/47138) ([Aleksandr Musorin](https://github.com/AVMusorin)). -* An "asynchronous connection drain" feature is removed. Related settings and metrics are removed as well. It was an internal feature, so the removal should not affect users who had never heard about that feature. [#47486](https://github.com/ClickHouse/ClickHouse/pull/47486) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Support 256-bit Decimal data type (more than 38 digits) in `arraySum`/`Min`/`Max`/`Avg`/`Product`, `arrayCumSum`/`CumSumNonNegative`, `arrayDifference`, array construction, IN operator, query parameters, `groupArrayMovingSum`, statistical functions, `min`/`max`/`any`/`argMin`/`argMax`, PostgreSQL wire protocol, MySQL table engine and function, `sumMap`, `mapAdd`, `mapSubtract`, `arrayIntersect`. Add support for big integers in `arrayIntersect`. Statistical aggregate functions involving moments (such as `corr` or various `TTest`s) will use `Float64` as their internal representation (they were using `Decimal128` before this change, but it was pointless), and these functions can return `nan` instead of `inf` in case of infinite variance. Some functions were allowed on `Decimal256` data types but returned `Decimal128` in previous versions - now it is fixed. This closes [#47569](https://github.com/ClickHouse/ClickHouse/issues/47569). This closes [#44864](https://github.com/ClickHouse/ClickHouse/issues/44864). This closes [#28335](https://github.com/ClickHouse/ClickHouse/issues/28335). [#47594](https://github.com/ClickHouse/ClickHouse/pull/47594) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Make backup_threads/restore_threads server settings (instead of user settings). [#47881](https://github.com/ClickHouse/ClickHouse/pull/47881) ([Azat Khuzhin](https://github.com/azat)). -* Do not allow const and non-deterministic secondary indices [#46839](https://github.com/ClickHouse/ClickHouse/pull/46839) ([Anton Popov](https://github.com/CurtizJ)). - -#### New Feature -* Add a new mode for splitting the work on replicas using settings `parallel_replicas_custom_key` and `parallel_replicas_custom_key_filter_type`. If the cluster consists of a single shard with multiple replicas, up to `max_parallel_replicas` will be randomly picked and turned into shards. For each shard, a corresponding filter is added to the query on the initiator before being sent to the shard. If the cluster consists of multiple shards, it will behave the same as `sample_key` but with the possibility to define an arbitrary key. [#45108](https://github.com/ClickHouse/ClickHouse/pull/45108) ([Antonio Andelic](https://github.com/antonio2368)). -* An option to display partial result on cancel: Added query setting `partial_result_on_first_cancel` allowing the canceled query (e.g. due to Ctrl-C) to return a partial result. [#45689](https://github.com/ClickHouse/ClickHouse/pull/45689) ([Alexey Perevyshin](https://github.com/alexX512)). -* Added support of arbitrary tables engines for temporary tables (except for Replicated and KeeperMap engines). Close [#31497](https://github.com/ClickHouse/ClickHouse/issues/31497). [#46071](https://github.com/ClickHouse/ClickHouse/pull/46071) ([Roman Vasin](https://github.com/rvasin)). -* Add support for replication of user-defined SQL functions using centralized storage in Keeper. [#46085](https://github.com/ClickHouse/ClickHouse/pull/46085) ([Aleksei Filatov](https://github.com/aalexfvk)). -* Implement `system.server_settings` (similar to `system.settings`), which will contain server configurations. [#46550](https://github.com/ClickHouse/ClickHouse/pull/46550) ([pufit](https://github.com/pufit)). -* Support for `UNDROP TABLE` query. Closes [#46811](https://github.com/ClickHouse/ClickHouse/issues/46811). [#47241](https://github.com/ClickHouse/ClickHouse/pull/47241) ([chen](https://github.com/xiedeyantu)). -* Allow separate grants for named collections (e.g. to be able to give `SHOW/CREATE/ALTER/DROP named collection` access only to certain collections, instead of all at once). Closes [#40894](https://github.com/ClickHouse/ClickHouse/issues/40894). Add new access type `NAMED_COLLECTION_CONTROL` which is not given to user default unless explicitly added to the user config (is required to be able to do `GRANT ALL`), also `show_named_collections` is no longer obligatory to be manually specified for user default to be able to have full access rights as was in 23.2. [#46241](https://github.com/ClickHouse/ClickHouse/pull/46241) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Allow nested custom disks. Previously custom disks supported only flat disk structure. [#47106](https://github.com/ClickHouse/ClickHouse/pull/47106) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Introduce a function `widthBucket` (with a `WIDTH_BUCKET` alias for compatibility). [#42974](https://github.com/ClickHouse/ClickHouse/issues/42974). [#46790](https://github.com/ClickHouse/ClickHouse/pull/46790) ([avoiderboi](https://github.com/avoiderboi)). -* Add new function `parseDateTime`/`parseDateTimeInJodaSyntax` according to the specified format string. parseDateTime parses String to DateTime in MySQL syntax, parseDateTimeInJodaSyntax parses in Joda syntax. [#46815](https://github.com/ClickHouse/ClickHouse/pull/46815) ([李扬](https://github.com/taiyang-li)). -* Use `dummy UInt8` for the default structure of table function `null`. Closes [#46930](https://github.com/ClickHouse/ClickHouse/issues/46930). [#47006](https://github.com/ClickHouse/ClickHouse/pull/47006) ([flynn](https://github.com/ucasfl)). -* Support for date format with a comma, like `Dec 15, 2021` in the `parseDateTimeBestEffort` function. Closes [#46816](https://github.com/ClickHouse/ClickHouse/issues/46816). [#47071](https://github.com/ClickHouse/ClickHouse/pull/47071) ([chen](https://github.com/xiedeyantu)). -* Add settings `http_wait_end_of_query` and `http_response_buffer_size` that corresponds to URL params `wait_end_of_query` and `buffer_size` for the HTTP interface. This allows changing these settings in the profiles. [#47108](https://github.com/ClickHouse/ClickHouse/pull/47108) ([Vladimir C](https://github.com/vdimir)). -* Add `system.dropped_tables` table that shows tables that were dropped from `Atomic` databases but were not completely removed yet. [#47364](https://github.com/ClickHouse/ClickHouse/pull/47364) ([chen](https://github.com/xiedeyantu)). -* Add `INSTR` as alias of `positionCaseInsensitive` for MySQL compatibility. Closes [#47529](https://github.com/ClickHouse/ClickHouse/issues/47529). [#47535](https://github.com/ClickHouse/ClickHouse/pull/47535) ([flynn](https://github.com/ucasfl)). -* Added `toDecimalString` function allowing to convert numbers to string with fixed precision. [#47838](https://github.com/ClickHouse/ClickHouse/pull/47838) ([Andrey Zvonov](https://github.com/zvonand)). -* Add a merge tree setting `max_number_of_mutations_for_replica`. It limits the number of part mutations per replica to the specified amount. Zero means no limit on the number of mutations per replica (the execution can still be constrained by other settings). [#48047](https://github.com/ClickHouse/ClickHouse/pull/48047) ([Vladimir C](https://github.com/vdimir)). -* Add the Map-related function `mapFromArrays`, which allows the creation of a map from a pair of arrays. [#31125](https://github.com/ClickHouse/ClickHouse/pull/31125) ([李扬](https://github.com/taiyang-li)). -* Allow control of compression in Parquet/ORC/Arrow output formats, adds support for more compression input formats. This closes [#13541](https://github.com/ClickHouse/ClickHouse/issues/13541). [#47114](https://github.com/ClickHouse/ClickHouse/pull/47114) ([Kruglov Pavel](https://github.com/Avogar)). -* Add SSL User Certificate authentication to the native protocol. Closes [#47077](https://github.com/ClickHouse/ClickHouse/issues/47077). [#47596](https://github.com/ClickHouse/ClickHouse/pull/47596) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add *OrNull() and *OrZero() variants for `parseDateTime`, add alias `str_to_date` for MySQL parity. [#48000](https://github.com/ClickHouse/ClickHouse/pull/48000) ([Robert Schulze](https://github.com/rschu1ze)). -* Added operator `REGEXP` (similar to operators "LIKE", "IN", "MOD" etc.) for better compatibility with MySQL [#47869](https://github.com/ClickHouse/ClickHouse/pull/47869) ([Robert Schulze](https://github.com/rschu1ze)). - -#### Performance Improvement -* Marks in memory are now compressed, using 3-6x less memory. [#47290](https://github.com/ClickHouse/ClickHouse/pull/47290) ([Michael Kolupaev](https://github.com/al13n321)). -* Backups for large numbers of files were unbelievably slow in previous versions. Not anymore. Now they are unbelievably fast. [#47251](https://github.com/ClickHouse/ClickHouse/pull/47251) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Introduced a separate thread pool for backup's IO operations. This will allow scaling it independently of other pools and increase performance. [#47174](https://github.com/ClickHouse/ClickHouse/pull/47174) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). Use MultiRead request and retries for collecting metadata at the final stage of backup processing. [#47243](https://github.com/ClickHouse/ClickHouse/pull/47243) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). If a backup and restoring data are both in S3 then server-side copy should be used from now on. [#47546](https://github.com/ClickHouse/ClickHouse/pull/47546) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fixed excessive reading in queries with `FINAL`. [#47801](https://github.com/ClickHouse/ClickHouse/pull/47801) ([Nikita Taranov](https://github.com/nickitat)). -* Setting `max_final_threads` would be set to the number of cores at server startup (by the same algorithm as used for `max_threads`). This improves the concurrency of `final` execution on servers with high number of CPUs. [#47915](https://github.com/ClickHouse/ClickHouse/pull/47915) ([Nikita Taranov](https://github.com/nickitat)). -* Allow executing reading pipeline for DIRECT dictionary with CLICKHOUSE source in multiple threads. To enable set `dictionary_use_async_executor=1` in `SETTINGS` section for source in `CREATE DICTIONARY` statement. [#47986](https://github.com/ClickHouse/ClickHouse/pull/47986) ([Vladimir C](https://github.com/vdimir)). -* Optimize one nullable key aggregate performance. [#45772](https://github.com/ClickHouse/ClickHouse/pull/45772) ([LiuNeng](https://github.com/liuneng1994)). -* Implemented lowercase `tokenbf_v1` index utilization for `hasTokenOrNull`, `hasTokenCaseInsensitive` and `hasTokenCaseInsensitiveOrNull`. [#46252](https://github.com/ClickHouse/ClickHouse/pull/46252) ([ltrk2](https://github.com/ltrk2)). -* Optimize functions `position` and `LIKE` by searching the first two chars using SIMD. [#46289](https://github.com/ClickHouse/ClickHouse/pull/46289) ([Jiebin Sun](https://github.com/jiebinn)). -* Optimize queries from the `system.detached_parts`, which could be significantly large. Added several sources with respect to the block size limitation; in each block, an IO thread pool is used to calculate the part size, i.e. to make syscalls in parallel. [#46624](https://github.com/ClickHouse/ClickHouse/pull/46624) ([Sema Checherinda](https://github.com/CheSema)). -* Increase the default value of `max_replicated_merges_in_queue` for ReplicatedMergeTree tables from 16 to 1000. It allows faster background merge operation on clusters with a very large number of replicas, such as clusters with shared storage in ClickHouse Cloud. [#47050](https://github.com/ClickHouse/ClickHouse/pull/47050) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Updated `clickhouse-copier` to use `GROUP BY` instead of `DISTINCT` to get the list of partitions. For large tables, this reduced the select time from over 500s to under 1s. [#47386](https://github.com/ClickHouse/ClickHouse/pull/47386) ([Clayton McClure](https://github.com/cmcclure-twilio)). -* Fix performance degradation in `ASOF JOIN`. [#47544](https://github.com/ClickHouse/ClickHouse/pull/47544) ([Ongkong](https://github.com/ongkong)). -* Even more batching in Keeper. Improve performance by avoiding breaking batches on read requests. [#47978](https://github.com/ClickHouse/ClickHouse/pull/47978) ([Antonio Andelic](https://github.com/antonio2368)). -* Allow PREWHERE for Merge with different DEFAULT expressions for columns. [#46831](https://github.com/ClickHouse/ClickHouse/pull/46831) ([Azat Khuzhin](https://github.com/azat)). - -#### Experimental Feature -* Parallel replicas: Improved the overall performance by better utilizing the local replica, and forbid the reading with parallel replicas from non-replicated MergeTree by default. [#47858](https://github.com/ClickHouse/ClickHouse/pull/47858) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Support filter push down to left table for JOIN with `Join`, `Dictionary` and `EmbeddedRocksDB` tables if the experimental Analyzer is enabled. [#47280](https://github.com/ClickHouse/ClickHouse/pull/47280) ([Maksim Kita](https://github.com/kitaisreal)). -* Now ReplicatedMergeTree with zero copy replication has less load to Keeper. [#47676](https://github.com/ClickHouse/ClickHouse/pull/47676) ([alesapin](https://github.com/alesapin)). -* Fix create materialized view with MaterializedPostgreSQL [#40807](https://github.com/ClickHouse/ClickHouse/pull/40807) ([Maksim Buren](https://github.com/maks-buren630501)). - -#### Improvement -* Enable `input_format_json_ignore_unknown_keys_in_named_tuple` by default. [#46742](https://github.com/ClickHouse/ClickHouse/pull/46742) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow errors to be ignored while pushing to MATERIALIZED VIEW (add new setting `materialized_views_ignore_errors`, by default to `false`, but it is set to `true` for flushing logs to `system.*_log` tables unconditionally). [#46658](https://github.com/ClickHouse/ClickHouse/pull/46658) ([Azat Khuzhin](https://github.com/azat)). -* Track the file queue of distributed sends in memory. [#45491](https://github.com/ClickHouse/ClickHouse/pull/45491) ([Azat Khuzhin](https://github.com/azat)). -* Now `X-ClickHouse-Query-Id` and `X-ClickHouse-Timezone` headers are added to responses in all queries via HTTP protocol. Previously it was done only for `SELECT` queries. [#46364](https://github.com/ClickHouse/ClickHouse/pull/46364) ([Anton Popov](https://github.com/CurtizJ)). -* External tables from `MongoDB`: support for connection to a replica set via a URI with a host:port enum and support for the readPreference option in MongoDB dictionaries. Example URI: mongodb://db0.example.com:27017,db1.example.com:27017,db2.example.com:27017/?replicaSet=myRepl&readPreference=primary. [#46524](https://github.com/ClickHouse/ClickHouse/pull/46524) ([artem-yadr](https://github.com/artem-yadr)). -* This improvement should be invisible for users. Re-implement projection analysis on top of query plan. Added setting `query_plan_optimize_projection=1` to switch between old and new version. Fixes [#44963](https://github.com/ClickHouse/ClickHouse/issues/44963). [#46537](https://github.com/ClickHouse/ClickHouse/pull/46537) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Use Parquet format v2 instead of v1 in output format by default. Add setting `output_format_parquet_version` to control parquet version, possible values `1.0`, `2.4`, `2.6`, `2.latest` (default). [#46617](https://github.com/ClickHouse/ClickHouse/pull/46617) ([Kruglov Pavel](https://github.com/Avogar)). -* It is now possible to use the new configuration syntax to configure Kafka topics with periods (`.`) in their name. [#46752](https://github.com/ClickHouse/ClickHouse/pull/46752) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix heuristics that check hyperscan patterns for problematic repeats. [#46819](https://github.com/ClickHouse/ClickHouse/pull/46819) ([Robert Schulze](https://github.com/rschu1ze)). -* Don't report ZK node exists to system.errors when a block was created concurrently by a different replica. [#46820](https://github.com/ClickHouse/ClickHouse/pull/46820) ([Raúl Marín](https://github.com/Algunenano)). -* Increase the limit for opened files in `clickhouse-local`. It will be able to read from `web` tables on servers with a huge number of CPU cores. Do not back off reading from the URL table engine in case of too many opened files. This closes [#46852](https://github.com/ClickHouse/ClickHouse/issues/46852). [#46853](https://github.com/ClickHouse/ClickHouse/pull/46853) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Exceptions thrown when numbers cannot be parsed now have an easier-to-read exception message. [#46917](https://github.com/ClickHouse/ClickHouse/pull/46917) ([Robert Schulze](https://github.com/rschu1ze)). -* Added update `system.backups` after every processed task to track the progress of backups. [#46989](https://github.com/ClickHouse/ClickHouse/pull/46989) ([Aleksandr Musorin](https://github.com/AVMusorin)). -* Allow types conversion in Native input format. Add settings `input_format_native_allow_types_conversion` that controls it (enabled by default). [#46990](https://github.com/ClickHouse/ClickHouse/pull/46990) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow IPv4 in the `range` function to generate IP ranges. [#46995](https://github.com/ClickHouse/ClickHouse/pull/46995) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Improve exception message when it's impossible to move a part from one volume/disk to another. [#47032](https://github.com/ClickHouse/ClickHouse/pull/47032) ([alesapin](https://github.com/alesapin)). -* Support `Bool` type in `JSONType` function. Previously `Null` type was mistakenly returned for bool values. [#47046](https://github.com/ClickHouse/ClickHouse/pull/47046) ([Anton Popov](https://github.com/CurtizJ)). -* Use `_request_body` parameter to configure predefined HTTP queries. [#47086](https://github.com/ClickHouse/ClickHouse/pull/47086) ([Constantine Peresypkin](https://github.com/pkit)). -* Automatic indentation in the built-in UI SQL editor when Enter is pressed. [#47113](https://github.com/ClickHouse/ClickHouse/pull/47113) ([Alexey Korepanov](https://github.com/alexkorep)). -* Self-extraction with 'sudo' will attempt to set uid and gid of extracted files to running user. [#47116](https://github.com/ClickHouse/ClickHouse/pull/47116) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Previously, the `repeat` function's second argument only accepted an unsigned integer type, which meant it could not accept values such as -1. This behavior differed from that of the Spark function. In this update, the repeat function has been modified to match the behavior of the Spark function. It now accepts the same types of inputs, including negative integers. Extensive testing has been performed to verify the correctness of the updated implementation. [#47134](https://github.com/ClickHouse/ClickHouse/pull/47134) ([KevinyhZou](https://github.com/KevinyhZou)). Note: the changelog entry was rewritten by ChatGPT. -* Remove `::__1` part from stacktraces. Display `std::basic_string ClickHouse release 23.2, 2023-02-23 - -#### Backward Incompatible Change -* Extend function "toDayOfWeek()" (alias: "DAYOFWEEK") with a mode argument that encodes whether the week starts on Monday or Sunday and whether counting starts at 0 or 1. For consistency with other date time functions, the mode argument was inserted between the time and the time zone arguments. This breaks existing usage of the (previously undocumented) 2-argument syntax "toDayOfWeek(time, time_zone)". A fix is to rewrite the function into "toDayOfWeek(time, 0, time_zone)". [#45233](https://github.com/ClickHouse/ClickHouse/pull/45233) ([Robert Schulze](https://github.com/rschu1ze)). -* Rename setting `max_query_cache_size` to `filesystem_cache_max_download_size`. [#45614](https://github.com/ClickHouse/ClickHouse/pull/45614) ([Kseniia Sumarokova](https://github.com/kssenii)). -* The `default` user will not have permissions for access type `SHOW NAMED COLLECTION` by default (e.g. `default` user will no longer be able to grant ALL to other users as it was before, therefore this PR is backward incompatible). [#46010](https://github.com/ClickHouse/ClickHouse/pull/46010) ([Kseniia Sumarokova](https://github.com/kssenii)). -* If the SETTINGS clause is specified before the FORMAT clause, the settings will be applied to formatting as well. [#46003](https://github.com/ClickHouse/ClickHouse/pull/46003) ([Azat Khuzhin](https://github.com/azat)). -* Remove support for setting `materialized_postgresql_allow_automatic_update` (which was by default turned off). [#46106](https://github.com/ClickHouse/ClickHouse/pull/46106) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Slightly improve performance of `countDigits` on realistic datasets. This closed [#44518](https://github.com/ClickHouse/ClickHouse/issues/44518). In previous versions, `countDigits(0)` returned `0`; now it returns `1`, which is more correct, and follows the existing documentation. [#46187](https://github.com/ClickHouse/ClickHouse/pull/46187) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Disallow creation of new columns compressed by a combination of codecs "Delta" or "DoubleDelta" followed by codecs "Gorilla" or "FPC". This can be bypassed using setting "allow_suspicious_codecs = true". [#45652](https://github.com/ClickHouse/ClickHouse/pull/45652) ([Robert Schulze](https://github.com/rschu1ze)). - -#### New Feature -* Add `StorageIceberg` and table function `iceberg` to access iceberg table store on S3. [#45384](https://github.com/ClickHouse/ClickHouse/pull/45384) ([flynn](https://github.com/ucasfl)). -* Allow configuring storage as `SETTINGS disk = ''` (instead of `storage_policy`) and with explicit disk creation `SETTINGS disk = disk(type=s3, ...)`. [#41976](https://github.com/ClickHouse/ClickHouse/pull/41976) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Expose `ProfileEvents` counters in `system.part_log`. [#38614](https://github.com/ClickHouse/ClickHouse/pull/38614) ([Bharat Nallan](https://github.com/bharatnc)). -* Enrichment of the existing `ReplacingMergeTree` engine to allow duplicate the insertion. It leverages the power of both `ReplacingMergeTree` and `CollapsingMergeTree` in one MergeTree engine. Deleted data are not returned when queried, but not removed from disk neither. [#41005](https://github.com/ClickHouse/ClickHouse/pull/41005) ([youennL-cs](https://github.com/youennL-cs)). -* Add `generateULID` function. Closes [#36536](https://github.com/ClickHouse/ClickHouse/issues/36536). [#44662](https://github.com/ClickHouse/ClickHouse/pull/44662) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add `corrMatrix` aggregate function, calculating each two columns. In addition, since Aggregatefunctions `covarSamp` and `covarPop` are similar to `corr`, I add `covarSampMatrix`, `covarPopMatrix` by the way. @alexey-milovidov closes [#44587](https://github.com/ClickHouse/ClickHouse/issues/44587). [#44680](https://github.com/ClickHouse/ClickHouse/pull/44680) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). -* Introduce arrayShuffle function for random array permutations. [#45271](https://github.com/ClickHouse/ClickHouse/pull/45271) ([Joanna Hulboj](https://github.com/jh0x)). -* Support types `FIXED_SIZE_BINARY` type in Arrow, `FIXED_LENGTH_BYTE_ARRAY` in `Parquet` and match them to `FixedString`. Add settings `output_format_parquet_fixed_string_as_fixed_byte_array/output_format_arrow_fixed_string_as_fixed_byte_array` to control default output type for FixedString. Closes [#45326](https://github.com/ClickHouse/ClickHouse/issues/45326). [#45340](https://github.com/ClickHouse/ClickHouse/pull/45340) ([Kruglov Pavel](https://github.com/Avogar)). -* Add a new column `last_exception_time` to system.replication_queue. [#45457](https://github.com/ClickHouse/ClickHouse/pull/45457) ([Frank Chen](https://github.com/FrankChen021)). -* Add two new functions which allow for user-defined keys/seeds with SipHash{64,128}. [#45513](https://github.com/ClickHouse/ClickHouse/pull/45513) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Allow a three-argument version for table function `format`. close [#45808](https://github.com/ClickHouse/ClickHouse/issues/45808). [#45873](https://github.com/ClickHouse/ClickHouse/pull/45873) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). -* Add `JodaTime` format support for 'x','w','S'. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. [#46073](https://github.com/ClickHouse/ClickHouse/pull/46073) ([zk_kiger](https://github.com/zk-kiger)). -* Support window function `ntile`. ([lgbo](https://github.com/lgbo-ustc)). -* Add setting `final` to implicitly apply the `FINAL` modifier to every table. [#40945](https://github.com/ClickHouse/ClickHouse/pull/40945) ([Arthur Passos](https://github.com/arthurpassos)). -* Added `arrayPartialSort` and `arrayPartialReverseSort` functions. [#46296](https://github.com/ClickHouse/ClickHouse/pull/46296) ([Joanna Hulboj](https://github.com/jh0x)). -* The new http parameter `client_protocol_version` allows setting a client protocol version for HTTP responses using the Native format. [#40397](https://github.com/ClickHouse/ClickHouse/issues/40397). [#46360](https://github.com/ClickHouse/ClickHouse/pull/46360) ([Geoff Genz](https://github.com/genzgd)). -* Add new function `regexpExtract`, like spark function `REGEXP_EXTRACT` for compatibility. It is similar to the existing function `extract`. [#46469](https://github.com/ClickHouse/ClickHouse/pull/46469) ([李扬](https://github.com/taiyang-li)). -* Add new function `JSONArrayLength`, which returns the number of elements in the outermost JSON array. The function returns NULL if the input JSON string is invalid. [#46631](https://github.com/ClickHouse/ClickHouse/pull/46631) ([李扬](https://github.com/taiyang-li)). - -#### Performance Improvement -* The introduced logic works if PREWHERE condition is a conjunction of multiple conditions (cond1 AND cond2 AND ... ). It groups those conditions that require reading the same columns into steps. After each step the corresponding part of the full condition is computed and the result rows might be filtered. This allows to read fewer rows in the next steps thus saving IO bandwidth and doing less computation. This logic is disabled by default for now. It will be enabled by default in one of the future releases once it is known to not have any regressions, so it is highly encouraged to be used for testing. It can be controlled by 2 settings: "enable_multiple_prewhere_read_steps" and "move_all_conditions_to_prewhere". [#46140](https://github.com/ClickHouse/ClickHouse/pull/46140) ([Alexander Gololobov](https://github.com/davenger)). -* An option added to aggregate partitions independently if table partition key and group by key are compatible. Controlled by the setting `allow_aggregate_partitions_independently`. Disabled by default because of limited applicability (please refer to the docs). [#45364](https://github.com/ClickHouse/ClickHouse/pull/45364) ([Nikita Taranov](https://github.com/nickitat)). -* Allow using Vertical merge algorithm with parts in Compact format. This will allow ClickHouse server to use much less memory for background operations. This closes [#46084](https://github.com/ClickHouse/ClickHouse/issues/46084). [#45681](https://github.com/ClickHouse/ClickHouse/pull/45681) [#46282](https://github.com/ClickHouse/ClickHouse/pull/46282) ([Anton Popov](https://github.com/CurtizJ)). -* Optimize `Parquet` reader by using batch reader. [#45878](https://github.com/ClickHouse/ClickHouse/pull/45878) ([LiuNeng](https://github.com/liuneng1994)). -* Add new `local_filesystem_read_method` method `io_uring` based on the asynchronous Linux [io_uring](https://kernel.dk/io_uring.pdf) subsystem, improving read performance almost universally compared to the default `pread` method. [#38456](https://github.com/ClickHouse/ClickHouse/pull/38456) ([Saulius Valatka](https://github.com/sauliusvl)). -* Rewrite aggregate functions with `if` expression as argument when logically equivalent. For example, `avg(if(cond, col, null))` can be rewritten to avgIf(cond, col). It is helpful in performance. [#44730](https://github.com/ClickHouse/ClickHouse/pull/44730) ([李扬](https://github.com/taiyang-li)). -* Improve lower/upper function performance with avx512 instructions. [#37894](https://github.com/ClickHouse/ClickHouse/pull/37894) ([yaqi-zhao](https://github.com/yaqi-zhao)). -* Remove the limitation that on systems with >=32 cores and SMT disabled ClickHouse uses only half of the cores (the case when you disable Hyper Threading in BIOS). [#44973](https://github.com/ClickHouse/ClickHouse/pull/44973) ([Robert Schulze](https://github.com/rschu1ze)). -* Improve performance of function `multiIf` by columnar executing, speed up by 2.3x. [#45296](https://github.com/ClickHouse/ClickHouse/pull/45296) ([李扬](https://github.com/taiyang-li)). -* Add fast path for function `position` when the needle is empty. [#45382](https://github.com/ClickHouse/ClickHouse/pull/45382) ([李扬](https://github.com/taiyang-li)). -* Enable `query_plan_remove_redundant_sorting` optimization by default. Optimization implemented in [#45420](https://github.com/ClickHouse/ClickHouse/issues/45420). [#45567](https://github.com/ClickHouse/ClickHouse/pull/45567) ([Igor Nikonov](https://github.com/devcrafter)). -* Increased HTTP Transfer Encoding chunk size to improve performance of large queries using the HTTP interface. [#45593](https://github.com/ClickHouse/ClickHouse/pull/45593) ([Geoff Genz](https://github.com/genzgd)). -* Fixed performance of short `SELECT` queries that read from tables with large number of `Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)). -* Improve performance of filtering for big integers and decimal types. [#45949](https://github.com/ClickHouse/ClickHouse/pull/45949) ([李扬](https://github.com/taiyang-li)). -* This change could effectively reduce the overhead of obtaining the filter from ColumnNullable(UInt8) and improve the overall query performance. To evaluate the impact of this change, we adopted TPC-H benchmark but revised the column types from non-nullable to nullable, and we measured the QPS of its queries as the performance indicator. [#45962](https://github.com/ClickHouse/ClickHouse/pull/45962) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* Make the `_part` and `_partition_id` virtual column be `LowCardinality(String)` type. Closes [#45964](https://github.com/ClickHouse/ClickHouse/issues/45964). [#45975](https://github.com/ClickHouse/ClickHouse/pull/45975) ([flynn](https://github.com/ucasfl)). -* Improve the performance of Decimal conversion when the scale does not change. [#46095](https://github.com/ClickHouse/ClickHouse/pull/46095) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Allow to increase prefetching for read data. [#46168](https://github.com/ClickHouse/ClickHouse/pull/46168) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Rewrite `arrayExists(x -> x = 1, arr)` -> `has(arr, 1)`, which improve performance by 1.34x. [#46188](https://github.com/ClickHouse/ClickHouse/pull/46188) ([李扬](https://github.com/taiyang-li)). -* Fix too big memory usage for vertical merges on non-remote disk. Respect `max_insert_delayed_streams_for_parallel_write` for the remote disk. [#46275](https://github.com/ClickHouse/ClickHouse/pull/46275) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Update zstd to v1.5.4. It has some minor improvements in performance and compression ratio. If you run replicas with different versions of ClickHouse you may see reasonable error messages `Data after merge/mutation is not byte-identical to data on another replicas.` with explanation. These messages are Ok and you should not worry. [#46280](https://github.com/ClickHouse/ClickHouse/pull/46280) ([Raúl Marín](https://github.com/Algunenano)). -* Fix performance degradation caused by [#39737](https://github.com/ClickHouse/ClickHouse/issues/39737). [#46309](https://github.com/ClickHouse/ClickHouse/pull/46309) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The `replicas_status` handle will answer quickly even in case of a large replication queue. [#46310](https://github.com/ClickHouse/ClickHouse/pull/46310) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add avx512 support for aggregate function `sum`, function unary arithmetic, function comparison. [#37870](https://github.com/ClickHouse/ClickHouse/pull/37870) ([zhao zhou](https://github.com/zzachimed)). -* Rewrote the code around marks distribution and the overall coordination of the reading in order to achieve the maximum performance improvement. This closes [#34527](https://github.com/ClickHouse/ClickHouse/issues/34527). [#43772](https://github.com/ClickHouse/ClickHouse/pull/43772) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Remove redundant DISTINCT clauses in query (subqueries). Implemented on top of query plan. It does similar optimization as `optimize_duplicate_order_by_and_distinct` regarding DISTINCT clauses. Can be enabled via `query_plan_remove_redundant_distinct` setting. Related to [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#44176](https://github.com/ClickHouse/ClickHouse/pull/44176) ([Igor Nikonov](https://github.com/devcrafter)). -* A few query rewrite optimizations: `sumIf(123, cond) -> 123 * countIf(1, cond)`, `sum(if(cond, 123, 0)) -> 123 * countIf(cond)`, `sum(if(cond, 0, 123)) -> 123 * countIf(not(cond))` [#44728](https://github.com/ClickHouse/ClickHouse/pull/44728) ([李扬](https://github.com/taiyang-li)). -* Improved how memory bound merging and aggregation in order on top query plan interact. Previously we fell back to explicit sorting for AIO in some cases when it wasn't actually needed. [#45892](https://github.com/ClickHouse/ClickHouse/pull/45892) ([Nikita Taranov](https://github.com/nickitat)). -* Concurrent merges are scheduled using round-robin by default to ensure fair and starvation-free operation. Previously in heavily overloaded shards, big merges could possibly be starved by smaller merges due to the use of strict priority scheduling. Added `background_merges_mutations_scheduling_policy` server config option to select scheduling algorithm (`round_robin` or `shortest_task_first`). [#46247](https://github.com/ClickHouse/ClickHouse/pull/46247) ([Sergei Trifonov](https://github.com/serxa)). - -#### Improvement -* Enable retries for INSERT by default in case of ZooKeeper session loss. We already use it in production. [#46308](https://github.com/ClickHouse/ClickHouse/pull/46308) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add ability to ignore unknown keys in JSON object for named tuples (`input_format_json_ignore_unknown_keys_in_named_tuple`). [#45678](https://github.com/ClickHouse/ClickHouse/pull/45678) ([Azat Khuzhin](https://github.com/azat)). -* Support optimizing the `where` clause with sorting key expression move to `prewhere` for query with `final`. [#38893](https://github.com/ClickHouse/ClickHouse/issues/38893). [#38950](https://github.com/ClickHouse/ClickHouse/pull/38950) ([hexiaoting](https://github.com/hexiaoting)). -* Add new metrics for backups: num_processed_files and processed_files_size described actual number of processed files. [#42244](https://github.com/ClickHouse/ClickHouse/pull/42244) ([Aleksandr](https://github.com/AVMusorin)). -* Added retries on interserver DNS errors. [#43179](https://github.com/ClickHouse/ClickHouse/pull/43179) ([Anton Kozlov](https://github.com/tonickkozlov)). -* Keeper improvement: try preallocating space on the disk to avoid undefined out-of-space issues. Introduce setting `max_log_file_size` for the maximum size of Keeper's Raft log files. [#44370](https://github.com/ClickHouse/ClickHouse/pull/44370) ([Antonio Andelic](https://github.com/antonio2368)). -* Optimize behavior for a replica delay api logic in case the replica is read-only. [#45148](https://github.com/ClickHouse/ClickHouse/pull/45148) ([mateng915](https://github.com/mateng0915)). -* Ask for the password in clickhouse-client interactively in a case when the empty password is wrong. Closes [#46702](https://github.com/ClickHouse/ClickHouse/issues/46702). [#46730](https://github.com/ClickHouse/ClickHouse/pull/46730) ([Nikolay Degterinsky](https://github.com/evillique)). -* Mark `Gorilla` compression on columns of non-Float* type as suspicious. [#45376](https://github.com/ClickHouse/ClickHouse/pull/45376) ([Robert Schulze](https://github.com/rschu1ze)). -* Show replica name that is executing a merge in the `postpone_reason` column. [#45458](https://github.com/ClickHouse/ClickHouse/pull/45458) ([Frank Chen](https://github.com/FrankChen021)). -* Save exception stack trace in part_log. [#45459](https://github.com/ClickHouse/ClickHouse/pull/45459) ([Frank Chen](https://github.com/FrankChen021)). -* The `regexp_tree` dictionary is polished and now it is compatible with https://github.com/ua-parser/uap-core. [#45631](https://github.com/ClickHouse/ClickHouse/pull/45631) ([Han Fei](https://github.com/hanfei1991)). -* Updated checking of `SYSTEM SYNC REPLICA`, resolves [#45508](https://github.com/ClickHouse/ClickHouse/issues/45508) [#45648](https://github.com/ClickHouse/ClickHouse/pull/45648) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Rename setting `replication_alter_partitions_sync` to `alter_sync`. [#45659](https://github.com/ClickHouse/ClickHouse/pull/45659) ([Antonio Andelic](https://github.com/antonio2368)). -* The `generateRandom` table function and the engine now support `LowCardinality` data types. This is useful for testing, for example you can write `INSERT INTO table SELECT * FROM generateRandom() LIMIT 1000`. This is needed to debug [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590). [#45661](https://github.com/ClickHouse/ClickHouse/pull/45661) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The experimental query result cache now provides more modular configuration settings. [#45679](https://github.com/ClickHouse/ClickHouse/pull/45679) ([Robert Schulze](https://github.com/rschu1ze)). -* Renamed "query result cache" to "query cache". [#45682](https://github.com/ClickHouse/ClickHouse/pull/45682) ([Robert Schulze](https://github.com/rschu1ze)). -* add `SYSTEM SYNC FILE CACHE` command. It will do the `sync` syscall. [#8921](https://github.com/ClickHouse/ClickHouse/issues/8921). [#45685](https://github.com/ClickHouse/ClickHouse/pull/45685) ([DR](https://github.com/freedomDR)). -* Add a new S3 setting `allow_head_object_request`. This PR makes usage of `GetObjectAttributes` request instead of `HeadObject` introduced in https://github.com/ClickHouse/ClickHouse/pull/45288 optional (and disabled by default). [#45701](https://github.com/ClickHouse/ClickHouse/pull/45701) ([Vitaly Baranov](https://github.com/vitlibar)). -* Add ability to override connection settings based on connection names (that said that now you can forget about storing password for each connection, you can simply put everything into `~/.clickhouse-client/config.xml` and even use different history files for them, which can be also useful). [#45715](https://github.com/ClickHouse/ClickHouse/pull/45715) ([Azat Khuzhin](https://github.com/azat)). -* Arrow format: support the duration type. Closes [#45669](https://github.com/ClickHouse/ClickHouse/issues/45669). [#45750](https://github.com/ClickHouse/ClickHouse/pull/45750) ([flynn](https://github.com/ucasfl)). -* Extend the logging in the Query Cache to improve investigations of the caching behavior. [#45751](https://github.com/ClickHouse/ClickHouse/pull/45751) ([Robert Schulze](https://github.com/rschu1ze)). -* The query cache's server-level settings are now reconfigurable at runtime. [#45758](https://github.com/ClickHouse/ClickHouse/pull/45758) ([Robert Schulze](https://github.com/rschu1ze)). -* Hide password in logs when a table function's arguments are specified with a named collection. [#45774](https://github.com/ClickHouse/ClickHouse/pull/45774) ([Vitaly Baranov](https://github.com/vitlibar)). -* Improve internal S3 client to correctly deduce regions and redirections for different types of URLs. [#45783](https://github.com/ClickHouse/ClickHouse/pull/45783) ([Antonio Andelic](https://github.com/antonio2368)). -* Add support for Map, IPv4 and IPv6 types in generateRandom. Mostly useful for testing. [#45785](https://github.com/ClickHouse/ClickHouse/pull/45785) ([Raúl Marín](https://github.com/Algunenano)). -* Support empty/notEmpty for IP types. [#45799](https://github.com/ClickHouse/ClickHouse/pull/45799) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* The column `num_processed_files` was split into two columns: `num_files` (for BACKUP) and `files_read` (for RESTORE). The column `processed_files_size` was split into two columns: `total_size` (for BACKUP) and `bytes_read` (for RESTORE). [#45800](https://github.com/ClickHouse/ClickHouse/pull/45800) ([Vitaly Baranov](https://github.com/vitlibar)). -* Add support for `SHOW ENGINES` query for MySQL compatibility. [#45859](https://github.com/ClickHouse/ClickHouse/pull/45859) ([Filatenkov Artur](https://github.com/FArthur-cmd)). -* Improved how the obfuscator deals with queries. [#45867](https://github.com/ClickHouse/ClickHouse/pull/45867) ([Raúl Marín](https://github.com/Algunenano)). -* Improve behaviour of conversion into Date for boundary value 65535 (2149-06-06). [#46042](https://github.com/ClickHouse/ClickHouse/pull/46042) [#45914](https://github.com/ClickHouse/ClickHouse/pull/45914) ([Joanna Hulboj](https://github.com/jh0x)). -* Add setting `check_referential_table_dependencies` to check referential dependencies on `DROP TABLE`. This PR solves [#38326](https://github.com/ClickHouse/ClickHouse/issues/38326). [#45936](https://github.com/ClickHouse/ClickHouse/pull/45936) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix `tupleElement` to return `Null` when having `Null` argument. Closes [#45894](https://github.com/ClickHouse/ClickHouse/issues/45894). [#45952](https://github.com/ClickHouse/ClickHouse/pull/45952) ([flynn](https://github.com/ucasfl)). -* Throw an error on no files satisfying the S3 wildcard. Closes [#45587](https://github.com/ClickHouse/ClickHouse/issues/45587). [#45957](https://github.com/ClickHouse/ClickHouse/pull/45957) ([chen](https://github.com/xiedeyantu)). -* Use cluster state data to check concurrent backup/restore. [#45982](https://github.com/ClickHouse/ClickHouse/pull/45982) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* ClickHouse Client: Use "exact" matching for fuzzy search, which has correct case ignorance and more appropriate algorithm for matching SQL queries. [#46000](https://github.com/ClickHouse/ClickHouse/pull/46000) ([Azat Khuzhin](https://github.com/azat)). -* Forbid wrong create View syntax `CREATE View X TO Y AS SELECT`. Closes [#4331](https://github.com/ClickHouse/ClickHouse/issues/4331). [#46043](https://github.com/ClickHouse/ClickHouse/pull/46043) ([flynn](https://github.com/ucasfl)). -* Storage `Log` family support setting the `storage_policy`. Closes [#43421](https://github.com/ClickHouse/ClickHouse/issues/43421). [#46044](https://github.com/ClickHouse/ClickHouse/pull/46044) ([flynn](https://github.com/ucasfl)). -* Improve `JSONColumns` format when the result is empty. Closes [#46024](https://github.com/ClickHouse/ClickHouse/issues/46024). [#46053](https://github.com/ClickHouse/ClickHouse/pull/46053) ([flynn](https://github.com/ucasfl)). -* Add reference implementation for SipHash128. [#46065](https://github.com/ClickHouse/ClickHouse/pull/46065) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Add a new metric to record allocations times and bytes using mmap. [#46068](https://github.com/ClickHouse/ClickHouse/pull/46068) ([李扬](https://github.com/taiyang-li)). -* Currently for functions like `leftPad`, `rightPad`, `leftPadUTF8`, `rightPadUTF8`, the second argument `length` must be UInt8|16|32|64|128|256. Which is too strict for clickhouse users, besides, it is not consistent with other similar functions like `arrayResize`, `substring` and so on. [#46103](https://github.com/ClickHouse/ClickHouse/pull/46103) ([李扬](https://github.com/taiyang-li)). -* Fix assertion in the `welchTTest` function in debug build when the resulting statistics is NaN. Unified the behavior with other similar functions. Change the behavior of `studentTTest` to return NaN instead of throwing an exception because the previous behavior was inconvenient. This closes [#41176](https://github.com/ClickHouse/ClickHouse/issues/41176) This closes [#42162](https://github.com/ClickHouse/ClickHouse/issues/42162). [#46141](https://github.com/ClickHouse/ClickHouse/pull/46141) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* More convenient usage of big integers and ORDER BY WITH FILL. Allow using plain integers for start and end points in WITH FILL when ORDER BY big (128-bit and 256-bit) integers. Fix the wrong result for big integers with negative start or end points. This closes [#16733](https://github.com/ClickHouse/ClickHouse/issues/16733). [#46152](https://github.com/ClickHouse/ClickHouse/pull/46152) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add `parts`, `active_parts` and `total_marks` columns to `system.tables` on [issue](https://github.com/ClickHouse/ClickHouse/issues/44336). [#46161](https://github.com/ClickHouse/ClickHouse/pull/46161) ([attack204](https://github.com/attack204)). -* Functions "multi[Fuzzy]Match(Any|AnyIndex|AllIndices}" now reject regexes which will likely evaluate very slowly in vectorscan. [#46167](https://github.com/ClickHouse/ClickHouse/pull/46167) ([Robert Schulze](https://github.com/rschu1ze)). -* When `insert_null_as_default` is enabled and column doesn't have defined default value, the default of column type will be used. Also this PR fixes using default values on nulls in case of LowCardinality columns. [#46171](https://github.com/ClickHouse/ClickHouse/pull/46171) ([Kruglov Pavel](https://github.com/Avogar)). -* Prefer explicitly defined access keys for S3 clients. If `use_environment_credentials` is set to `true`, and the user has provided the access key through query or config, they will be used instead of the ones from the environment variable. [#46191](https://github.com/ClickHouse/ClickHouse/pull/46191) ([Antonio Andelic](https://github.com/antonio2368)). -* Add an alias "DATE_FORMAT()" for function "formatDateTime()" to improve compatibility with MySQL's SQL dialect, extend function `formatDateTime` with substitutions "a", "b", "c", "h", "i", "k", "l" "r", "s", "W". ### Documentation entry for user-facing changes User-readable short description: `DATE_FORMAT` is an alias of `formatDateTime`. Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column. (Provide link to [formatDateTime](https://clickhouse.com/docs/en/sql-reference/functions/date-time-functions/#formatdatetime)). [#46302](https://github.com/ClickHouse/ClickHouse/pull/46302) ([Jake Bamrah](https://github.com/JakeBamrah)). -* Add `ProfileEvents` and `CurrentMetrics` about the callback tasks for parallel replicas (`s3Cluster` and `MergeTree` tables). [#46313](https://github.com/ClickHouse/ClickHouse/pull/46313) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add support for `DELETE` and `UPDATE` for tables using `KeeperMap` storage engine. [#46330](https://github.com/ClickHouse/ClickHouse/pull/46330) ([Antonio Andelic](https://github.com/antonio2368)). -* Allow writing RENAME queries with query parameters. Resolves [#45778](https://github.com/ClickHouse/ClickHouse/issues/45778). [#46407](https://github.com/ClickHouse/ClickHouse/pull/46407) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix parameterized SELECT queries with REPLACE transformer. Resolves [#33002](https://github.com/ClickHouse/ClickHouse/issues/33002). [#46420](https://github.com/ClickHouse/ClickHouse/pull/46420) ([Nikolay Degterinsky](https://github.com/evillique)). -* Exclude the internal database used for temporary/external tables from the calculation of asynchronous metric "NumberOfDatabases". This makes the behavior consistent with system table "system.databases". [#46435](https://github.com/ClickHouse/ClickHouse/pull/46435) ([Robert Schulze](https://github.com/rschu1ze)). -* Added `last_exception_time` column into distribution_queue table. [#46564](https://github.com/ClickHouse/ClickHouse/pull/46564) ([Aleksandr](https://github.com/AVMusorin)). -* Support for IN clause with parameter in parameterized views. [#46583](https://github.com/ClickHouse/ClickHouse/pull/46583) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Do not load named collections on server startup (load them on first access instead). [#46607](https://github.com/ClickHouse/ClickHouse/pull/46607) ([Kseniia Sumarokova](https://github.com/kssenii)). - - -#### Build/Testing/Packaging Improvement -* Introduce GWP-ASan implemented by the LLVM runtime. This closes [#27039](https://github.com/ClickHouse/ClickHouse/issues/27039). [#45226](https://github.com/ClickHouse/ClickHouse/pull/45226) ([Han Fei](https://github.com/hanfei1991)). -* We want to make our tests less stable and more flaky: add randomization for merge tree settings in tests. [#38983](https://github.com/ClickHouse/ClickHouse/pull/38983) ([Anton Popov](https://github.com/CurtizJ)). -* Enable the HDFS support in PowerPC and which helps to fixes the following functional tests 02113_hdfs_assert.sh, 02244_hdfs_cluster.sql and 02368_cancel_write_into_hdfs.sh. [#44949](https://github.com/ClickHouse/ClickHouse/pull/44949) ([MeenaRenganathan22](https://github.com/MeenaRenganathan22)). -* Add systemd.service file for clickhouse-keeper. Fixes [#44293](https://github.com/ClickHouse/ClickHouse/issues/44293). [#45568](https://github.com/ClickHouse/ClickHouse/pull/45568) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* ClickHouse's fork of poco was moved from "contrib/" to "base/poco/". [#46075](https://github.com/ClickHouse/ClickHouse/pull/46075) ([Robert Schulze](https://github.com/rschu1ze)). -* Add an option for `clickhouse-watchdog` to restart the child process. This does not make a lot of use. [#46312](https://github.com/ClickHouse/ClickHouse/pull/46312) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* If the environment variable `CLICKHOUSE_DOCKER_RESTART_ON_EXIT` is set to 1, the Docker container will run `clickhouse-server` as a child instead of the first process, and restart it when it exited. [#46391](https://github.com/ClickHouse/ClickHouse/pull/46391) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix Systemd service file. [#46461](https://github.com/ClickHouse/ClickHouse/pull/46461) ([SuperDJY](https://github.com/cmsxbc)). -* Raised the minimum Clang version needed to build ClickHouse from 12 to 15. [#46710](https://github.com/ClickHouse/ClickHouse/pull/46710) ([Robert Schulze](https://github.com/rschu1ze)). -* Upgrade Intel QPL from v0.3.0 to v1.0.0 2. Build libaccel-config and link it statically to QPL library instead of dynamically. [#45809](https://github.com/ClickHouse/ClickHouse/pull/45809) ([jasperzhu](https://github.com/jinjunzh)). - - -#### Bug Fix (user-visible misbehavior in official stable release) - -* Flush data exactly by `rabbitmq_flush_interval_ms` or by `rabbitmq_max_block_size` in `StorageRabbitMQ`. Closes [#42389](https://github.com/ClickHouse/ClickHouse/issues/42389). Closes [#45160](https://github.com/ClickHouse/ClickHouse/issues/45160). [#44404](https://github.com/ClickHouse/ClickHouse/pull/44404) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Use PODArray to render in sparkBar function, so we can control the memory usage. Close [#44467](https://github.com/ClickHouse/ClickHouse/issues/44467). [#44489](https://github.com/ClickHouse/ClickHouse/pull/44489) ([Duc Canh Le](https://github.com/canhld94)). -* Fix functions (quantilesExactExclusive, quantilesExactInclusive) return unsorted array element. [#45379](https://github.com/ClickHouse/ClickHouse/pull/45379) ([wujunfu](https://github.com/wujunfu)). -* Fix uncaught exception in HTTPHandler when open telemetry is enabled. [#45456](https://github.com/ClickHouse/ClickHouse/pull/45456) ([Frank Chen](https://github.com/FrankChen021)). -* Don't infer Dates from 8 digit numbers. It could lead to wrong data to be read. [#45581](https://github.com/ClickHouse/ClickHouse/pull/45581) ([Kruglov Pavel](https://github.com/Avogar)). -* Fixes to correctly use `odbc_bridge_use_connection_pooling` setting. [#45591](https://github.com/ClickHouse/ClickHouse/pull/45591) ([Bharat Nallan](https://github.com/bharatnc)). -* When the callback in the cache is called, it is possible that this cache is destructed. To keep it safe, we capture members by value. It's also safe for task schedule because it will be deactivated before storage is destroyed. Resolve [#45548](https://github.com/ClickHouse/ClickHouse/issues/45548). [#45601](https://github.com/ClickHouse/ClickHouse/pull/45601) ([Han Fei](https://github.com/hanfei1991)). -* Fix data corruption when codecs Delta or DoubleDelta are combined with codec Gorilla. [#45615](https://github.com/ClickHouse/ClickHouse/pull/45615) ([Robert Schulze](https://github.com/rschu1ze)). -* Correctly check types when using N-gram bloom filter index to avoid invalid reads. [#45617](https://github.com/ClickHouse/ClickHouse/pull/45617) ([Antonio Andelic](https://github.com/antonio2368)). -* A couple of segfaults have been reported around `c-ares`. They were introduced in my previous pull requests. I have fixed them with the help of Alexander Tokmakov. [#45629](https://github.com/ClickHouse/ClickHouse/pull/45629) ([Arthur Passos](https://github.com/arthurpassos)). -* Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)). -* Set compression method and level for backup Closes [#45690](https://github.com/ClickHouse/ClickHouse/issues/45690). [#45737](https://github.com/ClickHouse/ClickHouse/pull/45737) ([Pradeep Chhetri](https://github.com/chhetripradeep)). -* Should use `select_query_typed.limitByOffset` instead of `select_query_typed.limitOffset`. [#45817](https://github.com/ClickHouse/ClickHouse/pull/45817) ([刘陶峰](https://github.com/taofengliu)). -* When use experimental analyzer, queries like `SELECT number FROM numbers(100) LIMIT 10 OFFSET 10;` get wrong results (empty result for this sql). That is caused by an unnecessary offset step added by planner. [#45822](https://github.com/ClickHouse/ClickHouse/pull/45822) ([刘陶峰](https://github.com/taofengliu)). -* Backward compatibility - allow implicit narrowing conversion from UInt64 to IPv4 - required for "INSERT ... VALUES ..." expression. [#45865](https://github.com/ClickHouse/ClickHouse/pull/45865) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Bugfix IPv6 parser for mixed ip4 address with missed first octet (like `::.1.2.3`). [#45871](https://github.com/ClickHouse/ClickHouse/pull/45871) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Add the `query_kind` column to the `system.processes` table and the `SHOW PROCESSLIST` query. Remove duplicate code. It fixes a bug: the global configuration parameter `max_concurrent_select_queries` was not respected to queries with `INTERSECT` or `EXCEPT` chains. [#45872](https://github.com/ClickHouse/ClickHouse/pull/45872) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix crash in a function `stochasticLinearRegression`. Found by WingFuzz. [#45985](https://github.com/ClickHouse/ClickHouse/pull/45985) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix crash in `SELECT` queries with `INTERSECT` and `EXCEPT` modifiers that read data from tables with enabled sparse columns (controlled by setting `ratio_of_defaults_for_sparse_serialization`). [#45987](https://github.com/ClickHouse/ClickHouse/pull/45987) ([Anton Popov](https://github.com/CurtizJ)). -* Fix read in order optimization for DESC sorting with FINAL, close [#45815](https://github.com/ClickHouse/ClickHouse/issues/45815). [#46009](https://github.com/ClickHouse/ClickHouse/pull/46009) ([Vladimir C](https://github.com/vdimir)). -* Fix reading of non existing nested columns with multiple level in compact parts. [#46045](https://github.com/ClickHouse/ClickHouse/pull/46045) ([Azat Khuzhin](https://github.com/azat)). -* Fix elapsed column in system.processes (10x error). [#46047](https://github.com/ClickHouse/ClickHouse/pull/46047) ([Azat Khuzhin](https://github.com/azat)). -* Follow-up fix for Replace domain IP types (IPv4, IPv6) with native https://github.com/ClickHouse/ClickHouse/pull/43221. [#46087](https://github.com/ClickHouse/ClickHouse/pull/46087) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix environment variable substitution in the configuration when a parameter already has a value. This closes [#46131](https://github.com/ClickHouse/ClickHouse/issues/46131). This closes [#9547](https://github.com/ClickHouse/ClickHouse/issues/9547). [#46144](https://github.com/ClickHouse/ClickHouse/pull/46144) ([pufit](https://github.com/pufit)). -* Fix incorrect predicate push down with grouping sets. Closes [#45947](https://github.com/ClickHouse/ClickHouse/issues/45947). [#46151](https://github.com/ClickHouse/ClickHouse/pull/46151) ([flynn](https://github.com/ucasfl)). -* Fix possible pipeline stuck error on `fulls_sorting_join` with constant keys. [#46175](https://github.com/ClickHouse/ClickHouse/pull/46175) ([Vladimir C](https://github.com/vdimir)). -* Never rewrite tuple functions as literals during formatting to avoid incorrect results. [#46232](https://github.com/ClickHouse/ClickHouse/pull/46232) ([Salvatore Mesoraca](https://github.com/aiven-sal)). -* Fix possible out of bounds error while reading LowCardinality(Nullable) in Arrow format. [#46270](https://github.com/ClickHouse/ClickHouse/pull/46270) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix `SYSTEM UNFREEZE` queries failing with the exception `CANNOT_PARSE_INPUT_ASSERTION_FAILED`. [#46325](https://github.com/ClickHouse/ClickHouse/pull/46325) ([Aleksei Filatov](https://github.com/aalexfvk)). -* Fix possible crash which can be caused by an integer overflow while deserializing aggregating state of a function that stores HashTable. [#46349](https://github.com/ClickHouse/ClickHouse/pull/46349) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix possible `LOGICAL_ERROR` in asynchronous inserts with invalid data sent in format `VALUES`. [#46350](https://github.com/ClickHouse/ClickHouse/pull/46350) ([Anton Popov](https://github.com/CurtizJ)). -* Fixed a LOGICAL_ERROR on an attempt to execute `ALTER ... MOVE PART ... TO TABLE`. This type of query was never actually supported. [#46359](https://github.com/ClickHouse/ClickHouse/pull/46359) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix s3Cluster schema inference in parallel distributed insert select when `parallel_distributed_insert_select` is enabled. [#46381](https://github.com/ClickHouse/ClickHouse/pull/46381) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix queries like `ALTER TABLE ... UPDATE nested.arr1 = nested.arr2 ...`, where `arr1` and `arr2` are fields of the same `Nested` column. [#46387](https://github.com/ClickHouse/ClickHouse/pull/46387) ([Anton Popov](https://github.com/CurtizJ)). -* Scheduler may fail to schedule a task. If it happens, the whole MulityPartUpload should be aborted and `UploadHelper` must wait for already scheduled tasks. [#46451](https://github.com/ClickHouse/ClickHouse/pull/46451) ([Dmitry Novik](https://github.com/novikd)). -* Fix PREWHERE for Merge with different default types (fixes some `NOT_FOUND_COLUMN_IN_BLOCK` when the default type for the column differs, also allow `PREWHERE` when the type of column is the same across tables, and prohibit it, only if it differs). [#46454](https://github.com/ClickHouse/ClickHouse/pull/46454) ([Azat Khuzhin](https://github.com/azat)). -* Fix a crash that could happen when constant values are used in `ORDER BY`. Fixes [#46466](https://github.com/ClickHouse/ClickHouse/issues/46466). [#46493](https://github.com/ClickHouse/ClickHouse/pull/46493) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Do not throw exception if `disk` setting was specified on query level, but `storage_policy` was specified in config merge tree settings section. `disk` will override setting from config. [#46533](https://github.com/ClickHouse/ClickHouse/pull/46533) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix an invalid processing of constant `LowCardinality` argument in function `arrayMap`. This bug could lead to a segfault in release, and logical error `Bad cast` in debug build. [#46569](https://github.com/ClickHouse/ClickHouse/pull/46569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* fixes [#46557](https://github.com/ClickHouse/ClickHouse/issues/46557). [#46611](https://github.com/ClickHouse/ClickHouse/pull/46611) ([Alexander Gololobov](https://github.com/davenger)). -* Fix endless restarts of clickhouse-server systemd unit if server cannot start within 1m30sec (Disable timeout logic for starting clickhouse-server from systemd service). [#46613](https://github.com/ClickHouse/ClickHouse/pull/46613) ([Azat Khuzhin](https://github.com/azat)). -* Allocated during asynchronous inserts memory buffers were deallocated in the global context and MemoryTracker counters for corresponding user and query were not updated correctly. That led to false positive OOM exceptions. [#46622](https://github.com/ClickHouse/ClickHouse/pull/46622) ([Dmitry Novik](https://github.com/novikd)). -* Updated to not clear on_expression from table_join as its used by future analyze runs resolves [#45185](https://github.com/ClickHouse/ClickHouse/issues/45185). [#46487](https://github.com/ClickHouse/ClickHouse/pull/46487) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). - - -### ClickHouse release 23.1, 2023-01-26 - -### ClickHouse release 23.1 - -#### Upgrade Notes -* The `SYSTEM RESTART DISK` query becomes a no-op. [#44647](https://github.com/ClickHouse/ClickHouse/pull/44647) ([alesapin](https://github.com/alesapin)). -* The `PREALLOCATE` option for `HASHED`/`SPARSE_HASHED` dictionaries becomes a no-op. [#45388](https://github.com/ClickHouse/ClickHouse/pull/45388) ([Azat Khuzhin](https://github.com/azat)). It does not give significant advantages anymore. -* Disallow `Gorilla` codec on columns of non-Float32 or non-Float64 type. [#45252](https://github.com/ClickHouse/ClickHouse/pull/45252) ([Robert Schulze](https://github.com/rschu1ze)). It was pointless and led to inconsistencies. -* Parallel quorum inserts might work incorrectly with `*MergeTree` tables created with the deprecated syntax. Therefore, parallel quorum inserts support is completely disabled for such tables. It does not affect tables created with a new syntax. [#45430](https://github.com/ClickHouse/ClickHouse/pull/45430) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Use the `GetObjectAttributes` request instead of the `HeadObject` request to get the size of an object in AWS S3. This change fixes handling endpoints without explicit regions after updating the AWS SDK, for example. [#45288](https://github.com/ClickHouse/ClickHouse/pull/45288) ([Vitaly Baranov](https://github.com/vitlibar)). AWS S3 and Minio are tested, but keep in mind that various S3-compatible services (GCS, R2, B2) may have subtle incompatibilities. This change also may require you to adjust the ACL to allow the `GetObjectAttributes` request. -* Forbid paths in timezone names. For example, a timezone name like `/usr/share/zoneinfo/Asia/Aden` is not allowed; the IANA timezone database name like `Asia/Aden` should be used. [#44225](https://github.com/ClickHouse/ClickHouse/pull/44225) ([Kruglov Pavel](https://github.com/Avogar)). -* Queries combining equijoin and constant expressions (e.g., `JOIN ON t1.x = t2.x AND 1 = 1`) are forbidden due to incorrect results. [#44016](https://github.com/ClickHouse/ClickHouse/pull/44016) ([Vladimir C](https://github.com/vdimir)). - - -#### New Feature -* Dictionary source for extracting keys by traversing regular expressions tree. It can be used for User-Agent parsing. [#40878](https://github.com/ClickHouse/ClickHouse/pull/40878) ([Vage Ogannisian](https://github.com/nooblose)). [#43858](https://github.com/ClickHouse/ClickHouse/pull/43858) ([Han Fei](https://github.com/hanfei1991)). -* Added parametrized view functionality, now it's possible to specify query parameters for the View table engine. resolves [#40907](https://github.com/ClickHouse/ClickHouse/issues/40907). [#41687](https://github.com/ClickHouse/ClickHouse/pull/41687) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Add `quantileInterpolatedWeighted`/`quantilesInterpolatedWeighted` functions. [#38252](https://github.com/ClickHouse/ClickHouse/pull/38252) ([Bharat Nallan](https://github.com/bharatnc)). -* Array join support for the `Map` type, like the function "explode" in Spark. [#43239](https://github.com/ClickHouse/ClickHouse/pull/43239) ([李扬](https://github.com/taiyang-li)). -* Support SQL standard binary and hex string literals. [#43785](https://github.com/ClickHouse/ClickHouse/pull/43785) ([Mo Xuan](https://github.com/mo-avatar)). -* Allow formatting `DateTime` in Joda-Time style. Refer to [the Joda-Time docs](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html). [#43818](https://github.com/ClickHouse/ClickHouse/pull/43818) ([李扬](https://github.com/taiyang-li)). -* Implemented a fractional second formatter (`%f`) for `formatDateTime`. [#44060](https://github.com/ClickHouse/ClickHouse/pull/44060) ([ltrk2](https://github.com/ltrk2)). [#44497](https://github.com/ClickHouse/ClickHouse/pull/44497) ([Alexander Gololobov](https://github.com/davenger)). -* Added `age` function to calculate the difference between two dates or dates with time values expressed as the number of full units. Closes [#41115](https://github.com/ClickHouse/ClickHouse/issues/41115). [#44421](https://github.com/ClickHouse/ClickHouse/pull/44421) ([Robert Schulze](https://github.com/rschu1ze)). -* Add `Null` source for dictionaries. Closes [#44240](https://github.com/ClickHouse/ClickHouse/issues/44240). [#44502](https://github.com/ClickHouse/ClickHouse/pull/44502) ([mayamika](https://github.com/mayamika)). -* Allow configuring the S3 storage class with the `s3_storage_class` configuration option. Such as `STANDARD/INTELLIGENT_TIERING` Closes [#44443](https://github.com/ClickHouse/ClickHouse/issues/44443). [#44707](https://github.com/ClickHouse/ClickHouse/pull/44707) ([chen](https://github.com/xiedeyantu)). -* Insert default values in case of missing elements in JSON object while parsing named tuple. Add setting `input_format_json_defaults_for_missing_elements_in_named_tuple` that controls this behaviour. Closes [#45142](https://github.com/ClickHouse/ClickHouse/issues/45142)#issuecomment-1380153217. [#45231](https://github.com/ClickHouse/ClickHouse/pull/45231) ([Kruglov Pavel](https://github.com/Avogar)). -* Record server startup time in ProfileEvents (`ServerStartupMilliseconds`). Resolves [#43188](https://github.com/ClickHouse/ClickHouse/issues/43188). [#45250](https://github.com/ClickHouse/ClickHouse/pull/45250) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Refactor and Improve streaming engines Kafka/RabbitMQ/NATS and add support for all formats, also refactor formats a bit: - Fix producing messages in row-based formats with suffixes/prefixes. Now every message is formatted completely with all delimiters and can be parsed back using input format. - Support block-based formats like Native, Parquet, ORC, etc. Every block is formatted as a separate message. The number of rows in one message depends on the block size, so you can control it via the setting `max_block_size`. - Add new engine settings `kafka_max_rows_per_message/rabbitmq_max_rows_per_message/nats_max_rows_per_message`. They control the number of rows formatted in one message in row-based formats. Default value: 1. - Fix high memory consumption in the NATS table engine. - Support arbitrary binary data in NATS producer (previously it worked only with strings contained \0 at the end) - Add missing Kafka/RabbitMQ/NATS engine settings in the documentation. - Refactor producing and consuming in Kafka/RabbitMQ/NATS, separate it from WriteBuffers/ReadBuffers semantic. - Refactor output formats: remove callbacks on each row used in Kafka/RabbitMQ/NATS (now we don't use callbacks there), allow to use IRowOutputFormat directly, clarify row end and row between delimiters, make it possible to reset output format to start formatting again - Add proper implementation in formatRow function (bonus after formats refactoring). [#42777](https://github.com/ClickHouse/ClickHouse/pull/42777) ([Kruglov Pavel](https://github.com/Avogar)). -* Support reading/writing `Nested` tables as `List` of `Struct` in `CapnProto` format. Read/write `Decimal32/64` as `Int32/64`. Closes [#43319](https://github.com/ClickHouse/ClickHouse/issues/43319). [#43379](https://github.com/ClickHouse/ClickHouse/pull/43379) ([Kruglov Pavel](https://github.com/Avogar)). -* Added a `message_format_string` column to `system.text_log`. The column contains a pattern that was used to format the message. [#44543](https://github.com/ClickHouse/ClickHouse/pull/44543) ([Alexander Tokmakov](https://github.com/tavplubix)). This allows various analytics over the ClickHouse logs. -* Try to autodetect headers with column names (and maybe types) for CSV/TSV/CustomSeparated input formats. -Add settings input_format_tsv/csv/custom_detect_header that enable this behaviour (enabled by default). Closes [#44640](https://github.com/ClickHouse/ClickHouse/issues/44640). [#44953](https://github.com/ClickHouse/ClickHouse/pull/44953) ([Kruglov Pavel](https://github.com/Avogar)). - -#### Experimental Feature -* Add an experimental inverted index as a new secondary index type for efficient text search. [#38667](https://github.com/ClickHouse/ClickHouse/pull/38667) ([larryluogit](https://github.com/larryluogit)). -* Add experimental query result cache. [#43797](https://github.com/ClickHouse/ClickHouse/pull/43797) ([Robert Schulze](https://github.com/rschu1ze)). -* Added extendable and configurable scheduling subsystem for IO requests (not yet integrated with IO code itself). [#41840](https://github.com/ClickHouse/ClickHouse/pull/41840) ([Sergei Trifonov](https://github.com/serxa)). This feature does nothing at all, enjoy. -* Added `SYSTEM DROP DATABASE REPLICA` that removes metadata of a dead replica of a `Replicated` database. Resolves [#41794](https://github.com/ClickHouse/ClickHouse/issues/41794). [#42807](https://github.com/ClickHouse/ClickHouse/pull/42807) ([Alexander Tokmakov](https://github.com/tavplubix)). - -#### Performance Improvement -* Do not load inactive parts at startup of `MergeTree` tables. [#42181](https://github.com/ClickHouse/ClickHouse/pull/42181) ([Anton Popov](https://github.com/CurtizJ)). -* Improved latency of reading from storage `S3` and table function `s3` with large numbers of small files. Now settings `remote_filesystem_read_method` and `remote_filesystem_read_prefetch` take effect while reading from storage `S3`. [#43726](https://github.com/ClickHouse/ClickHouse/pull/43726) ([Anton Popov](https://github.com/CurtizJ)). -* Optimization for reading struct fields in Parquet/ORC files. Only the required fields are loaded. [#44484](https://github.com/ClickHouse/ClickHouse/pull/44484) ([lgbo](https://github.com/lgbo-ustc)). -* Two-level aggregation algorithm was mistakenly disabled for queries over the HTTP interface. It was enabled back, and it leads to a major performance improvement. [#45450](https://github.com/ClickHouse/ClickHouse/pull/45450) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Added mmap support for StorageFile, which should improve the performance of clickhouse-local. [#43927](https://github.com/ClickHouse/ClickHouse/pull/43927) ([pufit](https://github.com/pufit)). -* Added sharding support in HashedDictionary to allow parallel load (almost linear scaling based on number of shards). [#40003](https://github.com/ClickHouse/ClickHouse/pull/40003) ([Azat Khuzhin](https://github.com/azat)). -* Speed up query parsing. [#42284](https://github.com/ClickHouse/ClickHouse/pull/42284) ([Raúl Marín](https://github.com/Algunenano)). -* Always replace OR chain `expr = x1 OR ... OR expr = xN` to `expr IN (x1, ..., xN)` in the case where `expr` is a `LowCardinality` column. Setting `optimize_min_equality_disjunction_chain_length` is ignored in this case. [#42889](https://github.com/ClickHouse/ClickHouse/pull/42889) ([Guo Wangyang](https://github.com/guowangy)). -* Slightly improve performance by optimizing the code around ThreadStatus. [#43586](https://github.com/ClickHouse/ClickHouse/pull/43586) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* Optimize the column-wise ternary logic evaluation by achieving auto-vectorization. In the performance test of this [microbenchmark](https://github.com/ZhiguoZh/ClickHouse/blob/20221123-ternary-logic-opt-example/src/Functions/examples/associative_applier_perf.cpp), we've observed a peak **performance gain** of **21x** on the ICX device (Intel Xeon Platinum 8380 CPU). [#43669](https://github.com/ClickHouse/ClickHouse/pull/43669) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* Avoid acquiring read locks in the `system.tables` table if possible. [#43840](https://github.com/ClickHouse/ClickHouse/pull/43840) ([Raúl Marín](https://github.com/Algunenano)). -* Optimize ThreadPool. The performance experiments of SSB (Star Schema Benchmark) on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) shows that this change could effectively decrease the lock contention for ThreadPoolImpl::mutex by **75%**, increasing the CPU utilization and improving the overall performance by **2.4%**. [#44308](https://github.com/ClickHouse/ClickHouse/pull/44308) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* Now the optimisation for predicting the hash table size is applied only if the cached hash table size is sufficiently large (thresholds were determined empirically and hardcoded). [#44455](https://github.com/ClickHouse/ClickHouse/pull/44455) ([Nikita Taranov](https://github.com/nickitat)). -* Small performance improvement for asynchronous reading from remote filesystems. [#44868](https://github.com/ClickHouse/ClickHouse/pull/44868) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add fast path for: - `col like '%%'`; - `col like '%'`; - `col not like '%'`; - `col not like '%'`; - `match(col, '.*')`. [#45244](https://github.com/ClickHouse/ClickHouse/pull/45244) ([李扬](https://github.com/taiyang-li)). -* Slightly improve happy path optimisation in filtering (WHERE clause). [#45289](https://github.com/ClickHouse/ClickHouse/pull/45289) ([Nikita Taranov](https://github.com/nickitat)). -* Provide monotonicity info for `toUnixTimestamp64*` to enable more algebraic optimizations for index analysis. [#44116](https://github.com/ClickHouse/ClickHouse/pull/44116) ([Nikita Taranov](https://github.com/nickitat)). -* Allow the configuration of temporary data for query processing (spilling to disk) to cooperate with the filesystem cache (taking up the space from the cache disk) [#43972](https://github.com/ClickHouse/ClickHouse/pull/43972) ([Vladimir C](https://github.com/vdimir)). This mainly improves [ClickHouse Cloud](https://clickhouse.cloud/), but can be used for self-managed setups as well, if you know what to do. -* Make `system.replicas` table do parallel fetches of replicas statuses. Closes [#43918](https://github.com/ClickHouse/ClickHouse/issues/43918). [#43998](https://github.com/ClickHouse/ClickHouse/pull/43998) ([Nikolay Degterinsky](https://github.com/evillique)). -* Optimize memory consumption during backup to S3: files to S3 now will be copied directly without using `WriteBufferFromS3` (which could use a lot of memory). [#45188](https://github.com/ClickHouse/ClickHouse/pull/45188) ([Vitaly Baranov](https://github.com/vitlibar)). -* Add a cache for async block ids. This will reduce the number of requests of ZooKeeper when we enable async inserts deduplication. [#45106](https://github.com/ClickHouse/ClickHouse/pull/45106) ([Han Fei](https://github.com/hanfei1991)). - -#### Improvement - -* Use structure from insertion table in generateRandom without arguments. [#45239](https://github.com/ClickHouse/ClickHouse/pull/45239) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow to implicitly convert floats stored in string fields of JSON to integers in `JSONExtract` functions. E.g. `JSONExtract('{"a": "1000.111"}', 'a', 'UInt64')` -> `1000`, previously it returned 0. [#45432](https://github.com/ClickHouse/ClickHouse/pull/45432) ([Anton Popov](https://github.com/CurtizJ)). -* Added fields `supports_parallel_parsing` and `supports_parallel_formatting` to table `system.formats` for better introspection. [#45499](https://github.com/ClickHouse/ClickHouse/pull/45499) ([Anton Popov](https://github.com/CurtizJ)). -* Improve reading CSV field in CustomSeparated/Template format. Closes [#42352](https://github.com/ClickHouse/ClickHouse/issues/42352) Closes [#39620](https://github.com/ClickHouse/ClickHouse/issues/39620). [#43332](https://github.com/ClickHouse/ClickHouse/pull/43332) ([Kruglov Pavel](https://github.com/Avogar)). -* Unify query elapsed time measurements. [#43455](https://github.com/ClickHouse/ClickHouse/pull/43455) ([Raúl Marín](https://github.com/Algunenano)). -* Improve automatic usage of structure from insertion table in table functions file/hdfs/s3 when virtual columns are present in a select query, it fixes the possible error `Block structure mismatch` or `number of columns mismatch`. [#43695](https://github.com/ClickHouse/ClickHouse/pull/43695) ([Kruglov Pavel](https://github.com/Avogar)). -* Add support for signed arguments in the function `range`. Fixes [#43333](https://github.com/ClickHouse/ClickHouse/issues/43333). [#43733](https://github.com/ClickHouse/ClickHouse/pull/43733) ([sanyu](https://github.com/wineternity)). -* Remove redundant sorting, for example, sorting related ORDER BY clauses in subqueries. Implemented on top of query plan. It does similar optimization as `optimize_duplicate_order_by_and_distinct` regarding `ORDER BY` clauses, but more generic, since it's applied to any redundant sorting steps (not only caused by ORDER BY clause) and applied to subqueries of any depth. Related to [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#43905](https://github.com/ClickHouse/ClickHouse/pull/43905) ([Igor Nikonov](https://github.com/devcrafter)). -* Add the ability to disable deduplication of files for BACKUP (for backups without deduplication ATTACH can be used instead of full RESTORE). For example `BACKUP foo TO S3(...) SETTINGS deduplicate_files=0` (default `deduplicate_files=1`). [#43947](https://github.com/ClickHouse/ClickHouse/pull/43947) ([Azat Khuzhin](https://github.com/azat)). -* Refactor and improve schema inference for text formats. Add new setting `schema_inference_make_columns_nullable` that controls making result types `Nullable` (enabled by default);. [#44019](https://github.com/ClickHouse/ClickHouse/pull/44019) ([Kruglov Pavel](https://github.com/Avogar)). -* Better support for `PROXYv1` protocol. [#44135](https://github.com/ClickHouse/ClickHouse/pull/44135) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Add information about the latest part check by cleanup threads into `system.parts` table. [#44244](https://github.com/ClickHouse/ClickHouse/pull/44244) ([Dmitry Novik](https://github.com/novikd)). -* Disable table functions in readonly mode for inserts. [#44290](https://github.com/ClickHouse/ClickHouse/pull/44290) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Add a setting `simultaneous_parts_removal_limit` to allow limiting the number of parts being processed by one iteration of CleanupThread. [#44461](https://github.com/ClickHouse/ClickHouse/pull/44461) ([Dmitry Novik](https://github.com/novikd)). -* Do not initialize ReadBufferFromS3 when only virtual columns are needed in a query. This may be helpful to [#44246](https://github.com/ClickHouse/ClickHouse/issues/44246). [#44493](https://github.com/ClickHouse/ClickHouse/pull/44493) ([chen](https://github.com/xiedeyantu)). -* Prevent duplicate column names hints. Closes [#44130](https://github.com/ClickHouse/ClickHouse/issues/44130). [#44519](https://github.com/ClickHouse/ClickHouse/pull/44519) ([Joanna Hulboj](https://github.com/jh0x)). -* Allow macro substitution in endpoint of disks. Resolve [#40951](https://github.com/ClickHouse/ClickHouse/issues/40951). [#44533](https://github.com/ClickHouse/ClickHouse/pull/44533) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Improve schema inference when `input_format_json_read_object_as_string` is enabled. [#44546](https://github.com/ClickHouse/ClickHouse/pull/44546) ([Kruglov Pavel](https://github.com/Avogar)). -* Add a user-level setting `database_replicated_allow_replicated_engine_arguments` which allows banning the creation of `ReplicatedMergeTree` tables with arguments in `DatabaseReplicated`. [#44566](https://github.com/ClickHouse/ClickHouse/pull/44566) ([alesapin](https://github.com/alesapin)). -* Prevent users from mistakenly specifying zero (invalid) value for `index_granularity`. This closes [#44536](https://github.com/ClickHouse/ClickHouse/issues/44536). [#44578](https://github.com/ClickHouse/ClickHouse/pull/44578) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Added possibility to set path to service keytab file in `keytab` parameter in `kerberos` section of config.xml. [#44594](https://github.com/ClickHouse/ClickHouse/pull/44594) ([Roman Vasin](https://github.com/rvasin)). -* Use already written part of the query for fuzzy search (pass to the `skim` library, which is written in Rust and linked statically to ClickHouse). [#44600](https://github.com/ClickHouse/ClickHouse/pull/44600) ([Azat Khuzhin](https://github.com/azat)). -* Enable `input_format_json_read_objects_as_strings` by default to be able to read nested JSON objects while JSON Object type is experimental. [#44657](https://github.com/ClickHouse/ClickHouse/pull/44657) ([Kruglov Pavel](https://github.com/Avogar)). -* Improvement for deduplication of async inserts: when users do duplicate async inserts, we should deduplicate inside the memory before we query Keeper. [#44682](https://github.com/ClickHouse/ClickHouse/pull/44682) ([Han Fei](https://github.com/hanfei1991)). -* Input/output `Avro` format will parse bool type as ClickHouse bool type. [#44684](https://github.com/ClickHouse/ClickHouse/pull/44684) ([Kruglov Pavel](https://github.com/Avogar)). -* Support Bool type in Arrow/Parquet/ORC. Closes [#43970](https://github.com/ClickHouse/ClickHouse/issues/43970). [#44698](https://github.com/ClickHouse/ClickHouse/pull/44698) ([Kruglov Pavel](https://github.com/Avogar)). -* Don't greedily parse beyond the quotes when reading UUIDs - it may lead to mistakenly successful parsing of incorrect data. [#44686](https://github.com/ClickHouse/ClickHouse/pull/44686) ([Raúl Marín](https://github.com/Algunenano)). -* Infer UInt64 in case of Int64 overflow and fix some transforms in schema inference. [#44696](https://github.com/ClickHouse/ClickHouse/pull/44696) ([Kruglov Pavel](https://github.com/Avogar)). -* Previously dependency resolving inside `Replicated` database was done in a hacky way, and now it's done right using an explicit graph. [#44697](https://github.com/ClickHouse/ClickHouse/pull/44697) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix `output_format_pretty_row_numbers` does not preserve the counter across the blocks. Closes [#44815](https://github.com/ClickHouse/ClickHouse/issues/44815). [#44832](https://github.com/ClickHouse/ClickHouse/pull/44832) ([flynn](https://github.com/ucasfl)). -* Don't report errors in `system.errors` due to parts being merged concurrently with the background cleanup process. [#44874](https://github.com/ClickHouse/ClickHouse/pull/44874) ([Raúl Marín](https://github.com/Algunenano)). -* Optimize and fix metrics for Distributed async INSERT. [#44922](https://github.com/ClickHouse/ClickHouse/pull/44922) ([Azat Khuzhin](https://github.com/azat)). -* Added settings to disallow concurrent backups and restores resolves [#43891](https://github.com/ClickHouse/ClickHouse/issues/43891) Implementation: * Added server-level settings to disallow concurrent backups and restores, which are read and set when BackupWorker is created in Context. * Settings are set to true by default. * Before starting backup or restores, added a check to see if any other backups/restores are running. For internal requests, it checks if it is from the self node using backup_uuid. [#45072](https://github.com/ClickHouse/ClickHouse/pull/45072) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Add `` config parameter for system logs. [#45320](https://github.com/ClickHouse/ClickHouse/pull/45320) ([Stig Bakken](https://github.com/stigsb)). - -#### Build/Testing/Packaging Improvement -* Statically link with the `skim` library (it is written in Rust) for fuzzy search in clickhouse client/local history. [#44239](https://github.com/ClickHouse/ClickHouse/pull/44239) ([Azat Khuzhin](https://github.com/azat)). -* We removed support for shared linking because of Rust. Actually, Rust is only an excuse for this removal, and we wanted to remove it nevertheless. [#44828](https://github.com/ClickHouse/ClickHouse/pull/44828) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The `SQLite` library is updated to the latest. It is used for the SQLite database and table integration engines. Also, fixed a false-positive TSan report. This closes [#45027](https://github.com/ClickHouse/ClickHouse/issues/45027). [#45031](https://github.com/ClickHouse/ClickHouse/pull/45031) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* CRC-32 changes to address the WeakHash collision issue in PowerPC. [#45144](https://github.com/ClickHouse/ClickHouse/pull/45144) ([MeenaRenganathan22](https://github.com/MeenaRenganathan22)). -* Update aws-c* submodules [#43020](https://github.com/ClickHouse/ClickHouse/pull/43020) ([Vitaly Baranov](https://github.com/vitlibar)). -* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Introduce a [website](https://aretestsgreenyet.com/) for the status of ClickHouse CI. [Source](https://github.com/ClickHouse/aretestsgreenyet). - -#### Bug Fix - -* Replace domain IP types (IPv4, IPv6) with native. [#43221](https://github.com/ClickHouse/ClickHouse/pull/43221) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). It automatically fixes some missing implementations in the code. -* Fix the backup process if mutations get killed during the backup process. [#45351](https://github.com/ClickHouse/ClickHouse/pull/45351) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix the `Invalid number of rows in Chunk` exception message. [#41404](https://github.com/ClickHouse/ClickHouse/issues/41404). [#42126](https://github.com/ClickHouse/ClickHouse/pull/42126) ([Alexander Gololobov](https://github.com/davenger)). -* Fix possible use of an uninitialized value after executing expressions after sorting. Closes [#43386](https://github.com/ClickHouse/ClickHouse/issues/43386) [#43635](https://github.com/ClickHouse/ClickHouse/pull/43635) ([Kruglov Pavel](https://github.com/Avogar)). -* Better handling of NULL in aggregate combinators, fix possible segfault/logical error while using an obscure optimization `optimize_rewrite_sum_if_to_count_if`. Closes [#43758](https://github.com/ClickHouse/ClickHouse/issues/43758). [#43813](https://github.com/ClickHouse/ClickHouse/pull/43813) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix CREATE USER/ROLE query settings constraints. [#43993](https://github.com/ClickHouse/ClickHouse/pull/43993) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fixed bug with non-parsable default value for `EPHEMERAL` column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix parsing of bad version from compatibility setting. [#44224](https://github.com/ClickHouse/ClickHouse/pull/44224) ([Kruglov Pavel](https://github.com/Avogar)). -* Bring interval subtraction from datetime in line with addition. [#44241](https://github.com/ClickHouse/ClickHouse/pull/44241) ([ltrk2](https://github.com/ltrk2)). -* Remove limits on the maximum size of the result for view. [#44261](https://github.com/ClickHouse/ClickHouse/pull/44261) ([lizhuoyu5](https://github.com/lzydmxy)). -* Fix possible logical error in cache if `do_not_evict_index_and_mrk_files=1`. Closes [#42142](https://github.com/ClickHouse/ClickHouse/issues/42142). [#44268](https://github.com/ClickHouse/ClickHouse/pull/44268) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix possible too early cache write interruption in write-through cache (caching could be stopped due to false assumption when it shouldn't have). [#44289](https://github.com/ClickHouse/ClickHouse/pull/44289) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix possible crash in the case function `IN` with constant arguments was used as a constant argument together with `LowCardinality`. Fixes [#44221](https://github.com/ClickHouse/ClickHouse/issues/44221). [#44346](https://github.com/ClickHouse/ClickHouse/pull/44346) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix support for complex parameters (like arrays) of parametric aggregate functions. This closes [#30975](https://github.com/ClickHouse/ClickHouse/issues/30975). The aggregate function `sumMapFiltered` was unusable in distributed queries before this change. [#44358](https://github.com/ClickHouse/ClickHouse/pull/44358) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix reading ObjectId in BSON schema inference. [#44382](https://github.com/ClickHouse/ClickHouse/pull/44382) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix race which can lead to premature temp parts removal before merge finishes in ReplicatedMergeTree. This issue could lead to errors like `No such file or directory: xxx`. Fixes [#43983](https://github.com/ClickHouse/ClickHouse/issues/43983). [#44383](https://github.com/ClickHouse/ClickHouse/pull/44383) ([alesapin](https://github.com/alesapin)). -* Some invalid `SYSTEM ... ON CLUSTER` queries worked in an unexpected way if a cluster name was not specified. It's fixed, now invalid queries throw `SYNTAX_ERROR` as they should. Fixes [#44264](https://github.com/ClickHouse/ClickHouse/issues/44264). [#44387](https://github.com/ClickHouse/ClickHouse/pull/44387) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix reading Map type in ORC format. [#44400](https://github.com/ClickHouse/ClickHouse/pull/44400) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix reading columns that are not presented in input data in Parquet/ORC formats. Previously it could lead to error `INCORRECT_NUMBER_OF_COLUMNS`. Closes [#44333](https://github.com/ClickHouse/ClickHouse/issues/44333). [#44405](https://github.com/ClickHouse/ClickHouse/pull/44405) ([Kruglov Pavel](https://github.com/Avogar)). -* Previously the `bar` function used the same '▋' (U+258B "Left five eighths block") character to display both 5/8 and 6/8 bars. This change corrects this behavior by using '▊' (U+258A "Left three quarters block") for displaying 6/8 bar. [#44410](https://github.com/ClickHouse/ClickHouse/pull/44410) ([Alexander Gololobov](https://github.com/davenger)). -* Placing profile settings after profile settings constraints in the configuration file made constraints ineffective. [#44411](https://github.com/ClickHouse/ClickHouse/pull/44411) ([Konstantin Bogdanov](https://github.com/thevar1able)). -* Fix `SYNTAX_ERROR` while running `EXPLAIN AST INSERT` queries with data. Closes [#44207](https://github.com/ClickHouse/ClickHouse/issues/44207). [#44413](https://github.com/ClickHouse/ClickHouse/pull/44413) ([save-my-heart](https://github.com/save-my-heart)). -* Fix reading bool value with CRLF in CSV format. Closes [#44401](https://github.com/ClickHouse/ClickHouse/issues/44401). [#44442](https://github.com/ClickHouse/ClickHouse/pull/44442) ([Kruglov Pavel](https://github.com/Avogar)). -* Don't execute and/or/if/multiIf on a LowCardinality dictionary, so the result type cannot be LowCardinality. It could lead to the error `Illegal column ColumnLowCardinality` in some cases. Fixes [#43603](https://github.com/ClickHouse/ClickHouse/issues/43603). [#44469](https://github.com/ClickHouse/ClickHouse/pull/44469) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix mutations with the setting `max_streams_for_merge_tree_reading`. [#44472](https://github.com/ClickHouse/ClickHouse/pull/44472) ([Anton Popov](https://github.com/CurtizJ)). -* Fix potential null pointer dereference with GROUPING SETS in ASTSelectQuery::formatImpl ([#43049](https://github.com/ClickHouse/ClickHouse/issues/43049)). [#44479](https://github.com/ClickHouse/ClickHouse/pull/44479) ([Robert Schulze](https://github.com/rschu1ze)). -* Validate types in table function arguments, CAST function arguments, JSONAsObject schema inference according to settings. [#44501](https://github.com/ClickHouse/ClickHouse/pull/44501) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix IN function with LowCardinality and const column, close [#44503](https://github.com/ClickHouse/ClickHouse/issues/44503). [#44506](https://github.com/ClickHouse/ClickHouse/pull/44506) ([Duc Canh Le](https://github.com/canhld94)). -* Fixed a bug in the normalization of a `DEFAULT` expression in `CREATE TABLE` statement. The second argument of the function `in` (or the right argument of operator `IN`) might be replaced with the result of its evaluation during CREATE query execution. Fixes [#44496](https://github.com/ClickHouse/ClickHouse/issues/44496). [#44547](https://github.com/ClickHouse/ClickHouse/pull/44547) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Projections do not work in presence of WITH ROLLUP, WITH CUBE and WITH TOTALS. In previous versions, a query produced an exception instead of skipping the usage of projections. This closes [#44614](https://github.com/ClickHouse/ClickHouse/issues/44614). This closes [#42772](https://github.com/ClickHouse/ClickHouse/issues/42772). [#44615](https://github.com/ClickHouse/ClickHouse/pull/44615) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Async blocks were not cleaned because the function `get all blocks sorted by time` didn't get async blocks. [#44651](https://github.com/ClickHouse/ClickHouse/pull/44651) ([Han Fei](https://github.com/hanfei1991)). -* Fix `LOGICAL_ERROR` `The top step of the right pipeline should be ExpressionStep` for JOIN with subquery, UNION, and TOTALS. Fixes [#43687](https://github.com/ClickHouse/ClickHouse/issues/43687). [#44673](https://github.com/ClickHouse/ClickHouse/pull/44673) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Avoid `std::out_of_range` exception in the Executable table engine. [#44681](https://github.com/ClickHouse/ClickHouse/pull/44681) ([Kruglov Pavel](https://github.com/Avogar)). -* Do not apply `optimize_syntax_fuse_functions` to quantiles on AST, close [#44712](https://github.com/ClickHouse/ClickHouse/issues/44712). [#44713](https://github.com/ClickHouse/ClickHouse/pull/44713) ([Vladimir C](https://github.com/vdimir)). -* Fix bug with wrong type in Merge table and PREWHERE, close [#43324](https://github.com/ClickHouse/ClickHouse/issues/43324). [#44716](https://github.com/ClickHouse/ClickHouse/pull/44716) ([Vladimir C](https://github.com/vdimir)). -* Fix a possible crash during shutdown (while destroying TraceCollector). Fixes [#44757](https://github.com/ClickHouse/ClickHouse/issues/44757). [#44758](https://github.com/ClickHouse/ClickHouse/pull/44758) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix a possible crash in distributed query processing. The crash could happen if a query with totals or extremes returned an empty result and there are mismatched types in the Distributed and the local tables. Fixes [#44738](https://github.com/ClickHouse/ClickHouse/issues/44738). [#44760](https://github.com/ClickHouse/ClickHouse/pull/44760) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix fsync for fetches (`min_compressed_bytes_to_fsync_after_fetch`)/small files (ttl.txt, columns.txt) in mutations (`min_rows_to_fsync_after_merge`/`min_compressed_bytes_to_fsync_after_merge`). [#44781](https://github.com/ClickHouse/ClickHouse/pull/44781) ([Azat Khuzhin](https://github.com/azat)). -* A rare race condition was possible when querying the `system.parts` or `system.parts_columns` tables in the presence of parts being moved between disks. Introduced in [#41145](https://github.com/ClickHouse/ClickHouse/issues/41145). [#44809](https://github.com/ClickHouse/ClickHouse/pull/44809) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix the error `Context has expired` which could appear with enabled projections optimization. Can be reproduced for queries with specific functions, like `dictHas/dictGet` which use context in runtime. Fixes [#44844](https://github.com/ClickHouse/ClickHouse/issues/44844). [#44850](https://github.com/ClickHouse/ClickHouse/pull/44850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* A fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Ignore cases when hardware monitor sensors cannot be read instead of showing a full exception message in logs. [#44895](https://github.com/ClickHouse/ClickHouse/pull/44895) ([Raúl Marín](https://github.com/Algunenano)). -* Use `max_delay_to_insert` value in case the calculated time to delay INSERT exceeds the setting value. Related to [#44902](https://github.com/ClickHouse/ClickHouse/issues/44902). [#44916](https://github.com/ClickHouse/ClickHouse/pull/44916) ([Igor Nikonov](https://github.com/devcrafter)). -* Fix error `Different order of columns in UNION subquery` for queries with `UNION`. Fixes [#44866](https://github.com/ClickHouse/ClickHouse/issues/44866). [#44920](https://github.com/ClickHouse/ClickHouse/pull/44920) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Delay for INSERT can be calculated incorrectly, which can lead to always using `max_delay_to_insert` setting as delay instead of a correct value. Using simple formula `max_delay_to_insert * (parts_over_threshold/max_allowed_parts_over_threshold)` i.e. delay grows proportionally to parts over threshold. Closes [#44902](https://github.com/ClickHouse/ClickHouse/issues/44902). [#44954](https://github.com/ClickHouse/ClickHouse/pull/44954) ([Igor Nikonov](https://github.com/devcrafter)). -* Fix alter table TTL error when a wide part has the lightweight delete mask. [#44959](https://github.com/ClickHouse/ClickHouse/pull/44959) ([Mingliang Pan](https://github.com/liangliangpan)). -* Follow-up fix for Replace domain IP types (IPv4, IPv6) with native [#43221](https://github.com/ClickHouse/ClickHouse/issues/43221). [#45024](https://github.com/ClickHouse/ClickHouse/pull/45024) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Follow-up fix for Replace domain IP types (IPv4, IPv6) with native https://github.com/ClickHouse/ClickHouse/pull/43221. [#45043](https://github.com/ClickHouse/ClickHouse/pull/45043) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* A buffer overflow was possible in the parser. Found by fuzzer. [#45047](https://github.com/ClickHouse/ClickHouse/pull/45047) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix possible cannot-read-all-data error in storage FileLog. Closes [#45051](https://github.com/ClickHouse/ClickHouse/issues/45051), [#38257](https://github.com/ClickHouse/ClickHouse/issues/38257). [#45057](https://github.com/ClickHouse/ClickHouse/pull/45057) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Memory efficient aggregation (setting `distributed_aggregation_memory_efficient`) is disabled when grouping sets are present in the query. [#45058](https://github.com/ClickHouse/ClickHouse/pull/45058) ([Nikita Taranov](https://github.com/nickitat)). -* Fix `RANGE_HASHED` dictionary to count range columns as part of the primary key during updates when `update_field` is specified. Closes [#44588](https://github.com/ClickHouse/ClickHouse/issues/44588). [#45061](https://github.com/ClickHouse/ClickHouse/pull/45061) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix error `Cannot capture column` for `LowCardinality` captured argument of nested lambda. Fixes [#45028](https://github.com/ClickHouse/ClickHouse/issues/45028). [#45065](https://github.com/ClickHouse/ClickHouse/pull/45065) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix the wrong query result of `additional_table_filters` (additional filter was not applied) in case the minmax/count projection is used. [#45133](https://github.com/ClickHouse/ClickHouse/pull/45133) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fixed bug in `histogram` function accepting negative values. [#45147](https://github.com/ClickHouse/ClickHouse/pull/45147) ([simpleton](https://github.com/rgzntrade)). -* Fix wrong column nullability in StoreageJoin, close [#44940](https://github.com/ClickHouse/ClickHouse/issues/44940). [#45184](https://github.com/ClickHouse/ClickHouse/pull/45184) ([Vladimir C](https://github.com/vdimir)). -* Fix `background_fetches_pool_size` settings reload (increase at runtime). [#45189](https://github.com/ClickHouse/ClickHouse/pull/45189) ([Raúl Marín](https://github.com/Algunenano)). -* Correctly process `SELECT` queries on KV engines (e.g. KeeperMap, EmbeddedRocksDB) using `IN` on the key with subquery producing different type. [#45215](https://github.com/ClickHouse/ClickHouse/pull/45215) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix logical error in SEMI JOIN & join_use_nulls in some cases, close [#45163](https://github.com/ClickHouse/ClickHouse/issues/45163), close [#45209](https://github.com/ClickHouse/ClickHouse/issues/45209). [#45230](https://github.com/ClickHouse/ClickHouse/pull/45230) ([Vladimir C](https://github.com/vdimir)). -* Fix heap-use-after-free in reading from s3. [#45253](https://github.com/ClickHouse/ClickHouse/pull/45253) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix bug when the Avro Union type is ['null', Nested type], closes [#45275](https://github.com/ClickHouse/ClickHouse/issues/45275). Fix bug that incorrectly infers `bytes` type to `Float`. [#45276](https://github.com/ClickHouse/ClickHouse/pull/45276) ([flynn](https://github.com/ucasfl)). -* Throw a correct exception when explicit PREWHERE cannot be used with a table using the storage engine `Merge`. [#45319](https://github.com/ClickHouse/ClickHouse/pull/45319) ([Antonio Andelic](https://github.com/antonio2368)). -* Under WSL1 Ubuntu self-extracting ClickHouse fails to decompress due to inconsistency - /proc/self/maps reporting 32bit file's inode, while stat reporting 64bit inode. [#45339](https://github.com/ClickHouse/ClickHouse/pull/45339) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix race in Distributed table startup (that could lead to processing file of async INSERT multiple times). [#45360](https://github.com/ClickHouse/ClickHouse/pull/45360) ([Azat Khuzhin](https://github.com/azat)). -* Fix a possible crash while reading from storage `S3` and table function `s3` in the case when `ListObject` request has failed. [#45371](https://github.com/ClickHouse/ClickHouse/pull/45371) ([Anton Popov](https://github.com/CurtizJ)). -* Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in XML config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)). -* Fix s3Cluster schema inference when structure from insertion table is used in `INSERT INTO ... SELECT * FROM s3Cluster` queries. [#45422](https://github.com/ClickHouse/ClickHouse/pull/45422) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix bug in JSON/BSONEachRow parsing with HTTP that could lead to using default values for some columns instead of values from data. [#45424](https://github.com/ClickHouse/ClickHouse/pull/45424) ([Kruglov Pavel](https://github.com/Avogar)). -* Fixed bug (Code: 632. DB::Exception: Unexpected data ... after parsed IPv6 value ...) with typed parsing of IP types from text source. [#45425](https://github.com/ClickHouse/ClickHouse/pull/45425) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* close [#45297](https://github.com/ClickHouse/ClickHouse/issues/45297) Add check for empty regular expressions. [#45428](https://github.com/ClickHouse/ClickHouse/pull/45428) ([Han Fei](https://github.com/hanfei1991)). -* Fix possible (likely distributed) query hung. [#45448](https://github.com/ClickHouse/ClickHouse/pull/45448) ([Azat Khuzhin](https://github.com/azat)). -* Fix possible deadlock with `allow_asynchronous_read_from_io_pool_for_merge_tree` enabled in case of exception from `ThreadPool::schedule`. [#45481](https://github.com/ClickHouse/ClickHouse/pull/45481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix possible in-use table after DETACH. [#45493](https://github.com/ClickHouse/ClickHouse/pull/45493) ([Azat Khuzhin](https://github.com/azat)). -* Fix rare abort in the case when a query is canceled and parallel parsing was used during its execution. [#45498](https://github.com/ClickHouse/ClickHouse/pull/45498) ([Anton Popov](https://github.com/CurtizJ)). -* Fix a race between Distributed table creation and INSERT into it (could lead to CANNOT_LINK during INSERT into the table). [#45502](https://github.com/ClickHouse/ClickHouse/pull/45502) ([Azat Khuzhin](https://github.com/azat)). -* Add proper default (SLRU) to cache policy getter. Closes [#45514](https://github.com/ClickHouse/ClickHouse/issues/45514). [#45524](https://github.com/ClickHouse/ClickHouse/pull/45524) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Disallow array join in mutations closes [#42637](https://github.com/ClickHouse/ClickHouse/issues/42637) [#44447](https://github.com/ClickHouse/ClickHouse/pull/44447) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix for qualified asterisks with alias table name and column transformer. Resolves [#44736](https://github.com/ClickHouse/ClickHouse/issues/44736). [#44755](https://github.com/ClickHouse/ClickHouse/pull/44755) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). - -## [Changelog for 2022](https://clickhouse.com/docs/en/whats-new/changelog/2022) +## [Changelog for 2023](https://clickhouse.com/docs/en/whats-new/changelog/2023) diff --git a/base/base/Decimal.h b/base/base/Decimal.h index 2405ba9ca0d..afa186faf5b 100644 --- a/base/base/Decimal.h +++ b/base/base/Decimal.h @@ -99,7 +99,7 @@ public: }; } -constexpr DB::UInt64 max_uint_mask = std::numeric_limits::max(); +constexpr UInt64 max_uint_mask = std::numeric_limits::max(); namespace std { @@ -114,8 +114,8 @@ namespace std { size_t operator()(const DB::Decimal128 & x) const { - return std::hash()(x.value >> 64) - ^ std::hash()(x.value & max_uint_mask); + return std::hash()(x.value >> 64) + ^ std::hash()(x.value & max_uint_mask); } }; @@ -134,8 +134,8 @@ namespace std size_t operator()(const DB::Decimal256 & x) const { // FIXME temp solution - return std::hash()(static_cast(x.value >> 64 & max_uint_mask)) - ^ std::hash()(static_cast(x.value & max_uint_mask)); + return std::hash()(static_cast(x.value >> 64 & max_uint_mask)) + ^ std::hash()(static_cast(x.value & max_uint_mask)); } }; } diff --git a/base/base/types.h b/base/base/types.h index 5825c8ae7ad..3a7760eae91 100644 --- a/base/base/types.h +++ b/base/base/types.h @@ -3,15 +3,6 @@ #include #include -using Int8 = int8_t; -using Int16 = int16_t; -using Int32 = int32_t; -using Int64 = int64_t; - -#ifndef __cpp_char8_t -using char8_t = unsigned char; -#endif - /// This is needed for more strict aliasing. https://godbolt.org/z/xpJBSb https://stackoverflow.com/a/57453713 using UInt8 = char8_t; @@ -19,24 +10,12 @@ using UInt16 = uint16_t; using UInt32 = uint32_t; using UInt64 = uint64_t; -using String = std::string; - -namespace DB -{ - -using UInt8 = ::UInt8; -using UInt16 = ::UInt16; -using UInt32 = ::UInt32; -using UInt64 = ::UInt64; - -using Int8 = ::Int8; -using Int16 = ::Int16; -using Int32 = ::Int32; -using Int64 = ::Int64; +using Int8 = int8_t; +using Int16 = int16_t; +using Int32 = int32_t; +using Int64 = int64_t; using Float32 = float; using Float64 = double; using String = std::string; - -} diff --git a/base/poco/Foundation/include/Poco/BufferedStreamBuf.h b/base/poco/Foundation/include/Poco/BufferedStreamBuf.h index 9f4cbd4e4d8..d97e37eedf3 100644 --- a/base/poco/Foundation/include/Poco/BufferedStreamBuf.h +++ b/base/poco/Foundation/include/Poco/BufferedStreamBuf.h @@ -26,6 +26,11 @@ #include "Poco/StreamUtil.h" +namespace DB +{ +class ReadBufferFromIStream; +} + namespace Poco { @@ -120,6 +125,8 @@ protected: openmode getMode() const { return _mode; } private: + friend class DB::ReadBufferFromIStream; + virtual int readFromDevice(char_type * /*buffer*/, std::streamsize /*length*/) { return 0; } virtual int writeToDevice(const char_type * /*buffer*/, std::streamsize /*length*/) { return 0; } diff --git a/base/poco/Foundation/include/Poco/Logger.h b/base/poco/Foundation/include/Poco/Logger.h index ffe3766dfec..cf202718662 100644 --- a/base/poco/Foundation/include/Poco/Logger.h +++ b/base/poco/Foundation/include/Poco/Logger.h @@ -33,7 +33,8 @@ namespace Poco class Exception; - +class Logger; +using LoggerPtr = std::shared_ptr; class Foundation_API Logger : public Channel /// Logger is a special Channel that acts as the main @@ -870,6 +871,11 @@ public: /// If the Logger does not yet exist, it is created, based /// on its parent logger. + static LoggerPtr getShared(const std::string & name); + /// Returns a shared pointer to the Logger with the given name. + /// If the Logger does not yet exist, it is created, based + /// on its parent logger. + static Logger & unsafeGet(const std::string & name); /// Returns a reference to the Logger with the given name. /// If the Logger does not yet exist, it is created, based @@ -885,6 +891,11 @@ public: /// given name. The Logger's Channel and log level as set as /// specified. + static LoggerPtr createShared(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION); + /// Creates and returns a shared pointer to a Logger with the + /// given name. The Logger's Channel and log level as set as + /// specified. + static Logger & root(); /// Returns a reference to the root logger, which is the ultimate /// ancestor of all Loggers. @@ -893,13 +904,6 @@ public: /// Returns a pointer to the Logger with the given name if it /// exists, or a null pointer otherwise. - static void destroy(const std::string & name); - /// Destroys the logger with the specified name. Does nothing - /// if the logger is not found. - /// - /// After a logger has been destroyed, all references to it - /// become invalid. - static void shutdown(); /// Shuts down the logging framework and releases all /// Loggers. @@ -929,8 +933,6 @@ public: static const std::string ROOT; /// The name of the root logger (""). protected: - typedef std::map LoggerMap; - Logger(const std::string & name, Channel * pChannel, int level); ~Logger(); @@ -938,6 +940,7 @@ protected: void log(const std::string & text, Message::Priority prio, const char * file, int line); static std::string format(const std::string & fmt, int argc, std::string argv[]); + static Logger & unsafeCreate(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION); static Logger & parent(const std::string & name); static void add(Logger * pLogger); static Logger * find(const std::string & name); @@ -950,9 +953,6 @@ private: std::string _name; Channel * _pChannel; std::atomic_int _level; - - static LoggerMap * _pLoggerMap; - static Mutex _mapMtx; }; diff --git a/base/poco/Foundation/include/Poco/RefCountedObject.h b/base/poco/Foundation/include/Poco/RefCountedObject.h index 4ad32e30cad..db966089e00 100644 --- a/base/poco/Foundation/include/Poco/RefCountedObject.h +++ b/base/poco/Foundation/include/Poco/RefCountedObject.h @@ -38,15 +38,15 @@ public: /// Creates the RefCountedObject. /// The initial reference count is one. - void duplicate() const; - /// Increments the object's reference count. + size_t duplicate() const; + /// Increments the object's reference count, returns reference count before call. - void release() const throw(); + size_t release() const throw(); /// Decrements the object's reference count /// and deletes the object if the count - /// reaches zero. + /// reaches zero, returns reference count before call. - int referenceCount() const; + size_t referenceCount() const; /// Returns the reference count. protected: @@ -57,36 +57,40 @@ private: RefCountedObject(const RefCountedObject &); RefCountedObject & operator=(const RefCountedObject &); - mutable AtomicCounter _counter; + mutable std::atomic _counter; }; // // inlines // -inline int RefCountedObject::referenceCount() const +inline size_t RefCountedObject::referenceCount() const { - return _counter.value(); + return _counter.load(std::memory_order_acquire); } -inline void RefCountedObject::duplicate() const +inline size_t RefCountedObject::duplicate() const { - ++_counter; + return _counter.fetch_add(1, std::memory_order_acq_rel); } -inline void RefCountedObject::release() const throw() +inline size_t RefCountedObject::release() const throw() { + size_t reference_count_before = _counter.fetch_sub(1, std::memory_order_acq_rel); + try { - if (--_counter == 0) + if (reference_count_before == 1) delete this; } catch (...) { poco_unexpected(); } + + return reference_count_before; } diff --git a/base/poco/Foundation/src/Logger.cpp b/base/poco/Foundation/src/Logger.cpp index 3d5de585b4f..cfc063c8979 100644 --- a/base/poco/Foundation/src/Logger.cpp +++ b/base/poco/Foundation/src/Logger.cpp @@ -20,12 +20,38 @@ #include "Poco/NumberParser.h" #include "Poco/String.h" +#include +#include + +namespace +{ + +std::mutex & getLoggerMutex() +{ + auto get_logger_mutex_placeholder_memory = []() + { + static char buffer[sizeof(std::mutex)]{}; + return buffer; + }; + + static std::mutex * logger_mutex = new (get_logger_mutex_placeholder_memory()) std::mutex(); + return *logger_mutex; +} + +struct LoggerEntry +{ + Poco::Logger * logger; + bool owned_by_shared_ptr = false; +}; + +using LoggerMap = std::unordered_map; +LoggerMap * _pLoggerMap = nullptr; + +} namespace Poco { -Logger::LoggerMap* Logger::_pLoggerMap = 0; -Mutex Logger::_mapMtx; const std::string Logger::ROOT; @@ -73,7 +99,7 @@ void Logger::setProperty(const std::string& name, const std::string& value) setChannel(LoggingRegistry::defaultRegistry().channelForName(value)); else if (name == "level") setLevel(value); - else + else Channel::setProperty(name, value); } @@ -112,17 +138,17 @@ void Logger::dump(const std::string& msg, const void* buffer, std::size_t length void Logger::setLevel(const std::string& name, int level) { - Mutex::ScopedLock lock(_mapMtx); + std::lock_guard lock(getLoggerMutex()); if (_pLoggerMap) { std::string::size_type len = name.length(); - for (LoggerMap::iterator it = _pLoggerMap->begin(); it != _pLoggerMap->end(); ++it) + for (auto & it : *_pLoggerMap) { - if (len == 0 || - (it->first.compare(0, len, name) == 0 && (it->first.length() == len || it->first[len] == '.'))) + if (len == 0 || + (it.first.compare(0, len, name) == 0 && (it.first.length() == len || it.first[len] == '.'))) { - it->second->setLevel(level); + it.second.logger->setLevel(level); } } } @@ -131,17 +157,17 @@ void Logger::setLevel(const std::string& name, int level) void Logger::setChannel(const std::string& name, Channel* pChannel) { - Mutex::ScopedLock lock(_mapMtx); + std::lock_guard lock(getLoggerMutex()); if (_pLoggerMap) { std::string::size_type len = name.length(); - for (LoggerMap::iterator it = _pLoggerMap->begin(); it != _pLoggerMap->end(); ++it) + for (auto & it : *_pLoggerMap) { if (len == 0 || - (it->first.compare(0, len, name) == 0 && (it->first.length() == len || it->first[len] == '.'))) + (it.first.compare(0, len, name) == 0 && (it.first.length() == len || it.first[len] == '.'))) { - it->second->setChannel(pChannel); + it.second.logger->setChannel(pChannel); } } } @@ -150,17 +176,17 @@ void Logger::setChannel(const std::string& name, Channel* pChannel) void Logger::setProperty(const std::string& loggerName, const std::string& propertyName, const std::string& value) { - Mutex::ScopedLock lock(_mapMtx); + std::lock_guard lock(getLoggerMutex()); if (_pLoggerMap) { std::string::size_type len = loggerName.length(); - for (LoggerMap::iterator it = _pLoggerMap->begin(); it != _pLoggerMap->end(); ++it) + for (auto & it : *_pLoggerMap) { if (len == 0 || - (it->first.compare(0, len, loggerName) == 0 && (it->first.length() == len || it->first[len] == '.'))) + (it.first.compare(0, len, loggerName) == 0 && (it.first.length() == len || it.first[len] == '.'))) { - it->second->setProperty(propertyName, value); + it.second.logger->setProperty(propertyName, value); } } } @@ -280,11 +306,88 @@ void Logger::formatDump(std::string& message, const void* buffer, std::size_t le } +namespace +{ + +struct LoggerDeleter +{ + void operator()(Poco::Logger * logger) + { + std::lock_guard lock(getLoggerMutex()); + + /// If logger infrastructure is destroyed just decrement logger reference count + if (!_pLoggerMap) + { + logger->release(); + return; + } + + auto it = _pLoggerMap->find(logger->name()); + assert(it != _pLoggerMap->end()); + + /** If reference count is 1, this means this shared pointer owns logger + * and need destroy it. + */ + size_t reference_count_before_release = logger->release(); + if (reference_count_before_release == 1) + { + assert(it->second.owned_by_shared_ptr); + _pLoggerMap->erase(it); + } + } +}; + + +inline LoggerPtr makeLoggerPtr(Logger & logger) +{ + return std::shared_ptr(&logger, LoggerDeleter()); +} + +} + + Logger& Logger::get(const std::string& name) { - Mutex::ScopedLock lock(_mapMtx); + std::lock_guard lock(getLoggerMutex()); - return unsafeGet(name); + Logger & logger = unsafeGet(name); + + /** If there are already shared pointer created for this logger + * we need to increment Logger reference count and now logger + * is owned by logger infrastructure. + */ + auto it = _pLoggerMap->find(name); + if (it->second.owned_by_shared_ptr) + { + it->second.logger->duplicate(); + it->second.owned_by_shared_ptr = false; + } + + return logger; +} + + +LoggerPtr Logger::getShared(const std::string & name) +{ + std::lock_guard lock(getLoggerMutex()); + bool logger_exists = _pLoggerMap && _pLoggerMap->contains(name); + + Logger & logger = unsafeGet(name); + + /** If logger already exists, then this shared pointer does not own it. + * If logger does not exists, logger infrastructure could be already destroyed + * or logger was created. + */ + if (logger_exists) + { + logger.duplicate(); + } + else if (_pLoggerMap) + { + _pLoggerMap->find(name)->second.owned_by_shared_ptr = true; + } + + return makeLoggerPtr(logger); } @@ -310,18 +413,24 @@ Logger& Logger::unsafeGet(const std::string& name) Logger& Logger::create(const std::string& name, Channel* pChannel, int level) { - Mutex::ScopedLock lock(_mapMtx); + std::lock_guard lock(getLoggerMutex()); - if (find(name)) throw ExistsException(); - Logger* pLogger = new Logger(name, pChannel, level); - add(pLogger); - return *pLogger; + return unsafeCreate(name, pChannel, level); } +LoggerPtr Logger::createShared(const std::string & name, Channel * pChannel, int level) +{ + std::lock_guard lock(getLoggerMutex()); + + Logger & logger = unsafeCreate(name, pChannel, level); + _pLoggerMap->find(name)->second.owned_by_shared_ptr = true; + + return makeLoggerPtr(logger); +} Logger& Logger::root() { - Mutex::ScopedLock lock(_mapMtx); + std::lock_guard lock(getLoggerMutex()); return unsafeGet(ROOT); } @@ -329,7 +438,7 @@ Logger& Logger::root() Logger* Logger::has(const std::string& name) { - Mutex::ScopedLock lock(_mapMtx); + std::lock_guard lock(getLoggerMutex()); return find(name); } @@ -337,14 +446,18 @@ Logger* Logger::has(const std::string& name) void Logger::shutdown() { - Mutex::ScopedLock lock(_mapMtx); + std::lock_guard lock(getLoggerMutex()); if (_pLoggerMap) { - for (LoggerMap::iterator it = _pLoggerMap->begin(); it != _pLoggerMap->end(); ++it) + for (auto & it : *_pLoggerMap) { - it->second->release(); + if (it.second.owned_by_shared_ptr) + continue; + + it.second.logger->release(); } + delete _pLoggerMap; _pLoggerMap = 0; } @@ -357,31 +470,15 @@ Logger* Logger::find(const std::string& name) { LoggerMap::iterator it = _pLoggerMap->find(name); if (it != _pLoggerMap->end()) - return it->second; + return it->second.logger; } return 0; } -void Logger::destroy(const std::string& name) -{ - Mutex::ScopedLock lock(_mapMtx); - - if (_pLoggerMap) - { - LoggerMap::iterator it = _pLoggerMap->find(name); - if (it != _pLoggerMap->end()) - { - it->second->release(); - _pLoggerMap->erase(it); - } - } -} - - void Logger::names(std::vector& names) { - Mutex::ScopedLock lock(_mapMtx); + std::lock_guard lock(getLoggerMutex()); names.clear(); if (_pLoggerMap) @@ -393,6 +490,14 @@ void Logger::names(std::vector& names) } } +Logger& Logger::unsafeCreate(const std::string & name, Channel * pChannel, int level) +{ + if (find(name)) throw ExistsException(); + Logger* pLogger = new Logger(name, pChannel, level); + add(pLogger); + + return *pLogger; +} Logger& Logger::parent(const std::string& name) { @@ -478,7 +583,8 @@ void Logger::add(Logger* pLogger) { if (!_pLoggerMap) _pLoggerMap = new LoggerMap; - _pLoggerMap->insert(LoggerMap::value_type(pLogger->name(), pLogger)); + + _pLoggerMap->emplace(pLogger->name(), LoggerEntry{pLogger, false /*owned_by_shared_ptr*/}); } diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake index 3f7a8498059..bc4a029721d 100644 --- a/cmake/sanitize.cmake +++ b/cmake/sanitize.cmake @@ -79,6 +79,10 @@ if (SANITIZE_COVERAGE) # But the actual coverage will be enabled on per-library basis: for ClickHouse code, but not for 3rd-party. set (COVERAGE_FLAGS "-fsanitize-coverage=trace-pc-guard,pc-table") -endif() -set (WITHOUT_COVERAGE_FLAGS "-fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table") + set (WITHOUT_COVERAGE_FLAGS "-fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table") + set (WITHOUT_COVERAGE_FLAGS_LIST -fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table) +else() + set (WITHOUT_COVERAGE_FLAGS "") + set (WITHOUT_COVERAGE_FLAGS_LIST "") +endif() diff --git a/contrib/avro b/contrib/avro index 2fb8a8a6ec0..d43acc84d3d 160000 --- a/contrib/avro +++ b/contrib/avro @@ -1 +1 @@ -Subproject commit 2fb8a8a6ec0eab9109b68abf3b4857e8c476b918 +Subproject commit d43acc84d3d455b016f847d6666fbc3cd27f16a9 diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index 343e863e496..2c60fc0e552 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -44,12 +44,14 @@ set (SRCS_IOSTREAMS "${LIBRARY_DIR}/libs/iostreams/src/gzip.cpp" "${LIBRARY_DIR}/libs/iostreams/src/mapped_file.cpp" "${LIBRARY_DIR}/libs/iostreams/src/zlib.cpp" + "${LIBRARY_DIR}/libs/iostreams/src/zstd.cpp" ) add_library (_boost_iostreams ${SRCS_IOSTREAMS}) add_library (boost::iostreams ALIAS _boost_iostreams) target_include_directories (_boost_iostreams PRIVATE ${LIBRARY_DIR}) target_link_libraries (_boost_iostreams PRIVATE ch_contrib::zlib) +target_link_libraries (_boost_iostreams PRIVATE ch_contrib::zstd) # program_options diff --git a/contrib/corrosion-cmake/CMakeLists.txt b/contrib/corrosion-cmake/CMakeLists.txt index 8adc2c0b23a..9b98ed6efb3 100644 --- a/contrib/corrosion-cmake/CMakeLists.txt +++ b/contrib/corrosion-cmake/CMakeLists.txt @@ -1,8 +1,5 @@ if (NOT ENABLE_LIBRARIES) set(DEFAULT_ENABLE_RUST FALSE) -elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "aarch64")) - message(STATUS "Rust is not available on aarch64-apple-darwin") - set(DEFAULT_ENABLE_RUST FALSE) else() list (APPEND CMAKE_MODULE_PATH "${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake") find_package(Rust) @@ -19,7 +16,9 @@ message(STATUS "Checking Rust toolchain for current target") # See https://doc.rust-lang.org/nightly/rustc/platform-support.html -if((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl")) +if(CMAKE_TOOLCHAIN_FILE MATCHES "ppc64le") + set(Rust_CARGO_TARGET "powerpc64le-unknown-linux-gnu") +elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl")) set(Rust_CARGO_TARGET "x86_64-unknown-linux-musl") elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu") @@ -29,14 +28,14 @@ elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu") elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) set(Rust_CARGO_TARGET "x86_64-apple-darwin") +elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "aarch64")) + set(Rust_CARGO_TARGET "aarch64-apple-darwin") elseif((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) set(Rust_CARGO_TARGET "x86_64-unknown-freebsd") elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-riscv64") set(Rust_CARGO_TARGET "riscv64gc-unknown-linux-gnu") -endif() - -if(CMAKE_TOOLCHAIN_FILE MATCHES "ppc64le") - set(Rust_CARGO_TARGET "powerpc64le-unknown-linux-gnu") +else() + message(FATAL_ERROR "Unsupported rust target") endif() message(STATUS "Switched Rust target to ${Rust_CARGO_TARGET}") diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 15e965ed841..b633f0fda50 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -34,9 +34,9 @@ if (OS_LINUX) # avoid spurious latencies and additional work associated with # MADV_DONTNEED. See # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation. - set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000") + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") else() - set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000") + set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000") endif() # CACHE variable is empty to allow changing defaults without the necessity # to purge cache @@ -161,6 +161,9 @@ target_include_directories(_jemalloc SYSTEM PRIVATE target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE) +# Because our coverage callbacks call malloc, and recursive call of malloc could not work. +target_compile_options(_jemalloc PRIVATE ${WITHOUT_COVERAGE_FLAGS_LIST}) + if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_DEBUG=1 diff --git a/contrib/libssh-cmake/CMakeLists.txt b/contrib/libssh-cmake/CMakeLists.txt index 7a3816d4dce..eee3df832fa 100644 --- a/contrib/libssh-cmake/CMakeLists.txt +++ b/contrib/libssh-cmake/CMakeLists.txt @@ -1,4 +1,4 @@ -option (ENABLE_SSH "Enable support for SSH keys and protocol" ON) +option (ENABLE_SSH "Enable support for SSH keys and protocol" ${ENABLE_LIBRARIES}) if (NOT ENABLE_SSH) message(STATUS "Not using SSH") diff --git a/contrib/simdjson b/contrib/simdjson index 1075e8609c4..6060be2fdf6 160000 --- a/contrib/simdjson +++ b/contrib/simdjson @@ -1 +1 @@ -Subproject commit 1075e8609c4afa253162d441437af929c29e31bb +Subproject commit 6060be2fdf62edf4a8f51a8b0883d57d09397b30 diff --git a/contrib/update-submodules.sh b/contrib/update-submodules.sh index b12f3f924dc..7195de020bd 100755 --- a/contrib/update-submodules.sh +++ b/contrib/update-submodules.sh @@ -6,9 +6,15 @@ SCRIPT_DIR=$(dirname "${SCRIPT_PATH}") GIT_DIR=$(git -C "$SCRIPT_DIR" rev-parse --show-toplevel) cd $GIT_DIR +# Exclude from contribs some garbage subdirs that we don't need. +# It reduces the checked out files size about 3 times and therefore speeds up indexing in IDEs and searching. +# NOTE .git/ still contains everything that we don't check out (although, it's compressed) +# See also https://git-scm.com/docs/git-sparse-checkout contrib/sparse-checkout/setup-sparse-checkout.sh + git submodule init git submodule sync + # NOTE: do not use --remote for `git submodule update`[1] command, since the submodule references to the specific commit SHA1 in the subproject. # It may cause unexpected behavior. Instead you need to commit a new SHA1 for a submodule. # diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index b9c7ea34a36..78f18f376f4 100755 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -49,17 +49,10 @@ CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}" CLICKHOUSE_DB="${CLICKHOUSE_DB:-}" CLICKHOUSE_ACCESS_MANAGEMENT="${CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT:-0}" -for dir in "$DATA_DIR" \ - "$ERROR_LOG_DIR" \ - "$LOG_DIR" \ - "$TMP_DIR" \ - "$USER_PATH" \ - "$FORMAT_SCHEMA_PATH" \ - "${DISKS_PATHS[@]}" \ - "${DISKS_METADATA_PATHS[@]}" -do +function create_directory_and_do_chown() { + local dir=$1 # check if variable not empty - [ -z "$dir" ] && continue + [ -z "$dir" ] && return # ensure directories exist if [ "$DO_CHOWN" = "1" ]; then mkdir="mkdir" @@ -81,6 +74,23 @@ do chown -R "$USER:$GROUP" "$dir" fi fi +} + +create_directory_and_do_chown "$DATA_DIR" + +# Change working directory to $DATA_DIR in case there're paths relative to $DATA_DIR, also avoids running +# clickhouse-server at root directory. +cd "$DATA_DIR" + +for dir in "$ERROR_LOG_DIR" \ + "$LOG_DIR" \ + "$TMP_DIR" \ + "$USER_PATH" \ + "$FORMAT_SCHEMA_PATH" \ + "${DISKS_PATHS[@]}" \ + "${DISKS_METADATA_PATHS[@]}" +do + create_directory_and_do_chown "$dir" done # if clickhouse user is defined - create it (user "default" already exists out of box) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index a38f59dacac..56ec0199849 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -22,7 +22,7 @@ RUN apt-get update \ zstd \ --yes --no-install-recommends -RUN pip3 install numpy scipy pandas Jinja2 +RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz" diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 8aeb06ec27b..050d4b68628 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -242,7 +242,7 @@ quit --create-query-fuzzer-runs=50 \ --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \ $NEW_TESTS_OPT \ - > >(tail -n 100000 > fuzzer.log) \ + > fuzzer.log \ 2>&1 & fuzzer_pid=$! echo "Fuzzer pid is $fuzzer_pid" @@ -390,6 +390,7 @@ rg --text -F '' server.log > fatal.log ||: dmesg -T > dmesg.log ||: zstd --threads=0 server.log +zstd --threads=0 fuzzer.log cat > report.html < @@ -413,7 +414,7 @@ p.links a { padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-s

AST Fuzzer for PR #${PR_TO_TEST} @ ${SHA_TO_TEST}

- - - - - - - + + localhost + 5432 + postgres + **** + schema1 + ``` Some parameters can be overridden by key value arguments: ``` sql -SELECT * FROM postgresql(postgres1, schema='schema1', table='table1'); +SELECT * FROM postgresql(postgres_creds, table='table1'); ``` ## Implementation Details {#implementation-details} diff --git a/docs/en/engines/table-engines/integrations/redis.md b/docs/en/engines/table-engines/integrations/redis.md index 8086a6503b8..3a07d150835 100644 --- a/docs/en/engines/table-engines/integrations/redis.md +++ b/docs/en/engines/table-engines/integrations/redis.md @@ -16,30 +16,32 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name name1 [type1], name2 [type2], ... -) ENGINE = Redis(host:port[, db_index[, password[, pool_size]]]) PRIMARY KEY(primary_key_name); +) ENGINE = Redis({host:port[, db_index[, password[, pool_size]]] | named_collection[, option=value [,..]] }) +PRIMARY KEY(primary_key_name); ``` **Engine Parameters** - `host:port` — Redis server address, you can ignore port and default Redis port 6379 will be used. - - `db_index` — Redis db index range from 0 to 15, default is 0. - - `password` — User password, default is blank string. - - `pool_size` — Redis max connection pool size, default is 16. - - `primary_key_name` - any column name in the column list. -- `primary` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a Redis key. +:::note Serialization +`PRIMARY KEY` supports only one column. The primary key will be serialized in binary as a Redis key. +Columns other than the primary key will be serialized in binary as Redis value in corresponding order. +::: -- columns other than the primary key will be serialized in binary as Redis value in corresponding order. +Arguments also can be passed using [named collections](/docs/en/operations/named-collections.md). In this case `host` and `port` should be specified separately. This approach is recommended for production environment. At this moment, all parameters passed using named collections to redis are required. -- queries with key equals or in filtering will be optimized to multi keys lookup from Redis. If queries without filtering key full table scan will happen which is a heavy operation. +:::note Filtering +Queries with `key equals` or `in filtering` will be optimized to multi keys lookup from Redis. If queries without filtering key full table scan will happen which is a heavy operation. +::: ## Usage Example {#usage-example} -Create a table in ClickHouse which allows to read data from Redis: +Create a table in ClickHouse using `Redis` engine with plain arguments: ``` sql CREATE TABLE redis_table @@ -52,6 +54,31 @@ CREATE TABLE redis_table ENGINE = Redis('redis1:6379') PRIMARY KEY(key); ``` +Or using [named collections](/docs/en/operations/named-collections.md): + +``` + + + localhost + 6379 + **** + 16 + s0 + + +``` + +```sql +CREATE TABLE redis_table +( + `key` String, + `v1` UInt32, + `v2` String, + `v3` Float32 +) +ENGINE = Redis(redis_creds) PRIMARY KEY(key); +``` + Insert: ```sql diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 6d60611ae4b..f185c11bab3 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -39,8 +39,8 @@ If you need to update rows frequently, we recommend using the [`ReplacingMergeTr ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( - name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [STATISTIC(stat1)] [TTL expr1] [PRIMARY KEY], - name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [STATISTIC(stat2)] [TTL expr2] [PRIMARY KEY], + name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [STATISTIC(stat1)] [TTL expr1] [PRIMARY KEY] [SETTINGS (name = value, ...)], + name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [STATISTIC(stat2)] [TTL expr2] [PRIMARY KEY] [SETTINGS (name = value, ...)], ... INDEX index_name1 expr1 TYPE type1(...) [GRANULARITY value1], INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2], @@ -56,7 +56,7 @@ ORDER BY expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx' [, ...] ] [WHERE conditions] [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ] -[SETTINGS name=value, ...] +[SETTINGS name = value, ...] ``` For a description of parameters, see the [CREATE query description](/docs/en/sql-reference/statements/create/table.md). @@ -508,7 +508,7 @@ Indexes of type `set` can be utilized by all functions. The other index types ar | [notEquals(!=, <>)](/docs/en/sql-reference/functions/comparison-functions.md/#notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ | | [like](/docs/en/sql-reference/functions/string-search-functions.md/#like) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ | | [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#notlike) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ | -| [match](/docs/en/sql-reference/functions/string-search-functions.md/#match) | ✗ | ✗ | ✔ | ✔ | ✗ | ✗ | +| [match](/docs/en/sql-reference/functions/string-search-functions.md/#match) | ✗ | ✗ | ✔ | ✔ | ✗ | ✔ | | [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ | | [endsWith](/docs/en/sql-reference/functions/string-functions.md/#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | ✔ | | [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | ✔ | @@ -620,7 +620,7 @@ The `TTL` clause can’t be used for key columns. #### Creating a table with `TTL`: ``` sql -CREATE TABLE example_table +CREATE TABLE tab ( d DateTime, a Int TTL d + INTERVAL 1 MONTH, @@ -635,7 +635,7 @@ ORDER BY d; #### Adding TTL to a column of an existing table ``` sql -ALTER TABLE example_table +ALTER TABLE tab MODIFY COLUMN c String TTL d + INTERVAL 1 DAY; ``` @@ -643,7 +643,7 @@ ALTER TABLE example_table #### Altering TTL of the column ``` sql -ALTER TABLE example_table +ALTER TABLE tab MODIFY COLUMN c String TTL d + INTERVAL 1 MONTH; ``` @@ -681,7 +681,7 @@ If a column is not part of the `GROUP BY` expression and is not set explicitly i #### Creating a table with `TTL`: ``` sql -CREATE TABLE example_table +CREATE TABLE tab ( d DateTime, a Int @@ -697,7 +697,7 @@ TTL d + INTERVAL 1 MONTH DELETE, #### Altering `TTL` of the table: ``` sql -ALTER TABLE example_table +ALTER TABLE tab MODIFY TTL d + INTERVAL 1 DAY; ``` @@ -1366,7 +1366,7 @@ In this sample configuration: The statistic declaration is in the columns section of the `CREATE` query for tables from the `*MergeTree*` Family when we enable `set allow_experimental_statistic = 1`. ``` sql -CREATE TABLE example_table +CREATE TABLE tab ( a Int64 STATISTIC(tdigest), b Float64 @@ -1378,8 +1378,8 @@ ORDER BY a We can also manipulate statistics with `ALTER` statements. ```sql -ALTER TABLE example_table ADD STATISTIC b TYPE tdigest; -ALTER TABLE example_table DROP STATISTIC a TYPE tdigest; +ALTER TABLE tab ADD STATISTIC b TYPE tdigest; +ALTER TABLE tab DROP STATISTIC a TYPE tdigest; ``` These lightweight statistics aggregate information about distribution of values in columns. @@ -1390,3 +1390,42 @@ They can be used for query optimization when we enable `set allow_statistic_opti - `tdigest` Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch. + +## Column-level Settings {#column-level-settings} + +Certain MergeTree settings can be override at column level: + +- `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table. +- `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark. + +Example: + +```sql +CREATE TABLE tab +( + id Int64, + document String SETTINGS (min_compress_block_size = 16777216, max_compress_block_size = 16777216) +) +ENGINE = MergeTree +ORDER BY id +``` + +Column-level settings can be modified or removed using [ALTER MODIFY COLUMN](/docs/en/sql-reference/statements/alter/column.md), for example: + +- Remove `SETTINGS` from column declaration: + +```sql +ALTER TABLE tab MODIFY COLUMN document REMOVE SETTINGS; +``` + +- Modify a setting: + +```sql +ALTER TABLE tab MODIFY COLUMN document MODIFY SETTING min_compress_block_size = 8192; +``` + +- Reset one or more settings, also removes the setting declaration in the column expression of the table's CREATE query. + +```sql +ALTER TABLE tab MODIFY COLUMN document RESET SETTING min_compress_block_size; +``` diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index ed67af48af7..a11c3e5ef19 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -2356,6 +2356,8 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam ### Arrow format settings {#parquet-format-settings} - [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`. +- [output_format_arrow_use_64_bit_indexes_for_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_use_64_bit_indexes_for_dictionary) - use 64-bit integer type for Dictionary indexes. Default value - `false`. +- [output_format_arrow_use_signed_indexes_for_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_use_signed_indexes_for_dictionary) - use signed integer type for Dictionary indexes. Default value - `true`. - [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. - [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. - [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. diff --git a/docs/en/operations/allocation-profiling.md b/docs/en/operations/allocation-profiling.md new file mode 100644 index 00000000000..64b4106a7e1 --- /dev/null +++ b/docs/en/operations/allocation-profiling.md @@ -0,0 +1,207 @@ +--- +slug: /en/operations/allocation-profiling +sidebar_label: "Allocation profiling" +title: "Allocation profiling" +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Allocation profiling + +ClickHouse uses [jemalloc](https://github.com/jemalloc/jemalloc) as its global allocator that comes with some tools for allocation sampling and profiling. +To make allocation profiling more convenient, `SYSTEM` commands are provided along 4LW commands in Keeper. + +## Sampling allocations and flushing heap profiles + +If we want to sample and profile allocations in `jemalloc`, we need to start ClickHouse/Keeper with profiling enabled using environment variable `MALLOC_CONF`. + +```sh +MALLOC_CONF=background_thread:true,prof:true +``` + +`jemalloc` will sample allocation and store the information internally. + +We can tell `jemalloc` to flush current profile by running: + + + + + SYSTEM JEMALLOC FLUSH PROFILE + + + + + echo jmfp | nc localhost 9181 + + + + +By default, heap profile file will be generated in `/tmp/jemalloc_clickhouse._pid_._seqnum_.heap` where `_pid_` is the PID of ClickHouse and `_seqnum_` is the global sequence number for the current heap profile. +For Keeper, the default file is `/tmp/jemalloc_keeper._pid_._seqnum_.heap` following the same rules. + +A different location can be defined by appending the `MALLOC_CONF` environment variable with `prof_prefix` option. +For example, if we want to generate profiles in `/data` folder where the prefix for filename will be `my_current_profile` we can run ClickHouse/Keeper with following environment variable: +```sh +MALLOC_CONF=background_thread:true,prof:true,prof_prefix:/data/my_current_profile +``` +Generated file will append to prefix PID and sequence number. + +## Analyzing heap profiles + +After we generated heap profiles, we need to analyze them. +For that, we need to use `jemalloc`'s tool called [jeprof](https://github.com/jemalloc/jemalloc/blob/dev/bin/jeprof.in) which can be installed in multiple ways: +- installing `jemalloc` using system's package manager +- cloning [jemalloc repo](https://github.com/jemalloc/jemalloc) and running autogen.sh from the root folder that will provide you with `jeprof` script inside the `bin` folder + +:::note +`jeprof` uses `addr2line` to generate stacktraces which can be really slow. +If that’s the case, we recommend installing an [alternative implementation](https://github.com/gimli-rs/addr2line) of the tool. + +``` +git clone https://github.com/gimli-rs/addr2line +cd addr2line +cargo b --examples -r +cp ./target/release/examples/addr2line path/to/current/addr2line +``` +::: + +There are many different formats to generate from the heap profile using `jeprof`. +We recommend to run `jeprof --help` to check usage and many different options the tool provides. + +In general, `jeprof` command will look like this: + +```sh +jeprof path/to/binary path/to/heap/profile --output_format [ > output_file] +``` + +If we want to compare which allocations happened between 2 profiles we can set the base argument: + +```sh +jeprof path/to/binary --base path/to/first/heap/profile path/to/second/heap/profile --output_format [ > output_file] +``` + +For example: + +- if we want to generate a text file with each procedure written per line: + +```sh +jeprof path/to/binary path/to/heap/profile --text > result.txt +``` + +- if we want to generate a PDF file with call-graph: + +```sh +jeprof path/to/binary path/to/heap/profile --pdf > result.pdf +``` + +### Generating flame graph + +`jeprof` allows us to generate collapsed stacks for building flame graphs. + +We need to use `--collapsed` argument: + +```sh +jeprof path/to/binary path/to/heap/profile --collapsed > result.collapsed +``` + +After that, we can use many different tools to visualize collapsed stacks. + +Most popular would be [FlameGraph](https://github.com/brendangregg/FlameGraph) which contains a script called `flamegraph.pl`: + +```sh +cat result.collapsed | /path/to/FlameGraph/flamegraph.pl --color=mem --title="Allocation Flame Graph" --width 2400 > result.svg +``` + +Another interesting tool is [speedscope](https://www.speedscope.app/) that allows you to analyze collected stacks in a more interactive way. + +## Controlling allocation profiler during runtime + +If ClickHouse/Keeper were started with enabled profiler, they support additional commands for disabling/enabling allocation profiling during runtime. +Using those commands, it's easier to profile only specific intervals. + +Disable profiler: + + + + + SYSTEM JEMALLOC DISABLE PROFILE + + + + + echo jmdp | nc localhost 9181 + + + + +Enable profiler: + + + + + SYSTEM JEMALLOC ENABLE PROFILE + + + + + echo jmep | nc localhost 9181 + + + + +It's also possible to control the initial state of the profiler by setting `prof_active` option which is enabled by default. +For example, if we don't want to sample allocations during startup but only after we enable the profiler, we can start ClickHouse/Keeper with following environment variable: +```sh +MALLOC_CONF=background_thread:true,prof:true,prof_active:false +``` + +and enable profiler at a later point. + +## Additional options for profiler + +`jemalloc` has many different options available related to profiler which can be controlled by modifying `MALLOC_CONF` environment variable. +For example, interval between allocation samples can be controlled with `lg_prof_sample`. +If you want to dump heap profile every N bytes you can enable it using `lg_prof_interval`. + +We recommend to check `jemalloc`s [reference page](https://jemalloc.net/jemalloc.3.html) for such options. + +## Other resources + +ClickHouse/Keeper expose `jemalloc` related metrics in many different ways. + +:::warning Warning +It's important to be aware that none of these metrics are synchronized with each other and values may drift. +::: + +### System table `asynchronous_metrics` + +```sql +SELECT * +FROM system.asynchronous_metrics +WHERE metric ILIKE '%jemalloc%' +FORMAT Vertical +``` + +[Reference](/en/operations/system-tables/asynchronous_metrics) + +### System table `jemalloc_bins` + +Contains information about memory allocations done via jemalloc allocator in different size classes (bins) aggregated from all arenas. + +[Reference](/en/operations/system-tables/jemalloc_bins) + +### Prometheus + +All `jemalloc` related metrics from `asynchronous_metrics` are also exposed using Prometheus endpoint in both ClickHouse and Keeper. + +[Reference](/en/operations/server-configuration-parameters/settings#prometheus) + +### `jmst` 4LW command in Keeper + +Keeper supports `jmst` 4LW command which returns [basic allocator statistics](https://github.com/jemalloc/jemalloc/wiki/Use-Case%3A-Basic-Allocator-Statistics). + +Example: +```sh +echo jmst | nc localhost 9181 +``` diff --git a/docs/en/operations/cluster-discovery.md b/docs/en/operations/cluster-discovery.md index a925afac916..d3a89d6887d 100644 --- a/docs/en/operations/cluster-discovery.md +++ b/docs/en/operations/cluster-discovery.md @@ -65,6 +65,20 @@ With Cluster Discovery, rather than defining each node explicitly, you simply sp /clickhouse/discovery/cluster_name + + + + + + + + + + + + + + diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md index 06c05929ffa..c9d94dd95ee 100644 --- a/docs/en/operations/named-collections.md +++ b/docs/en/operations/named-collections.md @@ -5,9 +5,9 @@ sidebar_label: "Named collections" title: "Named collections" --- -Named collections provide a way to store collections of key-value pairs to be +Named collections provide a way to store collections of key-value pairs to be used to configure integrations with external sources. You can use named collections with -dictionaries, tables, table functions, and object storage. +dictionaries, tables, table functions, and object storage. Named collections can be configured with DDL or in configuration files and are applied when ClickHouse starts. They simplify the creation of objects and the hiding of credentials @@ -64,7 +64,7 @@ To manage named collections with DDL a user must have the `named_control_collect ``` :::tip -In the above example the `password_sha256_hex` value is the hexadecimal representation of the SHA256 hash of the password. This configuration for the user `default` has the attribute `replace=true` as in the default configuration has a plain text `password` set, and it is not possible to have both plain text and sha256 hex passwords set for a user. +In the above example the `password_sha256_hex` value is the hexadecimal representation of the SHA256 hash of the password. This configuration for the user `default` has the attribute `replace=true` as in the default configuration has a plain text `password` set, and it is not possible to have both plain text and sha256 hex passwords set for a user. ::: ## Storing named collections in configuration files @@ -296,7 +296,6 @@ host = '127.0.0.1', port = 5432, database = 'test', schema = 'test_schema', -connection_pool_size = 8 ``` Example of configuration: @@ -310,7 +309,6 @@ Example of configuration: 5432 test test_schema - 8 @@ -445,4 +443,3 @@ SELECT dictGet('dict', 'b', 1); │ a │ └─────────────────────────┘ ``` - diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 48434d992e2..1cbf9deccc6 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -2866,3 +2866,10 @@ This also allows a mix of resolver types can be used. ### disable_tunneling_for_https_requests_over_http_proxy {#disable_tunneling_for_https_requests_over_http_proxy} By default, tunneling (i.e, `HTTP CONNECT`) is used to make `HTTPS` requests over `HTTP` proxy. This setting can be used to disable it. + +## max_materialized_views_count_for_table {#max_materialized_views_count_for_table} + +A limit on the number of materialized views attached to a table. +Note that only directly dependent views are considered here, and the creation of one view on top of another view is not considered. + +Default value: `0`. diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index 1cb7ec9dced..9a80f977ed1 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -172,7 +172,7 @@ If you set `timeout_before_checking_execution_speed `to 0, ClickHouse will use c ## timeout_overflow_mode {#timeout-overflow-mode} -What to do if the query is run longer than `max_execution_time`: `throw` or `break`. By default, `throw`. +What to do if the query is run longer than `max_execution_time` or the estimated running time is longer than `max_estimated_execution_time`: `throw` or `break`. By default, `throw`. # max_execution_time_leaf @@ -214,6 +214,10 @@ A maximum number of execution bytes per second. Checked on every data block when Checks that execution speed is not too slow (no less than ‘min_execution_speed’), after the specified time in seconds has expired. +## max_estimated_execution_time {#max_estimated_execution_time} + +Maximum query estimate execution time in seconds. Checked on every data block when ‘timeout_before_checking_execution_speed’ expires. + ## max_columns_to_read {#max-columns-to-read} A maximum number of columns that can be read from a table in a single query. If a query requires reading a greater number of columns, it throws an exception. diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 43a73844b79..eb09af44efd 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -1269,6 +1269,28 @@ Possible values: Default value: `0`. +### output_format_arrow_use_signed_indexes_for_dictionary {#output_format_arrow_use_signed_indexes_for_dictionary} + +Use signed integer types instead of unsigned in `DICTIONARY` type of the [Arrow](../../interfaces/formats.md/#data-format-arrow) format during [LowCardinality](../../sql-reference/data-types/lowcardinality.md) output when `output_format_arrow_low_cardinality_as_dictionary` is enabled. + +Possible values: + +- 0 — Unsigned integer types are used for indexes in `DICTIONARY` type. +- 1 — Signed integer types are used for indexes in `DICTIONARY` type. + +Default value: `1`. + +### output_format_arrow_use_64_bit_indexes_for_dictionary {#output_format_arrow_use_64_bit_indexes_for_dictionary} + +Use 64-bit integer type in `DICTIONARY` type of the [Arrow](../../interfaces/formats.md/#data-format-arrow) format during [LowCardinality](../../sql-reference/data-types/lowcardinality.md) output when `output_format_arrow_low_cardinality_as_dictionary` is enabled. + +Possible values: + +- 0 — Type for indexes in `DICTIONARY` type is determined automatically. +- 1 — 64-bit integer type is used for indexes in `DICTIONARY` type. + +Default value: `0`. + ### output_format_arrow_string_as_string {#output_format_arrow_string_as_string} Use Arrow String type instead of Binary for String columns. @@ -1575,7 +1597,13 @@ Result: Use ANSI escape sequences to paint colors in Pretty formats. -Enabled by default. +possible values: + +- `0` — Disabled. Pretty formats do not use ANSI escape sequences. +- `1` — Enabled. Pretty formats will use ANSI escape sequences except for `NoEscapes` formats. +- `auto` - Enabled if `stdout` is a terminal except for `NoEscapes` formats. + +Default value is `auto`. ### output_format_pretty_grid_charset {#output_format_pretty_grid_charset} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index f085fe1abcd..c673464b23d 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2040,6 +2040,32 @@ SELECT * FROM test_table └───┘ ``` +## update_insert_deduplication_token_in_dependent_materialized_views {#update-insert-deduplication-token-in-dependent-materialized-views} + +Allows to update `insert_deduplication_token` with table identifier during insert in dependent materialized views, if setting `deduplicate_blocks_in_dependent_materialized_views` is enabled and `insert_deduplication_token` is set. + +Possible values: + + 0 — Disabled. + 1 — Enabled. + +Default value: 0. + +Usage: + +If setting `deduplicate_blocks_in_dependent_materialized_views` is enabled, `insert_deduplication_token` is passed to dependent materialized views. But in complex INSERT flows it is possible that we want to avoid deduplication for dependent materialized views. + +Example: +``` +landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1 + | | + └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘ +``` + +In this example we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will be inserted into `ds_2_1`. Without `update_insert_deduplication_token_in_dependent_materialized_views` setting enabled, those two different blocks will be deduplicated, because different blocks from `mv_2_1` and `mv_2_2` will have the same `insert_deduplication_token`. + +If setting `update_insert_deduplication_token_in_dependent_materialized_views` is enabled, during each insert into dependent materialized views `insert_deduplication_token` is updated with table identifier, so block from `mv_2_1` and block from `mv_2_2` will have different `insert_deduplication_token` and will not be deduplicated. + ## insert_keeper_max_retries The setting sets the maximum number of retries for ClickHouse Keeper (or ZooKeeper) requests during insert into replicated MergeTree. Only Keeper requests which failed due to network error, Keeper session timeout, or request timeout are considered for retries. @@ -5165,7 +5191,7 @@ SETTINGS(dictionary_use_async_executor=1, max_threads=8); ## storage_metadata_write_full_object_key {#storage_metadata_write_full_object_key} When set to `true` the metadata files are written with `VERSION_FULL_OBJECT_KEY` format version. With that format full object storage key names are written to the metadata files. -When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section. +When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section. Default value: `false`. @@ -5176,6 +5202,95 @@ When set to `false` than all attempts are made with identical timeouts. Default value: `true`. +## allow_experimental_variant_type {#allow_experimental_variant_type} + +Allows creation of experimental [Variant](../../sql-reference/data-types/variant.md). + +Default value: `false`. + +## use_variant_as_common_type {#use_variant_as_common_type} + +Allows to use `Variant` type as a result type for [if](../../sql-reference/functions/conditional-functions.md/#if)/[multiIf](../../sql-reference/functions/conditional-functions.md/#multiif)/[array](../../sql-reference/functions/array-functions.md)/[map](../../sql-reference/functions/tuple-map-functions.md) functions when there is no common type for argument types. + +Example: + +```sql +SET use_variant_as_common_type = 1; +SELECT toTypeName(if(number % 2, number, range(number))) as variant_type FROM numbers(1); +SELECT if(number % 2, number, range(number)) as variant FROM numbers(5); +``` + +```text +┌─variant_type───────────────────┐ +│ Variant(Array(UInt64), UInt64) │ +└────────────────────────────────┘ +┌─variant───┐ +│ [] │ +│ 1 │ +│ [0,1] │ +│ 3 │ +│ [0,1,2,3] │ +└───────────┘ +``` + +```sql +SET use_variant_as_common_type = 1; +SELECT toTypeName(multiIf((number % 4) = 0, 42, (number % 4) = 1, [1, 2, 3], (number % 4) = 2, 'Hello, World!', NULL)) AS variant_type FROM numbers(1); +SELECT multiIf((number % 4) = 0, 42, (number % 4) = 1, [1, 2, 3], (number % 4) = 2, 'Hello, World!', NULL) AS variant FROM numbers(4); +``` + +```text +─variant_type─────────────────────────┐ +│ Variant(Array(UInt8), String, UInt8) │ +└──────────────────────────────────────┘ + +┌─variant───────┐ +│ 42 │ +│ [1,2,3] │ +│ Hello, World! │ +│ ᴺᵁᴸᴸ │ +└───────────────┘ +``` + +```sql +SET use_variant_as_common_type = 1; +SELECT toTypeName(array(range(number), number, 'str_' || toString(number))) as array_of_variants_type from numbers(1); +SELECT array(range(number), number, 'str_' || toString(number)) as array_of_variants FROM numbers(3); +``` + +```text +┌─array_of_variants_type────────────────────────┐ +│ Array(Variant(Array(UInt64), String, UInt64)) │ +└───────────────────────────────────────────────┘ + +┌─array_of_variants─┐ +│ [[],0,'str_0'] │ +│ [[0],1,'str_1'] │ +│ [[0,1],2,'str_2'] │ +└───────────────────┘ +``` + +```sql +SET use_variant_as_common_type = 1; +SELECT toTypeName(map('a', range(number), 'b', number, 'c', 'str_' || toString(number))) as map_of_variants_type from numbers(1); +SELECT map('a', range(number), 'b', number, 'c', 'str_' || toString(number)) as map_of_variants FROM numbers(3); +``` + +```text +┌─map_of_variants_type────────────────────────────────┐ +│ Map(String, Variant(Array(UInt64), String, UInt64)) │ +└─────────────────────────────────────────────────────┘ + +┌─map_of_variants───────────────┐ +│ {'a':[],'b':0,'c':'str_0'} │ +│ {'a':[0],'b':1,'c':'str_1'} │ +│ {'a':[0,1],'b':2,'c':'str_2'} │ +└───────────────────────────────┘ +``` + + +Default value: `false`. + ## max_partition_size_to_drop Restriction on dropping partitions in query time. @@ -5197,3 +5312,13 @@ The value 0 means that you can delete all tables without any restrictions. :::note This query setting overwrites its server setting equivalent, see [max_table_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-table-size-to-drop) ::: + +## iceberg_engine_ignore_schema_evolution {#iceberg_engine_ignore_schema_evolution} + +Allow to ignore schema evolution in Iceberg table engine and read all data using schema specified by the user on table creation or latest schema parsed from metadata on table creation. + +:::note +Enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema. +::: + +Default value: 'false'. \ No newline at end of file diff --git a/docs/en/operations/system-tables/asynchronous_metric_log.md b/docs/en/operations/system-tables/asynchronous_metric_log.md index 65b2e349707..e63ab65ba07 100644 --- a/docs/en/operations/system-tables/asynchronous_metric_log.md +++ b/docs/en/operations/system-tables/asynchronous_metric_log.md @@ -10,7 +10,7 @@ Columns: - `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. -- `name` ([String](../../sql-reference/data-types/string.md)) — Metric name. +- `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name. - `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. **Example** diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index 3dec6345eb6..898e6ae2e2c 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -287,7 +287,7 @@ Number of threads in the HashedDictionary thread pool running a task. ### IOPrefetchThreads -Number of threads in the IO prefertch thread pool. +Number of threads in the IO prefetch thread pool. ### IOPrefetchThreadsActive diff --git a/docs/en/operations/system-tables/replication_queue.md b/docs/en/operations/system-tables/replication_queue.md index dd8f6328688..d63517291a4 100644 --- a/docs/en/operations/system-tables/replication_queue.md +++ b/docs/en/operations/system-tables/replication_queue.md @@ -49,7 +49,7 @@ Columns: - `last_attempt_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last attempted. -- `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of postponed tasks. +- `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of times the action was postponed. - `postpone_reason` ([String](../../sql-reference/data-types/string.md)) — The reason why the task was postponed. diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index b1f2c5bacbb..1922672bee9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -88,6 +88,7 @@ ClickHouse-specific aggregate functions: - [quantileTDigestWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md) - [quantileBFloat16](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16) - [quantileBFloat16Weighted](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted) +- [quantileDD](/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch) - [simpleLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md) - [stochasticLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md) - [stochasticLogisticRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md) @@ -104,4 +105,3 @@ ClickHouse-specific aggregate functions: - [sparkBar](./sparkbar.md) - [sumCount](./sumcount.md) - [largestTriangleThreeBuckets](./largestTriangleThreeBuckets.md) - diff --git a/docs/en/sql-reference/aggregate-functions/reference/median.md b/docs/en/sql-reference/aggregate-functions/reference/median.md index f20b23a0c8b..2a166c83dad 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/median.md +++ b/docs/en/sql-reference/aggregate-functions/reference/median.md @@ -18,6 +18,7 @@ Functions: - `medianTDigest` — Alias for [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md#quantiletdigest). - `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md#quantiletdigestweighted). - `medianBFloat16` — Alias for [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16). +- `medianDD` — Alias for [quantileDD](../../../sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch). **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md b/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md new file mode 100644 index 00000000000..f9acd2e20cb --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md @@ -0,0 +1,61 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/quantileddsketch +sidebar_position: 211 +title: quantileDD +--- + +Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a sample with relative-error guarantees. It works by building a [DD](https://www.vldb.org/pvldb/vol12/p2195-masson.pdf). + +**Syntax** + +``` sql +quantileDDsketch[relative_accuracy, (level)](expr) +``` + +**Arguments** + +- `expr` — Column with numeric data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md). + +**Parameters** + +- `relative_accuracy` — Relative accuracy of the quantile. Possible values are in the range from 0 to 1. [Float](../../../sql-reference/data-types/float.md). The size of the sketch depends on the range of the data and the relative accuracy. The larger the range and the smaller the relative accuracy, the larger the sketch. The rough memory size of the of the sketch is `log(max_value/min_value)/relative_accuracy`. The recommended value is 0.001 or higher. + +- `level` — Level of quantile. Optional. Possible values are in the range from 0 to 1. Default value: 0.5. [Float](../../../sql-reference/data-types/float.md). + +**Returned value** + +- Approximate quantile of the specified level. + +Type: [Float64](../../../sql-reference/data-types/float.md#float32-float64). + +**Example** + +Input table has an integer and a float columns: + +``` text +┌─a─┬─────b─┐ +│ 1 │ 1.001 │ +│ 2 │ 1.002 │ +│ 3 │ 1.003 │ +│ 4 │ 1.004 │ +└───┴───────┘ +``` + +Query to calculate 0.75-quantile (third quartile): + +``` sql +SELECT quantileDD(0.01, 0.75)(a), quantileDD(0.01, 0.75)(b) FROM example_table; +``` + +Result: + +``` text +┌─quantileDD(0.01, 0.75)(a)─┬─quantileDD(0.01, 0.75)(b)─┐ +│ 2.974233423476717 │ 1.01 │ +└─────────────────────────────────┴─────────────────────────────────┘ +``` + +**See Also** + +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index 38db39d2eec..e2a5bc53e32 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -9,7 +9,7 @@ sidebar_position: 201 Syntax: `quantiles(level1, level2, …)(x)` -All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. +All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`, `quantilesDD`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. ## quantilesExactExclusive diff --git a/docs/en/sql-reference/data-types/float.md b/docs/en/sql-reference/data-types/float.md index f1b99153b41..be7b2a7fcd8 100644 --- a/docs/en/sql-reference/data-types/float.md +++ b/docs/en/sql-reference/data-types/float.md @@ -35,8 +35,8 @@ Types are equivalent to types of C: Aliases: -- `Float32` — `FLOAT`. -- `Float64` — `DOUBLE`. +- `Float32` — `FLOAT`, `REAL`, `SINGLE`. +- `Float64` — `DOUBLE`, `DOUBLE PRECISION`. When creating tables, numeric parameters for floating point numbers can be set (e.g. `FLOAT(12)`, `FLOAT(15, 22)`, `DOUBLE(12)`, `DOUBLE(4, 18)`), but ClickHouse ignores them. diff --git a/docs/en/sql-reference/data-types/int-uint.md b/docs/en/sql-reference/data-types/int-uint.md index b551143d92f..520454a859f 100644 --- a/docs/en/sql-reference/data-types/int-uint.md +++ b/docs/en/sql-reference/data-types/int-uint.md @@ -21,10 +21,10 @@ When creating tables, numeric parameters for integer numbers can be set (e.g. `T Aliases: -- `Int8` — `TINYINT`, `BOOL`, `BOOLEAN`, `INT1`. -- `Int16` — `SMALLINT`, `INT2`. -- `Int32` — `INT`, `INT4`, `INTEGER`. -- `Int64` — `BIGINT`. +- `Int8` — `TINYINT`, `INT1`, `BYTE`, `TINYINT SIGNED`, `INT1 SIGNED`. +- `Int16` — `SMALLINT`, `SMALLINT SIGNED`. +- `Int32` — `INT`, `INTEGER`, `MEDIUMINT`, `MEDIUMINT SIGNED`, `INT SIGNED`, `INTEGER SIGNED`. +- `Int64` — `BIGINT`, `SIGNED`, `BIGINT SIGNED`, `TIME`. ## UInt Ranges @@ -34,3 +34,11 @@ Aliases: - `UInt64` — \[0 : 18446744073709551615\] - `UInt128` — \[0 : 340282366920938463463374607431768211455\] - `UInt256` — \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\] + +Aliases: + +- `UInt8` — `TINYINT UNSIGNED`, `INT1 UNSIGNED`. +- `UInt16` — `SMALLINT UNSIGNED`. +- `UInt32` — `MEDIUMINT UNSIGNED`, `INT UNSIGNED`, `INTEGER UNSIGNED` +- `UInt64` — `UNSIGNED`, `BIGINT UNSIGNED`, `BIT`, `SET` + diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/json.md index f727f0d75f7..fd548a0d5a2 100644 --- a/docs/en/sql-reference/data-types/json.md +++ b/docs/en/sql-reference/data-types/json.md @@ -7,7 +7,7 @@ sidebar_label: JSON # JSON :::note -This feature is experimental and is not production ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead. +This feature is experimental and is not production-ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json.md) instead. ::: Stores JavaScript Object Notation (JSON) documents in a single column. @@ -15,7 +15,8 @@ Stores JavaScript Object Notation (JSON) documents in a single column. `JSON` is an alias for `Object('json')`. :::note -The JSON data type is an experimental feature. To use it, set `allow_experimental_object_type = 1`. +The JSON data type is an obsolete feature. Do not use it. +If you want to use it, set `allow_experimental_object_type = 1`. ::: ## Example diff --git a/docs/en/sql-reference/data-types/nullable.md b/docs/en/sql-reference/data-types/nullable.md index 28180f7f991..5504765e4a0 100644 --- a/docs/en/sql-reference/data-types/nullable.md +++ b/docs/en/sql-reference/data-types/nullable.md @@ -4,11 +4,11 @@ sidebar_position: 55 sidebar_label: Nullable --- -# Nullable(typename) +# Nullable(T) -Allows to store special marker ([NULL](../../sql-reference/syntax.md)) that denotes “missing value” alongside normal values allowed by `TypeName`. For example, a `Nullable(Int8)` type column can store `Int8` type values, and the rows that do not have a value will store `NULL`. +Allows to store special marker ([NULL](../../sql-reference/syntax.md)) that denotes “missing value” alongside normal values allowed by `T`. For example, a `Nullable(Int8)` type column can store `Int8` type values, and the rows that do not have a value will store `NULL`. -For a `TypeName`, you can’t use composite data types [Array](../../sql-reference/data-types/array.md), [Map](../../sql-reference/data-types/map.md) and [Tuple](../../sql-reference/data-types/tuple.md). Composite data types can contain `Nullable` type values, such as `Array(Nullable(Int8))`. +`T` can’t be any of the composite data types [Array](../../sql-reference/data-types/array.md), [Map](../../sql-reference/data-types/map.md) and [Tuple](../../sql-reference/data-types/tuple.md) but composite data types can contain `Nullable` type values, e.g. `Array(Nullable(Int8))`. A `Nullable` type field can’t be included in table indexes. diff --git a/docs/en/sql-reference/data-types/variant.md b/docs/en/sql-reference/data-types/variant.md new file mode 100644 index 00000000000..0058e13b4ca --- /dev/null +++ b/docs/en/sql-reference/data-types/variant.md @@ -0,0 +1,245 @@ +--- +slug: /en/sql-reference/data-types/json +sidebar_position: 55 +sidebar_label: Variant +--- + +# Variant(T1, T2, T3, ...) + +This type represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type +has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value). + +The order of nested types doesn't matter: Variant(T1, T2) = Variant(T2, T1). +Nested types can be arbitrary types except Nullable(...), LowCardinality(Nullable(...)) and Variant(...) types. + +:::note +The Variant data type is an experimental feature. To use it, set `allow_experimental_variant_type = 1`. +::: + +## Creating Variant + +Using `Variant` type in table column definition: + +```sql +CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT v FROM test; +``` + +```text +┌─v─────────────┐ +│ ᴺᵁᴸᴸ │ +│ 42 │ +│ Hello, World! │ +│ [1,2,3] │ +└───────────────┘ +``` + +Using CAST from ordinary columns: + +```sql +SELECT toTypeName(variant) as type_name, 'Hello, World!'::Variant(UInt64, String, Array(UInt64)) as variant; +``` + +```text +┌─type_name──────────────────────────────┬─variant───────┐ +│ Variant(Array(UInt64), String, UInt64) │ Hello, World! │ +└────────────────────────────────────────┴───────────────┘ +``` + +Using functions `if/multiIf` when arguments don't have common type (setting `use_variant_as_common_type` should be enabled for it): + +```sql +SET use_variant_as_common_type = 1; +SELECT if(number % 2, number, range(number)) as variant FROM numbers(5); +``` + +```text +┌─variant───┐ +│ [] │ +│ 1 │ +│ [0,1] │ +│ 3 │ +│ [0,1,2,3] │ +└───────────┘ +``` + +```sql +SET use_variant_as_common_type = 1; +SELECT multiIf((number % 4) = 0, 42, (number % 4) = 1, [1, 2, 3], (number % 4) = 2, 'Hello, World!', NULL) AS variant FROM numbers(4); +``` + +```text +┌─variant───────┐ +│ 42 │ +│ [1,2,3] │ +│ Hello, World! │ +│ ᴺᵁᴸᴸ │ +└───────────────┘ +``` + +Using functions 'array/map' if array elements/map values don't have common type (setting `use_variant_as_common_type` should be enabled for it): + +```sql +SET use_variant_as_common_type = 1; +SELECT array(range(number), number, 'str_' || toString(number)) as array_of_variants FROM numbers(3); +``` + +```text +┌─array_of_variants─┐ +│ [[],0,'str_0'] │ +│ [[0],1,'str_1'] │ +│ [[0,1],2,'str_2'] │ +└───────────────────┘ +``` + +```sql +SET use_variant_as_common_type = 1; +SELECT map('a', range(number), 'b', number, 'c', 'str_' || toString(number)) as map_of_variants FROM numbers(3); +``` + +```text +┌─map_of_variants───────────────┐ +│ {'a':[],'b':0,'c':'str_0'} │ +│ {'a':[0],'b':1,'c':'str_1'} │ +│ {'a':[0,1],'b':2,'c':'str_2'} │ +└───────────────────────────────┘ +``` + +## Reading Variant nested types as subcolumns + +Variant type supports reading a single nested type from a Variant column using the type name as a subcolumn. +So, if you have column `variant Variant(T1, T2, T3)` you can read a subcolumn of type `T2` using syntax `variant.T2`, +this subcolumn will have type `Nullable(T2)` if `T2` can be inside `Nullable` and `T2` otherwise. This subcolumn will +be the same size as original `Variant` column and will contain `NULL` values (or empty values if `T2` cannot be inside `Nullable`) +in all rows in which original `Variant` column doesn't have type `T2`. + +Variant subcolumns can be also read using function `variantElement(variant_column, type_name)`. + +Examples: + +```sql +CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT v, v.String, v.UInt64, v.`Array(UInt64)` FROM test; +``` + +```text +┌─v─────────────┬─v.String──────┬─v.UInt64─┬─v.Array(UInt64)─┐ +│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ +│ 42 │ ᴺᵁᴸᴸ │ 42 │ [] │ +│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ [] │ +│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ +└───────────────┴───────────────┴──────────┴─────────────────┘ +``` + +```sql +SELECT toTypeName(v.String), toTypeName(v.UInt64), toTypeName(v.`Array(UInt64)`) FROM test LIMIT 1; +``` + +```text +┌─toTypeName(v.String)─┬─toTypeName(v.UInt64)─┬─toTypeName(v.Array(UInt64))─┐ +│ Nullable(String) │ Nullable(UInt64) │ Array(UInt64) │ +└──────────────────────┴──────────────────────┴─────────────────────────────┘ +``` + +```sql +SELECT v, variantElement(v, 'String'), variantElement(v, 'UInt64'), variantElement(v, 'Array(UInt64)') FROM test; +``` + +```text +┌─v─────────────┬─variantElement(v, 'String')─┬─variantElement(v, 'UInt64')─┬─variantElement(v, 'Array(UInt64)')─┐ +│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ +│ 42 │ ᴺᵁᴸᴸ │ 42 │ [] │ +│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ [] │ +│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ +└───────────────┴─────────────────────────────┴─────────────────────────────┴────────────────────────────────────┘ +``` + +## Conversion between Variant column and other columns + +There are 3 possible conversions that can be performed with Variant column. + +### Converting an ordinary column to a Variant column + +It is possible to convert ordinary column with type `T` to a `Variant` column containing this type: + +```sql +SELECT toTypeName(variant) as type_name, 'Hello, World!'::Variant(UInt64, String, Array(UInt64)) as variant; +``` + +```text +┌─type_name──────────────────────────────┬─variant───────┐ +│ Variant(Array(UInt64), String, UInt64) │ Hello, World! │ +└────────────────────────────────────────┴───────────────┘ +``` + +### Converting a Variant column to an ordinary column + +It is possible to convert a `Variant` column to an ordinary column. In this case all nested variants will be converted to a destination type: + +```sql +CREATE TABLE test (v Variant(UInt64, String)) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('42.42'); +SELECT v::Nullable(Float64) FROM test; +``` + +```text +┌─CAST(v, 'Nullable(Float64)')─┐ +│ ᴺᵁᴸᴸ │ +│ 42 │ +│ 42.42 │ +└──────────────────────────────┘ +``` + +### Converting a Variant to another Variant + +It is possible to convert a `Variant` column to another `Variant` column, but only if the destination `Variant` column contains all nested types from the original `Variant`: + +```sql +CREATE TABLE test (v Variant(UInt64, String)) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('String'); +SELECT v::Variant(UInt64, String, Array(UInt64)) FROM test; +``` + +```text +┌─CAST(v, 'Variant(UInt64, String, Array(UInt64))')─┐ +│ ᴺᵁᴸᴸ │ +│ 42 │ +│ String │ +└───────────────────────────────────────────────────┘ +``` + + +## Reading Variant type from the data + +All text formats (TSV, CSV, CustomSeparated, Values, JSONEachRow, etc) supports reading `Variant` type. During data parsing ClickHouse tries to insert value into most appropriate variant type. + +Example: + +```sql +SELECT + v, + variantElement(v, 'String') AS str, + variantElement(v, 'UInt64') AS num, + variantElement(v, 'Float64') AS float, + variantElement(v, 'DateTime') AS date, + variantElement(v, 'Array(UInt64)') AS arr +FROM format(JSONEachRow, 'v Variant(String, UInt64, Float64, DateTime, Array(UInt64))', $$ +{"v" : "Hello, World!"}, +{"v" : 42}, +{"v" : 42.42}, +{"v" : "2020-01-01 00:00:00"}, +{"v" : [1, 2, 3]} +$$) +``` + +```text +┌─v───────────────────┬─str───────────┬──num─┬─float─┬────────────────date─┬─arr─────┐ +│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ +│ 42 │ ᴺᵁᴸᴸ │ 42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ +│ 42.42 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 42.42 │ ᴺᵁᴸᴸ │ [] │ +│ 2020-01-01 00:00:00 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 2020-01-01 00:00:00 │ [] │ +│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ +└─────────────────────┴───────────────┴──────┴───────┴─────────────────────┴─────────┘ +``` diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md index 9f86aaf2502..080de94f8b7 100644 --- a/docs/en/sql-reference/dictionaries/index.md +++ b/docs/en/sql-reference/dictionaries/index.md @@ -1805,6 +1805,7 @@ Example of settings: ``` xml + postgresql-hostname 5432 clickhouse qwerty diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 1639f45e66c..2120b675c73 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -657,6 +657,43 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res; Array elements set to `NULL` are handled as normal values. +## arrayShingles + +Generates an array of "shingles", i.e. consecutive sub-arrays with specified length of the input array. + +**Syntax** + +``` sql +arrayShingles(array, length) +``` + +**Arguments** + +- `array` — Input array [Array](../../sql-reference/data-types/array.md). +- `length` — The length of each shingle. + +**Returned value** + +- An array of generated shingles. + +Type: [Array](../../sql-reference/data-types/array.md). + +**Examples** + +Query: + +``` sql +SELECT arrayShingles([1,2,3,4], 3) as res; +``` + +Result: + +``` text +┌─res───────────────┐ +│ [[1,2,3],[2,3,4]] │ +└───────────────────┘ +``` + ## arraySort(\[func,\] arr, …) {#sort} Sorts the elements of the `arr` array in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the elements of the array. If `func` accepts multiple arguments, the `arraySort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arraySort` description. diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 35f9c7af2ce..ebc80e4d308 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2832,6 +2832,43 @@ Result: └─────────────────────────────────────────────────────────────────────────┘ ``` +## variantElement + +Extracts a column with specified type from a `Variant` column. + +**Syntax** + +```sql +variantElement(variant, type_name, [, default_value]) +``` + +**Arguments** + +- `variant` — Variant column. [Variant](../../sql-reference/data-types/variant.md). +- `type_name` — The name of the variant type to extract. [String](../../sql-reference/data-types/string.md). +- `default_value` - The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional. + +**Returned value** + +- Subcolumn of a `Variant` column with specified type. + +**Example** + +```sql +CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory; +INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]); +SELECT v, variantElement(v, 'String'), variantElement(v, 'UInt64'), variantElement(v, 'Array(UInt64)') FROM test; +``` + +```text +┌─v─────────────┬─variantElement(v, 'String')─┬─variantElement(v, 'UInt64')─┬─variantElement(v, 'Array(UInt64)')─┐ +│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [] │ +│ 42 │ ᴺᵁᴸᴸ │ 42 │ [] │ +│ Hello, World! │ Hello, World! │ ᴺᵁᴸᴸ │ [] │ +│ [1,2,3] │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ [1,2,3] │ +└───────────────┴─────────────────────────────┴─────────────────────────────┴────────────────────────────────────┘ +``` + ## minSampleSizeConversion Calculates minimum required sample size for an A/B test comparing conversions (proportions) in two samples. diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index a2f1b0d7752..60cb3ac4ac4 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -515,7 +515,7 @@ Alias: `concat_ws` **Arguments** - sep — separator. Const [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- exprN — expression to be concatenated. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- exprN — expression to be concatenated. Arguments which are not of types [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments. **Returned values** diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md index 144d832b36a..016c3410944 100644 --- a/docs/en/sql-reference/functions/time-series-functions.md +++ b/docs/en/sql-reference/functions/time-series-functions.md @@ -77,8 +77,8 @@ The number of data points in `series` should be at least twice the value of `per **Returned value** -- An array of three arrays where the first array include seasonal components, the second array - trend, -and the third array - residue component. +- An array of four arrays where the first array include seasonal components, the second array - trend, +the third array - residue component, and the fourth array - baseline(seasonal + trend) component. Type: [Array](../../sql-reference/data-types/array.md). @@ -107,6 +107,10 @@ Result: [ 0, 0.0000019073486, -0.0000019073486, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.0000019073486, 0, 0 + ], + [ + 10.1, 20.449999, 40.340004, 10.100001, 20.45, 40.34, 10.100001, 20.45, 40.34, 10.1, 20.45, 40.34, + 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.1, 20.45, 40.34, 10.100002, 20.45, 40.34 ]] │ └────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index 2cb802c863b..676d30f5e44 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -23,10 +23,11 @@ The following actions are supported: - [RENAME COLUMN](#rename-column) — Renames an existing column. - [CLEAR COLUMN](#clear-column) — Resets column values. - [COMMENT COLUMN](#comment-column) — Adds a text comment to the column. -- [MODIFY COLUMN](#modify-column) — Changes column’s type, default expression and TTL. +- [MODIFY COLUMN](#modify-column) — Changes column’s type, default expression, TTL, and column settings. - [MODIFY COLUMN REMOVE](#modify-column-remove) — Removes one of the column properties. +- [MODIFY COLUMN MODIFY SETTING](#modify-column-modify-setting) - Changes column settings. +- [MODIFY COLUMN RESET SETTING](#modify-column-reset-setting) - Reset column settings. - [MATERIALIZE COLUMN](#materialize-column) — Materializes the column in the parts where the column is missing. - These actions are described in detail below. ## ADD COLUMN @@ -75,7 +76,7 @@ Deletes the column with the name `name`. If the `IF EXISTS` clause is specified, Deletes data from the file system. Since this deletes entire files, the query is completed almost instantly. -:::tip +:::tip You can’t delete a column if it is referenced by [materialized view](/docs/en/sql-reference/statements/create/view.md/#materialized). Otherwise, it returns an error. ::: @@ -208,7 +209,7 @@ The `ALTER` query for changing columns is replicated. The instructions are saved ## MODIFY COLUMN REMOVE -Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`. +Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`, `SETTING`. Syntax: @@ -228,6 +229,43 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL; - [REMOVE TTL](ttl.md). + +## MODIFY COLUMN MODIFY SETTING + +Modify a column setting. + +Syntax: + +```sql +ALTER TABLE table_name MODIFY COLUMN MODIFY SETTING name=value,...; +``` + +**Example** + +Modify column's `max_compress_block_size` to `1MB`: + +```sql +ALTER TABLE table_name MODIFY COLUMN MODIFY SETTING max_compress_block_size = 1048576; +``` + +## MODIFY COLUMN RESET SETTING + +Reset a column setting, also removes the setting declaration in the column expression of the table's CREATE query. + +Syntax: + +```sql +ALTER TABLE table_name MODIFY COLUMN RESET SETTING name,...; +``` + +**Example** + +Remove column setting `max_compress_block_size` to `1MB`: + +```sql +ALTER TABLE table_name MODIFY COLUMN REMOVE SETTING max_compress_block_size; +``` + ## MATERIALIZE COLUMN Materializes or updates a column with an expression for a default value (`DEFAULT` or `MATERIALIZED`). diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index 114b8d5ffe3..5659a0565c5 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -112,7 +112,7 @@ Note that: For the query to run successfully, the following conditions must be met: - Both tables must have the same structure. -- Both tables must have the same partition key, the same order by key and the same primary key. +- Both tables must have the same order by key and the same primary key. - Both tables must have the same indices and projections. - Both tables must have the same storage policy. diff --git a/docs/en/sql-reference/statements/alter/view.md b/docs/en/sql-reference/statements/alter/view.md index 517e64e3e5b..59045afdeb6 100644 --- a/docs/en/sql-reference/statements/alter/view.md +++ b/docs/en/sql-reference/statements/alter/view.md @@ -8,8 +8,6 @@ sidebar_label: VIEW You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE … MODIFY QUERY` statement without interrupting ingestion process. -The `allow_experimental_alter_materialized_view_structure` setting must be enabled. - This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underling storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause. **Example with TO table** diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index f6158acd9a4..028d0b09a1a 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -97,7 +97,7 @@ This feature is deprecated and will be removed in the future. For your convenience, the old documentation is located [here](https://pastila.nl/?00f32652/fdf07272a7b54bda7e13b919264e449f.md) -## Refreshable Materialized View {#refreshable-materialized-view} +## Refreshable Materialized View [Experimental] {#refreshable-materialized-view} ```sql CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name @@ -120,7 +120,8 @@ Differences from regular non-refreshable materialized views: :::note Refreshable materialized views are a work in progress. Setting `allow_experimental_refreshable_materialized_view = 1` is required for creating one. Current limitations: - * not compatible with Replicated database or table engines, + * not compatible with Replicated database or table engines + * It is not supported in ClickHouse Cloud * require [Atomic database engine](../../../engines/database-engines/atomic.md), * no retries for failed refresh - we just skip to the next scheduled refresh time, * no limit on number of concurrent refreshes. diff --git a/docs/en/sql-reference/statements/rename.md b/docs/en/sql-reference/statements/rename.md index bb62cc3af1c..667ccbc6c93 100644 --- a/docs/en/sql-reference/statements/rename.md +++ b/docs/en/sql-reference/statements/rename.md @@ -9,10 +9,6 @@ sidebar_label: RENAME Renames databases, tables, or dictionaries. Several entities can be renamed in a single query. Note that the `RENAME` query with several entities is non-atomic operation. To swap entities names atomically, use the [EXCHANGE](./exchange.md) statement. -:::note -The `RENAME` query is supported by the [Atomic](../../engines/database-engines/atomic.md) database engine only. -::: - **Syntax** ```sql diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 0fdbbeac235..5d416dfffb3 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -343,13 +343,14 @@ SYSTEM START PULLING REPLICATION LOG [ON CLUSTER cluster_name] [[db.]replicated_ Wait until a `ReplicatedMergeTree` table will be synced with other replicas in a cluster, but no more than `receive_timeout` seconds. ``` sql -SYSTEM SYNC REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT | PULL] +SYSTEM SYNC REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT [FROM 'srcReplica1'[, 'srcReplica2'[, ...]]] | PULL] ``` After running this statement the `[db.]replicated_merge_tree_family_table_name` fetches commands from the common replicated log into its own replication queue, and then the query waits till the replica processes all of the fetched commands. The following modifiers are supported: - If a `STRICT` modifier was specified then the query waits for the replication queue to become empty. The `STRICT` version may never succeed if new entries constantly appear in the replication queue. - - If a `LIGHTWEIGHT` modifier was specified then the query waits only for `GET_PART`, `ATTACH_PART`, `DROP_RANGE`, `REPLACE_RANGE` and `DROP_PART` entries to be processed. + - If a `LIGHTWEIGHT` modifier was specified then the query waits only for `GET_PART`, `ATTACH_PART`, `DROP_RANGE`, `REPLACE_RANGE` and `DROP_PART` entries to be processed. + Additionally, the LIGHTWEIGHT modifier supports an optional FROM 'srcReplicas' clause, where 'srcReplicas' is a comma-separated list of source replica names. This extension allows for more targeted synchronization by focusing only on replication tasks originating from the specified source replicas. - If a `PULL` modifier was specified then the query pulls new replication queue entries from ZooKeeper, but does not wait for anything to be processed. ### SYNC DATABASE REPLICA diff --git a/docs/en/sql-reference/table-functions/executable.md b/docs/en/sql-reference/table-functions/executable.md index d377c5d4d0c..512dc1a9f13 100644 --- a/docs/en/sql-reference/table-functions/executable.md +++ b/docs/en/sql-reference/table-functions/executable.md @@ -7,7 +7,7 @@ keywords: [udf, user defined function, clickhouse, executable, table, function] # executable Table Function for UDFs -The `executable` table function creates a table based on the output of a user-defined function (UDF) that you define in a script that outputs rows to **stdout**. The executable script is stored in the `users_scripts` directory and can read data from any source. +The `executable` table function creates a table based on the output of a user-defined function (UDF) that you define in a script that outputs rows to **stdout**. The executable script is stored in the `users_scripts` directory and can read data from any source. Make sure your ClickHouse server has all the required packages to run the executable script. For example, if it is a Python script, ensure that the server has the necessary Python packages installed. You can optionally include one or more input queries that stream their results to **stdin** for the script to read. @@ -63,7 +63,7 @@ if __name__ == "__main__": Let's invoke the script and have it generate 10 random strings: ```sql -SELECT * FROM executable('my_script.py', TabSeparated, 'id UInt32, random String', (SELECT 10)) +SELECT * FROM executable('generate_random.py', TabSeparated, 'id UInt32, random String', (SELECT 10)) ``` The response looks like: diff --git a/docs/en/sql-reference/table-functions/fuzzJSON.md b/docs/en/sql-reference/table-functions/fuzzJSON.md index a64f35691f6..ab7bd7f9f1b 100644 --- a/docs/en/sql-reference/table-functions/fuzzJSON.md +++ b/docs/en/sql-reference/table-functions/fuzzJSON.md @@ -9,7 +9,7 @@ sidebar_label: fuzzJSON Perturbs a JSON string with random variations. ``` sql -fuzzJSON({ named_collection [option=value [,..]] | json_str[, random_seed] }) +fuzzJSON({ named_collection [, option=value [,..]] | json_str[, random_seed] }) ``` **Arguments** diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md index 5ffc20189da..80077ecdb33 100644 --- a/docs/en/sql-reference/table-functions/gcs.md +++ b/docs/en/sql-reference/table-functions/gcs.md @@ -16,7 +16,8 @@ If you have multiple replicas in your cluster, you can use the [s3Cluster functi **Syntax** ``` sql -gcs(path [,hmac_key, hmac_secret] [,format] [,structure] [,compression]) +gcs(url [, NOSIGN | hmac_key, hmac_secret] [,format] [,structure] [,compression_method]) +gcs(named_collection[, option=value [,..]]) ``` :::tip GCS @@ -24,10 +25,9 @@ The GCS Table Function integrates with Google Cloud Storage by using the GCS XML ::: -**Arguments** - -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. +**Parameters** +- `url` — Bucket path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. :::note GCS The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API: ``` @@ -35,10 +35,21 @@ The GCS Table Function integrates with Google Cloud Storage by using the GCS XML ``` and not ~~https://storage.cloud.google.com~~. ::: +- `NOSIGN` — If this keyword is provided in place of credentials, all the requests will not be signed. +- `hmac_key` and `hmac_secret` — Keys that specify credentials to use with given endpoint. Optional. +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. +- `compression_method` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression method by file extension. + +Arguments can also be passed using [named collections](/docs/en/operations/named-collections.md). In this case `url`, `format`, `structure`, `compression_method` work in the same way, and some extra parameters are supported: + + - `access_key_id` — `hmac_key`, optional. + - `secret_access_key` — `hmac_secret`, optional. + - `filename` — appended to the url if specified. + - `use_environment_credentials` — enabled by default, allows passing extra parameters using environment variables `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI`, `AWS_CONTAINER_CREDENTIALS_FULL_URI`, `AWS_CONTAINER_AUTHORIZATION_TOKEN`, `AWS_EC2_METADATA_DISABLED`. + - `no_sign_request` — disabled by default. + - `expiration_window_seconds` — default value is 120. -- `format` — The [format](../../interfaces/formats.md#formats) of the file. -- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. -- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. **Returned value** @@ -61,7 +72,7 @@ LIMIT 2; └─────────┴─────────┴─────────┘ ``` -The similar but from file with `gzip` compression: +The similar but from file with `gzip` compression method: ``` sql SELECT * @@ -158,6 +169,16 @@ The below get data from all `test-data.csv.gz` files from any folder inside `my- SELECT * FROM gcs('https://storage.googleapis.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); ``` +For production use cases it is recommended to use [named collections](/docs/en/operations/named-collections.md). Here is the example: +``` sql + +CREATE NAMED COLLECTION creds AS + access_key_id = '***', + secret_access_key = '***'; +SELECT count(*) +FROM gcs(creds, url='https://s3-object-url.csv') +``` + ## Partitioned Write If you specify `PARTITION BY` expression when inserting data into `GCS` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency. diff --git a/docs/en/sql-reference/table-functions/mysql.md b/docs/en/sql-reference/table-functions/mysql.md index 0e5b0f54d1c..5fd9708317c 100644 --- a/docs/en/sql-reference/table-functions/mysql.md +++ b/docs/en/sql-reference/table-functions/mysql.md @@ -11,31 +11,25 @@ Allows `SELECT` and `INSERT` queries to be performed on data that is stored on a **Syntax** ``` sql -mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']) +mysql({host:port, database, table, user, password[, replace_query, on_duplicate_clause] | named_collection[, option=value [,..]]}) ``` -**Arguments** +**Parameters** - `host:port` — MySQL server address. - - `database` — Remote database name. - - `table` — Remote table name. - - `user` — MySQL user. - - `password` — User password. - - `replace_query` — Flag that converts `INSERT INTO` queries to `REPLACE INTO`. Possible values: - `0` - The query is executed as `INSERT INTO`. - `1` - The query is executed as `REPLACE INTO`. - - `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` expression that is added to the `INSERT` query. Can be specified only with `replace_query = 0` (if you simultaneously pass `replace_query = 1` and `on_duplicate_clause`, ClickHouse generates an exception). - Example: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1;` - `on_duplicate_clause` here is `UPDATE c2 = c2 + 1`. See the MySQL documentation to find which `on_duplicate_clause` you can use with the `ON DUPLICATE KEY` clause. +Arguments also can be passed using [named collections](/docs/en/operations/named-collections.md). In this case `host` and `port` should be specified separately. This approach is recommended for production environment. + Simple `WHERE` clauses such as `=, !=, >, >=, <, <=` are currently executed on the MySQL server. The rest of the conditions and the `LIMIT` sampling constraint are executed in ClickHouse only after the query to MySQL finishes. @@ -86,6 +80,18 @@ Selecting data from ClickHouse: SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123'); ``` +Or using [named collections](/docs/en/operations/named-collections.md): + +```sql +CREATE NAMED COLLECTION creds AS + host = 'localhost', + port = 3306, + database = 'test', + user = 'bayonet', + password = '123'; +SELECT * FROM mysql(creds, table='test'); +``` + ``` text ┌─int_id─┬─float─┐ │ 1 │ 2 │ diff --git a/docs/en/sql-reference/table-functions/postgresql.md b/docs/en/sql-reference/table-functions/postgresql.md index b9211d70cdb..3fd0e5805e7 100644 --- a/docs/en/sql-reference/table-functions/postgresql.md +++ b/docs/en/sql-reference/table-functions/postgresql.md @@ -11,10 +11,10 @@ Allows `SELECT` and `INSERT` queries to be performed on data that is stored on a **Syntax** ``` sql -postgresql('host:port', 'database', 'table', 'user', 'password'[, `schema`]) +postgresql({host:port, database, table, user, password[, schema, [, on_conflict]] | named_collection[, option=value [,..]]}) ``` -**Arguments** +**Parameters** - `host:port` — PostgreSQL server address. - `database` — Remote database name. @@ -22,6 +22,9 @@ postgresql('host:port', 'database', 'table', 'user', 'password'[, `schema`]) - `user` — PostgreSQL user. - `password` — User password. - `schema` — Non-default table schema. Optional. +- `on_conflict` — Conflict resolution strategy. Example: `ON CONFLICT DO NOTHING`. Optional. + +Arguments also can be passed using [named collections](/docs/en/operations/named-collections.md). In this case `host` and `port` should be specified separately. This approach is recommended for production environment. **Returned Value** @@ -86,12 +89,24 @@ postgresql> SELECT * FROM test; (1 row) ``` -Selecting data from ClickHouse: +Selecting data from ClickHouse using plain arguments: ```sql SELECT * FROM postgresql('localhost:5432', 'test', 'test', 'postgresql_user', 'password') WHERE str IN ('test'); ``` +Or using [named collections](/docs/en/operations/named-collections.md): + +```sql +CREATE NAMED COLLECTION mypg AS + host = 'localhost', + port = 5432, + database = 'test', + user = 'postgresql_user', + password = 'password'; +SELECT * FROM postgresql(mypg, table='test') WHERE str IN ('test'); +``` + ``` text ┌─int_id─┬─int_nullable─┬─float─┬─str──┬─float_nullable─┐ │ 1 │ ᴺᵁᴸᴸ │ 2 │ test │ ᴺᵁᴸᴸ │ diff --git a/docs/en/sql-reference/table-functions/redis.md b/docs/en/sql-reference/table-functions/redis.md index 98d9a647cee..09841642210 100644 --- a/docs/en/sql-reference/table-functions/redis.md +++ b/docs/en/sql-reference/table-functions/redis.md @@ -34,6 +34,7 @@ redis(host:port, key, structure[, db_index[, password[, pool_size]]]) - queries with key equals or in filtering will be optimized to multi keys lookup from Redis. If queries without filtering key full table scan will happen which is a heavy operation. +[Named collections](/docs/en/operations/named-collections.md) are not supported for `redis` table function at the moment. **Returned Value** @@ -41,17 +42,7 @@ A table object with key as Redis key, other columns packaged together as Redis v ## Usage Example {#usage-example} -Create a table in ClickHouse which allows to read data from Redis: - -``` sql -CREATE TABLE redis_table -( - `k` String, - `m` String, - `n` UInt32 -) -ENGINE = Redis('redis1:6379') PRIMARY KEY(k); -``` +Read from Redis: ```sql SELECT * FROM redis( @@ -61,6 +52,15 @@ SELECT * FROM redis( ) ``` +Insert into Redis: + +```sql +INSERT INTO TABLE FUNCTION redis( + 'redis1:6379', + 'key', + 'key String, v1 String, v2 UInt32') values ('1', '1', 1); +``` + **See Also** - [The `Redis` table engine](/docs/en/engines/table-engines/integrations/redis.md) diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md index 228f4a4c7e1..f6e49099d99 100644 --- a/docs/en/sql-reference/table-functions/remote.md +++ b/docs/en/sql-reference/table-functions/remote.md @@ -13,10 +13,12 @@ Both functions can be used in `SELECT` and `INSERT` queries. ## Syntax ``` sql -remote('addresses_expr', [db, table, 'user'[, 'password'], sharding_key]) -remote('addresses_expr', [db.table, 'user'[, 'password'], sharding_key]) -remoteSecure('addresses_expr', [db, table, 'user'[, 'password'], sharding_key]) -remoteSecure('addresses_expr', [db.table, 'user'[, 'password'], sharding_key]) +remote(addresses_expr, [db, table, user [, password], sharding_key]) +remote(addresses_expr, [db.table, user [, password], sharding_key]) +remote(named_collection[, option=value [,..]]) +remoteSecure(addresses_expr, [db, table, user [, password], sharding_key]) +remoteSecure(addresses_expr, [db.table, user [, password], sharding_key]) +remoteSecure(named_collection[, option=value [,..]]) ``` ## Parameters @@ -39,6 +41,8 @@ remoteSecure('addresses_expr', [db.table, 'user'[, 'password'], sharding_key]) - `password` — User password. If not specified, an empty password is used. Type: [String](../../sql-reference/data-types/string.md). - `sharding_key` — Sharding key to support distributing data across nodes. For example: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`. Type: [UInt32](../../sql-reference/data-types/int-uint.md). +Arguments also can be passed using [named collections](/docs/en/operations/named-collections.md). + ## Returned value A table located on a remote server. @@ -82,7 +86,16 @@ example01-01-1,example01-02-1 SELECT * FROM remote('127.0.0.1', db.remote_engine_table) LIMIT 3; ``` -### Inserting data from a remote server into a table: +Or using [named collections](/docs/en/operations/named-collections.md): + +```sql +CREATE NAMED COLLECTION creds AS + host = '127.0.0.1', + database = 'db'; +SELECT * FROM remote(creds, table='remote_engine_table') LIMIT 3; +``` + +### Inserting data into a table on a remote server: ``` sql CREATE TABLE remote_table (name String, value UInt32) ENGINE=Memory; diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 8065f066666..970b3e52882 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -16,33 +16,41 @@ When using the `s3 table function` with [`INSERT INTO...SELECT`](../../sql-refer **Syntax** ``` sql -s3(path [, NOSIGN | aws_access_key_id, aws_secret_access_key [,session_token]] [,format] [,structure] [,compression]) +s3(url [, NOSIGN | access_key_id, secret_access_key, [session_token]] [,format] [,structure] [,compression_method]) +s3(named_collection[, option=value [,..]]) ``` :::tip GCS The S3 Table Function integrates with Google Cloud Storage by using the GCS XML API and HMAC keys. See the [Google interoperability docs]( https://cloud.google.com/storage/docs/interoperability) for more details about the endpoint and HMAC. -For GCS, substitute your HMAC key and HMAC secret where you see `aws_access_key_id` and `aws_secret_access_key`. +For GCS, substitute your HMAC key and HMAC secret where you see `access_key_id` and `secret_access_key`. ::: -**Arguments** +**Parameters** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +`s3` table function supports the following plain parameters: +- `url` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). :::note GCS - The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API: + The GCS url is in this format as the endpoint for the Google XML API is different than the JSON API: ``` https://storage.googleapis.com/// ``` and not ~~https://storage.cloud.google.com~~. ::: - -- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed. -- `access_key_id`, `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. +- `NOSIGN` — If this keyword is provided in place of credentials, all the requests will not be signed. +- `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. - `session_token` - Session token to use with the given keys. Optional when passing keys. - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. -- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. +- `compression_method` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression method by file extension. + +Arguments can also be passed using [named collections](/docs/en/operations/named-collections.md). In this case `url`, `access_key_id`, `secret_access_key`, `format`, `structure`, `compression_method` work in the same way, and some extra parameters are supported: + + - `filename` — appended to the url if specified. + - `use_environment_credentials` — enabled by default, allows passing extra parameters using environment variables `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI`, `AWS_CONTAINER_CREDENTIALS_FULL_URI`, `AWS_CONTAINER_AUTHORIZATION_TOKEN`, `AWS_EC2_METADATA_DISABLED`. + - `no_sign_request` — disabled by default. + - `expiration_window_seconds` — default value is 120. **Returned value** @@ -82,7 +90,7 @@ FROM s3( LIMIT 5; ``` -ClickHouse also can determine the compression of the file. For example, if the file was zipped up with a `.csv.gz` extension, ClickHouse would decompress the file automatically. +ClickHouse also can determine the compression method of the file. For example, if the file was zipped up with a `.csv.gz` extension, ClickHouse would decompress the file automatically. ::: @@ -168,7 +176,7 @@ The below get data from all `test-data.csv.gz` files from any folder inside `my- SELECT * FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); ``` -Note. It is possible to specify custom URL mappers in the server configuration file. Example: +Note. It is possible to specify custom URL mappers in the server configuration file. Example: ``` sql SELECT * FROM s3('s3://clickhouse-public-datasets/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); ``` @@ -190,6 +198,16 @@ Custom mapper can be added into `config.xml`: ``` +For production use cases it is recommended to use [named collections](/docs/en/operations/named-collections.md). Here is the example: +``` sql + +CREATE NAMED COLLECTION creds AS + access_key_id = '***', + secret_access_key = '***'; +SELECT count(*) +FROM s3(creds, url='https://s3-object-url.csv') +``` + ## Partitioned Write If you specify `PARTITION BY` expression when inserting data into `S3` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency. diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index 080c9860519..92d9527df82 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -4,23 +4,34 @@ sidebar_position: 181 sidebar_label: s3Cluster title: "s3Cluster Table Function" --- +This is an extension to the [s3](/docs/en/sql-reference/table-functions/s3.md) table function. Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) and Google Cloud Storage [Google Cloud Storage](https://cloud.google.com/storage/) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished. **Syntax** ``` sql -s3Cluster(cluster_name, source, [,access_key_id, secret_access_key, [session_token]] [,format] [,structure]) +s3Cluster(cluster_name, url [, NOSIGN | access_key_id, secret_access_key, [session_token]] [,format] [,structure] [,compression_method]) +s3Cluster(cluster_name, named_collection[, option=value [,..]]) ``` **Arguments** - `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. -- `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). -- `access_key_id`, `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. +- `url` — path to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `NOSIGN` — If this keyword is provided in place of credentials, all the requests will not be signed. +- `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. - `session_token` - Session token to use with the given keys. Optional when passing keys. - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. +- `compression_method` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression method by file extension. + +Arguments can also be passed using [named collections](/docs/en/operations/named-collections.md). In this case `url`, `access_key_id`, `secret_access_key`, `format`, `structure`, `compression_method` work in the same way, and some extra parameters are supported: + + - `filename` — appended to the url if specified. + - `use_environment_credentials` — enabled by default, allows passing extra parameters using environment variables `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI`, `AWS_CONTAINER_CREDENTIALS_FULL_URI`, `AWS_CONTAINER_AUTHORIZATION_TOKEN`, `AWS_EC2_METADATA_DISABLED`. + - `no_sign_request` — disabled by default. + - `expiration_window_seconds` — default value is 120. **Returned value** @@ -47,6 +58,18 @@ Count the total amount of rows in all files in the cluster `cluster_simple`: If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. ::: +For production use cases it is recommended to use [named collections](/docs/en/operations/named-collections.md). Here is the example: +``` sql + +CREATE NAMED COLLECTION creds AS + access_key_id = 'minio' + secret_access_key = 'minio123'; +SELECT count(*) FROM s3Cluster( + 'cluster_simple', creds, url='https://s3-object-url.csv', + format='CSV', structure='name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))' +) +``` + **See Also** - [S3 engine](../../engines/table-engines/integrations/s3.md) diff --git a/docs/ru/getting-started/tutorial.md b/docs/ru/getting-started/tutorial.md index a2ddb103bc3..34064b6cf2f 100644 --- a/docs/ru/getting-started/tutorial.md +++ b/docs/ru/getting-started/tutorial.md @@ -670,4 +670,4 @@ ENGINE = ReplicatedMergeTree( INSERT INTO tutorial.hits_replica SELECT * FROM tutorial.hits_local; ``` -Репликация работает в режиме мультимастера. Это означает, что данные могут быть загружены на любую из реплик и система автоматически синхронизирует данные между остальными репликами. Репликация асинхронна, то есть в конкретный момент времнени не все реплики могут содержать недавно добавленные данные. Как минимум одна реплика должна быть в строю для приёма данных. Прочие реплики синхронизируются и восстановят согласованное состояния как только снова станут активными. Заметим, что при таком подходе есть вероятность утраты недавно добавленных данных. +Репликация работает в режиме мультимастера. Это означает, что данные могут быть загружены на любую из реплик и система автоматически синхронизирует данные между остальными репликами. Репликация асинхронна, то есть в конкретный момент времени не все реплики могут содержать недавно добавленные данные. Как минимум одна реплика должна быть в строю для приёма данных. Прочие реплики синхронизируются и восстановят согласованное состояния как только снова станут активными. Заметим, что при таком подходе есть вероятность утраты недавно добавленных данных. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 2081dcc59b6..cd949e9e6b1 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2796,6 +2796,17 @@ SELECT TOP 3 name, value FROM system.settings; 3. │ max_block_size │ 65505 │ └─────────────────────────┴─────────┘ ``` +### output_format_pretty_color {#output_format_pretty_color} + +Включает/выключает управляющие последовательности ANSI в форматах Pretty. + +Возможные значения: + +- `0` — выключена. Не исползует ANSI последовательности в форматах Pretty. +- `1` — включена. Исползует ANSI последовательности с исключением форматов `NoEscapes`. +- `auto` - включена если `stdout` является терминалом с исключением форматов `NoEscapes`. + +Значение по умолчанию: `auto` ## system_events_show_zero_values {#system_events_show_zero_values} diff --git a/docs/ru/operations/system-tables/replication_queue.md b/docs/ru/operations/system-tables/replication_queue.md index 60d42133153..31bd0bf50fd 100644 --- a/docs/ru/operations/system-tables/replication_queue.md +++ b/docs/ru/operations/system-tables/replication_queue.md @@ -49,7 +49,7 @@ slug: /ru/operations/system-tables/replication_queue - `last_attempt_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время последней попытки выполнить задачу. -- `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — количество отложенных задач. +- `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — количество откладываний запуска задачи. - `postpone_reason` ([String](../../sql-reference/data-types/string.md)) — причина, по которой была отложена задача. diff --git a/docs/ru/sql-reference/statements/system.md b/docs/ru/sql-reference/statements/system.md index b3d2eff5364..3e7d67d90ff 100644 --- a/docs/ru/sql-reference/statements/system.md +++ b/docs/ru/sql-reference/statements/system.md @@ -280,7 +280,7 @@ SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge Ждет когда таблица семейства `ReplicatedMergeTree` будет синхронизирована с другими репликами в кластере, но не более `receive_timeout` секунд: ``` sql -SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT | PULL] +SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT [FROM 'srcReplica1'[, 'srcReplica2'[, ...]]] | PULL] ``` После выполнения этого запроса таблица `[db.]replicated_merge_tree_family_table_name` загружает команды из общего реплицированного лога в свою собственную очередь репликации. Затем запрос ждет, пока реплика не обработает все загруженные команды. Поддерживаются следующие модификаторы: diff --git a/docs/zh/faq/general/ne-tormozit.md b/docs/zh/faq/general/ne-tormozit.md index c4149655108..f397f6bb1d6 100644 --- a/docs/zh/faq/general/ne-tormozit.md +++ b/docs/zh/faq/general/ne-tormozit.md @@ -1,27 +1,27 @@ --- slug: /zh/faq/general/ne-tormozit -title: "What does \u201C\u043D\u0435 \u0442\u043E\u0440\u043C\u043E\u0437\u0438\u0442\ - \u201D mean?" +title: "\u201C\u043D\u0435 \u0442\u043E\u0440\u043C\u043E\u0437\u0438\u0442\ + \u201D 是什么意思?" toc_hidden: true sidebar_position: 11 --- -# What Does “Не тормозит” Mean? {#what-does-ne-tormozit-mean} +# “Не тормозит” 是什么意思? {#what-does-ne-tormozit-mean} -This question usually arises when people see official ClickHouse t-shirts. They have large words **“ClickHouse не тормозит”** on the front. +这个问题通常出现在人们看到官方 ClickHouse T恤时。它们的正面印有大字**“ClickHouse не тормозит”**。 -Before ClickHouse became open-source, it has been developed as an in-house storage system by the largest Russian IT company, [Yandex](https://yandex.com/company/). That’s why it initially got its slogan in Russian, which is “не тормозит” (pronounced as “ne tormozit”). After the open-source release we first produced some of those t-shirts for events in Russia and it was a no-brainer to use the slogan as-is. +在 ClickHouse 开源之前,它作为俄罗斯最大的 IT 公司 [Yandex](https://yandex.com/company/) 的内部存储系统而开发。这就是为什么它最初获得了俄文口号“не тормозит”(发音为“ne tormozit”)。在开源发布后,我们首先为俄罗斯的活动制作了一些这样的T恤,使用原汁原味的口号是理所当然的。 -One of the following batches of those t-shirts was supposed to be given away on events outside of Russia and we tried to make the English version of the slogan. Unfortunately, the Russian language is kind of elegant in terms of expressing stuff and there was a restriction of limited space on a t-shirt, so we failed to come up with good enough translation (most options appeared to be either long or inaccurate) and decided to keep the slogan in Russian even on t-shirts produced for international events. It appeared to be a great decision because people all over the world get positively surprised and curious when they see it. +其中一批这样的T恤原本打算在俄罗斯之外的活动中赠送,我们尝试制作口号的英文版本。不幸的是,俄语在表达方面有些优雅,而且T恤上的空间有限,所以我们未能提出足够好的翻译(大多数选项要么太长,要么不够准确),并决定即使在为国际活动制作的T恤上也保留俄文口号。这被证明是一个绝妙的决定,因为全世界的人们看到它时都会感到惊喜和好奇。 -So, what does it mean? Here are some ways to translate *“не тормозит”*: +那么,它是什么意思呢?以下是翻译“не тормозит”的一些方式: -- If you translate it literally, it’d be something like *“ClickHouse does not press the brake pedal”*. -- If you’d want to express it as close to how it sounds to a Russian person with IT background, it’d be something like *“If your larger system lags, it’s not because it uses ClickHouse”*. -- Shorter, but not so precise versions could be *“ClickHouse is not slow”*, *“ClickHouse does not lag”* or just *“ClickHouse is fast”*. +- 如果你直译,那就是“ClickHouse 不踩刹车”。 +- 如果你想尽可能接近一个有 IT 背景的俄罗斯人的听觉感受,那就是“如果你的大型系统延迟,不是因为它使用了 ClickHouse”。 +- 更短,但不那么精确的版本可能是“ClickHouse 不慢”,“ClickHouse 不卡顿”或仅仅“ClickHouse 很快”。 -If you haven’t seen one of those t-shirts in person, you can check them out online in many ClickHouse-related videos. For example, this one: +如果您还没有亲眼见过这些 T恤,可以在许多与 ClickHouse 相关的视频中在线查看。例如,这个: ![iframe](https://www.youtube.com/embed/bSyQahMVZ7w) -P.S. These t-shirts are not for sale, they are given away for free on most [ClickHouse Meetups](https://clickhouse.com/#meet), usually for best questions or other forms of active participation. +附言:这些 T恤不出售,它们在大多数 [ClickHouse 聚会](https://clickhouse.com/#meet)上免费赠送,通常是给出最佳问题或其他形式的积极参与者。 diff --git a/docs/zh/faq/general/why-clickhouse-is-so-fast.md b/docs/zh/faq/general/why-clickhouse-is-so-fast.md index a7df6aec207..ddfda87abb4 100644 --- a/docs/zh/faq/general/why-clickhouse-is-so-fast.md +++ b/docs/zh/faq/general/why-clickhouse-is-so-fast.md @@ -1,63 +1,63 @@ --- slug: /zh/faq/general/why-clickhouse-is-so-fast -title: Why is ClickHouse so fast? +title: 为什么 ClickHouse 如此快速? toc_hidden: true sidebar_position: 8 --- -# Why ClickHouse Is So Fast? {#why-clickhouse-is-so-fast} +# 为什么 ClickHouse 如此快速? {#why-clickhouse-is-so-fast} -It was designed to be fast. Query execution performance has always been a top priority during the development process, but other important characteristics like user-friendliness, scalability, and security were also considered so ClickHouse could become a real production system. +它被设计成一个快速的系统。在开发过程中,查询执行性能一直是首要考虑的优先级,但也考虑了其他重要特性,如用户友好性、可扩展性和安全性,使 ClickHouse 成为一个真正的生产系统。 -ClickHouse was initially built as a prototype to do just a single task well: to filter and aggregate data as fast as possible. That’s what needs to be done to build a typical analytical report and that’s what a typical [GROUP BY](../../sql-reference/statements/select/group-by.md) query does. ClickHouse team has made several high-level decisions that combined made achieving this task possible: +ClickHouse 最初是作为一个原型构建的,它的单一任务就是尽可能快速地过滤和聚合数据。这正是构建典型分析报告所需做的,也是典型 [GROUP BY](../../sql-reference/statements/select/group-by.md) 查询所做的。ClickHouse 团队做出了几个高层次的决策,这些决策组合在一起使得实现这一任务成为可能: -Column-oriented storage -: Source data often contain hundreds or even thousands of columns, while a report can use just a few of them. The system needs to avoid reading unnecessary columns, or most expensive disk read operations would be wasted. +列式存储 +: 源数据通常包含数百甚至数千列,而报告可能只使用其中的几列。系统需要避免读取不必要的列,否则大部分昂贵的磁盘读取操作将被浪费。 -Indexes -: ClickHouse keeps data structures in memory that allows reading not only used columns but only necessary row ranges of those columns. +索引 +: ClickHouse 在内存中保留数据结构,允许不仅读取使用的列,而且只读取这些列的必要行范围。 -Data compression -: Storing different values of the same column together often leads to better compression ratios (compared to row-oriented systems) because in real data column often has the same or not so many different values for neighboring rows. In addition to general-purpose compression, ClickHouse supports [specialized codecs](../../sql-reference/statements/create/table.mdx/#create-query-specialized-codecs) that can make data even more compact. +数据压缩 +: 将同一列的不同值存储在一起通常会导致更好的压缩比(与行式系统相比),因为在实际数据中列通常对相邻行有相同或不太多的不同值。除了通用压缩之外,ClickHouse 还支持 [专用编解码器](../../sql-reference/statements/create/table.mdx/#create-query-specialized-codecs),可以使数据更加紧凑。 -Vectorized query execution -: ClickHouse not only stores data in columns but also processes data in columns. It leads to better CPU cache utilization and allows for [SIMD](https://en.wikipedia.org/wiki/SIMD) CPU instructions usage. +向量化查询执行 +: ClickHouse 不仅以列的形式存储数据,而且以列的形式处理数据。这导致更好的 CPU 缓存利用率,并允许使用 [SIMD](https://en.wikipedia.org/wiki/SIMD) CPU 指令。 -Scalability -: ClickHouse can leverage all available CPU cores and disks to execute even a single query. Not only on a single server but all CPU cores and disks of a cluster as well. +可扩展性 +: ClickHouse 可以利用所有可用的 CPU 核心和磁盘来执行甚至是单个查询。不仅在单个服务器上,而且在集群的所有 CPU 核心和磁盘上。 -But many other database management systems use similar techniques. What really makes ClickHouse stand out is **attention to low-level details**. Most programming languages provide implementations for most common algorithms and data structures, but they tend to be too generic to be effective. Every task can be considered as a landscape with various characteristics, instead of just throwing in random implementation. For example, if you need a hash table, here are some key questions to consider: +但许多其他数据库管理系统也使用类似的技术。真正使 ClickHouse 脱颖而出的是 **对底层细节的关注**。大多数编程语言为最常见的算法和数据结构提供了实现,但它们往往过于通用而无法高效。每个任务都可以被视为具有各种特征的景观,而不是仅仅随意投入某个实现。例如,如果您需要一个哈希表,这里有一些关键问题需要考虑: -- Which hash function to choose? -- Collision resolution algorithm: [open addressing](https://en.wikipedia.org/wiki/Open_addressing) vs [chaining](https://en.wikipedia.org/wiki/Hash_table#Separate_chaining)? -- Memory layout: one array for keys and values or separate arrays? Will it store small or large values? -- Fill factor: when and how to resize? How to move values around on resize? -- Will values be removed and which algorithm will work better if they will? -- Will we need fast probing with bitmaps, inline placement of string keys, support for non-movable values, prefetch, and batching? +- 选择哪种哈希函数? +- 冲突解决算法:[开放寻址](https://en.wikipedia.org/wiki/Open_addressing)还是[链接](https://en.wikipedia.org/wiki/Hash_table#Separate_chaining)? +- 内存布局:一个数组用于键和值还是分开的数组?它会存储小值还是大值? +- 填充因子:何时以及如何调整大小?在调整大小时如何移动值? +- 是否会移除值,如果会,哪种算法会更好? +- 我们是否需要使用位图进行快速探测,字符串键的内联放置,对不可移动值的支持,预取和批处理? -Hash table is a key data structure for `GROUP BY` implementation and ClickHouse automatically chooses one of [30+ variations](https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Aggregator.h) for each specific query. +哈希表是 `GROUP BY` 实现的关键数据结构,ClickHouse 会根据每个特定查询自动选择 [30 多种变体](https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Aggregator.h) 中的一种。 -The same goes for algorithms, for example, in sorting you might consider: +算法也是如此,例如,在排序中,您可能会考虑: -- What will be sorted: an array of numbers, tuples, strings, or structures? -- Is all data available completely in RAM? -- Do we need a stable sort? -- Do we need a full sort? Maybe partial sort or n-th element will suffice? -- How to implement comparisons? -- Are we sorting data that has already been partially sorted? +- 将要排序的是数字数组、元组、字符串还是结构? +- 所有数据是否完全可用于 RAM? +- 我们需要稳定排序吗? +- 我们需要完全排序吗?也许部分排序或第 n 个元素就足够了? +- 如何实现比较? +- 我们正在对已经部分排序的数据进行排序吗? -Algorithms that they rely on characteristics of data they are working with can often do better than their generic counterparts. If it is not really known in advance, the system can try various implementations and choose the one that works best in runtime. For example, see an [article on how LZ4 decompression is implemented in ClickHouse](https://habr.com/en/company/yandex/blog/457612/). +他们所依赖的算法根据其所处理的数据特性,往往可以比通用算法做得更好。如果事先真的不知道,系统可以尝试各种实现,并在运行时选择最佳的一种。例如,看一篇关于 [ClickHouse 中 LZ4 解压缩是如何实现的文章](https://habr.com/en/company/yandex/blog/457612/)。 -Last but not least, the ClickHouse team always monitors the Internet on people claiming that they came up with the best implementation, algorithm, or data structure to do something and tries it out. Those claims mostly appear to be false, but from time to time you’ll indeed find a gem. +最后但同样重要的是,ClickHouse 团队始终关注互联网上人们声称他们提出了最佳的实现、算法或数据结构来做某事,并尝试它。这些声称大多是虚假的,但有时你确实会找到一颗宝石。 -:::info Tips for building your own high-performance software -- Keep in mind low-level details when designing your system. -- Design based on hardware capabilities. -- Choose data structures and abstractions based on the needs of the task. -- Provide specializations for special cases. -- Try new, “best” algorithms, that you read about yesterday. -- Choose an algorithm in runtime based on statistics. -- Benchmark on real datasets. -- Test for performance regressions in CI. -- Measure and observe everything. +:::info 构建高性能软件的提示 +- 设计系统时要考虑到底层细节。 +- 基于硬件能力进行设计。 +- 根据任务的需求选择数据结构和抽象。 +- 为特殊情况提供专门化。 +- 尝试您昨天阅读的关于新的“最佳”算法。 +- 根据统计数据在运行时选择算法。 +- 在真实数据集上进行基准测试。 +- 在 CI 中测试性能回归。 +- 测量并观察一切。 ::: diff --git a/docs/zh/faq/integration/json-import.md b/docs/zh/faq/integration/json-import.md index 2d5c687316d..730af8cc6da 100644 --- a/docs/zh/faq/integration/json-import.md +++ b/docs/zh/faq/integration/json-import.md @@ -1,35 +1,35 @@ --- slug: /zh/faq/integration/json-import -title: How to import JSON into ClickHouse? +title: 如何将 JSON 导入到 ClickHouse? toc_hidden: true sidebar_position: 11 --- -# How to Import JSON Into ClickHouse? {#how-to-import-json-into-clickhouse} +# 如何将 JSON 导入到 ClickHouse? {#how-to-import-json-into-clickhouse} -ClickHouse supports a wide range of [data formats for input and output](../../interfaces/formats.md). There are multiple JSON variations among them, but the most commonly used for data ingestion is [JSONEachRow](../../interfaces/formats.md#jsoneachrow). It expects one JSON object per row, each object separated by a newline. +ClickHouse 支持多种[输入和输出的数据格式](../../interfaces/formats.md)。其中包括多种 JSON 变体,但最常用于数据导入的是 [JSONEachRow](../../interfaces/formats.md#jsoneachrow)。它期望每行一个 JSON 对象,每个对象由一个新行分隔。 -## Examples {#examples} +## 示例 {#examples} -Using [HTTP interface](../../interfaces/http.md): +使用 [HTTP 接口](../../interfaces/http.md): ``` bash $ echo '{"foo":"bar"}' | curl 'http://localhost:8123/?query=INSERT%20INTO%20test%20FORMAT%20JSONEachRow' --data-binary @- ``` -Using [CLI interface](../../interfaces/cli.md): +使用 [CLI接口](../../interfaces/cli.md): ``` bash $ echo '{"foo":"bar"}' | clickhouse-client --query="INSERT INTO test FORMAT JSONEachRow" ``` -Instead of inserting data manually, you might consider to use one of [client libraries](../../interfaces/index.md) instead. +除了手动插入数据外,您可能会考虑使用 [客户端库](../../interfaces/index.md) 之一。 -## Useful Settings {#useful-settings} +## 实用设置 {#useful-settings} -- `input_format_skip_unknown_fields` allows to insert JSON even if there were additional fields not present in table schema (by discarding them). -- `input_format_import_nested_json` allows to insert nested JSON objects into columns of [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) type. +- `input_format_skip_unknown_fields` 允许插入 JSON,即使存在表格架构中未出现的额外字段(通过丢弃它们)。 +- `input_format_import_nested_json` 允许将嵌套 JSON 对象插入到 [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) 类型的列中。 :::note -Settings are specified as `GET` parameters for the HTTP interface or as additional command-line arguments prefixed with `--` for the `CLI` interface. +对于 HTTP 接口,设置作为 `GET` 参数指定;对于 `CLI` 接口,则作为前缀为 -- 的附加命令行参数。 ::: \ No newline at end of file diff --git a/docs/zh/faq/integration/oracle-odbc.md b/docs/zh/faq/integration/oracle-odbc.md index e22db1d8960..ca65f08686c 100644 --- a/docs/zh/faq/integration/oracle-odbc.md +++ b/docs/zh/faq/integration/oracle-odbc.md @@ -1,16 +1,16 @@ --- slug: /zh/faq/integration/oracle-odbc -title: What if I have a problem with encodings when using Oracle via ODBC? +title: 使用 Oracle ODBC 时遇到编码问题怎么办? toc_hidden: true sidebar_position: 20 --- -# What If I Have a Problem with Encodings When Using Oracle Via ODBC? {#oracle-odbc-encodings} +# 使用 Oracle ODBC 时遇到编码问题怎么办? {#oracle-odbc-encodings} -If you use Oracle as a source of ClickHouse external dictionaries via Oracle ODBC driver, you need to set the correct value for the `NLS_LANG` environment variable in `/etc/default/clickhouse`. For more information, see the [Oracle NLS_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html). +如果您使用 Oracle 作为 ClickHouse 外部字典的数据源,并通过 Oracle ODBC 驱动程序,您需要在 `/etc/default/clickhouse` 中为 `NLS_LANG` 环境变量设置正确的值。更多信息,请参阅 [Oracle NLS_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html)。 -**Example** +**示例** ``` sql NLS_LANG=RUSSIAN_RUSSIA.UTF8 -``` +``` \ No newline at end of file diff --git a/docs/zh/faq/operations/delete-old-data.md b/docs/zh/faq/operations/delete-old-data.md index 24181116bab..293ba8069fa 100644 --- a/docs/zh/faq/operations/delete-old-data.md +++ b/docs/zh/faq/operations/delete-old-data.md @@ -1,44 +1,44 @@ --- slug: /zh/faq/operations/delete-old-data -title: Is it possible to delete old records from a ClickHouse table? +title: 是否可以从ClickHouse表中删除旧记录? toc_hidden: true sidebar_position: 20 --- -# Is It Possible to Delete Old Records from a ClickHouse Table? {#is-it-possible-to-delete-old-records-from-a-clickhouse-table} +# 是否可以从ClickHouse表中删除旧记录? {#is-it-possible-to-delete-old-records-from-a-clickhouse-table} -The short answer is “yes”. ClickHouse has multiple mechanisms that allow freeing up disk space by removing old data. Each mechanism is aimed for different scenarios. +简短的答案是“可以”。ClickHouse具有多种机制,允许通过删除旧数据来释放磁盘空间。每种机制都针对不同的场景。 ## TTL {#ttl} -ClickHouse allows to automatically drop values when some condition happens. This condition is configured as an expression based on any columns, usually just static offset for any timestamp column. +ClickHouse 允许在某些条件发生时自动删除值。这个条件被配置为基于任何列的表达式,通常只是针对任何时间戳列的静态偏移量。 -The key advantage of this approach is that it does not need any external system to trigger, once TTL is configured, data removal happens automatically in background. +这种方法的主要优势是它不需要任何外部系统来触发,一旦配置了 TTL,数据删除就会自动在后台发生。 :::note -TTL can also be used to move data not only to [/dev/null](https://en.wikipedia.org/wiki/Null_device), but also between different storage systems, like from SSD to HDD. +TTL 也可以用来将数据移动到非 [/dev/null](https://en.wikipedia.org/wiki/Null_device) 的不同存储系统,例如从 SSD 到 HDD。 ::: -More details on [configuring TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). +有关 [配置 TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) 的更多详细信息。 ## ALTER DELETE {#alter-delete} -ClickHouse does not have real-time point deletes like in [OLTP](https://en.wikipedia.org/wiki/Online_transaction_processing) databases. The closest thing to them are mutations. They are issued as `ALTER ... DELETE` or `ALTER ... UPDATE` queries to distinguish from normal `DELETE` or `UPDATE` as they are asynchronous batch operations, not immediate modifications. The rest of syntax after `ALTER TABLE` prefix is similar. +ClickHouse没有像[OLTP](https://en.wikipedia.org/wiki/Online_transaction_processing)数据库那样的实时点删除。最接近的东西是 `Mutation`,执行 `ALTER ... DELETE` 或 `ALTER ... UPDATE` 查询,以区别于普通的`DELETE`或`UPDATE`。因为它们是异步批处理操作,而不是立即修改。`ALTER TABLE`前缀后的其余语法相似。 -`ALTER DELETE` can be issued to flexibly remove old data. If you need to do it regularly, the main downside will be the need to have an external system to submit the query. There are also some performance considerations since mutation rewrite complete parts even there’s only a single row to be deleted. +`ALTER DELETE`可以灵活地用来删除旧数据。如果你需要定期这样做,主要缺点将是需要有一个外部系统来提交查询。还有一些性能方面的考虑,因为即使只有一行要被删除,突变也会重写完整部分。 -This is the most common approach to make your system based on ClickHouse [GDPR](https://gdpr-info.eu)-compliant. +这是使基于ClickHouse的系统符合[GDPR](https://gdpr-info.eu)的最常见方法。 -More details on [mutations](../../sql-reference/statements/alter.md/#alter-mutations). +有关[mutations](../../sql-reference/statements/alter.md/#alter-mutations)的更多详细信息。 ## DROP PARTITION {#drop-partition} -`ALTER TABLE ... DROP PARTITION` provides a cost-efficient way to drop a whole partition. It’s not that flexible and needs proper partitioning scheme configured on table creation, but still covers most common cases. Like mutations need to be executed from an external system for regular use. +`ALTER TABLE ... DROP PARTITION`提供了一种成本效率高的方式来删除整个分区。它不是那么灵活,需要在创建表时配置适当的分区方案,但仍然涵盖了大多数常见情况。像 mutations 一样,需要从外部系统执行以进行常规使用。 -More details on [manipulating partitions](../../sql-reference/statements/alter/partition.mdx/#alter_drop-partition). +有关[操作分区](../../sql-reference/statements/alter/partition.mdx/#alter_drop-partition)的更多详细信息。 ## TRUNCATE {#truncate} -It’s rather radical to drop all data from a table, but in some cases it might be exactly what you need. +从表中删除所有数据是相当激进的,但在某些情况下可能正是您所需要的。 -More details on [table truncation](../../sql-reference/statements/truncate.md). \ No newline at end of file +有关[truncate](../../sql-reference/statements/truncate.md)的更多详细信息。 diff --git a/docs/zh/sql-reference/statements/system.md b/docs/zh/sql-reference/statements/system.md index 87d077fcdb9..5b5f101ebc4 100644 --- a/docs/zh/sql-reference/statements/system.md +++ b/docs/zh/sql-reference/statements/system.md @@ -248,7 +248,7 @@ SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge ``` sql -SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT | PULL] +SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT [FROM 'srcReplica1'[, 'srcReplica2'[, ...]]] | PULL] ``` ### RESTART REPLICA {#query_language-system-restart-replica} diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 4e81c0a75f6..0988e1eb4a1 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1010,6 +1010,7 @@ void Client::addOptions(OptionsDescription & options_description) ("config,c", po::value(), "config-file path (another shorthand)") ("connection", po::value(), "connection to use (from the client config), by default connection name is hostname") ("secure,s", "Use TLS connection") + ("no-secure,s", "Don't use TLS connection") ("user,u", po::value()->default_value("default"), "user") ("password", po::value(), "password") ("ask-password", "ask-password") @@ -1151,6 +1152,8 @@ void Client::processOptions(const OptionsDescription & options_description, interleave_queries_files = options["interleave-queries-file"].as>(); if (options.count("secure")) config().setBool("secure", true); + if (options.count("no-secure")) + config().setBool("no-secure", true); if (options.count("user") && !options["user"].defaulted()) config().setString("user", options["user"].as()); if (options.count("password")) diff --git a/programs/copier/ClusterCopier.h b/programs/copier/ClusterCopier.h index 063b13e9078..01f8b30f546 100644 --- a/programs/copier/ClusterCopier.h +++ b/programs/copier/ClusterCopier.h @@ -20,7 +20,7 @@ public: const String & host_id_, const String & proxy_database_name_, ContextMutablePtr context_, - Poco::Logger * log_) + LoggerRawPtr log_) : WithMutableContext(context_), task_zookeeper_path(task_path_), host_id(host_id_), @@ -230,7 +230,7 @@ private: bool experimental_use_sample_offset{false}; - Poco::Logger * log; + LoggerRawPtr log; UInt64 max_table_tries = 3; UInt64 max_shard_partition_tries = 3; diff --git a/programs/copier/ZooKeeperStaff.h b/programs/copier/ZooKeeperStaff.h index 36dcfa50842..bbdec230d2d 100644 --- a/programs/copier/ZooKeeperStaff.h +++ b/programs/copier/ZooKeeperStaff.h @@ -177,7 +177,7 @@ public: auto watch_callback = [my_stale = stale] (const Coordination::WatchResponse & rsp) { - auto logger = &Poco::Logger::get("ClusterCopier"); + auto logger = getLogger("ClusterCopier"); if (rsp.error == Coordination::Error::ZOK) { switch (rsp.type) diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index ded324fd0da..4b3b83238a0 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -160,7 +160,7 @@ int DisksApp::main(const std::vector & /*args*/) } else { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No config-file specifiged"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No config-file specified"); } if (config().has("save-logs")) diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp index ab9252dd62e..d19e2ffe00f 100644 --- a/programs/keeper-client/Commands.cpp +++ b/programs/keeper-client/Commands.cpp @@ -413,13 +413,13 @@ void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient switch (operation) { case static_cast(ReconfigCommand::Operation::ADD): - joining = query->args[1].safeGet(); + joining = query->args[1].safeGet(); break; case static_cast(ReconfigCommand::Operation::REMOVE): - leaving = query->args[1].safeGet(); + leaving = query->args[1].safeGet(); break; case static_cast(ReconfigCommand::Operation::SET): - new_members = query->args[1].safeGet(); + new_members = query->args[1].safeGet(); break; default: UNREACHABLE(); diff --git a/programs/keeper-client/KeeperClient.cpp b/programs/keeper-client/KeeperClient.cpp index 7ed4499efbd..fa66a69687c 100644 --- a/programs/keeper-client/KeeperClient.cpp +++ b/programs/keeper-client/KeeperClient.cpp @@ -375,7 +375,7 @@ int KeeperClient::main(const std::vector & /* args */) if (!config().has("host") && !config().has("port") && !keys.empty()) { - LOG_INFO(&Poco::Logger::get("KeeperClient"), "Found keeper node in the config.xml, will use it for connection"); + LOG_INFO(getLogger("KeeperClient"), "Found keeper node in the config.xml, will use it for connection"); for (const auto & key : keys) { diff --git a/programs/keeper-converter/KeeperConverter.cpp b/programs/keeper-converter/KeeperConverter.cpp index 20448aafa2f..2b2759412ab 100644 --- a/programs/keeper-converter/KeeperConverter.cpp +++ b/programs/keeper-converter/KeeperConverter.cpp @@ -28,7 +28,7 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv) po::store(po::command_line_parser(argc, argv).options(desc).run(), options); Poco::AutoPtr console_channel(new Poco::ConsoleChannel); - Poco::Logger * logger = &Poco::Logger::get("KeeperConverter"); + LoggerPtr logger = getLogger("KeeperConverter"); logger->setChannel(console_channel); if (options.count("help")) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index b8f538f821c..143ded0ee85 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -95,6 +95,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/CurrentThread.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollections.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollectionConfiguration.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/Jemalloc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/IKeeper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/TestKeeper.cpp @@ -126,15 +127,17 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorage.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIterator.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/StoredObject.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/registerDiskS3.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3Capabilities.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/diskSettings.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/DiskS3Utils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageFactory.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFactory.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/createReadBufferFromFileBase.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 109884ec899..c751702dc6f 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -624,7 +624,7 @@ catch (...) void Keeper::logRevision() const { - LOG_INFO(&Poco::Logger::get("Application"), + LOG_INFO(getLogger("Application"), "Starting ClickHouse Keeper {} (revision: {}, git hash: {}, build id: {}), PID {}", VERSION_STRING, ClickHouseRevision::getVersionRevision(), diff --git a/programs/library-bridge/CatBoostLibraryHandlerFactory.cpp b/programs/library-bridge/CatBoostLibraryHandlerFactory.cpp index 6ee078f6c5c..7ce896636e7 100644 --- a/programs/library-bridge/CatBoostLibraryHandlerFactory.cpp +++ b/programs/library-bridge/CatBoostLibraryHandlerFactory.cpp @@ -13,7 +13,7 @@ CatBoostLibraryHandlerFactory & CatBoostLibraryHandlerFactory::instance() } CatBoostLibraryHandlerFactory::CatBoostLibraryHandlerFactory() - : log(&Poco::Logger::get("CatBoostLibraryHandlerFactory")) + : log(getLogger("CatBoostLibraryHandlerFactory")) { } diff --git a/programs/library-bridge/CatBoostLibraryHandlerFactory.h b/programs/library-bridge/CatBoostLibraryHandlerFactory.h index 6ba3fe84ec9..e29834cbe79 100644 --- a/programs/library-bridge/CatBoostLibraryHandlerFactory.h +++ b/programs/library-bridge/CatBoostLibraryHandlerFactory.h @@ -31,7 +31,7 @@ private: /// map: model path --> catboost library handler std::unordered_map library_handlers TSA_GUARDED_BY(mutex); std::mutex mutex; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/programs/library-bridge/ExternalDictionaryLibraryAPI.cpp b/programs/library-bridge/ExternalDictionaryLibraryAPI.cpp index 70cd6fca375..4fa5c991f0f 100644 --- a/programs/library-bridge/ExternalDictionaryLibraryAPI.cpp +++ b/programs/library-bridge/ExternalDictionaryLibraryAPI.cpp @@ -9,40 +9,40 @@ const char DICT_LOGGER_NAME[] = "LibraryDictionarySourceExternal"; void ExternalDictionaryLibraryAPI::log(LogLevel level, CString msg) { - auto & logger = Poco::Logger::get(DICT_LOGGER_NAME); + auto logger = getLogger(DICT_LOGGER_NAME); switch (level) { case LogLevel::TRACE: - if (logger.trace()) - logger.trace(msg); + if (logger->trace()) + logger->trace(msg); break; case LogLevel::DEBUG: - if (logger.debug()) - logger.debug(msg); + if (logger->debug()) + logger->debug(msg); break; case LogLevel::INFORMATION: - if (logger.information()) - logger.information(msg); + if (logger->information()) + logger->information(msg); break; case LogLevel::NOTICE: - if (logger.notice()) - logger.notice(msg); + if (logger->notice()) + logger->notice(msg); break; case LogLevel::WARNING: - if (logger.warning()) - logger.warning(msg); + if (logger->warning()) + logger->warning(msg); break; case LogLevel::ERROR: - if (logger.error()) - logger.error(msg); + if (logger->error()) + logger->error(msg); break; case LogLevel::CRITICAL: - if (logger.critical()) - logger.critical(msg); + if (logger->critical()) + logger->critical(msg); break; case LogLevel::FATAL: - if (logger.fatal()) - logger.fatal(msg); + if (logger->fatal()) + logger->fatal(msg); break; } } diff --git a/programs/library-bridge/ExternalDictionaryLibraryHandlerFactory.cpp b/programs/library-bridge/ExternalDictionaryLibraryHandlerFactory.cpp index 6acd9af20ed..1b2b57beeb1 100644 --- a/programs/library-bridge/ExternalDictionaryLibraryHandlerFactory.cpp +++ b/programs/library-bridge/ExternalDictionaryLibraryHandlerFactory.cpp @@ -26,7 +26,7 @@ void ExternalDictionaryLibraryHandlerFactory::create( if (library_handlers.contains(dictionary_id)) { - LOG_WARNING(&Poco::Logger::get("ExternalDictionaryLibraryHandlerFactory"), "Library handler with dictionary id {} already exists", dictionary_id); + LOG_WARNING(getLogger("ExternalDictionaryLibraryHandlerFactory"), "Library handler with dictionary id {} already exists", dictionary_id); return; } diff --git a/programs/library-bridge/LibraryBridgeHandlerFactory.cpp b/programs/library-bridge/LibraryBridgeHandlerFactory.cpp index 4af1f8355e8..e5ab22f2d40 100644 --- a/programs/library-bridge/LibraryBridgeHandlerFactory.cpp +++ b/programs/library-bridge/LibraryBridgeHandlerFactory.cpp @@ -12,7 +12,7 @@ LibraryBridgeHandlerFactory::LibraryBridgeHandlerFactory( size_t keep_alive_timeout_, ContextPtr context_) : WithContext(context_) - , log(&Poco::Logger::get(name_)) + , log(getLogger(name_)) , name(name_) , keep_alive_timeout(keep_alive_timeout_) { diff --git a/programs/library-bridge/LibraryBridgeHandlerFactory.h b/programs/library-bridge/LibraryBridgeHandlerFactory.h index 7565052c4cb..5b0f088bc29 100644 --- a/programs/library-bridge/LibraryBridgeHandlerFactory.h +++ b/programs/library-bridge/LibraryBridgeHandlerFactory.h @@ -19,7 +19,7 @@ public: std::unique_ptr createRequestHandler(const HTTPServerRequest & request) override; private: - Poco::Logger * log; + LoggerPtr log; const std::string name; const size_t keep_alive_timeout; }; diff --git a/programs/library-bridge/LibraryBridgeHandlers.cpp b/programs/library-bridge/LibraryBridgeHandlers.cpp index b0b465460e0..ab146f458df 100644 --- a/programs/library-bridge/LibraryBridgeHandlers.cpp +++ b/programs/library-bridge/LibraryBridgeHandlers.cpp @@ -47,7 +47,7 @@ namespace if (!response.sent()) *response.send() << message << '\n'; - LOG_WARNING(&Poco::Logger::get("LibraryBridge"), fmt::runtime(message)); + LOG_WARNING(getLogger("LibraryBridge"), fmt::runtime(message)); } std::shared_ptr parseColumns(String && column_string) @@ -92,7 +92,7 @@ static void writeData(Block data, OutputFormatPtr format) ExternalDictionaryLibraryBridgeRequestHandler::ExternalDictionaryLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_) : WithContext(context_) , keep_alive_timeout(keep_alive_timeout_) - , log(&Poco::Logger::get("ExternalDictionaryLibraryBridgeRequestHandler")) + , log(getLogger("ExternalDictionaryLibraryBridgeRequestHandler")) { } @@ -380,7 +380,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_) : WithContext(context_) , keep_alive_timeout(keep_alive_timeout_) - , log(&Poco::Logger::get("ExternalDictionaryLibraryBridgeExistsHandler")) + , log(getLogger("ExternalDictionaryLibraryBridgeExistsHandler")) { } @@ -419,7 +419,7 @@ CatBoostLibraryBridgeRequestHandler::CatBoostLibraryBridgeRequestHandler( size_t keep_alive_timeout_, ContextPtr context_) : WithContext(context_) , keep_alive_timeout(keep_alive_timeout_) - , log(&Poco::Logger::get("CatBoostLibraryBridgeRequestHandler")) + , log(getLogger("CatBoostLibraryBridgeRequestHandler")) { } @@ -623,7 +623,7 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ CatBoostLibraryBridgeExistsHandler::CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_) : WithContext(context_) , keep_alive_timeout(keep_alive_timeout_) - , log(&Poco::Logger::get("CatBoostLibraryBridgeExistsHandler")) + , log(getLogger("CatBoostLibraryBridgeExistsHandler")) { } diff --git a/programs/library-bridge/LibraryBridgeHandlers.h b/programs/library-bridge/LibraryBridgeHandlers.h index 4f08d7a6084..1db71eb24cb 100644 --- a/programs/library-bridge/LibraryBridgeHandlers.h +++ b/programs/library-bridge/LibraryBridgeHandlers.h @@ -26,7 +26,7 @@ private: static constexpr inline auto FORMAT = "RowBinary"; const size_t keep_alive_timeout; - Poco::Logger * log; + LoggerPtr log; }; @@ -40,7 +40,7 @@ public: private: const size_t keep_alive_timeout; - Poco::Logger * log; + LoggerPtr log; }; @@ -69,7 +69,7 @@ public: private: const size_t keep_alive_timeout; - Poco::Logger * log; + LoggerPtr log; }; @@ -83,7 +83,7 @@ public: private: const size_t keep_alive_timeout; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index c9841277b6d..443d4a52fa3 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -221,7 +221,7 @@ void LocalServer::tryInitPath() { // The path is not provided explicitly - use a unique path in the system temporary directory // (or in the current dir if temporary don't exist) - Poco::Logger * log = &logger(); + LoggerRawPtr log = &logger(); std::filesystem::path parent_folder; std::filesystem::path default_path; @@ -290,6 +290,11 @@ void LocalServer::cleanup() { connection.reset(); + /// Suggestions are loaded async in a separate thread and it can use global context. + /// We should reset it before resetting global_context. + if (suggest) + suggest.reset(); + if (global_context) { global_context->shutdown(); @@ -626,7 +631,7 @@ void LocalServer::processConfig() tryInitPath(); - Poco::Logger * log = &logger(); + LoggerRawPtr log = &logger(); /// Maybe useless if (config().has("macros")) diff --git a/programs/odbc-bridge/ColumnInfoHandler.h b/programs/odbc-bridge/ColumnInfoHandler.h index e3087701182..ca7044fdf32 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.h +++ b/programs/odbc-bridge/ColumnInfoHandler.h @@ -18,7 +18,7 @@ class ODBCColumnsInfoHandler : public HTTPRequestHandler, WithContext public: ODBCColumnsInfoHandler(size_t keep_alive_timeout_, ContextPtr context_) : WithContext(context_) - , log(&Poco::Logger::get("ODBCColumnsInfoHandler")) + , log(getLogger("ODBCColumnsInfoHandler")) , keep_alive_timeout(keep_alive_timeout_) { } @@ -26,7 +26,7 @@ public: void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: - Poco::Logger * log; + LoggerPtr log; size_t keep_alive_timeout; }; diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.h b/programs/odbc-bridge/IdentifierQuoteHandler.h index ff5c02ca07b..7b78c5b4b93 100644 --- a/programs/odbc-bridge/IdentifierQuoteHandler.h +++ b/programs/odbc-bridge/IdentifierQuoteHandler.h @@ -16,7 +16,7 @@ class IdentifierQuoteHandler : public HTTPRequestHandler, WithContext public: IdentifierQuoteHandler(size_t keep_alive_timeout_, ContextPtr context_) : WithContext(context_) - , log(&Poco::Logger::get("IdentifierQuoteHandler")) + , log(getLogger("IdentifierQuoteHandler")) , keep_alive_timeout(keep_alive_timeout_) { } @@ -24,7 +24,7 @@ public: void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: - Poco::Logger * log; + LoggerPtr log; size_t keep_alive_timeout; }; diff --git a/programs/odbc-bridge/MainHandler.h b/programs/odbc-bridge/MainHandler.h index 7977245ff82..ed0c6b2e28c 100644 --- a/programs/odbc-bridge/MainHandler.h +++ b/programs/odbc-bridge/MainHandler.h @@ -24,7 +24,7 @@ public: ContextPtr context_, const String & mode_) : WithContext(context_) - , log(&Poco::Logger::get("ODBCHandler")) + , log(getLogger("ODBCHandler")) , keep_alive_timeout(keep_alive_timeout_) , mode(mode_) { @@ -33,7 +33,7 @@ public: void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; private: - Poco::Logger * log; + LoggerPtr log; size_t keep_alive_timeout; String mode; diff --git a/programs/odbc-bridge/ODBCBlockInputStream.cpp b/programs/odbc-bridge/ODBCBlockInputStream.cpp index 3aa3d9a652b..c46144c3dc8 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockInputStream.cpp @@ -23,7 +23,7 @@ namespace ErrorCodes ODBCSource::ODBCSource( nanodbc::ConnectionHolderPtr connection_holder, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_) : ISource(sample_block) - , log(&Poco::Logger::get("ODBCSource")) + , log(getLogger("ODBCSource")) , max_block_size{max_block_size_} , query(query_str) { diff --git a/programs/odbc-bridge/ODBCBlockInputStream.h b/programs/odbc-bridge/ODBCBlockInputStream.h index 79d5816ad01..dedd98f930f 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.h +++ b/programs/odbc-bridge/ODBCBlockInputStream.h @@ -30,7 +30,7 @@ private: column.insertFrom(sample_column, 0); } - Poco::Logger * log; + LoggerPtr log; const UInt64 max_block_size; ExternalResultDescription description; diff --git a/programs/odbc-bridge/ODBCBlockOutputStream.cpp b/programs/odbc-bridge/ODBCBlockOutputStream.cpp index eb5901ad3e1..87c09d1e757 100644 --- a/programs/odbc-bridge/ODBCBlockOutputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockOutputStream.cpp @@ -19,7 +19,7 @@ ODBCSink::ODBCSink( ContextPtr local_context_, IdentifierQuotingStyle quoting_) : ISink(sample_block_) - , log(&Poco::Logger::get("ODBCSink")) + , log(getLogger("ODBCSink")) , connection_holder(std::move(connection_holder_)) , db_name(remote_database_name_) , table_name(remote_table_name_) diff --git a/programs/odbc-bridge/ODBCBlockOutputStream.h b/programs/odbc-bridge/ODBCBlockOutputStream.h index f5e7b4e3a2d..06edce92e1a 100644 --- a/programs/odbc-bridge/ODBCBlockOutputStream.h +++ b/programs/odbc-bridge/ODBCBlockOutputStream.h @@ -30,7 +30,7 @@ protected: void consume(Chunk chunk) override; private: - Poco::Logger * log; + LoggerPtr log; nanodbc::ConnectionHolderPtr connection_holder; std::string db_name; diff --git a/programs/odbc-bridge/ODBCHandlerFactory.cpp b/programs/odbc-bridge/ODBCHandlerFactory.cpp index dd21358df8c..eebb0c24c7a 100644 --- a/programs/odbc-bridge/ODBCHandlerFactory.cpp +++ b/programs/odbc-bridge/ODBCHandlerFactory.cpp @@ -11,7 +11,7 @@ namespace DB ODBCBridgeHandlerFactory::ODBCBridgeHandlerFactory(const std::string & name_, size_t keep_alive_timeout_, ContextPtr context_) : WithContext(context_) - , log(&Poco::Logger::get(name_)) + , log(getLogger(name_)) , name(name_) , keep_alive_timeout(keep_alive_timeout_) { diff --git a/programs/odbc-bridge/ODBCHandlerFactory.h b/programs/odbc-bridge/ODBCHandlerFactory.h index 3e3da7c9f24..4aaf1b55453 100644 --- a/programs/odbc-bridge/ODBCHandlerFactory.h +++ b/programs/odbc-bridge/ODBCHandlerFactory.h @@ -22,7 +22,7 @@ public: std::unique_ptr createRequestHandler(const HTTPServerRequest & request) override; private: - Poco::Logger * log; + LoggerPtr log; std::string name; size_t keep_alive_timeout; }; diff --git a/programs/odbc-bridge/ODBCPooledConnectionFactory.h b/programs/odbc-bridge/ODBCPooledConnectionFactory.h index a10055c6659..b70e45f2b9d 100644 --- a/programs/odbc-bridge/ODBCPooledConnectionFactory.h +++ b/programs/odbc-bridge/ODBCPooledConnectionFactory.h @@ -97,7 +97,7 @@ T execute(nanodbc::ConnectionHolderPtr connection_holder, std::function #include #include -#include -#include +#include +#include #include #include #include "MetricsTransmitter.h" @@ -365,7 +365,7 @@ void Server::createServer( namespace { -void setOOMScore(int value, Poco::Logger * log) +void setOOMScore(int value, LoggerRawPtr log) { try { @@ -450,7 +450,7 @@ void checkForUsersNotInMainConfig( const Poco::Util::AbstractConfiguration & config, const std::string & config_path, const std::string & users_config_path, - Poco::Logger * log) + LoggerPtr log) { if (config.getBool("skip_check_for_incorrect_settings", false)) return; @@ -1467,6 +1467,8 @@ try global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config); + global_context->reloadQueryMaskingRulesIfChanged(config); + std::lock_guard lock(servers_lock); updateServers(*config, server_pool, async_metrics, servers, servers_to_start_before_tables); } @@ -1738,6 +1740,17 @@ try LOG_INFO(log, "Loading metadata from {}", path_str); LoadTaskPtrs load_metadata_tasks; + + // Make sure that if exception is thrown during startup async, new async loading jobs are not going to be called. + // This is important for the case when exception is thrown from loading of metadata with `async_load_databases = false` + // to avoid simultaneously running table startups and destructing databases. + SCOPE_EXIT_SAFE( + LOG_INFO(log, "Stopping AsyncLoader."); + + // Waits for all currently running jobs to finish and do not run any other pending jobs. + global_context->getAsyncLoader().stop(); + ); + try { auto & database_catalog = DatabaseCatalog::instance(); @@ -1886,6 +1899,7 @@ try /// Must be done after initialization of `servers`, because async_metrics will access `servers` variable from its thread. async_metrics.start(); + global_context->setAsynchronousMetrics(&async_metrics); main_config_reloader->start(); access_control.startPeriodicReloading(); @@ -2002,6 +2016,12 @@ try else LOG_INFO(log, "Closed all listening sockets."); + /// Wait for unfinished backups and restores. + /// This must be done after closing listening sockets (no more backups/restores) but before ProcessList::killAllQueries + /// (because killAllQueries() will cancel all running backups/restores). + if (server_settings.shutdown_wait_backups_and_restores) + global_context->waitAllBackupsAndRestores(); + /// Killing remaining queries. if (!server_settings.shutdown_wait_unfinished_queries) global_context->getProcessList().killAllQueries(); @@ -2470,7 +2490,7 @@ void Server::stopServers( const ServerType & server_type ) const { - Poco::Logger * log = &logger(); + LoggerRawPtr log = &logger(); /// Remove servers once all their connections are closed auto check_server = [&log](const char prefix[], auto & server) @@ -2509,7 +2529,7 @@ void Server::updateServers( std::vector & servers, std::vector & servers_to_start_before_tables) { - Poco::Logger * log = &logger(); + LoggerRawPtr log = &logger(); const auto listen_hosts = getListenHosts(config); const auto interserver_listen_hosts = getInterserverListenHosts(config); diff --git a/programs/server/binary.html b/programs/server/binary.html index 988dd33a72a..74095dff537 100644 --- a/programs/server/binary.html +++ b/programs/server/binary.html @@ -60,10 +60,16 @@ /// If it is hosted on server, assume that it is the address of ClickHouse. if (location.protocol != 'file:') { host = location.origin; - user = 'default'; add_http_cors_header = false; } + if (window.location.search) { + const params = new URLSearchParams(window.location.search); + if (params.has('host')) { host = params.get('host'); } + if (params.has('user')) { user = params.get('user'); } + if (params.has('password')) { password = params.get('password'); } + } + let map = L.map('space', { crs: L.CRS.Simple, center: [-512, 512], diff --git a/programs/server/config.xml b/programs/server/config.xml index e1428b17084..6a40818332b 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -440,6 +440,9 @@ 10000 + + /var/lib/clickhouse/caches/ + false @@ -1559,6 +1562,10 @@ 30000000 + + backups + + mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1 + * | | + * └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘ + * + * Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will + * be inserted into `ds_2_1`. + */ + auto insert_deduplication_token = insert_settings.insert_deduplication_token.value; + + if (table_id.hasUUID()) + insert_deduplication_token += "_" + toString(table_id.uuid); + else + insert_deduplication_token += "_" + table_id.getFullNameNotQuoted(); + + insert_context->setSetting("insert_deduplication_token", insert_deduplication_token); + } // Processing of blocks for MVs is done block by block, and there will // be no parallel reading after (plus it is not a costless operation) @@ -267,7 +293,7 @@ Chain buildPushingToViewsChain( if (view == nullptr) { LOG_WARNING( - &Poco::Logger::get("PushingToViews"), "Trying to access table {} but it doesn't exist", view_id.getFullTableName()); + getLogger("PushingToViews"), "Trying to access table {} but it doesn't exist", view_id.getFullTableName()); continue; } @@ -310,7 +336,7 @@ Chain buildPushingToViewsChain( // In case the materialized view is dropped/detached at this point, we register a warning and ignore it assert(materialized_view->is_dropped || materialized_view->is_detached); LOG_WARNING( - &Poco::Logger::get("PushingToViews"), "Trying to access table {} but it doesn't exist", view_id.getFullTableName()); + getLogger("PushingToViews"), "Trying to access table {} but it doesn't exist", view_id.getFullTableName()); continue; } @@ -341,7 +367,7 @@ Chain buildPushingToViewsChain( /// It may happen if materialize view query was changed and it doesn't depend on this source table anymore. /// See setting `allow_experimental_alter_materialized_view_structure` LOG_DEBUG( - &Poco::Logger::get("PushingToViews"), "Table '{}' is not a source for view '{}' anymore, current source is '{}'", + getLogger("PushingToViews"), "Table '{}' is not a source for view '{}' anymore, current source is '{}'", select_query.select_table_id.getFullTableName(), view_id.getFullTableName(), table_id); continue; } @@ -835,14 +861,14 @@ void FinalizingViewsTransform::work() /// Exception will be ignored, it is saved here for the system.query_views_log if (materialized_views_ignore_errors) - tryLogException(view.exception, &Poco::Logger::get("PushingToViews"), "Cannot push to the storage, ignoring the error"); + tryLogException(view.exception, getLogger("PushingToViews"), "Cannot push to the storage, ignoring the error"); } else { view.runtime_stats->setStatus(QueryViewsLogElement::ViewStatus::QUERY_FINISH); LOG_TRACE( - &Poco::Logger::get("PushingToViews"), + getLogger("PushingToViews"), "Pushing ({}) from {} to {} took {} ms.", views_data->max_threads <= 1 ? "sequentially" : ("parallel " + std::to_string(views_data->max_threads)), views_data->source_storage_id.getNameForLogs(), diff --git a/src/QueryPipeline/ExecutionSpeedLimits.cpp b/src/QueryPipeline/ExecutionSpeedLimits.cpp index 9ceaa4921c7..f8ae4c76d0f 100644 --- a/src/QueryPipeline/ExecutionSpeedLimits.cpp +++ b/src/QueryPipeline/ExecutionSpeedLimits.cpp @@ -78,17 +78,17 @@ void ExecutionSpeedLimits::throttle( read_bytes / elapsed_seconds, min_execution_bps); - /// If the predicted execution time is longer than `max_execution_time`. - if (max_execution_time != 0 && total_rows_to_read && read_rows) + /// If the predicted execution time is longer than `max_estimated_execution_time`. + if (max_estimated_execution_time != 0 && total_rows_to_read && read_rows) { double estimated_execution_time_seconds = elapsed_seconds * (static_cast(total_rows_to_read) / read_rows); - if (timeout_overflow_mode == OverflowMode::THROW && estimated_execution_time_seconds > max_execution_time.totalSeconds()) + if (timeout_overflow_mode == OverflowMode::THROW && estimated_execution_time_seconds > max_estimated_execution_time.totalSeconds()) throw Exception( ErrorCodes::TOO_SLOW, "Estimated query execution time ({} seconds) is too long. Maximum: {}. Estimated rows to process: {}", estimated_execution_time_seconds, - max_execution_time.totalSeconds(), + max_estimated_execution_time.totalSeconds(), total_rows_to_read); } diff --git a/src/QueryPipeline/ExecutionSpeedLimits.h b/src/QueryPipeline/ExecutionSpeedLimits.h index eed8b5c3248..0def483123a 100644 --- a/src/QueryPipeline/ExecutionSpeedLimits.h +++ b/src/QueryPipeline/ExecutionSpeedLimits.h @@ -21,6 +21,7 @@ public: size_t max_execution_bps = 0; Poco::Timespan max_execution_time = 0; + Poco::Timespan max_estimated_execution_time = 0; /// Verify that the speed is not too low after the specified time has elapsed. Poco::Timespan timeout_before_checking_execution_speed = 0; diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index a43571c8114..136a3bb09c6 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -716,6 +716,7 @@ void RemoteQueryExecutor::sendExternalTables() limits.mode = LimitsMode::LIMITS_TOTAL; limits.speed_limits.max_execution_time = settings.max_execution_time; limits.timeout_overflow_mode = settings.timeout_overflow_mode; + limits.speed_limits.max_estimated_execution_time = settings.max_estimated_execution_time; for (size_t i = 0; i < count; ++i) { diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h index 5a8ccc2592b..444f1258f3e 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.h +++ b/src/QueryPipeline/RemoteQueryExecutor.h @@ -186,7 +186,7 @@ public: void setMainTable(StorageID main_table_) { main_table = std::move(main_table_); } - void setLogger(Poco::Logger * logger) { log = logger; } + void setLogger(LoggerPtr logger) { log = logger; } const Block & getHeader() const { return header; } @@ -283,7 +283,7 @@ private: PoolMode pool_mode = PoolMode::GET_MANY; StorageID main_table = StorageID::createEmpty(); - Poco::Logger * log = nullptr; + LoggerPtr log = nullptr; GetPriorityForLoadBalancing::Func priority_func; diff --git a/src/Server/CertificateReloader.cpp b/src/Server/CertificateReloader.cpp index 8795d4807de..c974f450c9a 100644 --- a/src/Server/CertificateReloader.cpp +++ b/src/Server/CertificateReloader.cpp @@ -105,7 +105,7 @@ CertificateReloader::Data::Data(std::string cert_path, std::string key_path, std } -bool CertificateReloader::File::changeIfModified(std::string new_path, Poco::Logger * logger) +bool CertificateReloader::File::changeIfModified(std::string new_path, LoggerPtr logger) { std::error_code ec; std::filesystem::file_time_type new_modification_time = std::filesystem::last_write_time(new_path, ec); diff --git a/src/Server/CertificateReloader.h b/src/Server/CertificateReloader.h index 9f04179b8d6..028914e682f 100644 --- a/src/Server/CertificateReloader.h +++ b/src/Server/CertificateReloader.h @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB @@ -51,7 +52,7 @@ public: private: CertificateReloader() = default; - Poco::Logger * log = &Poco::Logger::get("CertificateReloader"); + LoggerPtr log = getLogger("CertificateReloader"); struct File { @@ -61,7 +62,7 @@ private: std::string path; std::filesystem::file_time_type modification_time; - bool changeIfModified(std::string new_path, Poco::Logger * logger); + bool changeIfModified(std::string new_path, LoggerPtr logger); }; File cert_file{"certificate"}; diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 6bb6ba139ad..f31a8d6feb5 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -76,7 +76,7 @@ namespace static std::once_flag once_flag; std::call_once(once_flag, [&config] { - static Poco::Logger * logger = &Poco::Logger::get("grpc"); + static LoggerPtr logger = getLogger("grpc"); gpr_set_log_function([](gpr_log_func_args* args) { if (args->severity == GPR_LOG_SEVERITY_DEBUG) @@ -614,7 +614,7 @@ namespace class Call { public: - Call(CallType call_type_, std::unique_ptr responder_, IServer & iserver_, Poco::Logger * log_); + Call(CallType call_type_, std::unique_ptr responder_, IServer & iserver_, LoggerPtr log_); ~Call(); void start(const std::function & on_finish_call_callback); @@ -656,7 +656,7 @@ namespace const CallType call_type; std::unique_ptr responder; IServer & iserver; - Poco::Logger * log = nullptr; + LoggerPtr log = nullptr; std::optional session; ContextMutablePtr query_context; @@ -718,7 +718,7 @@ namespace }; // NOLINTEND(clang-analyzer-optin.performance.Padding) - Call::Call(CallType call_type_, std::unique_ptr responder_, IServer & iserver_, Poco::Logger * log_) + Call::Call(CallType call_type_, std::unique_ptr responder_, IServer & iserver_, LoggerPtr log_) : call_type(call_type_), responder(std::move(responder_)), iserver(iserver_), log(log_) { } @@ -1843,7 +1843,7 @@ private: GRPCServer::GRPCServer(IServer & iserver_, const Poco::Net::SocketAddress & address_to_listen_) : iserver(iserver_) , address_to_listen(address_to_listen_) - , log(&Poco::Logger::get("GRPCServer")) + , log(getLogger("GRPCServer")) , runner(std::make_unique(*this)) {} diff --git a/src/Server/GRPCServer.h b/src/Server/GRPCServer.h index 359a2506e95..a9c8161298f 100644 --- a/src/Server/GRPCServer.h +++ b/src/Server/GRPCServer.h @@ -5,6 +5,7 @@ #if USE_GRPC #include #include +#include #include "clickhouse_grpc.grpc.pb.h" namespace Poco { class Logger; } @@ -46,7 +47,7 @@ private: IServer & iserver; const Poco::Net::SocketAddress address_to_listen; - Poco::Logger * log; + LoggerPtr log; GRPCService grpc_service; std::unique_ptr grpc_server; std::unique_ptr queue; diff --git a/src/Server/HTTP/HTTPServerRequest.cpp b/src/Server/HTTP/HTTPServerRequest.cpp index 4a6e85ba0fb..9db02eac220 100644 --- a/src/Server/HTTP/HTTPServerRequest.cpp +++ b/src/Server/HTTP/HTTPServerRequest.cpp @@ -65,7 +65,7 @@ HTTPServerRequest::HTTPServerRequest(HTTPContextPtr context, HTTPServerResponse { stream = std::move(in); if (!startsWith(getContentType(), "multipart/form-data")) - LOG_WARNING(LogFrequencyLimiter(&Poco::Logger::get("HTTPServerRequest"), 10), "Got an HTTP request with no content length " + LOG_WARNING(LogFrequencyLimiter(getLogger("HTTPServerRequest"), 10), "Got an HTTP request with no content length " "and no chunked/multipart encoding, it may be impossible to distinguish graceful EOF from abnormal connection loss"); } else diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp index a3e7c28c8dc..8098671a903 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp @@ -1,4 +1,3 @@ -#include "Common/StackTrace.h" #include #include #include @@ -20,6 +19,13 @@ void WriteBufferFromHTTPServerResponse::startSendHeaders() if (response.getChunkedTransferEncoding()) setChunked(); + else if (response.getContentLength() == Poco::Net::HTTPMessage::UNKNOWN_CONTENT_LENGTH) + { + /// In case there is no Content-Length we cannot use keep-alive, + /// since there is no way to know when the server send all the + /// data, so "Connection: close" should be sent. + response.setKeepAlive(false); + } if (add_cors_header) response.set("Access-Control-Allow-Origin", "*"); diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index bdc8e7d59c9..72e7c5552f8 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -137,7 +137,7 @@ bool tryAddHttpOptionHeadersFromConfig(HTTPServerResponse & response, const Poco { /// If there is empty header name, it will not be processed and message about it will be in logs if (config.getString("http_options_response." + config_key + ".name", "").empty()) - LOG_WARNING(&Poco::Logger::get("processOptionsRequest"), "Empty header was found in config. It will not be processed."); + LOG_WARNING(getLogger("processOptionsRequest"), "Empty header was found in config. It will not be processed."); else response.add(config.getString("http_options_response." + config_key + ".name", ""), config.getString("http_options_response." + config_key + ".value", "")); @@ -328,7 +328,7 @@ void HTTPHandler::pushDelayedResults(Output & used_output) HTTPHandler::HTTPHandler(IServer & server_, const std::string & name, const std::optional & content_type_override_) : server(server_) - , log(&Poco::Logger::get(name)) + , log(getLogger(name)) , default_settings(server.context()->getSettingsRef()) , content_type_override(content_type_override_) { diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h index 026bda43d14..fa2d0dae199 100644 --- a/src/Server/HTTPHandler.h +++ b/src/Server/HTTPHandler.h @@ -87,9 +87,9 @@ private: return; finalized = true; - if (out_maybe_compressed) - out_maybe_compressed->finalize(); - else if (out) + if (out_compressed_holder) + out_compressed_holder->finalize(); + if (out) out->finalize(); } @@ -100,7 +100,7 @@ private: }; IServer & server; - Poco::Logger * log; + LoggerPtr log; /// It is the name of the server that will be sent in an http-header X-ClickHouse-Server-Display-Name. String server_display_name; diff --git a/src/Server/HTTPRequestHandlerFactoryMain.cpp b/src/Server/HTTPRequestHandlerFactoryMain.cpp index 5481bcd5083..48c2ab21468 100644 --- a/src/Server/HTTPRequestHandlerFactoryMain.cpp +++ b/src/Server/HTTPRequestHandlerFactoryMain.cpp @@ -7,7 +7,7 @@ namespace DB { HTTPRequestHandlerFactoryMain::HTTPRequestHandlerFactoryMain(const std::string & name_) - : log(&Poco::Logger::get(name_)), name(name_) + : log(getLogger(name_)), name(name_) { } diff --git a/src/Server/HTTPRequestHandlerFactoryMain.h b/src/Server/HTTPRequestHandlerFactoryMain.h index 07b278d831c..1075b7d1d60 100644 --- a/src/Server/HTTPRequestHandlerFactoryMain.h +++ b/src/Server/HTTPRequestHandlerFactoryMain.h @@ -21,7 +21,7 @@ public: std::unique_ptr createRequestHandler(const HTTPServerRequest & request) override; private: - Poco::Logger * log; + LoggerPtr log; std::string name; HTTPPathHints hints; diff --git a/src/Server/InterserverIOHTTPHandler.h b/src/Server/InterserverIOHTTPHandler.h index 66042ad3d1d..226a06f5a45 100644 --- a/src/Server/InterserverIOHTTPHandler.h +++ b/src/Server/InterserverIOHTTPHandler.h @@ -26,7 +26,7 @@ class InterserverIOHTTPHandler : public HTTPRequestHandler public: explicit InterserverIOHTTPHandler(IServer & server_) : server(server_) - , log(&Poco::Logger::get("InterserverIOHTTPHandler")) + , log(getLogger("InterserverIOHTTPHandler")) { } @@ -39,7 +39,7 @@ private: }; IServer & server; - Poco::Logger * log; + LoggerPtr log; CurrentMetrics::Increment metric_increment{CurrentMetrics::InterserverConnection}; diff --git a/src/Server/KeeperReadinessHandler.cpp b/src/Server/KeeperReadinessHandler.cpp index de6edd199d7..c973be040c8 100644 --- a/src/Server/KeeperReadinessHandler.cpp +++ b/src/Server/KeeperReadinessHandler.cpp @@ -63,7 +63,7 @@ void KeeperReadinessHandler::handleRequest(HTTPServerRequest & /*request*/, HTTP } catch (...) { - LOG_ERROR((&Poco::Logger::get("KeeperReadinessHandler")), "Cannot send exception to client"); + LOG_ERROR((getLogger("KeeperReadinessHandler")), "Cannot send exception to client"); } } } diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 76b84f0ce6e..6709cd298e5 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -220,7 +220,7 @@ KeeperTCPHandler::KeeperTCPHandler( Poco::Timespan send_timeout_, const Poco::Net::StreamSocket & socket_) : Poco::Net::TCPServerConnection(socket_) - , log(&Poco::Logger::get("KeeperTCPHandler")) + , log(getLogger("KeeperTCPHandler")) , keeper_dispatcher(keeper_dispatcher_) , operation_timeout( 0, diff --git a/src/Server/KeeperTCPHandler.h b/src/Server/KeeperTCPHandler.h index adb1baa084f..c1c522eee89 100644 --- a/src/Server/KeeperTCPHandler.h +++ b/src/Server/KeeperTCPHandler.h @@ -63,7 +63,7 @@ public: ~KeeperTCPHandler() override; private: - Poco::Logger * log; + LoggerPtr log; std::shared_ptr keeper_dispatcher; Poco::Timespan operation_timeout; Poco::Timespan min_session_timeout; diff --git a/src/Server/KeeperTCPHandlerFactory.h b/src/Server/KeeperTCPHandlerFactory.h index 36f284442c6..239bf8b5524 100644 --- a/src/Server/KeeperTCPHandlerFactory.h +++ b/src/Server/KeeperTCPHandlerFactory.h @@ -17,7 +17,7 @@ class KeeperTCPHandlerFactory : public TCPServerConnectionFactory private: ConfigGetter config_getter; std::shared_ptr keeper_dispatcher; - Poco::Logger * log; + LoggerPtr log; Poco::Timespan receive_timeout; Poco::Timespan send_timeout; @@ -37,7 +37,7 @@ public: bool secure) : config_getter(config_getter_) , keeper_dispatcher(keeper_dispatcher_) - , log(&Poco::Logger::get(std::string{"KeeperTCP"} + (secure ? "S" : "") + "HandlerFactory")) + , log(getLogger(std::string{"KeeperTCP"} + (secure ? "S" : "") + "HandlerFactory")) , receive_timeout(/* seconds = */ receive_timeout_seconds, /* microseconds = */ 0) , send_timeout(/* seconds = */ send_timeout_seconds, /* microseconds = */ 0) { diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index cb912e000e5..72fe3b7cea9 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -57,14 +57,109 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; } - static const size_t PACKET_HEADER_SIZE = 4; static const size_t SSL_REQUEST_PAYLOAD_SIZE = 32; -static String selectEmptyReplacementQuery(const String & query); -static String showTableStatusReplacementQuery(const String & query); -static String killConnectionIdReplacementQuery(const String & query); -static String selectLimitReplacementQuery(const String & query); +static bool checkShouldReplaceQuery(const String & query, const String & prefix) +{ + return query.length() >= prefix.length() + && std::equal(prefix.begin(), prefix.end(), query.begin(), [](char a, char b) { return std::tolower(a) == std::tolower(b); }); +} + +static bool isFederatedServerSetupSetCommand(const String & query) +{ + re2::RE2::Options regexp_options; + regexp_options.set_case_sensitive(false); + static const re2::RE2 expr( + "(^(SET NAMES(.*)))" + "|(^(SET character_set_results(.*)))" + "|(^(SET FOREIGN_KEY_CHECKS(.*)))" + "|(^(SET AUTOCOMMIT(.*)))" + "|(^(SET sql_mode(.*)))" + "|(^(SET @@(.*)))" + "|(^(SET SESSION TRANSACTION ISOLATION LEVEL(.*)))", regexp_options); + assert(expr.ok()); + return re2::RE2::FullMatch(query, expr); +} + +/// Always return an empty set with appropriate column definitions for SHOW WARNINGS queries +/// See also: https://dev.mysql.com/doc/refman/8.0/en/show-warnings.html +static String showWarningsReplacementQuery([[maybe_unused]] const String & query) +{ + return "SELECT '' AS Level, 0::UInt32 AS Code, '' AS Message WHERE false"; +} + +static String showCountWarningsReplacementQuery([[maybe_unused]] const String & query) +{ + return "SELECT 0::UInt64 AS `@@session.warning_count`"; +} + +/// Replace "[query(such as SHOW VARIABLES...)]" into "". +static String selectEmptyReplacementQuery(const String & query) +{ + std::ignore = query; + return "select ''"; +} + +/// Replace "SHOW TABLE STATUS LIKE 'xx'" into "SELECT ... FROM system.tables WHERE name LIKE 'xx'". +static String showTableStatusReplacementQuery(const String & query) +{ + const String prefix = "SHOW TABLE STATUS LIKE "; + if (query.size() > prefix.size()) + { + String suffix = query.data() + prefix.length(); + return ( + "SELECT" + " name AS Name," + " engine AS Engine," + " '10' AS Version," + " 'Dynamic' AS Row_format," + " 0 AS Rows," + " 0 AS Avg_row_length," + " 0 AS Data_length," + " 0 AS Max_data_length," + " 0 AS Index_length," + " 0 AS Data_free," + " 'NULL' AS Auto_increment," + " metadata_modification_time AS Create_time," + " metadata_modification_time AS Update_time," + " metadata_modification_time AS Check_time," + " 'utf8_bin' AS Collation," + " 'NULL' AS Checksum," + " '' AS Create_options," + " '' AS Comment" + " FROM system.tables" + " WHERE name LIKE " + + suffix); + } + return query; +} + +static std::optional setSettingReplacementQuery(const String & query, const String & mysql_setting, const String & clickhouse_setting) +{ + const String prefix = "SET " + mysql_setting; + // if (query.length() >= prefix.length() && boost::iequals(std::string_view(prefix), std::string_view(query.data(), 3))) + if (checkShouldReplaceQuery(query, prefix)) + return "SET " + clickhouse_setting + String(query.data() + prefix.length()); + return std::nullopt; +} + +/// Replace "KILL QUERY [connection_id]" into "KILL QUERY WHERE query_id LIKE 'mysql:[connection_id]:xxx'". +static String killConnectionIdReplacementQuery(const String & query) +{ + const String prefix = "KILL QUERY "; + if (query.size() > prefix.size()) + { + String suffix = query.data() + prefix.length(); + static const re2::RE2 expr("^[0-9]"); + if (re2::RE2::FullMatch(suffix, expr)) + { + String replacement = fmt::format("KILL QUERY WHERE query_id LIKE 'mysql:{}:%'", suffix); + return replacement; + } + } + return query; +} MySQLHandler::MySQLHandler( IServer & server_, @@ -76,7 +171,7 @@ MySQLHandler::MySQLHandler( : Poco::Net::TCPServerConnection(socket_) , server(server_) , tcp_server(tcp_server_) - , log(&Poco::Logger::get("MySQLHandler")) + , log(getLogger("MySQLHandler")) , connection_id(connection_id_) , auth_plugin(new MySQLProtocol::Authentication::Native41()) , read_event(read_event_) @@ -86,10 +181,14 @@ MySQLHandler::MySQLHandler( if (ssl_enabled) server_capabilities |= CLIENT_SSL; - replacements.emplace("KILL QUERY", killConnectionIdReplacementQuery); - replacements.emplace("SHOW TABLE STATUS LIKE", showTableStatusReplacementQuery); - replacements.emplace("SHOW VARIABLES", selectEmptyReplacementQuery); - replacements.emplace("SET SQL_SELECT_LIMIT", selectLimitReplacementQuery); + queries_replacements.emplace("SHOW WARNINGS", showWarningsReplacementQuery); + queries_replacements.emplace("SHOW COUNT(*) WARNINGS", showCountWarningsReplacementQuery); + queries_replacements.emplace("KILL QUERY", killConnectionIdReplacementQuery); + queries_replacements.emplace("SHOW TABLE STATUS LIKE", showTableStatusReplacementQuery); + queries_replacements.emplace("SHOW VARIABLES", selectEmptyReplacementQuery); + settings_replacements.emplace("SQL_SELECT_LIMIT", "limit"); + settings_replacements.emplace("NET_WRITE_TIMEOUT", "send_timeout"); + settings_replacements.emplace("NET_READ_TIMEOUT", "receive_timeout"); } void MySQLHandler::run() @@ -320,8 +419,6 @@ void MySQLHandler::comPing() packet_endpoint->sendPacket(OKPacket(0x0, client_capabilities, 0, 0, 0), true); } -static bool isFederatedServerSetupSetCommand(const String & query); - void MySQLHandler::comQuery(ReadBuffer & payload, bool binary_protocol) { String query = String(payload.position(), payload.buffer().end()); @@ -338,17 +435,29 @@ void MySQLHandler::comQuery(ReadBuffer & payload, bool binary_protocol) bool should_replace = false; bool with_output = false; - for (auto const & x : replacements) + // Queries replacements + for (auto const & [query_to_replace, replacement_fn] : queries_replacements) { - if (0 == strncasecmp(x.first.c_str(), query.c_str(), x.first.size())) + if (checkShouldReplaceQuery(query, query_to_replace)) { should_replace = true; - replacement_query = x.second(query); + replacement_query = replacement_fn(query); break; } } - ReadBufferFromString replacement(replacement_query); + // Settings replacements + if (!should_replace) + for (auto const & [mysql_setting, clickhouse_setting] : settings_replacements) + { + const auto replacement_query_opt = setSettingReplacementQuery(query, mysql_setting, clickhouse_setting); + if (replacement_query_opt.has_value()) + { + should_replace = true; + replacement_query = replacement_query_opt.value(); + break; + } + } auto query_context = session->makeQueryContext(); query_context->setCurrentQueryId(fmt::format("mysql:{}:{}", connection_id, toString(UUIDHelpers::generateV4()))); @@ -381,7 +490,14 @@ void MySQLHandler::comQuery(ReadBuffer & payload, bool binary_protocol) } }; - executeQuery(should_replace ? replacement : payload, *out, false, query_context, set_result_details, QueryFlags{}, format_settings); + if (should_replace) + { + ReadBufferFromString replacement(replacement_query); + executeQuery(replacement, *out, false, query_context, set_result_details, QueryFlags{}, format_settings); + } + else + executeQuery(payload, *out, false, query_context, set_result_details, QueryFlags{}, format_settings); + if (!with_output) packet_endpoint->sendPacket(OKPacket(0x00, client_capabilities, affected_rows, 0, 0), true); @@ -527,87 +643,4 @@ void MySQLHandlerSSL::finishHandshakeSSL( } #endif - -static bool isFederatedServerSetupSetCommand(const String & query) -{ - re2::RE2::Options regexp_options; - regexp_options.set_case_sensitive(false); - static const re2::RE2 expr( - "(^(SET NAMES(.*)))" - "|(^(SET character_set_results(.*)))" - "|(^(SET FOREIGN_KEY_CHECKS(.*)))" - "|(^(SET AUTOCOMMIT(.*)))" - "|(^(SET sql_mode(.*)))" - "|(^(SET @@(.*)))" - "|(^(SET SESSION TRANSACTION ISOLATION LEVEL(.*)))", regexp_options); - assert(expr.ok()); - return re2::RE2::FullMatch(query, expr); -} - -/// Replace "[query(such as SHOW VARIABLES...)]" into "". -static String selectEmptyReplacementQuery(const String & query) -{ - std::ignore = query; - return "select ''"; -} - -/// Replace "SHOW TABLE STATUS LIKE 'xx'" into "SELECT ... FROM system.tables WHERE name LIKE 'xx'". -static String showTableStatusReplacementQuery(const String & query) -{ - const String prefix = "SHOW TABLE STATUS LIKE "; - if (query.size() > prefix.size()) - { - String suffix = query.data() + prefix.length(); - return ( - "SELECT" - " name AS Name," - " engine AS Engine," - " '10' AS Version," - " 'Dynamic' AS Row_format," - " 0 AS Rows," - " 0 AS Avg_row_length," - " 0 AS Data_length," - " 0 AS Max_data_length," - " 0 AS Index_length," - " 0 AS Data_free," - " 'NULL' AS Auto_increment," - " metadata_modification_time AS Create_time," - " metadata_modification_time AS Update_time," - " metadata_modification_time AS Check_time," - " 'utf8_bin' AS Collation," - " 'NULL' AS Checksum," - " '' AS Create_options," - " '' AS Comment" - " FROM system.tables" - " WHERE name LIKE " - + suffix); - } - return query; -} - -static String selectLimitReplacementQuery(const String & query) -{ - const String prefix = "SET SQL_SELECT_LIMIT"; - if (query.starts_with(prefix)) - return "SET limit" + std::string(query.data() + prefix.length()); - return query; -} - -/// Replace "KILL QUERY [connection_id]" into "KILL QUERY WHERE query_id LIKE 'mysql:[connection_id]:xxx'". -static String killConnectionIdReplacementQuery(const String & query) -{ - const String prefix = "KILL QUERY "; - if (query.size() > prefix.size()) - { - String suffix = query.data() + prefix.length(); - static const re2::RE2 expr("^[0-9]"); - if (re2::RE2::FullMatch(suffix, expr)) - { - String replacement = fmt::format("KILL QUERY WHERE query_id LIKE 'mysql:{}:%'", suffix); - return replacement; - } - } - return query; -} - } diff --git a/src/Server/MySQLHandler.h b/src/Server/MySQLHandler.h index 36d63ebca84..2deb2b8f435 100644 --- a/src/Server/MySQLHandler.h +++ b/src/Server/MySQLHandler.h @@ -81,7 +81,7 @@ protected: IServer & server; TCPServer & tcp_server; - Poco::Logger * log; + LoggerPtr log; uint32_t connection_id = 0; uint32_t server_capabilities = 0; @@ -92,9 +92,13 @@ protected: MySQLProtocol::PacketEndpointPtr packet_endpoint; std::unique_ptr session; - using ReplacementFn = std::function; - using Replacements = std::unordered_map; - Replacements replacements; + using QueryReplacementFn = std::function; + using QueriesReplacements = std::unordered_map; + QueriesReplacements queries_replacements; + + /// MySQL setting name --> ClickHouse setting name + using SettingsReplacements = std::unordered_map; + SettingsReplacements settings_replacements; std::mutex prepared_statements_mutex; UInt32 current_prepared_statement_id TSA_GUARDED_BY(prepared_statements_mutex) = 0; diff --git a/src/Server/MySQLHandlerFactory.cpp b/src/Server/MySQLHandlerFactory.cpp index 79234c647aa..1dd43e6dab2 100644 --- a/src/Server/MySQLHandlerFactory.cpp +++ b/src/Server/MySQLHandlerFactory.cpp @@ -23,7 +23,7 @@ namespace ErrorCodes MySQLHandlerFactory::MySQLHandlerFactory(IServer & server_, const ProfileEvents::Event & read_event_, const ProfileEvents::Event & write_event_) : server(server_) - , log(&Poco::Logger::get("MySQLHandlerFactory")) + , log(getLogger("MySQLHandlerFactory")) , read_event(read_event_) , write_event(write_event_) { diff --git a/src/Server/MySQLHandlerFactory.h b/src/Server/MySQLHandlerFactory.h index 307ee3b2f0d..4108269d738 100644 --- a/src/Server/MySQLHandlerFactory.h +++ b/src/Server/MySQLHandlerFactory.h @@ -20,7 +20,7 @@ class MySQLHandlerFactory : public TCPServerConnectionFactory { private: IServer & server; - Poco::Logger * log; + LoggerPtr log; #if USE_SSL struct RSADeleter diff --git a/src/Server/PostgreSQLHandler.h b/src/Server/PostgreSQLHandler.h index 57b91a0ad04..1c23d896415 100644 --- a/src/Server/PostgreSQLHandler.h +++ b/src/Server/PostgreSQLHandler.h @@ -40,7 +40,7 @@ public: void run() final; private: - Poco::Logger * log = &Poco::Logger::get("PostgreSQLHandler"); + LoggerPtr log = getLogger("PostgreSQLHandler"); IServer & server; TCPServer & tcp_server; diff --git a/src/Server/PostgreSQLHandlerFactory.cpp b/src/Server/PostgreSQLHandlerFactory.cpp index 096bbbdcda9..29eb7567976 100644 --- a/src/Server/PostgreSQLHandlerFactory.cpp +++ b/src/Server/PostgreSQLHandlerFactory.cpp @@ -7,7 +7,7 @@ namespace DB PostgreSQLHandlerFactory::PostgreSQLHandlerFactory(IServer & server_, const ProfileEvents::Event & read_event_, const ProfileEvents::Event & write_event_) : server(server_) - , log(&Poco::Logger::get("PostgreSQLHandlerFactory")) + , log(getLogger("PostgreSQLHandlerFactory")) , read_event(read_event_) , write_event(write_event_) { diff --git a/src/Server/PostgreSQLHandlerFactory.h b/src/Server/PostgreSQLHandlerFactory.h index e5f762fca6d..43674306ff6 100644 --- a/src/Server/PostgreSQLHandlerFactory.h +++ b/src/Server/PostgreSQLHandlerFactory.h @@ -14,7 +14,7 @@ class PostgreSQLHandlerFactory : public TCPServerConnectionFactory { private: IServer & server; - Poco::Logger * log; + LoggerPtr log; ProfileEvents::Event read_event; ProfileEvents::Event write_event; diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp index 12caad5eea1..8690ec9121e 100644 --- a/src/Server/PrometheusRequestHandler.cpp +++ b/src/Server/PrometheusRequestHandler.cpp @@ -23,6 +23,10 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe const auto & config = server.config(); unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); + /// In order to make keep-alive works. + if (request.getVersion() == HTTPServerRequest::HTTP_1_1) + response.setChunkedTransferEncoding(true); + setResponseDefaultHeaders(response, keep_alive_timeout); response.setContentType("text/plain; version=0.0.4; charset=UTF-8"); diff --git a/src/Server/ProxyV1Handler.h b/src/Server/ProxyV1Handler.h index b50c2acbc55..a044b9a966b 100644 --- a/src/Server/ProxyV1Handler.h +++ b/src/Server/ProxyV1Handler.h @@ -13,7 +13,7 @@ class ProxyV1Handler : public Poco::Net::TCPServerConnection using StreamSocket = Poco::Net::StreamSocket; public: explicit ProxyV1Handler(const StreamSocket & socket, IServer & server_, const std::string & conf_name_, TCPProtocolStackData & stack_data_) - : Poco::Net::TCPServerConnection(socket), log(&Poco::Logger::get("ProxyV1Handler")), server(server_), conf_name(conf_name_), stack_data(stack_data_) {} + : Poco::Net::TCPServerConnection(socket), log(getLogger("ProxyV1Handler")), server(server_), conf_name(conf_name_), stack_data(stack_data_) {} void run() override; @@ -21,7 +21,7 @@ protected: bool readWord(int max_len, std::string & word, bool & eol); private: - Poco::Logger * log; + LoggerPtr log; IServer & server; std::string conf_name; TCPProtocolStackData & stack_data; diff --git a/src/Server/ProxyV1HandlerFactory.h b/src/Server/ProxyV1HandlerFactory.h index 028596d745d..0398c8c1ccf 100644 --- a/src/Server/ProxyV1HandlerFactory.h +++ b/src/Server/ProxyV1HandlerFactory.h @@ -16,7 +16,7 @@ class ProxyV1HandlerFactory : public TCPServerConnectionFactory { private: IServer & server; - Poco::Logger * log; + LoggerPtr log; std::string conf_name; class DummyTCPHandler : public Poco::Net::TCPServerConnection @@ -28,7 +28,7 @@ private: public: explicit ProxyV1HandlerFactory(IServer & server_, const std::string & conf_name_) - : server(server_), log(&Poco::Logger::get("ProxyV1HandlerFactory")), conf_name(conf_name_) + : server(server_), log(getLogger("ProxyV1HandlerFactory")), conf_name(conf_name_) { } diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp index 07f3b67b6a7..91c6bd722d3 100644 --- a/src/Server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -118,7 +118,7 @@ void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServe } catch (...) { - LOG_ERROR((&Poco::Logger::get("ReplicasStatusHandler")), "Cannot send exception to client"); + LOG_ERROR((getLogger("ReplicasStatusHandler")), "Cannot send exception to client"); } } } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index fa7206eeaac..ec6b374518d 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -189,7 +189,7 @@ TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::N , server(server_) , tcp_server(tcp_server_) , parse_proxy_protocol(parse_proxy_protocol_) - , log(&Poco::Logger::get("TCPHandler")) + , log(getLogger("TCPHandler")) , read_event(read_event_) , write_event(write_event_) , server_display_name(std::move(server_display_name_)) @@ -200,7 +200,7 @@ TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::N : Poco::Net::TCPServerConnection(socket_) , server(server_) , tcp_server(tcp_server_) - , log(&Poco::Logger::get("TCPHandler")) + , log(getLogger("TCPHandler")) , forwarded_for(stack_data.forwarded_for) , certificate(stack_data.certificate) , read_event(read_event_) diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 4eb84ee5eee..26cecf46662 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -160,7 +160,7 @@ private: IServer & server; TCPServer & tcp_server; bool parse_proxy_protocol = false; - Poco::Logger * log; + LoggerPtr log; String forwarded_for; String certificate; diff --git a/src/Server/TCPHandlerFactory.h b/src/Server/TCPHandlerFactory.h index 3eb032f4250..d65c9898b23 100644 --- a/src/Server/TCPHandlerFactory.h +++ b/src/Server/TCPHandlerFactory.h @@ -18,7 +18,7 @@ class TCPHandlerFactory : public TCPServerConnectionFactory private: IServer & server; bool parse_proxy_protocol = false; - Poco::Logger * log; + LoggerPtr log; std::string server_display_name; ProfileEvents::Event read_event; @@ -38,7 +38,7 @@ public: */ TCPHandlerFactory(IServer & server_, bool secure_, bool parse_proxy_protocol_, const ProfileEvents::Event & read_event_ = ProfileEvents::end(), const ProfileEvents::Event & write_event_ = ProfileEvents::end()) : server(server_), parse_proxy_protocol(parse_proxy_protocol_) - , log(&Poco::Logger::get(std::string("TCP") + (secure_ ? "S" : "") + "HandlerFactory")) + , log(getLogger(std::string("TCP") + (secure_ ? "S" : "") + "HandlerFactory")) , read_event(read_event_) , write_event(write_event_) { diff --git a/src/Server/TCPProtocolStackFactory.h b/src/Server/TCPProtocolStackFactory.h index 7373e6e1c4e..b76bb8d72fd 100644 --- a/src/Server/TCPProtocolStackFactory.h +++ b/src/Server/TCPProtocolStackFactory.h @@ -23,7 +23,7 @@ class TCPProtocolStackFactory : public TCPServerConnectionFactory { private: IServer & server [[maybe_unused]]; - Poco::Logger * log; + LoggerPtr log; std::string conf_name; std::vector stack; AllowedClientHosts allowed_client_hosts; @@ -38,7 +38,7 @@ private: public: template explicit TCPProtocolStackFactory(IServer & server_, const std::string & conf_name_, T... factory) - : server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")), conf_name(conf_name_), stack({factory...}) + : server(server_), log(getLogger("TCPProtocolStackFactory")), conf_name(conf_name_), stack({factory...}) { const auto & config = server.config(); /// Fill list of allowed hosts. diff --git a/src/Server/TLSHandlerFactory.h b/src/Server/TLSHandlerFactory.h index 9e3002d2971..19602c7d25e 100644 --- a/src/Server/TLSHandlerFactory.h +++ b/src/Server/TLSHandlerFactory.h @@ -19,7 +19,7 @@ class TLSHandlerFactory : public TCPServerConnectionFactory { private: IServer & server; - Poco::Logger * log; + LoggerPtr log; std::string conf_name; class DummyTCPHandler : public Poco::Net::TCPServerConnection @@ -31,7 +31,7 @@ private: public: explicit TLSHandlerFactory(IServer & server_, const std::string & conf_name_) - : server(server_), log(&Poco::Logger::get("TLSHandlerFactory")), conf_name(conf_name_) + : server(server_), log(getLogger("TLSHandlerFactory")), conf_name(conf_name_) { } diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp index ac7a3bfccf3..e45d2a55acb 100644 --- a/src/Server/WebUIRequestHandler.cpp +++ b/src/Server/WebUIRequestHandler.cpp @@ -17,6 +17,7 @@ INCBIN(resource_play_html, SOURCE_DIR "/programs/server/play.html"); INCBIN(resource_dashboard_html, SOURCE_DIR "/programs/server/dashboard.html"); INCBIN(resource_uplot_js, SOURCE_DIR "/programs/server/js/uplot.js"); +INCBIN(resource_lz_string_js, SOURCE_DIR "/programs/server/js/lz-string.js"); INCBIN(resource_binary_html, SOURCE_DIR "/programs/server/binary.html"); @@ -59,6 +60,9 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR static re2::RE2 uplot_url = R"(https://[^\s"'`]+u[Pp]lot[^\s"'`]*\.js)"; RE2::Replace(&html, uplot_url, "/js/uplot.js"); + static re2::RE2 lz_string_url = R"(https://[^\s"'`]+lz-string[^\s"'`]*\.js)"; + RE2::Replace(&html, lz_string_url, "/js/lz-string.js"); + WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout).write(html); } else if (request.getURI().starts_with("/binary")) @@ -71,6 +75,11 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout).write(reinterpret_cast(gresource_uplot_jsData), gresource_uplot_jsSize); } + else if (request.getURI() == "/js/lz-string.js") + { + response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); + WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout).write(reinterpret_cast(gresource_lz_string_jsData), gresource_lz_string_jsSize); + } else { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_NOT_FOUND); diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 1fb53475801..766863ed9f9 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -36,6 +36,7 @@ #include #include #include +#include #include @@ -74,6 +75,8 @@ AlterCommand::RemoveProperty removePropertyFromString(const String & property) return AlterCommand::RemoveProperty::CODEC; else if (property == "TTL") return AlterCommand::RemoveProperty::TTL; + else if (property == "SETTINGS") + return AlterCommand::RemoveProperty::SETTINGS; throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot remove unknown property '{}'", property); } @@ -137,7 +140,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.clear = true; if (command_ast->partition) - command.partition = command_ast->partition; + command.partition = command_ast->partition->clone(); return command; } else if (command_ast->type == ASTAlterCommand::MODIFY_COLUMN) @@ -173,6 +176,25 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ if (ast_col_decl.codec) command.codec = ast_col_decl.codec; + if (ast_col_decl.settings) + command.settings_changes = ast_col_decl.settings->as().changes; + + /// At most only one of ast_col_decl.settings or command_ast->settings_changes is non-null + if (command_ast->settings_changes) + { + command.settings_changes = command_ast->settings_changes->as().changes; + command.append_column_setting = true; + } + + if (command_ast->settings_resets) + { + for (const ASTPtr & identifier_ast : command_ast->settings_resets->children) + { + const auto & identifier = identifier_ast->as(); + command.settings_resets.emplace(identifier.name()); + } + } + if (command_ast->column) command.after_column = getIdentifierName(command_ast->column); @@ -206,7 +228,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ AlterCommand command; command.ast = command_ast->clone(); command.type = AlterCommand::MODIFY_ORDER_BY; - command.order_by = command_ast->order_by; + command.order_by = command_ast->order_by->clone(); return command; } else if (command_ast->type == ASTAlterCommand::MODIFY_SAMPLE_BY) @@ -214,7 +236,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ AlterCommand command; command.ast = command_ast->clone(); command.type = AlterCommand::MODIFY_SAMPLE_BY; - command.sample_by = command_ast->sample_by; + command.sample_by = command_ast->sample_by->clone(); return command; } else if (command_ast->type == ASTAlterCommand::REMOVE_SAMPLE_BY) @@ -228,7 +250,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ { AlterCommand command; command.ast = command_ast->clone(); - command.index_decl = command_ast->index_decl; + command.index_decl = command_ast->index_decl->clone(); command.type = AlterCommand::ADD_INDEX; const auto & ast_index_decl = command_ast->index_decl->as(); @@ -247,7 +269,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ { AlterCommand command; command.ast = command_ast->clone(); - command.statistic_decl = command_ast->statistic_decl; + command.statistic_decl = command_ast->statistic_decl->clone(); command.type = AlterCommand::ADD_STATISTIC; const auto & ast_stat_decl = command_ast->statistic_decl->as(); @@ -262,7 +284,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ { AlterCommand command; command.ast = command_ast->clone(); - command.constraint_decl = command_ast->constraint_decl; + command.constraint_decl = command_ast->constraint_decl->clone(); command.type = AlterCommand::ADD_CONSTRAINT; const auto & ast_constraint_decl = command_ast->constraint_decl->as(); @@ -277,7 +299,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ { AlterCommand command; command.ast = command_ast->clone(); - command.projection_decl = command_ast->projection_decl; + command.projection_decl = command_ast->projection_decl->clone(); command.type = AlterCommand::ADD_PROJECTION; const auto & ast_projection_decl = command_ast->projection_decl->as(); @@ -313,7 +335,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.clear = true; if (command_ast->partition) - command.partition = command_ast->partition; + command.partition = command_ast->partition->clone(); return command; } @@ -330,7 +352,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.clear = command_ast->clear_statistic; if (command_ast->partition) - command.partition = command_ast->partition; + command.partition = command_ast->partition->clone(); return command; } @@ -345,7 +367,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.clear = true; if (command_ast->partition) - command.partition = command_ast->partition; + command.partition = command_ast->partition->clone(); return command; } @@ -354,7 +376,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ AlterCommand command; command.ast = command_ast->clone(); command.type = AlterCommand::MODIFY_TTL; - command.ttl = command_ast->ttl; + command.ttl = command_ast->ttl->clone(); return command; } else if (command_ast->type == ASTAlterCommand::REMOVE_TTL) @@ -399,7 +421,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ AlterCommand command; command.ast = command_ast->clone(); command.type = AlterCommand::MODIFY_QUERY; - command.select = command_ast->select; + command.select = command_ast->select->clone(); return command; } else if (command_ast->type == ASTAlterCommand::MODIFY_REFRESH) @@ -501,6 +523,10 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) { column.ttl.reset(); } + else if (to_remove == RemoveProperty::SETTINGS) + { + column.settings.clear(); + } else { if (codec) @@ -515,6 +541,22 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) if (data_type) column.type = data_type; + if (!settings_changes.empty()) + { + MergeTreeColumnSettings::validate(settings_changes); + if (append_column_setting) + for (const auto & change : settings_changes) + column.settings.setSetting(change.name, change.value); + else + column.settings = settings_changes; + } + + if (!settings_resets.empty()) + { + for (const auto & setting : settings_resets) + column.settings.removeSetting(setting); + } + /// User specified default expression or changed /// datatype. We have to replace default. if (default_expression || data_type) @@ -1357,7 +1399,6 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const ErrorCodes::BAD_ARGUMENTS, "Column {} doesn't have COMMENT, cannot remove it", backQuote(column_name)); - } modified_columns.emplace(column_name); diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index f40fdc954ec..d0d5d02b5f7 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -64,7 +64,8 @@ struct AlterCommand /// Other properties COMMENT, CODEC, - TTL + TTL, + SETTINGS }; Type type = UNKNOWN; @@ -137,10 +138,10 @@ struct AlterCommand /// For ADD and MODIFY ASTPtr codec = nullptr; - /// For MODIFY SETTING + /// For MODIFY SETTING or MODIFY COLUMN MODIFY SETTING SettingsChanges settings_changes; - /// For RESET SETTING + /// For RESET SETTING or MODIFY COLUMN RESET SETTING std::set settings_resets; /// For MODIFY_QUERY @@ -155,6 +156,9 @@ struct AlterCommand /// What to remove from column (or TTL) RemoveProperty to_remove = RemoveProperty::NO_PROPERTY; + /// Is this MODIFY COLUMN MODIFY SETTING or MODIFY COLUMN column with settings declaration) + bool append_column_setting = false; + static std::optional parse(const ASTAlterCommand * command); void apply(StorageInMemoryMetadata & metadata, ContextPtr context) const; diff --git a/src/Storages/Cache/ExternalDataSourceCache.h b/src/Storages/Cache/ExternalDataSourceCache.h index 937801c4767..a5dea2f63db 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.h +++ b/src/Storages/Cache/ExternalDataSourceCache.h @@ -91,7 +91,7 @@ private: std::mutex mutex; std::unique_ptr lru_caches; - Poco::Logger * log = &Poco::Logger::get("ExternalDataSourceCache"); + LoggerPtr log = getLogger("ExternalDataSourceCache"); String calculateLocalPath(IRemoteFileMetadataPtr meta) const; diff --git a/src/Storages/Cache/RemoteCacheController.cpp b/src/Storages/Cache/RemoteCacheController.cpp index b72f5336ea4..403d0c8e43b 100644 --- a/src/Storages/Cache/RemoteCacheController.cpp +++ b/src/Storages/Cache/RemoteCacheController.cpp @@ -20,7 +20,7 @@ namespace ErrorCodes std::shared_ptr RemoteCacheController::recover(const std::filesystem::path & local_path_) { - auto * log = &Poco::Logger::get("RemoteCacheController"); + auto log = getLogger("RemoteCacheController"); if (!std::filesystem::exists(local_path_ / "data.bin")) { diff --git a/src/Storages/Cache/RemoteCacheController.h b/src/Storages/Cache/RemoteCacheController.h index fafe363bbd4..782a6b89519 100644 --- a/src/Storages/Cache/RemoteCacheController.h +++ b/src/Storages/Cache/RemoteCacheController.h @@ -116,7 +116,7 @@ private: //std::shared_ptr remote_read_buffer; std::unique_ptr data_file_writer; - Poco::Logger * log = &Poco::Logger::get("RemoteCacheController"); + LoggerPtr log = getLogger("RemoteCacheController"); }; using RemoteCacheControllerPtr = std::shared_ptr; diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 72047b3033a..d6a241da032 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -24,6 +24,7 @@ #include #include #include +#include "Parsers/ASTSetQuery.h" #include #include #include @@ -53,6 +54,16 @@ ColumnDescription::ColumnDescription(String name_, DataTypePtr type_) { } +ColumnDescription::ColumnDescription(String name_, DataTypePtr type_, String comment_) + : name(std::move(name_)), type(std::move(type_)), comment(comment_) +{ +} + +ColumnDescription::ColumnDescription(String name_, DataTypePtr type_, ASTPtr codec_, String comment_) + : name(std::move(name_)), type(std::move(type_)), comment(comment_), codec(codec_) +{ +} + bool ColumnDescription::operator==(const ColumnDescription & other) const { auto ast_to_str = [](const ASTPtr & ast) { return ast ? queryToString(ast) : String{}; }; @@ -62,6 +73,7 @@ bool ColumnDescription::operator==(const ColumnDescription & other) const && default_desc == other.default_desc && stat == other.stat && ast_to_str(codec) == ast_to_str(other.codec) + && settings == other.settings && ast_to_str(ttl) == ast_to_str(other.ttl); } @@ -94,6 +106,18 @@ void ColumnDescription::writeText(WriteBuffer & buf) const writeEscapedString(queryToString(codec), buf); } + if (!settings.empty()) + { + writeChar('\t', buf); + DB::writeText("SETTINGS ", buf); + DB::writeText("(", buf); + ASTSetQuery ast; + ast.is_standalone = false; + ast.changes = settings; + writeEscapedString(queryToString(ast), buf); + DB::writeText(")", buf); + } + if (stat) { writeChar('\t', buf); @@ -144,22 +168,27 @@ void ColumnDescription::readText(ReadBuffer & buf) if (col_ast->ttl) ttl = col_ast->ttl; + + if (col_ast->settings) + settings = col_ast->settings->as().changes; } else throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse column description"); } } -ColumnsDescription::ColumnsDescription(std::initializer_list ordinary) +ColumnsDescription::ColumnsDescription(std::initializer_list ordinary) { - for (const auto & elem : ordinary) - add(ColumnDescription(elem.name, elem.type)); + for (auto && elem : ordinary) + add(elem); } -ColumnsDescription::ColumnsDescription(NamesAndTypes ordinary) +ColumnsDescription ColumnsDescription::fromNamesAndTypes(NamesAndTypes ordinary) { + ColumnsDescription result; for (auto & elem : ordinary) - add(ColumnDescription(std::move(elem.name), std::move(elem.type))); + result.add(ColumnDescription(std::move(elem.name), std::move(elem.type))); + return result; } ColumnsDescription::ColumnsDescription(NamesAndTypesList ordinary) @@ -173,6 +202,11 @@ ColumnsDescription::ColumnsDescription(NamesAndTypesList ordinary, NamesAndAlias for (auto & elem : ordinary) add(ColumnDescription(std::move(elem.name), std::move(elem.type))); + setAliases(std::move(aliases)); +} + +void ColumnsDescription::setAliases(NamesAndAliases aliases) +{ for (auto & alias : aliases) { ColumnDescription description(std::move(alias.name), std::move(alias.type)); diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index 4de8aa11de3..59179aac17a 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -83,6 +84,7 @@ struct ColumnDescription ColumnDefault default_desc; String comment; ASTPtr codec; + SettingsChanges settings; ASTPtr ttl; std::optional stat; @@ -90,6 +92,8 @@ struct ColumnDescription ColumnDescription(ColumnDescription &&) = default; ColumnDescription(const ColumnDescription &) = default; ColumnDescription(String name_, DataTypePtr type_); + ColumnDescription(String name_, DataTypePtr type_, String comment_); + ColumnDescription(String name_, DataTypePtr type_, ASTPtr codec_, String comment_); bool operator==(const ColumnDescription & other) const; bool operator!=(const ColumnDescription & other) const { return !(*this == other); } @@ -105,14 +109,16 @@ class ColumnsDescription : public IHints<> public: ColumnsDescription() = default; - ColumnsDescription(std::initializer_list ordinary); - - explicit ColumnsDescription(NamesAndTypes ordinary); + static ColumnsDescription fromNamesAndTypes(NamesAndTypes ordinary); explicit ColumnsDescription(NamesAndTypesList ordinary); + explicit ColumnsDescription(std::initializer_list ordinary); + explicit ColumnsDescription(NamesAndTypesList ordinary, NamesAndAliases aliases); + void setAliases(NamesAndAliases aliases); + /// `after_column` can be a Nested column name; void add(ColumnDescription column, const String & after_column = String(), bool first = false, bool add_subcolumns = true); /// `column_name` can be a Nested column name; diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp index b8bffb267e5..3584f137225 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp +++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp @@ -314,7 +314,7 @@ struct DeltaLakeMetadataParser::Impl return version; } - Poco::Logger * log = &Poco::Logger::get("DeltaLakeMetadataParser"); + LoggerPtr log = getLogger("DeltaLakeMetadataParser"); }; diff --git a/src/Storages/DataLakes/HudiMetadataParser.cpp b/src/Storages/DataLakes/HudiMetadataParser.cpp index 78d69c83989..699dfe8fda0 100644 --- a/src/Storages/DataLakes/HudiMetadataParser.cpp +++ b/src/Storages/DataLakes/HudiMetadataParser.cpp @@ -50,7 +50,7 @@ struct HudiMetadataParser::Impl */ Strings processMetadataFiles(const Configuration & configuration) { - auto * log = &Poco::Logger::get("HudiMetadataParser"); + auto log = getLogger("HudiMetadataParser"); const auto keys = MetadataReadHelper::listFiles(configuration, "", Poco::toLower(configuration.format)); diff --git a/src/Storages/DataLakes/IStorageDataLake.h b/src/Storages/DataLakes/IStorageDataLake.h index 77a22cd00fc..db3f835494f 100644 --- a/src/Storages/DataLakes/IStorageDataLake.h +++ b/src/Storages/DataLakes/IStorageDataLake.h @@ -22,15 +22,15 @@ public: using Configuration = typename Storage::Configuration; template - explicit IStorageDataLake(const Configuration & configuration_, ContextPtr context_, Args && ...args) - : Storage(getConfigurationForDataRead(configuration_, context_), context_, std::forward(args)...) + explicit IStorageDataLake(const Configuration & configuration_, ContextPtr context_, bool attach, Args && ...args) + : Storage(getConfigurationForDataRead(configuration_, context_, {}, attach), context_, std::forward(args)...) , base_configuration(configuration_) - , log(&Poco::Logger::get(getName())) {} // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) + , log(getLogger(getName())) {} // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall) template - static StoragePtr create(const Configuration & configuration_, ContextPtr context_, Args && ...args) + static StoragePtr create(const Configuration & configuration_, ContextPtr context_, bool attach, Args && ...args) { - return std::make_shared>(configuration_, context_, std::forward(args)...); + return std::make_shared>(configuration_, context_, attach, std::forward(args)...); } String getName() const override { return name; } @@ -64,24 +64,34 @@ public: private: static Configuration getConfigurationForDataRead( - const Configuration & base_configuration, ContextPtr local_context, const Strings & keys = {}) + const Configuration & base_configuration, ContextPtr local_context, const Strings & keys = {}, bool attach = false) { auto configuration{base_configuration}; configuration.update(local_context); configuration.static_configuration = true; - if (keys.empty()) - configuration.keys = getDataFiles(configuration, local_context); - else - configuration.keys = keys; + try + { + if (keys.empty()) + configuration.keys = getDataFiles(configuration, local_context); + else + configuration.keys = keys; - LOG_TRACE( - &Poco::Logger::get("DataLake"), - "New configuration path: {}, keys: {}", - configuration.getPath(), fmt::join(configuration.keys, ", ")); + LOG_TRACE( + getLogger("DataLake"), + "New configuration path: {}, keys: {}", + configuration.getPath(), fmt::join(configuration.keys, ", ")); - configuration.connect(local_context); - return configuration; + configuration.connect(local_context); + return configuration; + } + catch (...) + { + if (!attach) + throw; + tryLogCurrentException(__PRETTY_FUNCTION__); + return configuration; + } } static Strings getDataFiles(const Configuration & configuration, ContextPtr local_context) @@ -102,7 +112,7 @@ private: Configuration base_configuration; std::mutex configuration_update_mutex; - Poco::Logger * log; + LoggerPtr log; }; @@ -115,7 +125,7 @@ static StoragePtr createDataLakeStorage(const StorageFactory::Arguments & args) if (configuration.format == "auto") configuration.format = "Parquet"; - return DataLake::create(configuration, args.getContext(), args.table_id, args.columns, args.constraints, + return DataLake::create(configuration, args.getContext(), args.attach, args.table_id, args.columns, args.constraints, args.comment, getFormatSettings(args.getContext())); } diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp index 40a81b59c36..df1536f53fc 100644 --- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp +++ b/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp @@ -46,10 +46,10 @@ namespace ErrorCodes IcebergMetadata::IcebergMetadata( const StorageS3::Configuration & configuration_, DB::ContextPtr context_, - DB::Int32 metadata_version_, - DB::Int32 format_version_, - DB::String manifest_list_file_, - DB::Int32 current_schema_id_, + Int32 metadata_version_, + Int32 format_version_, + String manifest_list_file_, + Int32 current_schema_id_, DB::NamesAndTypesList schema_) : WithContext(context_) , configuration(configuration_) @@ -58,7 +58,7 @@ IcebergMetadata::IcebergMetadata( , manifest_list_file(std::move(manifest_list_file_)) , current_schema_id(current_schema_id_) , schema(std::move(schema_)) - , log(&Poco::Logger::get("IcebergMetadata")) + , log(getLogger("IcebergMetadata")) { } @@ -240,7 +240,7 @@ DataTypePtr getFieldType(const Poco::JSON::Object::Ptr & field, const String & t } -std::pair parseTableSchema(const Poco::JSON::Object::Ptr & metadata_object, int format_version) +std::pair parseTableSchema(const Poco::JSON::Object::Ptr & metadata_object, int format_version, bool ignore_schema_evolution) { Poco::JSON::Object::Ptr schema; Int32 current_schema_id; @@ -253,13 +253,39 @@ std::pair parseTableSchema(const Poco::JSON::Object::P { current_schema_id = metadata_object->getValue("current-schema-id"); auto schemas = metadata_object->get("schemas").extract(); - if (schemas->size() != 1) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is not supported"); + if (schemas->size() == 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot parse Iceberg table schema: schemas field is empty"); - /// Now we sure that there is only one schema. - schema = schemas->getObject(0); - if (schema->getValue("schema-id") != current_schema_id) - throw Exception(ErrorCodes::BAD_ARGUMENTS, R"(Field "schema-id" of the schema doesn't match "current-schema-id" in metadata)"); + if (ignore_schema_evolution) + { + /// If we ignore schema evolution, we will just use latest schema for all data files. + /// Find schema with 'schema-id' equal to 'current_schema_id'. + for (uint32_t i = 0; i != schemas->size(); ++i) + { + auto current_schema = schemas->getObject(i); + if (current_schema->getValue("schema-id") == current_schema_id) + { + schema = current_schema; + break; + } + } + + if (!schema) + throw Exception(ErrorCodes::BAD_ARGUMENTS, R"(There is no schema with "schema-id" that matches "current-schema-id" in metadata)"); + } + else + { + if (schemas->size() != 1) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is " + "supported. If you want to ignore schema evolution and read all files using latest schema saved on table creation, enable setting " + "iceberg_engine_ignore_schema_evolution (Note: enabling this setting can lead to incorrect result)"); + + /// Now we sure that there is only one schema. + schema = schemas->getObject(0); + if (schema->getValue("schema-id") != current_schema_id) + throw Exception(ErrorCodes::BAD_ARGUMENTS, R"(Field "schema-id" of the schema doesn't match "current-schema-id" in metadata)"); + } } else { @@ -267,8 +293,11 @@ std::pair parseTableSchema(const Poco::JSON::Object::P current_schema_id = schema->getValue("schema-id"); /// Field "schemas" is optional for version 1, but after version 2 was introduced, /// in most cases this field is added for new tables in version 1 as well. - if (metadata_object->has("schemas") && metadata_object->get("schemas").extract()->size() > 1) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is not supported"); + if (!ignore_schema_evolution && metadata_object->has("schemas") && metadata_object->get("schemas").extract()->size() > 1) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is not " + "supported. If you want to ignore schema evolution and read all files using latest schema saved on table creation, enable setting " + "iceberg_engine_ignore_schema_evolution (Note: enabling this setting can lead to incorrect result)"); } NamesAndTypesList names_and_types; @@ -346,7 +375,7 @@ std::pair getMetadataFileAndVersion(const StorageS3::Configuratio std::unique_ptr parseIcebergMetadata(const StorageS3::Configuration & configuration, ContextPtr context_) { const auto [metadata_version, metadata_file_path] = getMetadataFileAndVersion(configuration); - LOG_DEBUG(&Poco::Logger::get("IcebergMetadata"), "Parse metadata {}", metadata_file_path); + LOG_DEBUG(getLogger("IcebergMetadata"), "Parse metadata {}", metadata_file_path); auto buf = S3DataLakeMetadataReadHelper::createReadBuffer(metadata_file_path, context_, configuration); String json_str; readJSONObjectPossiblyInvalid(json_str, *buf); @@ -356,7 +385,7 @@ std::unique_ptr parseIcebergMetadata(const StorageS3::Configura Poco::JSON::Object::Ptr object = json.extract(); auto format_version = object->getValue("format-version"); - auto [schema, schema_id] = parseTableSchema(object, format_version); + auto [schema, schema_id] = parseTableSchema(object, format_version, context_->getSettingsRef().iceberg_engine_ignore_schema_evolution); auto current_snapshot_id = object->getValue("current-snapshot-id"); auto snapshots = object->get("snapshots").extract(); @@ -453,8 +482,12 @@ Strings IcebergMetadata::getDataFiles() Poco::JSON::Parser parser; Poco::Dynamic::Var json = parser.parse(schema_json_string); Poco::JSON::Object::Ptr schema_object = json.extract(); - if (schema_object->getValue("schema-id") != current_schema_id) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is not supported"); + if (!getContext()->getSettingsRef().iceberg_engine_ignore_schema_evolution && schema_object->getValue("schema-id") != current_schema_id) + throw Exception( + ErrorCodes::UNSUPPORTED_METHOD, + "Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is not " + "supported. If you want to ignore schema evolution and read all files using latest schema saved on table creation, enable setting " + "iceberg_engine_ignore_schema_evolution (Note: enabling this setting can lead to incorrect result)"); avro::NodePtr root_node = manifest_file_reader->dataSchema().root(); size_t leaves_num = root_node->leaves(); @@ -563,10 +596,11 @@ Strings IcebergMetadata::getDataFiles() const auto status = status_int_column->getInt(i); const auto data_path = std::string(file_path_string_column->getDataAt(i).toView()); const auto pos = data_path.find(configuration.url.key); - const auto file_path = data_path.substr(pos); if (pos == std::string::npos) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected to find {} in data path: {}", configuration.url.key, data_path); + const auto file_path = data_path.substr(pos); + if (ManifestEntryStatus(status) == ManifestEntryStatus::DELETED) { LOG_TEST(log, "Processing delete file for path: {}", file_path); diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h b/src/Storages/DataLakes/Iceberg/IcebergMetadata.h index d42ad84f472..3e6a2ec3415 100644 --- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h +++ b/src/Storages/DataLakes/Iceberg/IcebergMetadata.h @@ -84,7 +84,7 @@ private: Int32 current_schema_id; NamesAndTypesList schema; Strings data_files; - Poco::Logger * log; + LoggerPtr log; }; diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp index 165ecce5142..8a1a2cdbd8f 100644 --- a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp +++ b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp @@ -8,18 +8,39 @@ namespace DB StoragePtr StorageIceberg::create( const DB::StorageIceberg::Configuration & base_configuration, DB::ContextPtr context_, + bool attach, const DB::StorageID & table_id_, const DB::ColumnsDescription & columns_, const DB::ConstraintsDescription & constraints_, - const DB::String & comment, + const String & comment, std::optional format_settings_) { auto configuration{base_configuration}; configuration.update(context_); - auto metadata = parseIcebergMetadata(configuration, context_); - auto schema_from_metadata = metadata->getTableSchema(); - configuration.keys = metadata->getDataFiles(); - return std::make_shared(std::move(metadata), configuration, context_, table_id_, columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, constraints_, comment, format_settings_); + std::unique_ptr metadata; + NamesAndTypesList schema_from_metadata; + try + { + metadata = parseIcebergMetadata(configuration, context_); + schema_from_metadata = metadata->getTableSchema(); + configuration.keys = metadata->getDataFiles(); + } + catch (...) + { + if (!attach) + throw; + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + return std::make_shared( + std::move(metadata), + configuration, + context_, + table_id_, + columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_, + constraints_, + comment, + format_settings_); } StorageIceberg::StorageIceberg( @@ -52,12 +73,11 @@ void StorageIceberg::updateConfigurationImpl(ContextPtr local_context) { const bool updated = base_configuration.update(local_context); auto new_metadata = parseIcebergMetadata(base_configuration, local_context); - /// Check if nothing was changed. - if (updated && new_metadata->getVersion() == current_metadata->getVersion()) - return; - if (new_metadata->getVersion() != current_metadata->getVersion()) + if (!current_metadata || new_metadata->getVersion() != current_metadata->getVersion()) current_metadata = std::move(new_metadata); + else if (!updated) + return; auto updated_configuration{base_configuration}; /// If metadata wasn't changed, we won't list data files again. diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.h b/src/Storages/DataLakes/Iceberg/StorageIceberg.h index a18865b5a54..4e63da5508a 100644 --- a/src/Storages/DataLakes/Iceberg/StorageIceberg.h +++ b/src/Storages/DataLakes/Iceberg/StorageIceberg.h @@ -30,6 +30,7 @@ public: static StoragePtr create(const Configuration & base_configuration, ContextPtr context_, + bool attach, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, diff --git a/src/Storages/DataLakes/S3MetadataReader.cpp b/src/Storages/DataLakes/S3MetadataReader.cpp index ac472c190e4..d66e21550a3 100644 --- a/src/Storages/DataLakes/S3MetadataReader.cpp +++ b/src/Storages/DataLakes/S3MetadataReader.cpp @@ -77,7 +77,7 @@ std::vector S3DataLakeMetadataReadHelper::listFiles( is_finished = !outcome.GetResult().GetIsTruncated(); } - LOG_TRACE(&Poco::Logger::get("S3DataLakeMetadataReadHelper"), "Listed {} files", res.size()); + LOG_TRACE(getLogger("S3DataLakeMetadataReadHelper"), "Listed {} files", res.size()); return res; } diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp index 26fa489a63d..4e01cb2c6cf 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp @@ -60,7 +60,7 @@ namespace { template -ConnectionPoolPtrs createPoolsForAddresses(const Cluster::Addresses & addresses, PoolFactory && factory, Poco::Logger * log) +ConnectionPoolPtrs createPoolsForAddresses(const Cluster::Addresses & addresses, PoolFactory && factory, LoggerPtr log) { ConnectionPoolPtrs pools; @@ -121,7 +121,7 @@ DistributedAsyncInsertDirectoryQueue::DistributedAsyncInsertDirectoryQueue( , default_sleep_time(storage.getDistributedSettingsRef().background_insert_sleep_time_ms.totalMilliseconds()) , sleep_time(default_sleep_time) , max_sleep_time(storage.getDistributedSettingsRef().background_insert_max_sleep_time_ms.totalMilliseconds()) - , log(&Poco::Logger::get(getLoggerName())) + , log(getLogger(getLoggerName())) , monitor_blocker(monitor_blocker_) , metric_pending_bytes(CurrentMetrics::DistributedBytesToInsert, 0) , metric_pending_files(CurrentMetrics::DistributedFilesToInsert, 0) diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h index 8bbd99c786a..f7d7553851a 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h @@ -145,7 +145,7 @@ private: const std::chrono::milliseconds max_sleep_time; std::chrono::time_point last_decrease_time {std::chrono::system_clock::now()}; std::mutex mutex; - Poco::Logger * log; + LoggerPtr log; ActionBlocker & monitor_blocker; BackgroundSchedulePoolTaskHolder task_handle; diff --git a/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp b/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp index a8ed89e66f1..cfcee4dc8a2 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertHeader.cpp @@ -18,7 +18,7 @@ namespace ErrorCodes extern const int CHECKSUM_DOESNT_MATCH; } -DistributedAsyncInsertHeader DistributedAsyncInsertHeader::read(ReadBufferFromFile & in, Poco::Logger * log) +DistributedAsyncInsertHeader DistributedAsyncInsertHeader::read(ReadBufferFromFile & in, LoggerPtr log) { DistributedAsyncInsertHeader distributed_header; diff --git a/src/Storages/Distributed/DistributedAsyncInsertHeader.h b/src/Storages/Distributed/DistributedAsyncInsertHeader.h index a7330fa5ef1..fb4b4696463 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertHeader.h +++ b/src/Storages/Distributed/DistributedAsyncInsertHeader.h @@ -38,7 +38,7 @@ struct DistributedAsyncInsertHeader std::string block_header_string; Block block_header; - static DistributedAsyncInsertHeader read(ReadBufferFromFile & in, Poco::Logger * log); + static DistributedAsyncInsertHeader read(ReadBufferFromFile & in, LoggerPtr log); OpenTelemetry::TracingContextHolderPtr createTracingContextHolder(const char * function, std::shared_ptr open_telemetry_span_log) const; }; diff --git a/src/Storages/Distributed/DistributedAsyncInsertHelpers.cpp b/src/Storages/Distributed/DistributedAsyncInsertHelpers.cpp index 98073ba1e08..a9bdef31711 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertHelpers.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertHelpers.cpp @@ -72,7 +72,7 @@ void writeRemoteConvert( RemoteInserter & remote, bool compression_expected, ReadBufferFromFile & in, - Poco::Logger * log) + LoggerPtr log) { if (!remote.getHeader()) { diff --git a/src/Storages/Distributed/DistributedAsyncInsertHelpers.h b/src/Storages/Distributed/DistributedAsyncInsertHelpers.h index 9543450418c..202d9ff6fff 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertHelpers.h +++ b/src/Storages/Distributed/DistributedAsyncInsertHelpers.h @@ -1,9 +1,7 @@ #pragma once -namespace Poco -{ -class Logger; -} +#include + namespace DB { @@ -30,6 +28,6 @@ void writeRemoteConvert( RemoteInserter & remote, bool compression_expected, ReadBufferFromFile & in, - Poco::Logger * log); + LoggerPtr log); } diff --git a/src/Storages/Distributed/DistributedAsyncInsertSource.cpp b/src/Storages/Distributed/DistributedAsyncInsertSource.cpp index 7992636ac11..33e53da2857 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertSource.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertSource.cpp @@ -10,7 +10,7 @@ namespace DB struct DistributedAsyncInsertSource::Data { - Poco::Logger * log = nullptr; + LoggerPtr log = nullptr; ReadBufferFromFile in; CompressedReadBuffer decompressing_in; @@ -19,7 +19,7 @@ struct DistributedAsyncInsertSource::Data Block first_block; explicit Data(const String & file_name) - : log(&Poco::Logger::get("DistributedAsyncInsertSource")) + : log(getLogger("DistributedAsyncInsertSource")) , in(file_name) , decompressing_in(in) , block_in(decompressing_in, DistributedAsyncInsertHeader::read(in, log).revision) diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 650539ef1e9..1efa98d0c13 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -62,7 +62,7 @@ namespace ErrorCodes extern const int ABORTED; } -static Block adoptBlock(const Block & header, const Block & block, Poco::Logger * log) +static Block adoptBlock(const Block & header, const Block & block, LoggerPtr log) { if (blocksHaveEqualStructure(header, block)) return block; @@ -84,7 +84,7 @@ static Block adoptBlock(const Block & header, const Block & block, Poco::Logger } -static void writeBlockConvert(PushingPipelineExecutor & executor, const Block & block, size_t repeats, Poco::Logger * log) +static void writeBlockConvert(PushingPipelineExecutor & executor, const Block & block, size_t repeats, LoggerPtr log) { Block adopted_block = adoptBlock(executor.getHeader(), block, log); for (size_t i = 0; i < repeats; ++i) @@ -126,7 +126,7 @@ DistributedSink::DistributedSink( , insert_timeout(insert_timeout_) , main_table(main_table_) , columns_to_send(columns_to_send_.begin(), columns_to_send_.end()) - , log(&Poco::Logger::get("DistributedSink")) + , log(getLogger("DistributedSink")) { const auto & settings = context->getSettingsRef(); if (settings.max_distributed_depth && context->getClientInfo().distributed_depth >= settings.max_distributed_depth) diff --git a/src/Storages/Distributed/DistributedSink.h b/src/Storages/Distributed/DistributedSink.h index 1bb4419e1a5..654c1db354f 100644 --- a/src/Storages/Distributed/DistributedSink.h +++ b/src/Storages/Distributed/DistributedSink.h @@ -152,7 +152,7 @@ private: std::atomic finished_jobs_count{0}; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/FileLog/FileLogConsumer.cpp b/src/Storages/FileLog/FileLogConsumer.cpp index bfe081c7bad..1bd3026ab8c 100644 --- a/src/Storages/FileLog/FileLogConsumer.cpp +++ b/src/Storages/FileLog/FileLogConsumer.cpp @@ -22,7 +22,7 @@ FileLogConsumer::FileLogConsumer( ContextPtr context_, size_t stream_number_, size_t max_streams_number_) - : log(&Poco::Logger::get("FileLogConsumer " + toString(stream_number_))) + : log(getLogger("FileLogConsumer " + toString(stream_number_))) , storage(storage_) , batch_size(max_batch_size) , poll_timeout(poll_timeout_) diff --git a/src/Storages/FileLog/FileLogConsumer.h b/src/Storages/FileLog/FileLogConsumer.h index b19f3a9350b..e44bfeb1806 100644 --- a/src/Storages/FileLog/FileLogConsumer.h +++ b/src/Storages/FileLog/FileLogConsumer.h @@ -42,7 +42,7 @@ private: BufferStatus buffer_status = BufferStatus::INIT; - Poco::Logger * log; + LoggerPtr log; StorageFileLog & storage; diff --git a/src/Storages/FileLog/FileLogDirectoryWatcher.cpp b/src/Storages/FileLog/FileLogDirectoryWatcher.cpp index 9d488616e85..844b31fd7c9 100644 --- a/src/Storages/FileLog/FileLogDirectoryWatcher.cpp +++ b/src/Storages/FileLog/FileLogDirectoryWatcher.cpp @@ -6,7 +6,7 @@ namespace DB FileLogDirectoryWatcher::FileLogDirectoryWatcher(const std::string & path_, StorageFileLog & storage_, ContextPtr context_) : path(path_) , storage(storage_) - , log(&Poco::Logger::get("FileLogDirectoryWatcher(" + path + ")")) + , log(getLogger("FileLogDirectoryWatcher(" + path + ")")) , dw(std::make_unique(*this, path, context_)) { } diff --git a/src/Storages/FileLog/FileLogDirectoryWatcher.h b/src/Storages/FileLog/FileLogDirectoryWatcher.h index 9b7afcf8e12..1cf3697c7c0 100644 --- a/src/Storages/FileLog/FileLogDirectoryWatcher.h +++ b/src/Storages/FileLog/FileLogDirectoryWatcher.h @@ -65,7 +65,7 @@ private: /// accessed in thread created by dw. Events events; - Poco::Logger * log; + LoggerPtr log; std::mutex mutex; diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 2eea619d654..9c7648ef658 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -1,7 +1,7 @@ #include +#include #include #include -#include #include #include #include @@ -13,9 +13,12 @@ #include #include #include +#include +#include #include #include #include +#include #include #include #include @@ -50,6 +53,76 @@ namespace static constexpr auto TMP_SUFFIX = ".tmp"; + +class ReadFromStorageFileLog final : public ReadFromStreamLikeEngine +{ +public: + ReadFromStorageFileLog( + const Names & column_names_, + StoragePtr storage_, + const StorageSnapshotPtr & storage_snapshot_, + SelectQueryInfo & query_info, + ContextPtr context_) + : ReadFromStreamLikeEngine{column_names_, storage_snapshot_, query_info.storage_limits, context_} + , column_names{column_names_} + , storage{storage_} + , storage_snapshot{storage_snapshot_} + { + } + + String getName() const override { return "ReadFromStorageFileLog"; } + +private: + Pipe makePipe() final + { + auto & file_log = storage->as(); + if (file_log.mv_attached) + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Cannot read from StorageFileLog with attached materialized views"); + + std::lock_guard lock(file_log.file_infos_mutex); + if (file_log.running_streams) + throw Exception(ErrorCodes::CANNOT_SELECT, "Another select query is running on this table, need to wait it finish."); + + file_log.updateFileInfos(); + + /// No files to parse + if (file_log.file_infos.file_names.empty()) + { + LOG_WARNING(file_log.log, "There is a idle table named {}, no files need to parse.", getName()); + return Pipe{}; + } + + auto modified_context = Context::createCopy(getContext()); + + auto max_streams_number = std::min(file_log.filelog_settings->max_threads, file_log.file_infos.file_names.size()); + + /// Each stream responsible for closing it's files and store meta + file_log.openFilesAndSetPos(); + + Pipes pipes; + pipes.reserve(max_streams_number); + for (size_t stream_number = 0; stream_number < max_streams_number; ++stream_number) + { + pipes.emplace_back(std::make_shared( + file_log, + storage_snapshot, + modified_context, + column_names, + file_log.getMaxBlockSize(), + file_log.getPollTimeoutMillisecond(), + stream_number, + max_streams_number, + file_log.filelog_settings->handle_error_mode)); + } + + return Pipe::unitePipes(std::move(pipes)); + } + + const Names column_names; + StoragePtr storage; + StorageSnapshotPtr storage_snapshot; +}; + StorageFileLog::StorageFileLog( const StorageID & table_id_, ContextPtr context_, @@ -66,7 +139,7 @@ StorageFileLog::StorageFileLog( , path(path_) , metadata_base_path(std::filesystem::path(metadata_base_path_) / "metadata") , format_name(format_name_) - , log(&Poco::Logger::get("StorageFileLog (" + table_id_.table_name + ")")) + , log(getLogger("StorageFileLog (" + table_id_.table_name + ")")) , disk(getContext()->getStoragePolicy("default")->getDisks().at(0)) , milliseconds_to_wait(filelog_settings->poll_directory_watch_events_backoff_init.totalMilliseconds()) { @@ -296,62 +369,19 @@ UInt64 StorageFileLog::getInode(const String & file_name) return file_stat.st_ino; } -Pipe StorageFileLog::read( +void StorageFileLog::read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & /* query_info */, - ContextPtr local_context, + SelectQueryInfo & query_info, + ContextPtr query_context, QueryProcessingStage::Enum /* processed_stage */, size_t /* max_block_size */, size_t /* num_streams */) + { - /// If there are MVs depended on this table, we just forbid reading - if (!local_context->getSettingsRef().stream_like_engine_allow_direct_select) - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, - "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); - - if (mv_attached) - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Cannot read from StorageFileLog with attached materialized views"); - - std::lock_guard lock(file_infos_mutex); - if (running_streams) - { - throw Exception(ErrorCodes::CANNOT_SELECT, "Another select query is running on this table, need to wait it finish."); - } - - updateFileInfos(); - - /// No files to parse - if (file_infos.file_names.empty()) - { - LOG_WARNING(log, "There is a idle table named {}, no files need to parse.", getName()); - return Pipe{}; - } - - auto modified_context = Context::createCopy(local_context); - - auto max_streams_number = std::min(filelog_settings->max_threads, file_infos.file_names.size()); - - /// Each stream responsible for closing it's files and store meta - openFilesAndSetPos(); - - Pipes pipes; - pipes.reserve(max_streams_number); - for (size_t stream_number = 0; stream_number < max_streams_number; ++stream_number) - { - pipes.emplace_back(std::make_shared( - *this, - storage_snapshot, - modified_context, - column_names, - getMaxBlockSize(), - getPollTimeoutMillisecond(), - stream_number, - max_streams_number, - filelog_settings->handle_error_mode)); - } - - return Pipe::unitePipes(std::move(pipes)); + query_plan.addStep( + std::make_unique(column_names, shared_from_this(), storage_snapshot, query_info, std::move(query_context))); } void StorageFileLog::increaseStreams() diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index 3cb6ac1fbbf..cc5815a1cef 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -49,7 +49,8 @@ public: void startup() override; void shutdown(bool is_drop) override; - Pipe read( + void read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, @@ -133,6 +134,8 @@ public: const auto & getFileLogSettings() const { return filelog_settings; } private: + friend class ReadFromStorageFileLog; + std::unique_ptr filelog_settings; const String path; @@ -146,7 +149,7 @@ private: FileInfos file_infos; const String format_name; - Poco::Logger * log; + LoggerPtr log; DiskPtr disk; diff --git a/src/Storages/Freeze.cpp b/src/Storages/Freeze.cpp index b9642ec7907..a5a5a07c9a1 100644 --- a/src/Storages/Freeze.cpp +++ b/src/Storages/Freeze.cpp @@ -76,7 +76,7 @@ bool FreezeMetaData::load(DiskPtr data_disk, const String & path) readIntText(version, buffer); if (version < 1 || version > 2) { - LOG_ERROR(&Poco::Logger::get("FreezeMetaData"), "Unknown frozen metadata version: {}", version); + LOG_ERROR(getLogger("FreezeMetaData"), "Unknown frozen metadata version: {}", version); return false; } DB::assertChar('\n', buffer); diff --git a/src/Storages/Freeze.h b/src/Storages/Freeze.h index a64be7465dd..5775653aaea 100644 --- a/src/Storages/Freeze.h +++ b/src/Storages/Freeze.h @@ -38,7 +38,7 @@ public: private: ContextPtr local_context; zkutil::ZooKeeperPtr zookeeper; - Poco::Logger * log = &Poco::Logger::get("Unfreezer"); + LoggerPtr log = getLogger("Unfreezer"); static constexpr std::string_view backup_directory_prefix = "shadow"; static bool removeFreezedPart(DiskPtr disk, const String & path, const String & part_name, ContextPtr local_context, zkutil::ZooKeeperPtr zookeeper); }; diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp index b490c5cac63..65df2c020ba 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp @@ -37,14 +37,14 @@ namespace ErrorCodes AsynchronousReadBufferFromHDFS::AsynchronousReadBufferFromHDFS( IAsynchronousReader & reader_, const ReadSettings & settings_, std::shared_ptr impl_) - : BufferWithOwnMemory(settings_.remote_fs_buffer_size) + : ReadBufferFromFileBase(settings_.remote_fs_buffer_size, nullptr, 0) , reader(reader_) , base_priority(settings_.priority) , impl(std::move(impl_)) , prefetch_buffer(settings_.remote_fs_buffer_size) , read_until_position(impl->getFileSize()) , use_prefetch(settings_.remote_fs_prefetch) - , log(&Poco::Logger::get("AsynchronousReadBufferFromHDFS")) + , log(getLogger("AsynchronousReadBufferFromHDFS")) { ProfileEvents::increment(ProfileEvents::RemoteFSBuffers); } diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h index d89aa60ab71..1d3e8b8e3e9 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h +++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h @@ -21,7 +21,7 @@ namespace DB class IAsynchronousReader; -class AsynchronousReadBufferFromHDFS : public BufferWithOwnMemory, public WithFileName, public WithFileSize +class AsynchronousReadBufferFromHDFS : public ReadBufferFromFileBase { public: AsynchronousReadBufferFromHDFS( @@ -62,7 +62,7 @@ private: std::optional read_until_position; bool use_prefetch; - Poco::Logger * log; + LoggerPtr log; /// Metrics to profile prefetch Stopwatch interval_watch; diff --git a/src/Storages/HDFS/HDFSCommon.cpp b/src/Storages/HDFS/HDFSCommon.cpp index 12b32b740de..f9a55a1285a 100644 --- a/src/Storages/HDFS/HDFSCommon.cpp +++ b/src/Storages/HDFS/HDFSCommon.cpp @@ -55,7 +55,7 @@ void HDFSBuilderWrapper::loadFromConfig( need_kinit = true; hadoop_kerberos_keytab = config.getString(key_path); #else // USE_KRB5 - LOG_WARNING(&Poco::Logger::get("HDFSClient"), "hadoop_kerberos_keytab parameter is ignored because ClickHouse was built without support of krb5 library."); + LOG_WARNING(getLogger("HDFSClient"), "hadoop_kerberos_keytab parameter is ignored because ClickHouse was built without support of krb5 library."); #endif // USE_KRB5 continue; } @@ -66,7 +66,7 @@ void HDFSBuilderWrapper::loadFromConfig( hadoop_kerberos_principal = config.getString(key_path); hdfsBuilderSetPrincipal(hdfs_builder, hadoop_kerberos_principal.c_str()); #else // USE_KRB5 - LOG_WARNING(&Poco::Logger::get("HDFSClient"), "hadoop_kerberos_principal parameter is ignored because ClickHouse was built without support of krb5 library."); + LOG_WARNING(getLogger("HDFSClient"), "hadoop_kerberos_principal parameter is ignored because ClickHouse was built without support of krb5 library."); #endif // USE_KRB5 continue; } @@ -81,7 +81,7 @@ void HDFSBuilderWrapper::loadFromConfig( hadoop_security_kerberos_ticket_cache_path = config.getString(key_path); // standard param - pass further #else // USE_KRB5 - LOG_WARNING(&Poco::Logger::get("HDFSClient"), "hadoop.security.kerberos.ticket.cache.path parameter is ignored because ClickHouse was built without support of krb5 library."); + LOG_WARNING(getLogger("HDFSClient"), "hadoop.security.kerberos.ticket.cache.path parameter is ignored because ClickHouse was built without support of krb5 library."); #endif // USE_KRB5 } @@ -95,7 +95,7 @@ void HDFSBuilderWrapper::loadFromConfig( #if USE_KRB5 void HDFSBuilderWrapper::runKinit() { - LOG_DEBUG(&Poco::Logger::get("HDFSClient"), "Running KerberosInit"); + LOG_DEBUG(getLogger("HDFSClient"), "Running KerberosInit"); try { kerberosInit(hadoop_kerberos_keytab,hadoop_kerberos_principal,hadoop_security_kerberos_ticket_cache_path); @@ -104,7 +104,7 @@ void HDFSBuilderWrapper::runKinit() { throw Exception(ErrorCodes::KERBEROS_ERROR, "KerberosInit failure: {}", getExceptionMessage(e, false)); } - LOG_DEBUG(&Poco::Logger::get("HDFSClient"), "Finished KerberosInit"); + LOG_DEBUG(getLogger("HDFSClient"), "Finished KerberosInit"); } #endif // USE_KRB5 diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index ca84719b793..4df05d47003 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -2,7 +2,7 @@ #if USE_HDFS #include -#include +#include #include #include #include diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index c1c327de74a..ab21c4946e4 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -687,7 +687,7 @@ Chunk HDFSSource::generate() return {}; } -void HDFSSource::addNumRowsToCache(const DB::String & path, size_t num_rows) +void HDFSSource::addNumRowsToCache(const String & path, size_t num_rows) { auto cache_key = getKeyForSchemaCache(path, storage->format_name, std::nullopt, getContext()); StorageHDFS::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); @@ -877,7 +877,7 @@ private: void ReadFromHDFS::applyFilters() { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index f1f0019d3e0..7170763c959 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -105,7 +105,7 @@ private: bool is_path_with_globs; NamesAndTypesList virtual_columns; - Poco::Logger * log = &Poco::Logger::get("StorageHDFS"); + LoggerPtr log = getLogger("StorageHDFS"); }; class PullingPipelineExecutor; diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp index 2e8129b9845..fad29436102 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ b/src/Storages/HDFS/StorageHDFSCluster.cpp @@ -45,7 +45,7 @@ StorageHDFSCluster::StorageHDFSCluster( const ConstraintsDescription & constraints_, const String & compression_method_, bool structure_argument_was_provided_) - : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageHDFSCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) + : IStorageCluster(cluster_name_, table_id_, getLogger("StorageHDFSCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) , uri(uri_) , format_name(format_name_) , compression_method(compression_method_) diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/HDFS/WriteBufferFromHDFS.cpp index 6360bb2a3d5..173dd899ada 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.cpp +++ b/src/Storages/HDFS/WriteBufferFromHDFS.cpp @@ -4,7 +4,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Storages/Hive/HiveCommon.cpp b/src/Storages/Hive/HiveCommon.cpp index 609adcf65c9..b58302f262e 100644 --- a/src/Storages/Hive/HiveCommon.cpp +++ b/src/Storages/Hive/HiveCommon.cpp @@ -25,7 +25,7 @@ static const int hive_metastore_client_recv_timeout_ms = 10000; static const int hive_metastore_client_send_timeout_ms = 10000; ThriftHiveMetastoreClientPool::ThriftHiveMetastoreClientPool(ThriftHiveMetastoreClientBuilder builder_) - : PoolBase(max_hive_metastore_client_connections, &Poco::Logger::get("ThriftHiveMetastoreClientPool")), builder(builder_) + : PoolBase(max_hive_metastore_client_connections, getLogger("ThriftHiveMetastoreClientPool")), builder(builder_) { } diff --git a/src/Storages/Hive/HiveCommon.h b/src/Storages/Hive/HiveCommon.h index e2c19fb1684..0f9d3364ffd 100644 --- a/src/Storages/Hive/HiveCommon.h +++ b/src/Storages/Hive/HiveCommon.h @@ -115,7 +115,7 @@ public: const bool empty_partition_keys; const HiveFilesCachePtr hive_files_cache; - Poco::Logger * log = &Poco::Logger::get("HiveMetastoreClient"); + LoggerPtr log = getLogger("HiveMetastoreClient"); }; @@ -138,7 +138,7 @@ private: CacheBase table_metadata_cache; ThriftHiveMetastoreClientPool client_pool; - Poco::Logger * log = &Poco::Logger::get("HiveMetastoreClient"); + LoggerPtr log = getLogger("HiveMetastoreClient"); }; using HiveMetastoreClientPtr = std::shared_ptr; diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 0c4e4f956a0..6766ecd6b4f 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -228,21 +228,27 @@ public: { auto get_raw_read_buf = [&]() -> std::unique_ptr { - auto buf = std::make_unique( - hdfs_namenode_url, - current_path, - getContext()->getGlobalContext()->getConfigRef(), - getContext()->getReadSettings()); - bool thread_pool_read = read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; if (thread_pool_read) { + auto buf = std::make_unique( + hdfs_namenode_url, + current_path, + getContext()->getGlobalContext()->getConfigRef(), + getContext()->getReadSettings(), + /* read_until_position */0, + /* use_external_buffer */true); + return std::make_unique( getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER), read_settings, std::move(buf)); } else { - return buf; + return std::make_unique( + hdfs_namenode_url, + current_path, + getContext()->getGlobalContext()->getConfigRef(), + getContext()->getReadSettings()); } }; @@ -405,7 +411,7 @@ private: bool generate_chunk_from_metadata{false}; UInt64 current_file_remained_rows = 0; - Poco::Logger * log = &Poco::Logger::get("StorageHive"); + LoggerPtr log = getLogger("StorageHive"); }; @@ -774,7 +780,7 @@ public: HDFSFSPtr fs_, HiveMetastoreClient::HiveTableMetadataPtr hive_table_metadata_, Block sample_block_, - Poco::Logger * log_, + LoggerPtr log_, ContextPtr context_, size_t max_block_size_, size_t num_streams_) @@ -799,7 +805,7 @@ private: HDFSFSPtr fs; HiveMetastoreClient::HiveTableMetadataPtr hive_table_metadata; Block sample_block; - Poco::Logger * log; + LoggerPtr log; ContextPtr context; size_t max_block_size; @@ -812,7 +818,7 @@ private: void ReadFromHive::applyFilters() { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); createFiles(filter_actions_dag); } diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index b0ec96604cc..07440097f7a 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -149,7 +149,7 @@ private: std::shared_ptr storage_settings; - Poco::Logger * log = &Poco::Logger::get("StorageHive"); + LoggerPtr log = getLogger("StorageHive"); }; } diff --git a/src/Storages/IMessageProducer.cpp b/src/Storages/IMessageProducer.cpp index cf314696041..20c47f6f0b4 100644 --- a/src/Storages/IMessageProducer.cpp +++ b/src/Storages/IMessageProducer.cpp @@ -4,7 +4,7 @@ namespace DB { -IMessageProducer::IMessageProducer(Poco::Logger * log_) : log(log_) +IMessageProducer::IMessageProducer(LoggerPtr log_) : log(log_) { } @@ -12,7 +12,16 @@ void AsynchronousMessageProducer::start(const ContextPtr & context) { LOG_TEST(log, "Executing startup"); - initialize(); + try + { + initialize(); + } + catch (...) + { + finished = true; + throw; + } + producing_task = context->getSchedulePool().createTask(getProducingTaskName(), [this] { LOG_TEST(log, "Starting producing task loop"); diff --git a/src/Storages/IMessageProducer.h b/src/Storages/IMessageProducer.h index 12580d5f94a..c769c325191 100644 --- a/src/Storages/IMessageProducer.h +++ b/src/Storages/IMessageProducer.h @@ -16,7 +16,7 @@ namespace DB class IMessageProducer { public: - explicit IMessageProducer(Poco::Logger * log_); + explicit IMessageProducer(LoggerPtr log_); /// Do some preparations. virtual void start(const ContextPtr & context) = 0; @@ -30,14 +30,14 @@ public: virtual ~IMessageProducer() = default; protected: - Poco::Logger * log; + LoggerPtr log; }; /// Implements interface for concurrent message producing. class AsynchronousMessageProducer : public IMessageProducer { public: - explicit AsynchronousMessageProducer(Poco::Logger * log_) : IMessageProducer(log_) {} + explicit AsynchronousMessageProducer(LoggerPtr log_) : IMessageProducer(log_) {} /// Create and schedule task in BackgroundSchedulePool that will produce messages. void start(const ContextPtr & context) override; diff --git a/src/Storages/IStorageCluster.cpp b/src/Storages/IStorageCluster.cpp index 6f42d8f855c..812b213cf33 100644 --- a/src/Storages/IStorageCluster.cpp +++ b/src/Storages/IStorageCluster.cpp @@ -32,7 +32,7 @@ namespace DB IStorageCluster::IStorageCluster( const String & cluster_name_, const StorageID & table_id_, - Poco::Logger * log_, + LoggerPtr log_, bool structure_argument_was_provided_) : IStorage(table_id_) , log(log_) @@ -54,7 +54,7 @@ public: ASTPtr query_to_send_, QueryProcessingStage::Enum processed_stage_, ClusterPtr cluster_, - Poco::Logger * log_, + LoggerPtr log_, ContextPtr context_) : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) , storage(std::move(storage_)) @@ -71,7 +71,7 @@ private: ASTPtr query_to_send; QueryProcessingStage::Enum processed_stage; ClusterPtr cluster; - Poco::Logger * log; + LoggerPtr log; ContextPtr context; std::optional extension; @@ -82,7 +82,7 @@ private: void ReadFromCluster::applyFilters() { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/IStorageCluster.h b/src/Storages/IStorageCluster.h index b233f20103d..8d93e94be9a 100644 --- a/src/Storages/IStorageCluster.h +++ b/src/Storages/IStorageCluster.h @@ -19,7 +19,7 @@ public: IStorageCluster( const String & cluster_name_, const StorageID & table_id_, - Poco::Logger * log_, + LoggerPtr log_, bool structure_argument_was_provided_); void read( @@ -46,7 +46,7 @@ protected: virtual void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) = 0; private: - Poco::Logger * log; + LoggerPtr log; String cluster_name; bool structure_argument_was_provided; }; diff --git a/src/Storages/KVStorageUtils.cpp b/src/Storages/KVStorageUtils.cpp index 3031fc6bf9d..5175c93041b 100644 --- a/src/Storages/KVStorageUtils.cpp +++ b/src/Storages/KVStorageUtils.cpp @@ -236,7 +236,7 @@ std::pair getFilterKeys( if (filter_nodes.nodes.empty()) return {{}, true}; - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); const auto * predicate = filter_actions_dag->getOutputs().at(0); FieldVectorPtr res = std::make_shared(); diff --git a/src/Storages/Kafka/KafkaConsumer.cpp b/src/Storages/Kafka/KafkaConsumer.cpp index 40f2897322d..47167e19a38 100644 --- a/src/Storages/Kafka/KafkaConsumer.cpp +++ b/src/Storages/Kafka/KafkaConsumer.cpp @@ -47,7 +47,7 @@ const auto DRAIN_TIMEOUT_MS = 5000ms; KafkaConsumer::KafkaConsumer( - Poco::Logger * log_, + LoggerPtr log_, size_t max_batch_size, size_t poll_timeout_, bool intermediate_commit_, diff --git a/src/Storages/Kafka/KafkaConsumer.h b/src/Storages/Kafka/KafkaConsumer.h index c4dfc56312f..9cc78d42856 100644 --- a/src/Storages/Kafka/KafkaConsumer.h +++ b/src/Storages/Kafka/KafkaConsumer.h @@ -62,7 +62,7 @@ public: }; KafkaConsumer( - Poco::Logger * log_, + LoggerPtr log_, size_t max_batch_size, size_t poll_timeout_, bool intermediate_commit_, @@ -150,7 +150,7 @@ private: std::string rdkafka_stat; ConsumerPtr consumer; - Poco::Logger * log; + LoggerPtr log; const size_t batch_size = 1; const size_t poll_timeout = 0; size_t offsets_stored = 0; diff --git a/src/Storages/Kafka/KafkaProducer.cpp b/src/Storages/Kafka/KafkaProducer.cpp index edbfc76ef93..77676fb010b 100644 --- a/src/Storages/Kafka/KafkaProducer.cpp +++ b/src/Storages/Kafka/KafkaProducer.cpp @@ -18,7 +18,7 @@ namespace DB KafkaProducer::KafkaProducer( ProducerPtr producer_, const std::string & topic_, std::chrono::milliseconds poll_timeout, std::atomic & shutdown_called_, const Block & header) - : IMessageProducer(&Poco::Logger::get("KafkaProducer")) + : IMessageProducer(getLogger("KafkaProducer")) , producer(producer_) , topic(topic_) , timeout(poll_timeout) diff --git a/src/Storages/Kafka/KafkaSource.cpp b/src/Storages/Kafka/KafkaSource.cpp index 1fbd7e2d705..dc62c13f633 100644 --- a/src/Storages/Kafka/KafkaSource.cpp +++ b/src/Storages/Kafka/KafkaSource.cpp @@ -33,7 +33,7 @@ KafkaSource::KafkaSource( const StorageSnapshotPtr & storage_snapshot_, const ContextPtr & context_, const Names & columns, - Poco::Logger * log_, + LoggerPtr log_, size_t max_block_size_, bool commit_in_suffix_) : ISource(storage_snapshot_->getSampleBlockForColumns(columns)) diff --git a/src/Storages/Kafka/KafkaSource.h b/src/Storages/Kafka/KafkaSource.h index 485a8e55b6a..a1b94b15a19 100644 --- a/src/Storages/Kafka/KafkaSource.h +++ b/src/Storages/Kafka/KafkaSource.h @@ -22,7 +22,7 @@ public: const StorageSnapshotPtr & storage_snapshot_, const ContextPtr & context_, const Names & columns, - Poco::Logger * log_, + LoggerPtr log_, size_t max_block_size_, bool commit_in_suffix = false); ~KafkaSource() override; @@ -41,7 +41,7 @@ private: StorageSnapshotPtr storage_snapshot; ContextPtr context; Names column_names; - Poco::Logger * log; + LoggerPtr log; UInt64 max_block_size; KafkaConsumerPtr consumer; diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 1cb810cf8ad..aa347fc719d 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -8,8 +8,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -17,18 +19,19 @@ #include #include #include -#include +#include +#include #include -#include +#include +#include #include #include #include +#include +#include #include #include -#include #include -#include -#include #include #include #include @@ -37,11 +40,13 @@ #include #include #include +#include #include #include +#include +#include #include #include -#include #include #include @@ -174,6 +179,67 @@ struct StorageKafkaInterceptors } }; +class ReadFromStorageKafka final : public ReadFromStreamLikeEngine +{ +public: + ReadFromStorageKafka( + const Names & column_names_, + StoragePtr storage_, + const StorageSnapshotPtr & storage_snapshot_, + SelectQueryInfo & query_info, + ContextPtr context_) + : ReadFromStreamLikeEngine{column_names_, storage_snapshot_, query_info.storage_limits, context_} + , column_names{column_names_} + , storage{storage_} + , storage_snapshot{storage_snapshot_} + { + } + + String getName() const override { return "ReadFromStorageKafka"; } + +private: + Pipe makePipe() final + { + auto & kafka_storage = storage->as(); + if (kafka_storage.shutdown_called) + throw Exception(ErrorCodes::ABORTED, "Table is detached"); + + if (kafka_storage.mv_attached) + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Cannot read from StorageKafka with attached materialized views"); + + ProfileEvents::increment(ProfileEvents::KafkaDirectReads); + + /// Always use all consumers at once, otherwise SELECT may not read messages from all partitions. + Pipes pipes; + pipes.reserve(kafka_storage.num_consumers); + auto modified_context = Context::createCopy(getContext()); + modified_context->applySettingsChanges(kafka_storage.settings_adjustments); + + // Claim as many consumers as requested, but don't block + for (size_t i = 0; i < kafka_storage.num_consumers; ++i) + { + /// Use block size of 1, otherwise LIMIT won't work properly as it will buffer excess messages in the last block + /// TODO: probably that leads to awful performance. + /// FIXME: seems that doesn't help with extra reading and committing unprocessed messages. + pipes.emplace_back(std::make_shared( + kafka_storage, + storage_snapshot, + modified_context, + column_names, + kafka_storage.log, + 1, + kafka_storage.kafka_settings->kafka_commit_on_select)); + } + + LOG_DEBUG(kafka_storage.log, "Starting reading {} streams", pipes.size()); + return Pipe::unitePipes(std::move(pipes)); + } + + const Names column_names; + StoragePtr storage; + StorageSnapshotPtr storage_snapshot; +}; + namespace { const String CONFIG_KAFKA_TAG = "kafka"; @@ -261,7 +327,7 @@ StorageKafka::StorageKafka( , max_rows_per_message(kafka_settings->kafka_max_rows_per_message.value) , schema_name(getContext()->getMacros()->expand(kafka_settings->kafka_schema.value, macros_info)) , num_consumers(kafka_settings->kafka_num_consumers.value) - , log(&Poco::Logger::get("StorageKafka (" + table_id_.table_name + ")")) + , log(getLogger("StorageKafka (" + table_id_.table_name + ")")) , intermediate_commit(kafka_settings->kafka_commit_every_batch.value) , settings_adjustments(createSettingsAdjustments()) , thread_per_consumer(kafka_settings->kafka_thread_per_consumer.value) @@ -347,45 +413,18 @@ String StorageKafka::getDefaultClientId(const StorageID & table_id_) return fmt::format("{}-{}-{}-{}", VERSION_NAME, getFQDNOrHostName(), table_id_.database_name, table_id_.table_name); } - -Pipe StorageKafka::read( +void StorageKafka::read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & /* query_info */, - ContextPtr local_context, + SelectQueryInfo & query_info, + ContextPtr query_context, QueryProcessingStage::Enum /* processed_stage */, size_t /* max_block_size */, size_t /* num_streams */) { - if (shutdown_called) - throw Exception(ErrorCodes::ABORTED, "Table is detached"); - - if (!local_context->getSettingsRef().stream_like_engine_allow_direct_select) - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, - "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); - - if (mv_attached) - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Cannot read from StorageKafka with attached materialized views"); - - ProfileEvents::increment(ProfileEvents::KafkaDirectReads); - - /// Always use all consumers at once, otherwise SELECT may not read messages from all partitions. - Pipes pipes; - pipes.reserve(num_consumers); - auto modified_context = Context::createCopy(local_context); - modified_context->applySettingsChanges(settings_adjustments); - - // Claim as many consumers as requested, but don't block - for (size_t i = 0; i < num_consumers; ++i) - { - /// Use block size of 1, otherwise LIMIT won't work properly as it will buffer excess messages in the last block - /// TODO: probably that leads to awful performance. - /// FIXME: seems that doesn't help with extra reading and committing unprocessed messages. - pipes.emplace_back(std::make_shared(*this, storage_snapshot, modified_context, column_names, log, 1, kafka_settings->kafka_commit_on_select)); - } - - LOG_DEBUG(log, "Starting reading {} streams", pipes.size()); - return Pipe::unitePipes(std::move(pipes)); + query_plan.addStep(std::make_unique( + column_names, shared_from_this(), storage_snapshot, query_info, std::move(query_context))); } diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index f60719538cf..f9a1e3ff6f3 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -20,6 +20,7 @@ namespace DB { class StorageSystemKafkaConsumers; +class ReadFromStorageKafka; struct StorageKafkaInterceptors; @@ -48,7 +49,8 @@ public: void startup() override; void shutdown(bool is_drop) override; - Pipe read( + void read( + QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, @@ -86,6 +88,8 @@ public: SafeConsumers getSafeConsumers() { return {shared_from_this(), std::unique_lock(mutex), consumers}; } private: + friend class ReadFromStorageKafka; + // Configuration and state std::unique_ptr kafka_settings; Macros::MacroExpansionInfo macros_info; @@ -97,7 +101,7 @@ private: const size_t max_rows_per_message; const String schema_name; const size_t num_consumers; /// total number of consumers - Poco::Logger * log; + LoggerPtr log; const bool intermediate_commit; const SettingsChanges settings_adjustments; diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 3c116321083..f81225bbee3 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -209,7 +209,7 @@ StorageLiveView::StorageLiveView( live_view_context = Context::createCopy(getContext()); live_view_context->makeQueryContext(); - log = &Poco::Logger::get("StorageLiveView (" + table_id_.database_name + "." + table_id_.table_name + ")"); + log = getLogger("StorageLiveView (" + table_id_.database_name + "." + table_id_.table_name + ")"); StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index e0566d586ee..6b8780cb81b 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -184,7 +184,7 @@ private: ContextMutablePtr live_view_context; - Poco::Logger * log; + LoggerPtr log; bool is_periodically_refreshed = false; Seconds periodic_live_view_refresh; diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index bc26301e3b9..daf7bd65784 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -27,7 +27,7 @@ namespace ErrorCodes RefreshTask::RefreshTask( const ASTRefreshStrategy & strategy) - : log(&Poco::Logger::get("RefreshTask")) + : log(getLogger("RefreshTask")) , refresh_schedule(strategy) {} diff --git a/src/Storages/MaterializedView/RefreshTask.h b/src/Storages/MaterializedView/RefreshTask.h index 8a062f6f359..78599f4f4b4 100644 --- a/src/Storages/MaterializedView/RefreshTask.h +++ b/src/Storages/MaterializedView/RefreshTask.h @@ -62,7 +62,7 @@ public: void setFakeTime(std::optional t); private: - Poco::Logger * log = nullptr; + LoggerPtr log = nullptr; std::weak_ptr view_to_refresh; /// Protects interrupt_execution and running_executor. diff --git a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp index cc3bc8fc2a8..9d64592ed64 100644 --- a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp +++ b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp @@ -60,7 +60,7 @@ AsyncBlockIDsCache::AsyncBlockIDsCache(TStorage & storage_) , update_wait(storage.getSettings()->async_block_ids_cache_update_wait_ms) , path(storage.getZooKeeperPath() + "/async_blocks") , log_name(storage.getStorageID().getFullTableName() + " (AsyncBlockIDsCache)") - , log(&Poco::Logger::get(log_name)) + , log(getLogger(log_name)) { task = storage.getContext()->getSchedulePool().createTask(log_name, [this]{ update(); }); } diff --git a/src/Storages/MergeTree/AsyncBlockIDsCache.h b/src/Storages/MergeTree/AsyncBlockIDsCache.h index 38c38da0033..bea012f1d32 100644 --- a/src/Storages/MergeTree/AsyncBlockIDsCache.h +++ b/src/Storages/MergeTree/AsyncBlockIDsCache.h @@ -43,7 +43,7 @@ private: BackgroundSchedulePool::TaskHolder task; const String log_name; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 6c1377505d5..0cb9eb84bf8 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -57,7 +57,7 @@ std::string DataPartStorageOnDiskBase::getRelativePath() const return fs::path(root_path) / part_dir / ""; } -std::optional DataPartStorageOnDiskBase::getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached, bool broken) const +std::optional DataPartStorageOnDiskBase::getRelativePathForPrefix(LoggerPtr log, const String & prefix, bool detached, bool broken) const { assert(!broken || detached); String res; @@ -195,7 +195,7 @@ std::string DataPartStorageOnDiskBase::getDiskName() const std::string DataPartStorageOnDiskBase::getDiskType() const { - return toString(volume->getDisk()->getDataSourceDescription().type); + return volume->getDisk()->getDataSourceDescription().toString(); } bool DataPartStorageOnDiskBase::isStoredOnRemoteDisk() const @@ -471,7 +471,7 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart( const DiskPtr & dst_disk, const ReadSettings & read_settings, const WriteSettings & write_settings, - Poco::Logger * log, + LoggerPtr log, const std::function & cancellation_hook) const { String path_to_clone = fs::path(to) / dir_path / ""; @@ -505,7 +505,7 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart( void DataPartStorageOnDiskBase::rename( std::string new_root_path, std::string new_part_dir, - Poco::Logger * log, + LoggerPtr log, bool remove_new_dir_if_exists, bool fsync_part_dir) { @@ -564,7 +564,7 @@ void DataPartStorageOnDiskBase::remove( const MergeTreeDataPartChecksums & checksums, std::list projections, bool is_temp, - Poco::Logger * log) + LoggerPtr log) { /// NOTE We rename part to delete_tmp_ instead of delete_tmp_ to avoid race condition /// when we try to remove two parts with the same name, but different relative paths, @@ -722,7 +722,7 @@ void DataPartStorageOnDiskBase::clearDirectory( const CanRemoveDescription & can_remove_description, const MergeTreeDataPartChecksums & checksums, bool is_temp, - Poco::Logger * log) + LoggerPtr log) { auto disk = volume->getDisk(); auto [can_remove_shared_data, names_not_to_remove] = can_remove_description; diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h index 339acce5953..52dc850c7fd 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h @@ -25,7 +25,7 @@ public: UInt64 calculateTotalSizeOnDisk() const override; /// Returns path to place detached part in or nullopt if we don't need to detach part (if it already exists and has the same content) - std::optional getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached, bool broken) const override; + std::optional getRelativePathForPrefix(LoggerPtr log, const String & prefix, bool detached, bool broken) const override; /// Returns true if detached part already exists and has the same content (compares checksums.txt and the list of files) bool looksLikeBrokenDetachedPartHasTheSameContent(const String & detached_part_path, std::optional & original_checksums_content, @@ -74,14 +74,14 @@ public: const DiskPtr & dst_disk, const ReadSettings & read_settings, const WriteSettings & write_settings, - Poco::Logger * log, + LoggerPtr log, const std::function & cancellation_hook ) const override; void rename( std::string new_root_path, std::string new_part_dir, - Poco::Logger * log, + LoggerPtr log, bool remove_new_dir_if_exists, bool fsync_part_dir) override; @@ -90,7 +90,7 @@ public: const MergeTreeDataPartChecksums & checksums, std::list projections, bool is_temp, - Poco::Logger * log) override; + LoggerPtr log) override; void changeRootPath(const std::string & from_root, const std::string & to_root) override; void createDirectories() override; @@ -130,7 +130,7 @@ private: const CanRemoveDescription & can_remove_description, const MergeTreeDataPartChecksums & checksums, bool is_temp, - Poco::Logger * log); + LoggerPtr log); /// For names of expected data part files returns the actual names /// of files in filesystem to which data of these files is written. diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 95b7c17ae78..ce70fbe18e5 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -99,7 +99,7 @@ struct ReplicatedFetchReadCallback Service::Service(StorageReplicatedMergeTree & data_) : data(data_) - , log(&Poco::Logger::get(data.getStorageID().getNameForLogs() + " (Replicated PartsService)")) + , log(getLogger(data.getStorageID().getNameForLogs() + " (Replicated PartsService)")) {} std::string Service::getId(const std::string & node_id) const @@ -415,7 +415,7 @@ MergeTreeData::DataPartPtr Service::findPart(const String & name) Fetcher::Fetcher(StorageReplicatedMergeTree & data_) : data(data_) - , log(&Poco::Logger::get(data.getStorageID().getNameForLogs() + " (Fetcher)")) + , log(getLogger(data.getStorageID().getNameForLogs() + " (Fetcher)")) {} std::pair Fetcher::fetchSelectedPart( @@ -474,7 +474,7 @@ std::pair Fetcher::fetchSelected if (disk) { - LOG_TRACE(log, "Will fetch to disk {} with type {}", disk->getName(), toString(disk->getDataSourceDescription().type)); + LOG_TRACE(log, "Will fetch to disk {} with type {}", disk->getName(), disk->getDataSourceDescription().toString()); UInt64 revision = disk->getRevision(); if (revision) uri.addQueryParameter("disk_revision", toString(revision)); @@ -489,18 +489,18 @@ std::pair Fetcher::fetchSelected Disks disks = data.getDisks(); for (const auto & data_disk : disks) { - LOG_TRACE(log, "Checking disk {} with type {}", data_disk->getName(), toString(data_disk->getDataSourceDescription().type)); + LOG_TRACE(log, "Checking disk {} with type {}", data_disk->getName(), data_disk->getDataSourceDescription().toString()); if (data_disk->supportZeroCopyReplication()) { - LOG_TRACE(log, "Disk {} (with type {}) supports zero-copy replication", data_disk->getName(), toString(data_disk->getDataSourceDescription().type)); - capability.push_back(toString(data_disk->getDataSourceDescription().type)); + LOG_TRACE(log, "Disk {} (with type {}) supports zero-copy replication", data_disk->getName(), data_disk->getDataSourceDescription().toString()); + capability.push_back(data_disk->getDataSourceDescription().toString()); } } } else if (disk->supportZeroCopyReplication()) { - LOG_TRACE(log, "Trying to fetch with zero copy replication, provided disk {} with type {}", disk->getName(), toString(disk->getDataSourceDescription().type)); - capability.push_back(toString(disk->getDataSourceDescription().type)); + LOG_TRACE(log, "Trying to fetch with zero copy replication, provided disk {} with type {}", disk->getName(), disk->getDataSourceDescription().toString()); + capability.push_back(disk->getDataSourceDescription().toString()); } } @@ -544,7 +544,7 @@ std::pair Fetcher::fetchSelected { for (const auto & disk_candidate : data.getDisks()) { - if (toString(disk_candidate->getDataSourceDescription().type) == remote_fs_metadata) + if (disk_candidate->getDataSourceDescription().toString() == remote_fs_metadata) { preffered_disk = disk_candidate; break; @@ -601,11 +601,11 @@ std::pair Fetcher::fetchSelected if (!disk) { disk = reservation->getDisk(); - LOG_TRACE(log, "Disk for fetch is not provided, getting disk from reservation {} with type '{}'", disk->getName(), toString(disk->getDataSourceDescription().type)); + LOG_TRACE(log, "Disk for fetch is not provided, getting disk from reservation {} with type '{}'", disk->getName(), disk->getDataSourceDescription().toString()); } else { - LOG_TEST(log, "Disk for fetch is disk {} with type {}", disk->getName(), toString(disk->getDataSourceDescription().type)); + LOG_TEST(log, "Disk for fetch is disk {} with type {}", disk->getName(), disk->getDataSourceDescription().toString()); } UInt64 revision = parse(in->getResponseCookie("disk_revision", "0")); @@ -888,7 +888,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( readStringBinary(part_id, in); if (!disk->supportZeroCopyReplication() || !disk->checkUniqueId(part_id)) - throw Exception(ErrorCodes::ZERO_COPY_REPLICATION_ERROR, "Part {} unique id {} doesn't exist on {} (with type {}).", part_name, part_id, disk->getName(), toString(disk->getDataSourceDescription().type)); + throw Exception(ErrorCodes::ZERO_COPY_REPLICATION_ERROR, "Part {} unique id {} doesn't exist on {} (with type {}).", part_name, part_id, disk->getName(), disk->getDataSourceDescription().toString()); LOG_DEBUG(log, "Downloading part {} unique id {} metadata onto disk {}.", part_name, part_id, disk->getName()); zero_copy_temporary_lock_holder = data.lockSharedDataTemporary(part_name, part_id, disk); diff --git a/src/Storages/MergeTree/DataPartsExchange.h b/src/Storages/MergeTree/DataPartsExchange.h index 07939a660a8..8c15dc3cfdb 100644 --- a/src/Storages/MergeTree/DataPartsExchange.h +++ b/src/Storages/MergeTree/DataPartsExchange.h @@ -55,7 +55,7 @@ private: /// StorageReplicatedMergeTree::shutdown() waits for all parts exchange handlers to finish, /// so Service will never access dangling reference to storage StorageReplicatedMergeTree & data; - Poco::Logger * log; + LoggerPtr log; }; /** Client for getting the parts from the table *MergeTree. @@ -137,7 +137,7 @@ private: ThrottlerPtr throttler); StorageReplicatedMergeTree & data; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp index 5741e11aa22..1ffb5177430 100644 --- a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp +++ b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp @@ -64,7 +64,7 @@ std::optional createEphemeralLockInZooKeeper( { const String & failed_op_path = ops[failed_idx]->getPath(); LOG_DEBUG( - &Poco::Logger::get("createEphemeralLockInZooKeeper"), + getLogger("createEphemeralLockInZooKeeper"), "Deduplication path already exists: deduplication_path={}", failed_op_path); return EphemeralLockInZooKeeper{"", nullptr, "", failed_op_path}; @@ -73,7 +73,7 @@ std::optional createEphemeralLockInZooKeeper( else if (responses[0]->error == Coordination::Error::ZNODEEXISTS) { LOG_DEBUG( - &Poco::Logger::get("createEphemeralLockInZooKeeper"), + getLogger("createEphemeralLockInZooKeeper"), "Deduplication path already exists: deduplication_path={}", deduplication_path); return {}; @@ -119,7 +119,7 @@ EphemeralLockInZooKeeper::~EphemeralLockInZooKeeper() { if (Coordination::isHardwareError(e.code)) LOG_DEBUG( - &Poco::Logger::get("EphemeralLockInZooKeeper"), + getLogger("EphemeralLockInZooKeeper"), "ZooKeeper communication error during unlock: code={} message='{}'", e.code, e.message()); @@ -130,7 +130,7 @@ EphemeralLockInZooKeeper::~EphemeralLockInZooKeeper() /// But it's possible that the multi op request can be executed on server side, and client will not get response due to network issue. /// In such case, assumeUnlocked() will not be called, so we'll get ZNONODE error here since the noded is already deleted LOG_DEBUG( - &Poco::Logger::get("EphemeralLockInZooKeeper"), + getLogger("EphemeralLockInZooKeeper"), "ZooKeeper node was already deleted: code={} message={}", e.code, e.message()); @@ -168,7 +168,7 @@ EphemeralLocksInAllPartitions::EphemeralLocksInAllPartitions( Coordination::Error rc = zookeeper->tryMulti(lock_ops, lock_responses); if (rc == Coordination::Error::ZBADVERSION) { - LOG_TRACE(&Poco::Logger::get("EphemeralLocksInAllPartitions"), "Someone has inserted a block in a new partition while we were creating locks. Retry."); + LOG_TRACE(getLogger("EphemeralLocksInAllPartitions"), "Someone has inserted a block in a new partition while we were creating locks. Retry."); continue; } else if (rc != Coordination::Error::ZOK) diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index afbe91a8a6d..5899ef58cd5 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -151,12 +151,12 @@ public: const MergeTreeDataPartChecksums & checksums, std::list projections, bool is_temp, - Poco::Logger * log) = 0; + LoggerPtr log) = 0; /// Get a name like 'prefix_partdir_tryN' which does not exist in a root dir. /// TODO: remove it. virtual std::optional getRelativePathForPrefix( - Poco::Logger * log, const String & prefix, bool detached, bool broken) const = 0; + LoggerPtr log, const String & prefix, bool detached, bool broken) const = 0; /// Reset part directory, used for in-memory parts. /// TODO: remove it. @@ -263,7 +263,7 @@ public: const DiskPtr & disk, const ReadSettings & read_settings, const WriteSettings & write_settings, - Poco::Logger * log, + LoggerPtr log, const std::function & cancellation_hook ) const = 0; @@ -314,7 +314,7 @@ public: virtual void rename( std::string new_root_path, std::string new_part_dir, - Poco::Logger * log, + LoggerPtr log, bool remove_new_dir_if_exists, bool fsync_part_dir) = 0; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 87f23b0da2a..686b2683770 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -81,6 +81,7 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Par auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key); size_t minmax_idx_size = minmax_column_types.size(); + hyperrectangle.clear(); hyperrectangle.reserve(minmax_idx_size); for (size_t i = 0; i < minmax_idx_size; ++i) { @@ -104,6 +105,39 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Par initialized = true; } +Block IMergeTreeDataPart::MinMaxIndex::getBlock(const MergeTreeData & data) const +{ + if (!initialized) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to get block from uninitialized MinMax index."); + + Block block; + + const auto metadata_snapshot = data.getInMemoryMetadataPtr(); + const auto & partition_key = metadata_snapshot->getPartitionKey(); + + const auto minmax_column_names = data.getMinMaxColumnsNames(partition_key); + const auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key); + const auto minmax_idx_size = minmax_column_types.size(); + + for (size_t i = 0; i < minmax_idx_size; ++i) + { + const auto & data_type = minmax_column_types[i]; + const auto & column_name = minmax_column_names[i]; + + const auto column = data_type->createColumn(); + + const auto min_val = hyperrectangle.at(i).left; + const auto max_val = hyperrectangle.at(i).right; + + column->insert(min_val); + column->insert(max_val); + + block.insert(ColumnWithTypeAndName(column->getPtr(), data_type, column_name)); + } + + return block; +} + IMergeTreeDataPart::MinMaxIndex::WrittenFiles IMergeTreeDataPart::MinMaxIndex::store( const MergeTreeData & data, IDataPartStorage & part_storage, Checksums & out_checksums) const { @@ -185,8 +219,7 @@ void IMergeTreeDataPart::MinMaxIndex::merge(const MinMaxIndex & other) if (!initialized) { - hyperrectangle = other.hyperrectangle; - initialized = true; + *this = other; } else { @@ -1663,7 +1696,7 @@ try metadata_manager->deleteAll(true); metadata_manager->assertAllDeleted(true); - getDataPartStorage().rename(to.parent_path(), to.filename(), storage.log, remove_new_dir_if_exists, fsync_dir); + getDataPartStorage().rename(to.parent_path(), to.filename(), storage.log.load(), remove_new_dir_if_exists, fsync_dir); metadata_manager->updateAll(true); auto new_projection_root_path = to.string(); @@ -1758,7 +1791,7 @@ void IMergeTreeDataPart::remove() } bool is_temporary_part = is_temp || state == MergeTreeDataPartState::Temporary; - getDataPartStorage().remove(std::move(can_remove_callback), checksums, projection_checksums, is_temporary_part, storage.log); + getDataPartStorage().remove(std::move(can_remove_callback), checksums, projection_checksums, is_temporary_part, storage.log.load()); } std::optional IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix, bool detached, bool broken) const @@ -1775,7 +1808,7 @@ std::optional IMergeTreeDataPart::getRelativePathForPrefix(const String if (detached && parent_part) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot detach projection"); - return getDataPartStorage().getRelativePathForPrefix(storage.log, prefix, detached, broken); + return getDataPartStorage().getRelativePathForPrefix(storage.log.load(), prefix, detached, broken); } std::optional IMergeTreeDataPart::getRelativePathForDetachedPart(const String & prefix, bool broken) const @@ -1841,7 +1874,7 @@ MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk( throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not clone data part {} to empty directory.", name); String path_to_clone = fs::path(storage.relative_data_path) / directory_name / ""; - return getDataPartStorage().clonePart(path_to_clone, getDataPartStorage().getPartDirectory(), disk, read_settings, write_settings, storage.log, cancellation_hook); + return getDataPartStorage().clonePart(path_to_clone, getDataPartStorage().getPartDirectory(), disk, read_settings, write_settings, storage.log.load(), cancellation_hook); } UInt64 IMergeTreeDataPart::getIndexSizeFromFile() const diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 640a1f1d0a3..29f0f54d419 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -336,6 +336,7 @@ public: } void load(const MergeTreeData & data, const PartMetadataManagerPtr & manager); + Block getBlock(const MergeTreeData & data) const; using WrittenFiles = std::vector>; diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 120edd81e30..63ed8021f58 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -140,16 +141,29 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns } } +bool IMergeTreeReader::isSubcolumnOffsetsOfNested(const String & name_in_storage, const String & subcolumn_name) const +{ + /// We cannot read separate subcolumn with offsets from compact parts. + if (!data_part_info_for_read->isWidePart() || subcolumn_name != "size0") + return false; + + return Nested::isSubcolumnOfNested(name_in_storage, part_columns); +} + String IMergeTreeReader::getColumnNameInPart(const NameAndTypePair & required_column) const { auto name_in_storage = required_column.getNameInStorage(); - if (alter_conversions->isColumnRenamed(name_in_storage)) - { - name_in_storage = alter_conversions->getColumnOldName(name_in_storage); - return Nested::concatenateName(name_in_storage, required_column.getSubcolumnName()); - } + auto subcolumn_name = required_column.getSubcolumnName(); - return required_column.name; + if (alter_conversions->isColumnRenamed(name_in_storage)) + name_in_storage = alter_conversions->getColumnOldName(name_in_storage); + + /// A special case when we read subcolumn of shared offsets of Nested. + /// E.g. instead of requested column "n.arr1.size0" we must read column "n.size0" from disk. + if (isSubcolumnOffsetsOfNested(name_in_storage, subcolumn_name)) + name_in_storage = Nested::splitName(name_in_storage).first; + + return Nested::concatenateName(name_in_storage, subcolumn_name); } NameAndTypePair IMergeTreeReader::getColumnInPart(const NameAndTypePair & required_column) const diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index f3ea0c6c361..997be064f28 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -65,14 +65,14 @@ public: protected: /// Returns actual column name in part, which can differ from table metadata. String getColumnNameInPart(const NameAndTypePair & required_column) const; - /// Returns actual column name and type in part, which can differ from table metadata. NameAndTypePair getColumnInPart(const NameAndTypePair & required_column) const; /// Returns actual serialization in part, which can differ from table metadata. SerializationPtr getSerializationInPart(const NameAndTypePair & required_column) const; + /// Returns true if requested column is a subcolumn with offsets of Array which is part of Nested column. + bool isSubcolumnOffsetsOfNested(const String & name_in_storage, const String & subcolumn_name) const; void checkNumberOfColumns(size_t num_columns_to_read) const; - String getMessageForDiagnosticOfBrokenPart(size_t from_mark, size_t max_rows_to_read) const; /// avg_value_size_hints are used to reduce the number of reallocations when creating columns of variable size. diff --git a/src/Storages/MergeTree/InsertBlockInfo.cpp b/src/Storages/MergeTree/InsertBlockInfo.cpp index ac900f8cf09..2de3ae8996a 100644 --- a/src/Storages/MergeTree/InsertBlockInfo.cpp +++ b/src/Storages/MergeTree/InsertBlockInfo.cpp @@ -9,7 +9,7 @@ namespace ErrorCodes } AsyncInsertBlockInfo::AsyncInsertBlockInfo( - Poco::Logger * log_, + LoggerPtr log_, std::vector && block_id_, BlockWithPartition && block_, std::optional && unmerged_block_with_partition_) diff --git a/src/Storages/MergeTree/InsertBlockInfo.h b/src/Storages/MergeTree/InsertBlockInfo.h index 3882373c0fa..7d7ec0c9f29 100644 --- a/src/Storages/MergeTree/InsertBlockInfo.h +++ b/src/Storages/MergeTree/InsertBlockInfo.h @@ -8,7 +8,7 @@ namespace DB struct SyncInsertBlockInfo { SyncInsertBlockInfo( - Poco::Logger * /*log_*/, + LoggerPtr /*log_*/, std::string && block_id_, BlockWithPartition && /*block_*/, std::optional && /*unmerged_block_with_partition_*/) @@ -25,7 +25,7 @@ struct SyncInsertBlockInfo struct AsyncInsertBlockInfo { - Poco::Logger * log; + LoggerPtr log; std::vector block_id; BlockWithPartition block_with_partition; /// Some merging algorithms can mofidy the block which loses the information about the async insert offsets @@ -34,7 +34,7 @@ struct AsyncInsertBlockInfo std::unordered_map> block_id_to_offset_idx; AsyncInsertBlockInfo( - Poco::Logger * log_, + LoggerPtr log_, std::vector && block_id_, BlockWithPartition && block_, std::optional && unmerged_block_with_partition_); diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index d5922ae1bc2..e5bcb11091f 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1,36 +1,37 @@ -#include -#include -#include +#include +#include #include #include #include #include +#include #include -#include #include -#include -#include -#include -#include -#include -#include -#include +#include #include +#include #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include -#include +#include +#include +#include +#include +#include #include #include #include -#include -#include +#include +#include +#include #include +#include +#include +#include #include #include @@ -836,21 +837,6 @@ bool KeyCondition::getConstant(const ASTPtr & expr, Block & block_with_constants return node.tryGetConstant(out_value, out_type); } - -static Field applyFunctionForField( - const FunctionBasePtr & func, - const DataTypePtr & arg_type, - const Field & arg_value) -{ - ColumnsWithTypeAndName columns - { - { arg_type->createColumnConst(1, arg_value), arg_type, "x" }, - }; - - auto col = func->execute(columns, func->getResultType(), 1); - return (*col)[0]; -} - /// The case when arguments may have types different than in the primary key. static std::pair applyFunctionForFieldOfUnknownType( const FunctionBasePtr & func, @@ -890,33 +876,6 @@ static std::pair applyBinaryFunctionForFieldOfUnknownType( return {std::move(result), std::move(return_type)}; } - -static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field) -{ - /// Fallback for fields without block reference. - if (field.isExplicit()) - return applyFunctionForField(func, current_type, field); - - String result_name = "_" + func->getName() + "_" + toString(field.column_idx); - const auto & columns = field.columns; - size_t result_idx = columns->size(); - - for (size_t i = 0; i < result_idx; ++i) - { - if ((*columns)[i].name == result_name) - result_idx = i; - } - - if (result_idx == columns->size()) - { - ColumnsWithTypeAndName args{(*columns)[field.column_idx]}; - field.columns->emplace_back(ColumnWithTypeAndName {nullptr, func->getResultType(), result_name}); - (*columns)[result_idx].column = func->execute(args, (*columns)[result_idx].type, columns->front().column->size()); - } - - return {field.columns, field.row_idx, result_idx}; -} - /** When table's key has expression with these functions from a column, * and when a column in a query is compared with a constant, such as: * CREATE TABLE (x String) ORDER BY toDate(x) diff --git a/src/Storages/MergeTree/LeaderElection.h b/src/Storages/MergeTree/LeaderElection.h index 2e48892563b..3bd486fd54a 100644 --- a/src/Storages/MergeTree/LeaderElection.h +++ b/src/Storages/MergeTree/LeaderElection.h @@ -19,7 +19,7 @@ namespace zkutil * For now, every replica can become leader if there is no leader among replicas with old version. */ -void checkNoOldLeaders(Poco::Logger * log, ZooKeeper & zookeeper, const String path) +void checkNoOldLeaders(LoggerPtr log, ZooKeeper & zookeeper, const String path) { /// Previous versions (before 21.12) used to create ephemeral sequential node path/leader_election- /// Replica with the lexicographically smallest node name becomes leader (before 20.6) or enables multi-leader mode (since 20.6) diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 23037b1ee7a..ae6e398026d 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -28,7 +28,7 @@ MergeFromLogEntryTask::MergeFromLogEntryTask( StorageReplicatedMergeTree & storage_, IExecutableTask::TaskResultCallback & task_result_callback_) : ReplicatedMergeMutateTaskBase( - &Poco::Logger::get( + getLogger( storage_.getStorageID().getShortName() + "::" + selected_entry_->log_entry->new_part_name + " (MergeFromLogEntryTask)"), storage_, selected_entry_, diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 4b5b7ca8018..59bdb7006b3 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -588,7 +588,15 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const auto pipe = Pipe::unitePipes(std::move(pipes)); ctx->rows_sources_read_buf->seek(0, 0); - auto transform = std::make_unique(pipe.getHeader(), pipe.numOutputPorts(), *ctx->rows_sources_read_buf); + + const auto data_settings = global_ctx->data->getSettings(); + auto transform = std::make_unique( + pipe.getHeader(), + pipe.numOutputPorts(), + *ctx->rows_sources_read_buf, + data_settings->merge_max_block_size, + data_settings->merge_max_block_size_bytes); + pipe.addTransform(std::move(transform)); ctx->column_parts_pipeline = QueryPipeline(std::move(pipe)); diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index b2a5796737d..6f5336baaad 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -228,7 +228,7 @@ private: size_t sum_compressed_bytes_upper_bound{0}; bool blocks_are_granules_size{false}; - Poco::Logger * log{&Poco::Logger::get("MergeTask::PrepareStage")}; + LoggerPtr log{getLogger("MergeTask::PrepareStage")}; /// Dependencies for next stages std::list::const_iterator it_name_and_type; @@ -354,7 +354,7 @@ private: MergeTasks tasks_for_projections; MergeTasks::iterator projections_iterator; - Poco::Logger * log{&Poco::Logger::get("MergeTask::MergeProjectionsStage")}; + LoggerPtr log{getLogger("MergeTask::MergeProjectionsStage")}; }; using MergeProjectionsRuntimeContextPtr = std::shared_ptr; diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp index a3f8e02f5eb..8cb0badc19b 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp @@ -144,7 +144,7 @@ bool MergeTreeBackgroundExecutor::trySchedule(ExecutableTaskPtr task) return true; } -void printExceptionWithRespectToAbort(Poco::Logger * log, const String & query_id) +void printExceptionWithRespectToAbort(LoggerPtr log, const String & query_id) { std::exception_ptr ex = std::current_exception(); diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h index 63f75ffc8d9..0ed03293589 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h @@ -307,7 +307,7 @@ private: std::condition_variable has_tasks TSA_GUARDED_BY(mutex); bool shutdown TSA_GUARDED_BY(mutex) = false; std::unique_ptr pool; - Poco::Logger * log = &Poco::Logger::get("MergeTreeBackgroundExecutor"); + LoggerPtr log = getLogger("MergeTreeBackgroundExecutor"); }; extern template class MergeTreeBackgroundExecutor; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e3de926570b..4b3012d68e0 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8,21 +8,6 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include @@ -43,19 +28,20 @@ #include #include #include -#include -#include #include #include #include #include #include #include +#include +#include #include +#include #include -#include -#include #include +#include +#include #include #include #include @@ -64,32 +50,48 @@ #include #include #include -#include #include #include #include #include #include #include +#include #include #include +#include #include #include #include #include #include +#include #include #include -#include #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include #include +#include #include #include @@ -197,6 +199,50 @@ namespace ErrorCodes extern const int LIMIT_EXCEEDED; } +static size_t getPartitionAstFieldsCount(const ASTPartition & partition_ast, ASTPtr partition_value_ast) +{ + if (partition_ast.fields_count.has_value()) + return *partition_ast.fields_count; + + if (partition_value_ast->as()) + return 1; + + const auto * tuple_ast = partition_value_ast->as(); + + if (!tuple_ast) + { + throw Exception( + ErrorCodes::INVALID_PARTITION_VALUE, "Expected literal or tuple for partition key, got {}", partition_value_ast->getID()); + } + + if (tuple_ast->name != "tuple") + { + if (!isFunctionCast(tuple_ast)) + throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name); + + if (tuple_ast->arguments->as()->children.empty()) + throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name); + + auto first_arg = tuple_ast->arguments->as()->children.at(0); + if (const auto * inner_tuple = first_arg->as(); inner_tuple && inner_tuple->name == "tuple") + { + const auto * arguments_ast = tuple_ast->arguments->as(); + return arguments_ast ? arguments_ast->children.size() : 0; + } + else if (const auto * inner_literal_tuple = first_arg->as(); inner_literal_tuple) + { + return inner_literal_tuple->value.getType() == Field::Types::Tuple ? inner_literal_tuple->value.safeGet().size() : 1; + } + + throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name); + } + else + { + const auto * arguments_ast = tuple_ast->arguments->as(); + return arguments_ast ? arguments_ast->children.size() : 0; + } +} + static void checkSuspiciousIndices(const ASTFunction * index_function) { std::unordered_set unique_index_expression_hashes; @@ -300,7 +346,11 @@ void MergeTreeData::initializeDirectoriesAndFormatVersion(const std::string & re if (disk->isBroken()) continue; - if (!disk->isReadOnly()) + /// Write once disk is almost the same as read-only for MergeTree, + /// since it does not support move, that is required for any + /// operation over MergeTree, so avoid writing format_version.txt + /// into it as well, to avoid leaving it after DROP. + if (!disk->isReadOnly() && !disk->isWriteOnce()) { auto buf = disk->writeFile(format_version_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, getContext()->getWriteSettings()); writeIntText(format_version.toUnderType(), *buf); @@ -354,8 +404,7 @@ MergeTreeData::MergeTreeData( , merging_params(merging_params_) , require_part_metadata(require_part_metadata_) , broken_part_callback(broken_part_callback_) - , log_name(std::make_shared(table_id_.getNameForLogs())) - , log(&Poco::Logger::get(*log_name)) + , log(table_id_.getNameForLogs()) , storage_settings(std::move(storage_settings_)) , pinned_part_uuids(std::make_shared()) , data_parts_by_info(data_parts_indexes.get()) @@ -1222,7 +1271,7 @@ MergeTreeData::PartLoadingTree::build(PartLoadingInfos nodes) } static std::optional calculatePartSizeSafe( - const MergeTreeData::DataPartPtr & part, Poco::Logger * log) + const MergeTreeData::DataPartPtr & part, const LoggerPtr & log) { try { @@ -1296,7 +1345,7 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart( res.is_broken = true; tryLogCurrentException(log, fmt::format("while loading part {} on path {}", part_name, part_path)); - res.size_of_part = calculatePartSizeSafe(res.part, log); + res.size_of_part = calculatePartSizeSafe(res.part, log.load()); auto part_size_str = res.size_of_part ? formatReadableSizeWithBinarySuffix(*res.size_of_part) : "failed to calculate size"; LOG_ERROR(log, @@ -1327,7 +1376,7 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart( if (part_disk_ptr->exists(marker_path)) { /// NOTE: getBytesOnDisk() cannot be used here, since it may be zero if checksums.txt does not exist. - res.size_of_part = calculatePartSizeSafe(res.part, log); + res.size_of_part = calculatePartSizeSafe(res.part, log.load()); res.is_broken = true; auto part_size_str = res.size_of_part ? formatReadableSizeWithBinarySuffix(*res.size_of_part) : "failed to calculate size"; @@ -2114,7 +2163,7 @@ size_t MergeTreeData::clearOldTemporaryDirectories(const String & root_path, siz { /// Actually we don't rely on temporary_directories_lifetime when removing old temporaries directories, /// it's just an extra level of protection just in case we have a bug. - LOG_INFO(LogFrequencyLimiter(log, 10), "{} is in use (by merge/mutation/INSERT) (consider increasing temporary_directories_lifetime setting)", full_path); + LOG_INFO(LogFrequencyLimiter(log.load(), 10), "{} is in use (by merge/mutation/INSERT) (consider increasing temporary_directories_lifetime setting)", full_path); continue; } else if (!disk->exists(it->path())) @@ -2734,12 +2783,20 @@ void MergeTreeData::rename(const String & new_table_path, const StorageID & new_ void MergeTreeData::renameInMemory(const StorageID & new_table_id) { IStorage::renameInMemory(new_table_id); - std::atomic_store(&log_name, std::make_shared(new_table_id.getNameForLogs())); - log = &Poco::Logger::get(*log_name); + log.store(new_table_id.getNameForLogs()); } void MergeTreeData::dropAllData() { + /// In case there is read-only/write-once disk we cannot allow to call dropAllData(), but dropping tables is allowed. + /// + /// Note, that one may think that drop on write-once disk should be + /// supported, since it is pretty trivial to implement + /// MetadataStorageFromPlainObjectStorageTransaction::removeDirectory(), + /// however removing part requires moveDirectory() as well. + if (isStaticStorage()) + return; + LOG_TRACE(log, "dropAllData: waiting for locks."); auto settings_ptr = getSettings(); @@ -4854,7 +4911,7 @@ void MergeTreeData::removePartContributionToColumnAndSecondaryIndexSizes(const D } void MergeTreeData::checkAlterPartitionIsPossible( - const PartitionCommands & commands, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & settings, ContextPtr local_context) const + const PartitionCommands & commands, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & settings, ContextPtr) const { for (const auto & command : commands) { @@ -4882,7 +4939,15 @@ void MergeTreeData::checkAlterPartitionIsPossible( throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only support DROP/DETACH PARTITION ALL currently"); } else - getPartitionIDFromQuery(command.partition, local_context); + { + // The below `getPartitionIDFromQuery` call will not work for attach / replace because it assumes the partition expressions + // are the same and deliberately uses this storage. Later on, `MergeTreeData::replaceFrom` is called, and it makes the right + // call to `getPartitionIDFromQuery` using source storage. + // Note: `PartitionCommand::REPLACE_PARTITION` is used both for `REPLACE PARTITION` and `ATTACH PARTITION FROM` queries. + // But not for `ATTACH PARTITION` queries. + if (command.type != PartitionCommand::REPLACE_PARTITION) + getPartitionIDFromQuery(command.partition, getContext()); + } } } } @@ -5616,69 +5681,8 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc MergeTreePartInfo::validatePartitionID(partition_ast.id->clone(), format_version); return partition_ast.id->as()->value.safeGet(); } - size_t partition_ast_fields_count = 0; ASTPtr partition_value_ast = partition_ast.value->clone(); - if (!partition_ast.fields_count.has_value()) - { - if (partition_value_ast->as()) - { - partition_ast_fields_count = 1; - } - else if (const auto * tuple_ast = partition_value_ast->as()) - { - if (tuple_ast->name != "tuple") - { - if (isFunctionCast(tuple_ast)) - { - if (tuple_ast->arguments->as()->children.empty()) - { - throw Exception( - ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name); - } - auto first_arg = tuple_ast->arguments->as()->children.at(0); - if (const auto * inner_tuple = first_arg->as(); inner_tuple && inner_tuple->name == "tuple") - { - const auto * arguments_ast = tuple_ast->arguments->as(); - if (arguments_ast) - partition_ast_fields_count = arguments_ast->children.size(); - else - partition_ast_fields_count = 0; - } - else if (const auto * inner_literal_tuple = first_arg->as(); inner_literal_tuple) - { - if (inner_literal_tuple->value.getType() == Field::Types::Tuple) - partition_ast_fields_count = inner_literal_tuple->value.safeGet().size(); - else - partition_ast_fields_count = 1; - } - else - { - throw Exception( - ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name); - } - } - else - throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Expected tuple for complex partition key, got {}", tuple_ast->name); - } - else - { - const auto * arguments_ast = tuple_ast->arguments->as(); - if (arguments_ast) - partition_ast_fields_count = arguments_ast->children.size(); - else - partition_ast_fields_count = 0; - } - } - else - { - throw Exception( - ErrorCodes::INVALID_PARTITION_VALUE, "Expected literal or tuple for partition key, got {}", partition_value_ast->getID()); - } - } - else - { - partition_ast_fields_count = *partition_ast.fields_count; - } + auto partition_ast_fields_count = getPartitionAstFieldsCount(partition_ast, partition_value_ast); if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { @@ -6249,13 +6253,13 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules( log, "Would like to reserve space on volume '{}' by TTL rule of table '{}' but volume was not found", move_ttl_entry->destination_name, - *std::atomic_load(&log_name)); + log.loadName()); else if (move_ttl_entry->destination_type == DataDestinationType::DISK && !move_ttl_entry->if_exists) LOG_WARNING( log, "Would like to reserve space on disk '{}' by TTL rule of table '{}' but disk was not found", move_ttl_entry->destination_name, - *std::atomic_load(&log_name)); + log.loadName()); } else if (is_insert && !perform_ttl_move_on_insert) { @@ -6264,7 +6268,7 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules( "TTL move on insert to {} {} for table {} is disabled", (move_ttl_entry->destination_type == DataDestinationType::VOLUME ? "volume" : "disk"), move_ttl_entry->destination_name, - *std::atomic_load(&log_name)); + log.loadName()); } else { @@ -6280,13 +6284,13 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules( log, "Would like to reserve space on volume '{}' by TTL rule of table '{}' but there is not enough space", move_ttl_entry->destination_name, - *std::atomic_load(&log_name)); + log.loadName()); else if (move_ttl_entry->destination_type == DataDestinationType::DISK) LOG_WARNING( log, "Would like to reserve space on disk '{}' by TTL rule of table '{}' but there is not enough space", move_ttl_entry->destination_name, - *std::atomic_load(&log_name)); + log.loadName()); } } } @@ -6299,7 +6303,7 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules( "Trying to reserve {} on the selected disk: {} (with type {})", ReadableSize(expected_size), selected_disk->getName(), - toString(selected_disk->getDataSourceDescription().type)); + selected_disk->getDataSourceDescription().toString()); reservation = selected_disk->reserve(expected_size); } @@ -7014,23 +7018,35 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & sour if (my_snapshot->getColumns().getAllPhysical().sizeOfDifference(src_snapshot->getColumns().getAllPhysical())) throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Tables have different structure"); - auto query_to_string = [] (const ASTPtr & ast) - { - return ast ? queryToString(ast) : ""; - }; - - if (query_to_string(my_snapshot->getSortingKeyAST()) != query_to_string(src_snapshot->getSortingKeyAST())) + if (queryToStringNullable(my_snapshot->getSortingKeyAST()) != queryToStringNullable(src_snapshot->getSortingKeyAST())) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different ordering"); - if (query_to_string(my_snapshot->getPartitionKeyAST()) != query_to_string(src_snapshot->getPartitionKeyAST())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different partition key"); - if (format_version != src_data->format_version) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different format_version"); - if (query_to_string(my_snapshot->getPrimaryKeyAST()) != query_to_string(src_snapshot->getPrimaryKeyAST())) + if (queryToStringNullable(my_snapshot->getPrimaryKeyAST()) != queryToStringNullable(src_snapshot->getPrimaryKeyAST())) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different primary key"); + const auto is_a_subset_of = [](const auto & lhs, const auto & rhs) + { + if (lhs.size() > rhs.size()) + return false; + + const auto rhs_set = NameSet(rhs.begin(), rhs.end()); + for (const auto & lhs_element : lhs) + if (!rhs_set.contains(lhs_element)) + return false; + + return true; + }; + + if (!is_a_subset_of(my_snapshot->getColumnsRequiredForPartitionKey(), src_snapshot->getColumnsRequiredForPartitionKey())) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Destination table partition expression columns must be a subset of source table partition expression columns"); + } + const auto check_definitions = [](const auto & my_descriptions, const auto & src_descriptions) { if (my_descriptions.size() != src_descriptions.size()) @@ -7071,128 +7087,56 @@ std::pair MergeTreeData::cloneAn const ReadSettings & read_settings, const WriteSettings & write_settings) { - /// Check that the storage policy contains the disk where the src_part is located. - bool does_storage_policy_allow_same_disk = false; - for (const DiskPtr & disk : getStoragePolicy()->getDisks()) - { - if (disk->getName() == src_part->getDataPartStorage().getDiskName()) - { - does_storage_policy_allow_same_disk = true; - break; - } - } - if (!does_storage_policy_allow_same_disk) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Could not clone and load part {} because disk does not belong to storage policy", - quoteString(src_part->getDataPartStorage().getFullPath())); + return MergeTreeDataPartCloner::clone( + this, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, require_part_metadata, params, read_settings, write_settings); +} - String dst_part_name = src_part->getNewName(dst_part_info); - String tmp_dst_part_name = tmp_part_prefix + dst_part_name; - auto temporary_directory_lock = getTemporaryPartDirectoryHolder(tmp_dst_part_name); +std::pair MergeTreeData::cloneAndLoadPartOnSameDiskWithDifferentPartitionKey( + const MergeTreeData::DataPartPtr & src_part, + const MergeTreePartition & new_partition, + const String & partition_id, + const IMergeTreeDataPart::MinMaxIndex & min_max_index, + const String & tmp_part_prefix, + const StorageMetadataPtr & my_metadata_snapshot, + const IDataPartStorage::ClonePartParams & clone_params, + ContextPtr local_context, + Int64 min_block, + Int64 max_block +) +{ + MergeTreePartInfo dst_part_info(partition_id, min_block, max_block, src_part->info.level); - /// Why it is needed if we only hardlink files? - auto reservation = src_part->getDataPartStorage().reserve(src_part->getBytesOnDisk()); - auto src_part_storage = src_part->getDataPartStoragePtr(); + return MergeTreeDataPartCloner::cloneWithDistinctPartitionExpression( + this, + src_part, + my_metadata_snapshot, + dst_part_info, + tmp_part_prefix, + local_context->getReadSettings(), + local_context->getWriteSettings(), + new_partition, + min_max_index, + false, + clone_params); +} - scope_guard src_flushed_tmp_dir_lock; - MergeTreeData::MutableDataPartPtr src_flushed_tmp_part; +std::pair MergeTreeData::createPartitionAndMinMaxIndexFromSourcePart( + const MergeTreeData::DataPartPtr & src_part, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr local_context) +{ + const auto & src_data = src_part->storage; - /// If source part is in memory, flush it to disk and clone it already in on-disk format - /// Protect tmp dir from removing by cleanup thread with src_flushed_tmp_dir_lock - /// Construct src_flushed_tmp_part in order to delete part with its directory at destructor - if (auto src_part_in_memory = asInMemoryPart(src_part)) - { - auto flushed_part_path = *src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix); + auto metadata_manager = std::make_shared(src_part.get()); + IMergeTreeDataPart::MinMaxIndex min_max_index; - auto tmp_src_part_file_name = fs::path(tmp_dst_part_name).filename(); - src_flushed_tmp_dir_lock = src_part->storage.getTemporaryPartDirectoryHolder(tmp_src_part_file_name); + min_max_index.load(src_data, metadata_manager); - auto flushed_part_storage = src_part_in_memory->flushToDisk(flushed_part_path, metadata_snapshot); + MergeTreePartition new_partition; - src_flushed_tmp_part = MergeTreeDataPartBuilder(*this, src_part->name, flushed_part_storage) - .withPartInfo(src_part->info) - .withPartFormatFromDisk() - .build(); + new_partition.create(metadata_snapshot, min_max_index.getBlock(src_data), 0u, local_context); - src_flushed_tmp_part->is_temp = true; - src_part_storage = flushed_part_storage; - } - - String with_copy; - if (params.copy_instead_of_hardlink) - with_copy = " (copying data)"; - - auto dst_part_storage = src_part_storage->freeze( - relative_data_path, - tmp_dst_part_name, - read_settings, - write_settings, - /* save_metadata_callback= */ {}, - params); - - if (params.metadata_version_to_write.has_value()) - { - chassert(!params.keep_metadata_version); - auto out_metadata = dst_part_storage->writeFile(IMergeTreeDataPart::METADATA_VERSION_FILE_NAME, 4096, getContext()->getWriteSettings()); - writeText(metadata_snapshot->getMetadataVersion(), *out_metadata); - out_metadata->finalize(); - if (getSettings()->fsync_after_insert) - out_metadata->sync(); - } - - LOG_DEBUG(log, "Clone{} part {} to {}{}", - src_flushed_tmp_part ? " flushed" : "", - src_part_storage->getFullPath(), - std::string(fs::path(dst_part_storage->getFullRootPath()) / tmp_dst_part_name), - with_copy); - - auto dst_data_part = MergeTreeDataPartBuilder(*this, dst_part_name, dst_part_storage) - .withPartFormatFromDisk() - .build(); - - if (!params.copy_instead_of_hardlink && params.hardlinked_files) - { - params.hardlinked_files->source_part_name = src_part->name; - params.hardlinked_files->source_table_shared_id = src_part->storage.getTableSharedID(); - - for (auto it = src_part->getDataPartStorage().iterate(); it->isValid(); it->next()) - { - if (!params.files_to_copy_instead_of_hardlinks.contains(it->name()) - && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED - && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME) - { - params.hardlinked_files->hardlinks_from_source_part.insert(it->name()); - } - } - - auto projections = src_part->getProjectionParts(); - for (const auto & [name, projection_part] : projections) - { - const auto & projection_storage = projection_part->getDataPartStorage(); - for (auto it = projection_storage.iterate(); it->isValid(); it->next()) - { - auto file_name_with_projection_prefix = fs::path(projection_storage.getPartDirectory()) / it->name(); - if (!params.files_to_copy_instead_of_hardlinks.contains(file_name_with_projection_prefix) - && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED - && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME) - { - params.hardlinked_files->hardlinks_from_source_part.insert(file_name_with_projection_prefix); - } - } - } - } - - /// We should write version metadata on part creation to distinguish it from parts that were created without transaction. - TransactionID tid = params.txn ? params.txn->tid : Tx::PrehistoricTID; - dst_data_part->version.setCreationTID(tid, nullptr); - dst_data_part->storeVersionMetadata(); - - dst_data_part->is_temp = true; - - dst_data_part->loadColumnsChecksumsIndexes(require_part_metadata, true); - dst_data_part->modification_time = dst_part_storage->getLastModified().epochTime(); - return std::make_pair(dst_data_part, std::move(temporary_directory_lock)); + return {new_partition, min_max_index}; } String MergeTreeData::getFullPathOnDisk(const DiskPtr & disk) const @@ -7989,7 +7933,7 @@ bool MergeTreeData::insertQueryIdOrThrowNoLock(const String & query_id, size_t m throw Exception( ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, "Too many simultaneous queries for table {}. Maximum is: {}", - *std::atomic_load(&log_name), + log.loadName(), max_queries); query_id_set.insert(query_id); return true; @@ -8181,7 +8125,7 @@ ReservationPtr MergeTreeData::balancedReservation( } // Record submerging big parts in the tagger to clean them up. - tagger_ptr->emplace(*this, part_name, std::move(covered_parts), log); + tagger_ptr->emplace(*this, part_name, std::move(covered_parts), log.load()); } } } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index f0dbaf0e307..6a3c21018dc 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -231,6 +232,7 @@ public: } }; + using DataParts = std::set; using MutableDataParts = std::set; using DataPartsVector = std::vector; @@ -461,7 +463,7 @@ public: /// Load the set of data parts from disk. Call once - immediately after the object is created. void loadDataParts(bool skip_sanity_checks, std::optional> expected_parts); - String getLogName() const { return *std::atomic_load(&log_name); } + String getLogName() const { return log.loadName(); } Int64 getMaxBlockNumber() const; @@ -848,6 +850,23 @@ public: const ReadSettings & read_settings, const WriteSettings & write_settings); + std::pair cloneAndLoadPartOnSameDiskWithDifferentPartitionKey( + const MergeTreeData::DataPartPtr & src_part, + const MergeTreePartition & new_partition, + const String & partition_id, + const IMergeTreeDataPart::MinMaxIndex & min_max_index, + const String & tmp_part_prefix, + const StorageMetadataPtr & my_metadata_snapshot, + const IDataPartStorage::ClonePartParams & clone_params, + ContextPtr local_context, + Int64 min_block, + Int64 max_block); + + static std::pair createPartitionAndMinMaxIndexFromSourcePart( + const MergeTreeData::DataPartPtr & src_part, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr local_context); + virtual std::vector getMutationsStatus() const = 0; /// Returns true if table can create new parts with adaptive granularity @@ -1114,10 +1133,7 @@ protected: /// Engine-specific methods BrokenPartCallback broken_part_callback; - /// log_name will change during table RENAME. Use atomic_shared_ptr to allow concurrent RW. - /// NOTE clang-14 doesn't have atomic_shared_ptr yet. Use std::atomic* operations for now. - std::shared_ptr log_name; - std::atomic log; + AtomicLogger log; /// Storage settings. /// Use get and set to receive readonly versions. @@ -1601,10 +1617,10 @@ struct CurrentlySubmergingEmergingTagger MergeTreeData & storage; String emerging_part_name; MergeTreeData::DataPartsVector submerging_parts; - Poco::Logger * log; + LoggerPtr log; CurrentlySubmergingEmergingTagger( - MergeTreeData & storage_, const String & name_, MergeTreeData::DataPartsVector && parts_, Poco::Logger * log_) + MergeTreeData & storage_, const String & name_, MergeTreeData::DataPartsVector && parts_, LoggerPtr log_) : storage(storage_), emerging_part_name(name_), submerging_parts(std::move(parts_)), log(log_) { } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 8c03aef6f99..58fddde7b54 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -66,7 +66,7 @@ static const double DISK_USAGE_COEFFICIENT_TO_SELECT = 2; static const double DISK_USAGE_COEFFICIENT_TO_RESERVE = 1.1; MergeTreeDataMergerMutator::MergeTreeDataMergerMutator(MergeTreeData & data_) - : data(data_), log(&Poco::Logger::get(data.getLogName() + " (MergerMutator)")) + : data(data_), log(getLogger(data.getLogName() + " (MergerMutator)")) { } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 6eab0ee0c37..f3a3f51b6c3 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -213,7 +213,7 @@ public : private: MergeTreeData & data; - Poco::Logger * log; + LoggerPtr log; /// When the last time you wrote to the log that the disk space was running out (not to write about this too often). time_t disk_space_warning_time = 0; diff --git a/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp b/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp new file mode 100644 index 00000000000..04019d2c665 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp @@ -0,0 +1,319 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +} + +namespace DistinctPartitionExpression +{ +std::unique_ptr updatePartitionFile( + const MergeTreeData & merge_tree_data, + const MergeTreePartition & partition, + const MergeTreeData::MutableDataPartPtr & dst_part, + IDataPartStorage & storage) +{ + storage.removeFile("partition.dat"); + // Leverage already implemented MergeTreePartition::store to create & store partition.dat. + // Checksum is re-calculated later. + return partition.store(merge_tree_data, storage, dst_part->checksums); +} + +IMergeTreeDataPart::MinMaxIndex::WrittenFiles updateMinMaxFiles( + const MergeTreeData & merge_tree_data, + const MergeTreeData::MutableDataPartPtr & dst_part, + IDataPartStorage & storage, + const StorageMetadataPtr & metadata_snapshot) +{ + for (const auto & column_name : MergeTreeData::getMinMaxColumnsNames(metadata_snapshot->partition_key)) + { + auto file = "minmax_" + escapeForFileName(column_name) + ".idx"; + storage.removeFile(file); + } + + return dst_part->minmax_idx->store(merge_tree_data, storage, dst_part->checksums); +} + +void finalizeNewFiles(const std::vector> & files, bool sync_new_files) +{ + for (const auto & file : files) + { + file->finalize(); + if (sync_new_files) + file->sync(); + } +} + +void updateNewPartFiles( + const MergeTreeData & merge_tree_data, + const MergeTreeData::MutableDataPartPtr & dst_part, + const MergeTreePartition & new_partition, + const IMergeTreeDataPart::MinMaxIndex & new_min_max_index, + const StorageMetadataPtr & src_metadata_snapshot, + bool sync_new_files) +{ + auto & storage = dst_part->getDataPartStorage(); + + *dst_part->minmax_idx = new_min_max_index; + + auto partition_file = updatePartitionFile(merge_tree_data, new_partition, dst_part, storage); + + auto min_max_files = updateMinMaxFiles(merge_tree_data, dst_part, storage, src_metadata_snapshot); + + IMergeTreeDataPart::MinMaxIndex::WrittenFiles written_files; + + if (partition_file) + written_files.emplace_back(std::move(partition_file)); + + written_files.insert(written_files.end(), std::make_move_iterator(min_max_files.begin()), std::make_move_iterator(min_max_files.end())); + + finalizeNewFiles(written_files, sync_new_files); + + // MergeTreeDataPartCloner::finalize_part calls IMergeTreeDataPart::loadColumnsChecksumsIndexes, which will re-create + // the checksum file if it doesn't exist. Relying on that is cumbersome, but this refactoring is simply a code extraction + // with small improvements. It can be further improved in the future. + storage.removeFile("checksums.txt"); +} +} + +namespace +{ +bool doesStoragePolicyAllowSameDisk(MergeTreeData * merge_tree_data, const MergeTreeData::DataPartPtr & src_part) +{ + for (const DiskPtr & disk : merge_tree_data->getStoragePolicy()->getDisks()) + if (disk->getName() == src_part->getDataPartStorage().getDiskName()) + return true; + return false; +} + +DataPartStoragePtr flushPartStorageToDiskIfInMemory( + MergeTreeData * merge_tree_data, + const MergeTreeData::DataPartPtr & src_part, + const StorageMetadataPtr & metadata_snapshot, + const String & tmp_part_prefix, + const String & tmp_dst_part_name, + scope_guard & src_flushed_tmp_dir_lock, + MergeTreeData::MutableDataPartPtr src_flushed_tmp_part) +{ + if (auto src_part_in_memory = asInMemoryPart(src_part)) + { + auto flushed_part_path = src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix); + auto tmp_src_part_file_name = fs::path(tmp_dst_part_name).filename(); + + src_flushed_tmp_dir_lock = src_part->storage.getTemporaryPartDirectoryHolder(tmp_src_part_file_name); + + auto flushed_part_storage = src_part_in_memory->flushToDisk(*flushed_part_path, metadata_snapshot); + + src_flushed_tmp_part = MergeTreeDataPartBuilder(*merge_tree_data, src_part->name, flushed_part_storage) + .withPartInfo(src_part->info) + .withPartFormatFromDisk() + .build(); + + src_flushed_tmp_part->is_temp = true; + + return flushed_part_storage; + } + + return src_part->getDataPartStoragePtr(); +} + +std::shared_ptr hardlinkAllFiles( + MergeTreeData * merge_tree_data, + const DB::ReadSettings & read_settings, + const DB::WriteSettings & write_settings, + const DataPartStoragePtr & storage, + const String & path, + const DB::IDataPartStorage::ClonePartParams & params) +{ + return storage->freeze( + merge_tree_data->getRelativeDataPath(), + path, + read_settings, + write_settings, + /*save_metadata_callback=*/{}, + params); +} + +std::pair cloneSourcePart( + MergeTreeData * merge_tree_data, + const MergeTreeData::DataPartPtr & src_part, + const StorageMetadataPtr & metadata_snapshot, + const MergeTreePartInfo & dst_part_info, + const String & tmp_part_prefix, + const ReadSettings & read_settings, + const WriteSettings & write_settings, + const DB::IDataPartStorage::ClonePartParams & params) +{ + const auto dst_part_name = src_part->getNewName(dst_part_info); + + const auto tmp_dst_part_name = tmp_part_prefix + dst_part_name; + + auto temporary_directory_lock = merge_tree_data->getTemporaryPartDirectoryHolder(tmp_dst_part_name); + + src_part->getDataPartStorage().reserve(src_part->getBytesOnDisk()); + + scope_guard src_flushed_tmp_dir_lock; + MergeTreeData::MutableDataPartPtr src_flushed_tmp_part; + + auto src_part_storage = flushPartStorageToDiskIfInMemory( + merge_tree_data, src_part, metadata_snapshot, tmp_part_prefix, tmp_dst_part_name, src_flushed_tmp_dir_lock, src_flushed_tmp_part); + + auto dst_part_storage = hardlinkAllFiles(merge_tree_data, read_settings, write_settings, src_part_storage, tmp_dst_part_name, params); + + if (params.metadata_version_to_write.has_value()) + { + chassert(!params.keep_metadata_version); + auto out_metadata = dst_part_storage->writeFile( + IMergeTreeDataPart::METADATA_VERSION_FILE_NAME, 4096, merge_tree_data->getContext()->getWriteSettings()); + writeText(metadata_snapshot->getMetadataVersion(), *out_metadata); + out_metadata->finalize(); + if (merge_tree_data->getSettings()->fsync_after_insert) + out_metadata->sync(); + } + + LOG_DEBUG( + &Poco::Logger::get("MergeTreeDataPartCloner"), + "Clone {} part {} to {}{}", + src_flushed_tmp_part ? "flushed" : "", + src_part_storage->getFullPath(), + std::string(fs::path(dst_part_storage->getFullRootPath()) / tmp_dst_part_name), + false); + + + auto part = MergeTreeDataPartBuilder(*merge_tree_data, dst_part_name, dst_part_storage).withPartFormatFromDisk().build(); + + return std::make_pair(part, std::move(temporary_directory_lock)); +} + +void handleHardLinkedParameterFiles(const MergeTreeData::DataPartPtr & src_part, const DB::IDataPartStorage::ClonePartParams & params) +{ + const auto & hardlinked_files = params.hardlinked_files; + + hardlinked_files->source_part_name = src_part->name; + hardlinked_files->source_table_shared_id = src_part->storage.getTableSharedID(); + + for (auto it = src_part->getDataPartStorage().iterate(); it->isValid(); it->next()) + { + if (!params.files_to_copy_instead_of_hardlinks.contains(it->name()) + && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED + && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME) + { + hardlinked_files->hardlinks_from_source_part.insert(it->name()); + } + } +} + +void handleProjections(const MergeTreeData::DataPartPtr & src_part, const DB::IDataPartStorage::ClonePartParams & params) +{ + auto projections = src_part->getProjectionParts(); + for (const auto & [name, projection_part] : projections) + { + const auto & projection_storage = projection_part->getDataPartStorage(); + for (auto it = projection_storage.iterate(); it->isValid(); it->next()) + { + auto file_name_with_projection_prefix = fs::path(projection_storage.getPartDirectory()) / it->name(); + if (!params.files_to_copy_instead_of_hardlinks.contains(file_name_with_projection_prefix) + && it->name() != IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED + && it->name() != IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME) + { + params.hardlinked_files->hardlinks_from_source_part.insert(file_name_with_projection_prefix); + } + } + } +} + +MergeTreeData::MutableDataPartPtr finalizePart( + const MergeTreeData::MutableDataPartPtr & dst_part, const DB::IDataPartStorage::ClonePartParams & params, bool require_part_metadata) +{ + /// We should write version metadata on part creation to distinguish it from parts that were created without transaction. + TransactionID tid = params.txn ? params.txn->tid : Tx::PrehistoricTID; + dst_part->version.setCreationTID(tid, nullptr); + dst_part->storeVersionMetadata(); + + dst_part->is_temp = true; + + dst_part->loadColumnsChecksumsIndexes(require_part_metadata, true); + + dst_part->modification_time = dst_part->getDataPartStorage().getLastModified().epochTime(); + + return dst_part; +} + +std::pair cloneAndHandleHardlinksAndProjections( + MergeTreeData * merge_tree_data, + const DataPartPtr & src_part, + const StorageMetadataPtr & metadata_snapshot, + const MergeTreePartInfo & dst_part_info, + const String & tmp_part_prefix, + const ReadSettings & read_settings, + const WriteSettings & write_settings, + const IDataPartStorage::ClonePartParams & params) +{ + chassert(!merge_tree_data->isStaticStorage()); + if (!doesStoragePolicyAllowSameDisk(merge_tree_data, src_part)) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Could not clone and load part {} because disk does not belong to storage policy", + quoteString(src_part->getDataPartStorage().getFullPath())); + + auto [destination_part, temporary_directory_lock] = cloneSourcePart( + merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params); + + if (!params.copy_instead_of_hardlink && params.hardlinked_files) + { + handleHardLinkedParameterFiles(src_part, params); + handleProjections(src_part, params); + } + + return std::make_pair(destination_part, std::move(temporary_directory_lock)); +} +} + +std::pair MergeTreeDataPartCloner::clone( + MergeTreeData * merge_tree_data, + const DataPartPtr & src_part, + const StorageMetadataPtr & metadata_snapshot, + const MergeTreePartInfo & dst_part_info, + const String & tmp_part_prefix, + bool require_part_metadata, + const IDataPartStorage::ClonePartParams & params, + const ReadSettings & read_settings, + const WriteSettings & write_settings) +{ + auto [destination_part, temporary_directory_lock] = cloneAndHandleHardlinksAndProjections( + merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params); + + return std::make_pair(finalizePart(destination_part, params, require_part_metadata), std::move(temporary_directory_lock)); +} + +std::pair MergeTreeDataPartCloner::cloneWithDistinctPartitionExpression( + MergeTreeData * merge_tree_data, + const DataPartPtr & src_part, + const StorageMetadataPtr & metadata_snapshot, + const MergeTreePartInfo & dst_part_info, + const String & tmp_part_prefix, + const ReadSettings & read_settings, + const WriteSettings & write_settings, + const MergeTreePartition & new_partition, + const IMergeTreeDataPart::MinMaxIndex & new_min_max_index, + bool sync_new_files, + const IDataPartStorage::ClonePartParams & params) +{ + auto [destination_part, temporary_directory_lock] = cloneAndHandleHardlinksAndProjections( + merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params); + + DistinctPartitionExpression::updateNewPartFiles( + *merge_tree_data, destination_part, new_partition, new_min_max_index, src_part->storage.getInMemoryMetadataPtr(), sync_new_files); + + return std::make_pair(finalizePart(destination_part, params, false), std::move(temporary_directory_lock)); +} + +} diff --git a/src/Storages/MergeTree/MergeTreeDataPartCloner.h b/src/Storages/MergeTree/MergeTreeDataPartCloner.h new file mode 100644 index 00000000000..53585f20b7f --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeDataPartCloner.h @@ -0,0 +1,43 @@ +#pragma once + +namespace DB +{ + +struct StorageInMemoryMetadata; +using StorageMetadataPtr = std::shared_ptr; +struct MergeTreePartition; +class IMergeTreeDataPart; + +class MergeTreeDataPartCloner +{ +public: + using DataPart = IMergeTreeDataPart; + using MutableDataPartPtr = std::shared_ptr; + using DataPartPtr = std::shared_ptr; + + static std::pair clone( + MergeTreeData * merge_tree_data, + const DataPartPtr & src_part, + const StorageMetadataPtr & metadata_snapshot, + const MergeTreePartInfo & dst_part_info, + const String & tmp_part_prefix, + bool require_part_metadata, + const IDataPartStorage::ClonePartParams & params, + const ReadSettings & read_settings, + const WriteSettings & write_settings); + + static std::pair cloneWithDistinctPartitionExpression( + MergeTreeData * merge_tree_data, + const DataPartPtr & src_part, + const StorageMetadataPtr & metadata_snapshot, + const MergeTreePartInfo & dst_part_info, + const String & tmp_part_prefix, + const ReadSettings & read_settings, + const WriteSettings & write_settings, + const MergeTreePartition & new_partition, + const IMergeTreeDataPart::MinMaxIndex & new_min_max_index, + bool sync_new_files, + const IDataPartStorage::ClonePartParams & params); +}; + +} diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index d86ff3a17ff..9d373504473 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { @@ -143,13 +144,22 @@ void MergeTreeDataPartWriterWide::addStreams( auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr); + const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), column.getNameInStorage()); + + UInt64 max_compress_block_size = 0; + if (column_desc) + if (const auto * value = column_desc->settings.tryGet("max_compress_block_size")) + max_compress_block_size = value->safeGet(); + if (!max_compress_block_size) + max_compress_block_size = settings.max_compress_block_size; + column_streams[stream_name] = std::make_unique>( stream_name, data_part->getDataPartStoragePtr(), stream_name, DATA_FILE_EXTENSION, stream_name, marks_file_extension, compression_codec, - settings.max_compress_block_size, + max_compress_block_size, marks_compression_codec, settings.marks_compress_block_size, settings.query_write_settings); @@ -323,6 +333,13 @@ StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn( WrittenOffsetColumns & offset_columns) { StreamsWithMarks result; + const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), column.getNameInStorage()); + UInt64 min_compress_block_size = 0; + if (column_desc) + if (const auto * value = column_desc->settings.tryGet("min_compress_block_size")) + min_compress_block_size = value->safeGet(); + if (!min_compress_block_size) + min_compress_block_size = settings.min_compress_block_size; data_part->getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; @@ -335,7 +352,7 @@ StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn( auto & stream = *column_streams[stream_name]; /// There could already be enough data to compress into the new block. - if (stream.compressed_hashing.offset() >= settings.min_compress_block_size) + if (stream.compressed_hashing.offset() >= min_compress_block_size) stream.compressed_hashing.next(); StreamNameAndMark stream_with_mark; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 66f593bbf33..a76d370d057 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -74,7 +74,7 @@ namespace ErrorCodes MergeTreeDataSelectExecutor::MergeTreeDataSelectExecutor(const MergeTreeData & data_) - : data(data_), log(&Poco::Logger::get(data.getLogName() + " (SelectExecutor)")) + : data(data_), log(getLogger(data.getLogName() + " (SelectExecutor)")) { } @@ -83,7 +83,7 @@ size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead( const StorageMetadataPtr & metadata_snapshot, const KeyCondition & key_condition, const Settings & settings, - Poco::Logger * log) + LoggerPtr log) { size_t rows_count = 0; @@ -167,7 +167,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( const StorageMetadataPtr & metadata_snapshot, ContextPtr context, bool sample_factor_column_queried, - Poco::Logger * log) + LoggerPtr log) { const Settings & settings = context->getSettingsRef(); /// Sampling. @@ -503,7 +503,7 @@ void MergeTreeDataSelectExecutor::filterPartsByPartition( const MergeTreeData & data, const ContextPtr & context, const PartitionIdToMaxBlock * max_block_numbers_to_read, - Poco::Logger * log, + LoggerPtr log, ReadFromMergeTree::IndexStats & index_stats) { chassert(alter_conversions.empty() || parts.size() == alter_conversions.size()); @@ -590,7 +590,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd const std::optional & part_offset_condition, const UsefulSkipIndexes & skip_indexes, const MergeTreeReaderSettings & reader_settings, - Poco::Logger * log, + LoggerPtr log, size_t num_streams, ReadFromMergeTree::IndexStats & index_stats, bool use_skip_indexes) @@ -1082,7 +1082,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( const KeyCondition & key_condition, const std::optional & part_offset_condition, const Settings & settings, - Poco::Logger * log) + LoggerPtr log) { MarkRanges res; @@ -1322,7 +1322,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( const MergeTreeReaderSettings & reader_settings, MarkCache * mark_cache, UncompressedCache * uncompressed_cache, - Poco::Logger * log) + LoggerPtr log) { if (!index_helper->getDeserializedFormat(part->getDataPartStorage(), index_helper->getFileName())) { @@ -1440,7 +1440,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex( const MergeTreeReaderSettings & reader_settings, MarkCache * mark_cache, UncompressedCache * uncompressed_cache, - Poco::Logger * log) + LoggerPtr log) { for (const auto & index_helper : indices) { @@ -1596,7 +1596,7 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter( const PartitionIdToMaxBlock * max_block_numbers_to_read, ContextPtr query_context, PartFilterCounters & counters, - Poco::Logger * log) + LoggerPtr log) { /// process_parts prepare parts that have to be read for the query, /// returns false if duplicated parts' UUID have been met diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index ba1f20054f0..17975354187 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -71,11 +71,11 @@ public: const KeyCondition & key_condition, const std::optional & part_offset_condition, const Settings & settings, - Poco::Logger * log); + LoggerPtr log); private: const MergeTreeData & data; - Poco::Logger * log; + LoggerPtr log; /// Get the approximate value (bottom estimate - only by full marks) of the number of rows falling under the index. static size_t getApproximateTotalRowsToRead( @@ -83,7 +83,7 @@ private: const StorageMetadataPtr & metadata_snapshot, const KeyCondition & key_condition, const Settings & settings, - Poco::Logger * log); + LoggerPtr log); static MarkRanges filterMarksUsingIndex( MergeTreeIndexPtr index_helper, @@ -94,7 +94,7 @@ private: const MergeTreeReaderSettings & reader_settings, MarkCache * mark_cache, UncompressedCache * uncompressed_cache, - Poco::Logger * log); + LoggerPtr log); static MarkRanges filterMarksUsingMergedIndex( MergeTreeIndices indices, @@ -105,7 +105,7 @@ private: const MergeTreeReaderSettings & reader_settings, MarkCache * mark_cache, UncompressedCache * uncompressed_cache, - Poco::Logger * log); + LoggerPtr log); struct PartFilterCounters { @@ -141,7 +141,7 @@ private: const PartitionIdToMaxBlock * max_block_numbers_to_read, ContextPtr query_context, PartFilterCounters & counters, - Poco::Logger * log); + LoggerPtr log); public: /// For given number rows and bytes, get the number of marks to read. @@ -184,7 +184,7 @@ public: const MergeTreeData & data, const ContextPtr & context, const PartitionIdToMaxBlock * max_block_numbers_to_read, - Poco::Logger * log, + LoggerPtr log, ReadFromMergeTree::IndexStats & index_stats); /// Filter parts using primary key and secondary indexes. @@ -199,7 +199,7 @@ public: const std::optional & part_offset_condition, const UsefulSkipIndexes & skip_indexes, const MergeTreeReaderSettings & reader_settings, - Poco::Logger * log, + LoggerPtr log, size_t num_streams, ReadFromMergeTree::IndexStats & index_stats, bool use_skip_indexes); @@ -216,7 +216,7 @@ public: const StorageMetadataPtr & metadata_snapshot, ContextPtr context, bool sample_factor_column_queried, - Poco::Logger * log); + LoggerPtr log); /// Check query limits: max_partitions_to_read, max_concurrent_queries. /// Also, return QueryIdHolder. If not null, we should keep it until query finishes. diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 2a381afa805..ce3015c5dcb 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -114,7 +115,7 @@ void buildScatterSelector( if (max_parts && partitions_count >= max_parts && !throw_on_limit) { const auto & client_info = context->getClientInfo(); - Poco::Logger * log = &Poco::Logger::get("MergeTreeDataWriter"); + LoggerPtr log = getLogger("MergeTreeDataWriter"); LOG_WARNING(log, "INSERT query from initial_user {} (query ID: {}) inserted a block " "that created parts in {} partitions. This is being logged " @@ -314,8 +315,13 @@ Block MergeTreeDataWriter::mergeBlock( IColumn::Permutation *& permutation, const MergeTreeData::MergingParams & merging_params) { + OpenTelemetry::SpanHolder span("MergeTreeDataWriter::mergeBlock"); + size_t block_size = block.rows(); + span.addAttribute("clickhouse.rows", block_size); + span.addAttribute("clickhouse.columns", block.columns()); + auto get_merging_algorithm = [&]() -> std::shared_ptr { switch (merging_params.mode) @@ -329,7 +335,7 @@ Block MergeTreeDataWriter::mergeBlock( case MergeTreeData::MergingParams::Collapsing: return std::make_shared( block, 1, sort_description, merging_params.sign_column, - false, block_size + 1, /*block_size_bytes=*/0, &Poco::Logger::get("MergeTreeDataWriter")); + false, block_size + 1, /*block_size_bytes=*/0, getLogger("MergeTreeDataWriter")); case MergeTreeData::MergingParams::Summing: return std::make_shared( block, 1, sort_description, merging_params.columns_to_sum, @@ -351,6 +357,8 @@ Block MergeTreeDataWriter::mergeBlock( if (!merging_algorithm) return block; + span.addAttribute("clickhouse.merging_algorithm", merging_algorithm->getName()); + Chunk chunk(block.getColumns(), block_size); IMergingAlgorithm::Input input; @@ -610,7 +618,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( bool is_temp, IMergeTreeDataPart * parent_part, const MergeTreeData & data, - Poco::Logger * log, + LoggerPtr log, Block block, const ProjectionDescription & projection) { @@ -721,7 +729,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPart( const MergeTreeData & data, - Poco::Logger * log, + LoggerPtr log, Block block, const ProjectionDescription & projection, IMergeTreeDataPart * parent_part) @@ -740,7 +748,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPart( /// projection part merges. MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempProjectionPart( const MergeTreeData & data, - Poco::Logger * log, + LoggerPtr log, Block block, const ProjectionDescription & projection, IMergeTreeDataPart * parent_part, diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index 2fb6b1f22d4..8fb8b82dbe6 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -45,8 +45,9 @@ class MergeTreeDataWriter public: explicit MergeTreeDataWriter(MergeTreeData & data_) : data(data_) - , log(&Poco::Logger::get(data.getLogName() + " (Writer)")) - {} + , log(getLogger(data.getLogName() + " (Writer)")) + { + } /** Split the block to blocks, each of them must be written as separate part. * (split rows by partition) @@ -91,7 +92,7 @@ public: /// For insertion. static TemporaryPart writeProjectionPart( const MergeTreeData & data, - Poco::Logger * log, + LoggerPtr log, Block block, const ProjectionDescription & projection, IMergeTreeDataPart * parent_part); @@ -99,7 +100,7 @@ public: /// For mutation: MATERIALIZE PROJECTION. static TemporaryPart writeTempProjectionPart( const MergeTreeData & data, - Poco::Logger * log, + LoggerPtr log, Block block, const ProjectionDescription & projection, IMergeTreeDataPart * parent_part, @@ -126,12 +127,12 @@ private: bool is_temp, IMergeTreeDataPart * parent_part, const MergeTreeData & data, - Poco::Logger * log, + LoggerPtr log, Block block, const ProjectionDescription & projection); MergeTreeData & data; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp index 4c28fe8f00b..baf5b0bf4de 100644 --- a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -227,6 +228,7 @@ bool MergeTreeConditionInverted::alwaysUnknownOrTrue() const || element.function == RPNElement::FUNCTION_IN || element.function == RPNElement::FUNCTION_NOT_IN || element.function == RPNElement::FUNCTION_MULTI_SEARCH + || element.function == RPNElement::FUNCTION_MATCH || element.function == RPNElement::ALWAYS_FALSE) { rpn_stack.push_back(false); @@ -294,8 +296,7 @@ bool MergeTreeConditionInverted::mayBeTrueOnGranuleInPart(MergeTreeIndexGranuleP result[row] = result[row] && granule->gin_filters[key_idx].contains(gin_filters[row], cache_store); } - rpn_stack.emplace_back( - std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true); + rpn_stack.emplace_back(std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true); if (element.function == RPNElement::FUNCTION_NOT_IN) rpn_stack.back() = !rpn_stack.back(); } @@ -308,8 +309,27 @@ bool MergeTreeConditionInverted::mayBeTrueOnGranuleInPart(MergeTreeIndexGranuleP for (size_t row = 0; row < gin_filters.size(); ++row) result[row] = result[row] && granule->gin_filters[element.key_column].contains(gin_filters[row], cache_store); - rpn_stack.emplace_back( - std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true); + rpn_stack.emplace_back(std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true); + } + else if (element.function == RPNElement::FUNCTION_MATCH) + { + if (!element.set_gin_filters.empty()) + { + /// Alternative substrings + std::vector result(element.set_gin_filters.back().size(), true); + + const auto & gin_filters = element.set_gin_filters[0]; + + for (size_t row = 0; row < gin_filters.size(); ++row) + result[row] = result[row] && granule->gin_filters[element.key_column].contains(gin_filters[row], cache_store); + + rpn_stack.emplace_back(std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true); + } + else if (element.gin_filter) + { + rpn_stack.emplace_back(granule->gin_filters[element.key_column].contains(*element.gin_filter, cache_store), true); + } + } else if (element.function == RPNElement::FUNCTION_NOT) { @@ -414,7 +434,8 @@ bool MergeTreeConditionInverted::traverseAtomAST(const RPNBuilderTreeNode & node function_name == "hasTokenOrNull" || function_name == "startsWith" || function_name == "endsWith" || - function_name == "multiSearchAny") + function_name == "multiSearchAny" || + function_name == "match") { Field const_value; DataTypePtr const_type; @@ -605,6 +626,42 @@ bool MergeTreeConditionInverted::traverseASTEquals( out.set_gin_filters = std::move(gin_filters); return true; } + else if (function_name == "match") + { + out.key_column = key_column_num; + out.function = RPNElement::FUNCTION_MATCH; + + auto & value = const_value.get(); + String required_substring; + bool dummy_is_trivial, dummy_required_substring_is_prefix; + std::vector alternatives; + OptimizedRegularExpression::analyze(value, required_substring, dummy_is_trivial, dummy_required_substring_is_prefix, alternatives); + + if (required_substring.empty() && alternatives.empty()) + return false; + + /// out.set_gin_filters means alternatives exist + /// out.gin_filter means required_substring exists + if (!alternatives.empty()) + { + std::vector gin_filters; + gin_filters.emplace_back(); + for (const auto & alternative : alternatives) + { + gin_filters.back().emplace_back(params); + token_extractor->stringToGinFilter(alternative.data(), alternative.size(), gin_filters.back().back()); + } + out.set_gin_filters = std::move(gin_filters); + } + else + { + out.gin_filter = std::make_unique(params); + token_extractor->stringToGinFilter(required_substring.data(), required_substring.size(), *out.gin_filter); + } + + return true; + + } return false; } diff --git a/src/Storages/MergeTree/MergeTreeIndexInverted.h b/src/Storages/MergeTree/MergeTreeIndexInverted.h index 807651d0c26..f3c1f37e364 100644 --- a/src/Storages/MergeTree/MergeTreeIndexInverted.h +++ b/src/Storages/MergeTree/MergeTreeIndexInverted.h @@ -101,6 +101,7 @@ private: FUNCTION_IN, FUNCTION_NOT_IN, FUNCTION_MULTI_SEARCH, + FUNCTION_MATCH, FUNCTION_UNKNOWN, /// Can take any value. /// Operators of the logical expression. FUNCTION_NOT, @@ -116,6 +117,7 @@ private: : function(function_), key_column(key_column_), gin_filter(std::move(const_gin_filter_)) {} Function function = FUNCTION_UNKNOWN; + /// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS and FUNCTION_MULTI_SEARCH size_t key_column; diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 831856f8085..dba2bc1e56c 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include #include @@ -490,7 +492,8 @@ bool MergeTreeIndexConditionSet::checkDAGUseless(const ActionsDAG::Node & node, RPNBuilderTreeContext tree_context(context); RPNBuilderTreeNode tree_node(node_to_check, tree_context); - if (node.column && isColumnConst(*node.column)) + if (node.column && isColumnConst(*node.column) + && !WhichDataType(node.result_type).isSet()) { Field literal; node.column->get(0, literal); diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index ddeaf69136a..76ef3be25b3 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -467,6 +467,45 @@ void MergeTreePartition::create(const StorageMetadataPtr & metadata_snapshot, Bl } } +void MergeTreePartition::createAndValidateMinMaxPartitionIds( + const StorageMetadataPtr & metadata_snapshot, Block block_with_min_max_partition_ids, ContextPtr context) +{ + if (!metadata_snapshot->hasPartitionKey()) + return; + + auto partition_key_names_and_types = executePartitionByExpression(metadata_snapshot, block_with_min_max_partition_ids, context); + value.resize(partition_key_names_and_types.size()); + + /// Executing partition_by expression adds new columns to passed block according to partition functions. + /// The block is passed by reference and is used afterwards. `moduloLegacy` needs to be substituted back + /// with just `modulo`, because it was a temporary substitution. + static constexpr std::string_view modulo_legacy_function_name = "moduloLegacy"; + + size_t i = 0; + for (const auto & element : partition_key_names_and_types) + { + auto & partition_column = block_with_min_max_partition_ids.getByName(element.name); + + if (element.name.starts_with(modulo_legacy_function_name)) + partition_column.name.replace(0, modulo_legacy_function_name.size(), "modulo"); + + Field extracted_min_partition_id_field; + Field extracted_max_partition_id_field; + + partition_column.column->get(0, extracted_min_partition_id_field); + partition_column.column->get(1, extracted_max_partition_id_field); + + if (extracted_min_partition_id_field != extracted_max_partition_id_field) + { + throw Exception( + ErrorCodes::INVALID_PARTITION_VALUE, + "Can not create the partition. A partition can not contain values that have different partition ids"); + } + + partition_column.column->get(0u, value[i++]); + } +} + NamesAndTypesList MergeTreePartition::executePartitionByExpression(const StorageMetadataPtr & metadata_snapshot, Block & block, ContextPtr context) { auto adjusted_partition_key = adjustPartitionKey(metadata_snapshot, context); diff --git a/src/Storages/MergeTree/MergeTreePartition.h b/src/Storages/MergeTree/MergeTreePartition.h index 78b141f26ec..fd7ae02cde4 100644 --- a/src/Storages/MergeTree/MergeTreePartition.h +++ b/src/Storages/MergeTree/MergeTreePartition.h @@ -1,11 +1,12 @@ #pragma once -#include +#include #include #include #include #include -#include +#include +#include namespace DB { @@ -51,6 +52,11 @@ public: void create(const StorageMetadataPtr & metadata_snapshot, Block block, size_t row, ContextPtr context); + /// Copy of MergeTreePartition::create, but also validates if min max partition keys are equal. If they are different, + /// it means the partition can't be created because the data doesn't belong to the same partition. + void createAndValidateMinMaxPartitionIds( + const StorageMetadataPtr & metadata_snapshot, Block block_with_min_max_partition_ids, ContextPtr context); + static void appendFiles(const MergeTreeData & storage, Strings & files); /// Adjust partition key and execute its expression on block. Return sample block according to used expression. diff --git a/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.cpp b/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.cpp new file mode 100644 index 00000000000..21bcdb84a96 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.cpp @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +} + +namespace +{ +bool isDestinationPartitionExpressionMonotonicallyIncreasing( + const std::vector & hyperrectangle, const MergeTreeData & destination_storage) +{ + auto destination_table_metadata = destination_storage.getInMemoryMetadataPtr(); + + auto key_description = destination_table_metadata->getPartitionKey(); + auto definition_ast = key_description.definition_ast->clone(); + + auto table_identifier = std::make_shared(destination_storage.getStorageID().getTableName()); + auto table_with_columns + = TableWithColumnNamesAndTypes{DatabaseAndTableWithAlias(table_identifier), destination_table_metadata->getColumns().getOrdinary()}; + + auto expression_list = extractKeyExpressionList(definition_ast); + + MonotonicityCheckVisitor::Data data{{table_with_columns}, destination_storage.getContext(), /*group_by_function_hashes*/ {}}; + + for (auto i = 0u; i < expression_list->children.size(); i++) + { + data.range = hyperrectangle[i]; + + MonotonicityCheckVisitor(data).visit(expression_list->children[i]); + + if (!data.monotonicity.is_monotonic || !data.monotonicity.is_positive) + return false; + } + + return true; +} + +bool isExpressionDirectSubsetOf(const ASTPtr source, const ASTPtr destination) +{ + auto source_expression_list = extractKeyExpressionList(source); + auto destination_expression_list = extractKeyExpressionList(destination); + + std::unordered_set source_columns; + + for (auto i = 0u; i < source_expression_list->children.size(); ++i) + source_columns.insert(source_expression_list->children[i]->getColumnName()); + + for (auto i = 0u; i < destination_expression_list->children.size(); ++i) + if (!source_columns.contains(destination_expression_list->children[i]->getColumnName())) + return false; + + return true; +} +} + +void MergeTreePartitionCompatibilityVerifier::verify( + const MergeTreeData & source_storage, const MergeTreeData & destination_storage, const DataPartsVector & source_parts) +{ + const auto source_metadata = source_storage.getInMemoryMetadataPtr(); + const auto destination_metadata = destination_storage.getInMemoryMetadataPtr(); + + const auto source_partition_key_ast = source_metadata->getPartitionKeyAST(); + const auto destination_partition_key_ast = destination_metadata->getPartitionKeyAST(); + + // If destination partition expression columns are a subset of source partition expression columns, + // there is no need to check for monotonicity. + if (isExpressionDirectSubsetOf(source_partition_key_ast, destination_partition_key_ast)) + return; + + const auto src_global_min_max_indexes = MergeTreePartitionGlobalMinMaxIdxCalculator::calculate(source_parts, destination_storage); + + assert(!src_global_min_max_indexes.hyperrectangle.empty()); + + if (!isDestinationPartitionExpressionMonotonicallyIncreasing(src_global_min_max_indexes.hyperrectangle, destination_storage)) + throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Destination table partition expression is not monotonically increasing"); + + MergeTreePartition().createAndValidateMinMaxPartitionIds( + destination_storage.getInMemoryMetadataPtr(), + src_global_min_max_indexes.getBlock(destination_storage), + destination_storage.getContext()); +} + +} diff --git a/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h b/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h new file mode 100644 index 00000000000..1682add3ebd --- /dev/null +++ b/src/Storages/MergeTree/MergeTreePartitionCompatibilityVerifier.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/* + * Verifies that source and destination partitions are compatible. + * To be compatible, one of the following criteria must be met: + * 1. Destination partition expression columns are a subset of source partition columns; or + * 2. Destination partition expression is monotonic on the source global min_max idx Range AND the computer partition id for + * the source global min_max idx range is the same. + * + * If not, an exception is thrown. + * */ + +class MergeTreePartitionCompatibilityVerifier +{ +public: + using DataPart = IMergeTreeDataPart; + using DataPartPtr = std::shared_ptr; + using DataPartsVector = std::vector; + + static void + verify(const MergeTreeData & source_storage, const MergeTreeData & destination_storage, const DataPartsVector & source_parts); +}; + +} diff --git a/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.cpp b/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.cpp new file mode 100644 index 00000000000..0871efadf0c --- /dev/null +++ b/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.cpp @@ -0,0 +1,25 @@ +#include + +namespace DB +{ + +IMergeTreeDataPart::MinMaxIndex +MergeTreePartitionGlobalMinMaxIdxCalculator::calculate(const DataPartsVector & parts, const MergeTreeData & storage) +{ + IMergeTreeDataPart::MinMaxIndex global_min_max_indexes; + + for (const auto & part : parts) + { + auto metadata_manager = std::make_shared(part.get()); + + auto local_min_max_index = MergeTreeData::DataPart::MinMaxIndex(); + + local_min_max_index.load(storage, metadata_manager); + + global_min_max_indexes.merge(local_min_max_index); + } + + return global_min_max_indexes; +} + +} diff --git a/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h b/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h new file mode 100644 index 00000000000..4f271177246 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreePartitionGlobalMinMaxIdxCalculator.h @@ -0,0 +1,24 @@ +#pragma once + +#include + +#include +#include + +namespace DB +{ + +/* + * Calculates global min max indexes for a given set of parts on given storage. + * */ +class MergeTreePartitionGlobalMinMaxIdxCalculator +{ + using DataPart = IMergeTreeDataPart; + using DataPartPtr = std::shared_ptr; + using DataPartsVector = std::vector; + +public: + static IMergeTreeDataPart::MinMaxIndex calculate(const DataPartsVector & parts, const MergeTreeData & storage); +}; + +} diff --git a/src/Storages/MergeTree/MergeTreePartsMover.h b/src/Storages/MergeTree/MergeTreePartsMover.h index b9109e51309..43d8ebdd6d3 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.h +++ b/src/Storages/MergeTree/MergeTreePartsMover.h @@ -48,7 +48,7 @@ public: explicit MergeTreePartsMover(MergeTreeData * data_) : data(data_) - , log(&Poco::Logger::get("MergeTreePartsMover")) + , log(getLogger("MergeTreePartsMover")) { } @@ -81,7 +81,7 @@ public: private: MergeTreeData * data; - Poco::Logger * log; + LoggerPtr log; }; diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index 3f9632637b6..47c2fe07bb4 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -128,7 +128,7 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool( context_) , WithContext(context_) , prefetch_threadpool(getContext()->getPrefetchThreadpool()) - , log(&Poco::Logger::get("MergeTreePrefetchedReadPool(" + (parts_ranges.empty() ? "" : parts_ranges.front().data_part->storage.getStorageID().getNameForLogs()) + ")")) + , log(getLogger("MergeTreePrefetchedReadPool(" + (parts_ranges.empty() ? "" : parts_ranges.front().data_part->storage.getStorageID().getNameForLogs()) + ")")) { /// Tasks creation might also create a lost of readers - check they do not /// do any time consuming operations in ctor. diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h index 9925d4e2fa4..378034c5eae 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h @@ -122,7 +122,7 @@ private: TasksPerThread per_thread_tasks; std::priority_queue prefetch_queue; /// the smallest on top bool started_prefetches = false; - Poco::Logger * log; + LoggerPtr log; /// A struct which allows to track max number of tasks which were in the /// threadpool simultaneously (similar to CurrentMetrics, but the result diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 84e3c10eace..cce7e56dda9 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -1283,6 +1283,81 @@ inline void combineFiltersImpl(UInt8 * first_begin, const UInt8 * first_end, con } ) +/* The BMI2 intrinsic, _pdep_u64 (unsigned __int64 a, unsigned __int64 mask), works + * by copying contiguous low-order bits from unsigned 64-bit integer a to destination + * at the corresponding bit locations specified by mask. To implement the column + * combination with the intrinsic, 8 contiguous bytes would be loaded from second_begin + * as a UInt64 and act the first operand, meanwhile the mask should be constructed from + * first_begin so that the bytes to be replaced (non-zero elements) are mapped to 0xFF + * at the exact bit locations and 0x00 otherwise. + * + * The construction of mask employs the SSE intrinsic, mm_cmpeq_epi8(__m128i a, __m128i + * b), which compares packed 8-bit integers in first_begin and packed 0s and outputs + * 0xFF for equality and 0x00 for inequality. The result's negation then creates the + * desired bit masks for _pdep_u64. + * + * The below example visualizes how this optimization applies to the combination of + * two quadwords from first_begin and second_begin. + * + * Addr high low + * <---------------------------------------- + * first_begin............................0x00 0x11 0x12 0x00 0x00 0x13 0x14 0x15 + * | mm_cmpeq_epi8(src, 0) | | | | | | | | + * v v v v v v v v v + * inv_mask..............................0xFF 0x00 0x00 0xFF 0xFF 0x00 0x00 0x00 + * | (negation) | | | | | | | | + * v v v v v v v v v + * mask-------------------------+......0x00 0xFF 0xFF 0x00 0x00 0xFF 0xFF 0xFF + * | | | | | | + * v v v v v v + * dst = pdep_u64(second_begin, mask)..0x00 0x05 0x04 0x00 0x00 0x03 0x02 0x01 + * ^ ^ ^ ^ ^ ^ + * | | | | | | + * | | +---------+ | | | + * +------------------+ +---------+ | | | | + * | | | | | | + * second_begin...........................0x00 0x00 0x00 0x05 0x04 0x03 0x02 0x01 + * + * References: + * 1. https://www.felixcloutier.com/x86/pdep + * 2. https://www.felixcloutier.com/x86/pcmpeqb:pcmpeqw:pcmpeqd + */ +DECLARE_AVX2_SPECIFIC_CODE( +inline void combineFiltersImpl(UInt8 * first_begin, const UInt8 * first_end, const UInt8 * second_begin) +{ + constexpr size_t XMM_VEC_SIZE_IN_BYTES = 16; + const __m128i zero16 = _mm_setzero_si128(); + + while (first_begin + XMM_VEC_SIZE_IN_BYTES <= first_end) + { + __m128i src = _mm_loadu_si128(reinterpret_cast<__m128i *>(first_begin)); + __m128i inv_mask = _mm_cmpeq_epi8(src, zero16); + + UInt64 masks[] = { + ~static_cast(_mm_extract_epi64(inv_mask, 0)), + ~static_cast(_mm_extract_epi64(inv_mask, 1)), + }; + + for (const auto & mask: masks) + { + UInt64 dst = _pdep_u64(unalignedLoad(second_begin), mask); + unalignedStore(first_begin, dst); + + first_begin += sizeof(UInt64); + second_begin += std::popcount(mask) / 8; + } + } + + for (/* empty */; first_begin < first_end; ++first_begin) + { + if (*first_begin) + { + *first_begin = *second_begin++; + } + } +} +) + /// Second filter size must be equal to number of 1s in the first filter. /// The result has size equal to first filter size and contains 1s only where both filters contain 1s. static ColumnPtr combineFilters(ColumnPtr first, ColumnPtr second) @@ -1330,6 +1405,10 @@ static ColumnPtr combineFilters(ColumnPtr first, ColumnPtr second) { TargetSpecific::AVX512VBMI2::combineFiltersImpl(first_data.begin(), first_data.end(), second_data); } + else if (isArchSupported(TargetArch::AVX2)) + { + TargetSpecific::AVX2::combineFiltersImpl(first_data.begin(), first_data.end(), second_data); + } else #endif { diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h index 04d42138963..79ed18f4d1f 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -231,7 +231,7 @@ public: using RangesInfo = std::vector; - explicit ReadResult(Poco::Logger * log_) : log(log_) {} + explicit ReadResult(LoggerPtr log_) : log(log_) {} static size_t getLastMark(const MergeTreeRangeReader::ReadResult::RangesInfo & ranges); @@ -298,7 +298,7 @@ public: size_t countZeroTails(const IColumn::Filter & filter, NumRows & zero_tails, bool can_read_incomplete_granules) const; static size_t numZerosInTail(const UInt8 * begin, const UInt8 * end); - Poco::Logger * log; + LoggerPtr log; }; ReadResult read(size_t max_rows, MarkRanges & ranges); @@ -325,7 +325,7 @@ private: bool is_initialized = false; Names non_const_virtual_column_names; - Poco::Logger * log = &Poco::Logger::get("MergeTreeRangeReader"); + LoggerPtr log = getLogger("MergeTreeRangeReader"); }; } diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index 3a1af947cae..e45ccad912f 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -108,7 +108,7 @@ private: std::vector threads_tasks; std::set remaining_thread_tasks; - Poco::Logger * log = &Poco::Logger::get("MergeTreeReadPool"); + LoggerPtr log = getLogger("MergeTreeReadPool"); }; } diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp index 69e64d5ea98..47436ed1407 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp @@ -1,3 +1,4 @@ +#include #include @@ -67,15 +68,11 @@ MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicas::getTask(size_t /*task_id auto & current_task = buffered_ranges.front(); - size_t part_idx = 0; - for (size_t index = 0; index < per_part_infos.size(); ++index) - { - if (per_part_infos[index]->data_part->info == current_task.info) - { - part_idx = index; - break; - } - } + auto part_it + = std::ranges::find_if(per_part_infos, [¤t_task](const auto & part) { return part->data_part->info == current_task.info; }); + if (part_it == per_part_infos.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Assignment contains an unknown part (current_task: {})", current_task.describe()); + const size_t part_idx = std::distance(per_part_infos.begin(), part_it); MarkRanges ranges_to_read; size_t current_sum_marks = 0; diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h index 7579a892b67..6a548dffe37 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h @@ -34,7 +34,7 @@ private: const CoordinationMode coordination_mode; RangesInDataPartsDescription buffered_ranges; bool no_more_tasks_available{false}; - Poco::Logger * log = &Poco::Logger::get("MergeTreeReadPoolParallelReplicas"); + LoggerPtr log = getLogger("MergeTreeReadPoolParallelReplicas"); }; } diff --git a/src/Storages/MergeTree/MergeTreeReadTask.cpp b/src/Storages/MergeTree/MergeTreeReadTask.cpp index dcfed700fac..41c7531b6a6 100644 --- a/src/Storages/MergeTree/MergeTreeReadTask.cpp +++ b/src/Storages/MergeTree/MergeTreeReadTask.cpp @@ -184,7 +184,11 @@ MergeTreeReadTask::BlockAndProgress MergeTreeReadTask::read(const BlockSizeParam Block block; if (read_result.num_rows != 0) + { + for (const auto & column : read_result.columns) + column->assumeMutableRef().shrinkToFit(); block = sample_block.cloneWithColumns(read_result.columns); + } BlockAndProgress res = { .block = std::move(block), diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index cf1a6313b51..b06ae788e91 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -114,7 +114,7 @@ private: /// Should we add part level to produced chunk. Part level is useful for next steps if query has FINAL bool add_part_level = false; - Poco::Logger * log = &Poco::Logger::get("MergeTreeSelectProcessor"); + LoggerPtr log = getLogger("MergeTreeSelectProcessor"); std::atomic is_cancelled{false}; }; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 82e9f8fd2db..d0fbc316024 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -68,7 +68,7 @@ private: /// Should read using direct IO bool read_with_direct_io; - Poco::Logger * log = &Poco::Logger::get("MergeTreeSequentialSource"); + LoggerPtr log = getLogger("MergeTreeSequentialSource"); std::optional mark_ranges; @@ -140,6 +140,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( if (storage.supportsSubcolumns()) options.withSubcolumns(); + columns_for_reader = storage_snapshot->getColumnsByNames(options, columns_to_read); } else @@ -156,6 +157,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( read_settings.local_fs_method = LocalFSReadMethod::pread; if (read_with_direct_io) read_settings.direct_io_threshold = 1; + /// Configure throttling switch (type) { @@ -224,7 +226,10 @@ try for (size_t i = 0; i < num_columns; ++i) { if (header.has(it->name)) + { + columns[i]->assumeMutableRef().shrinkToFit(); res_columns.emplace_back(std::move(columns[i])); + } ++it; } @@ -318,7 +323,7 @@ public: bool apply_deleted_mask_, ActionsDAGPtr filter_, ContextPtr context_, - Poco::Logger * log_) + LoggerPtr log_) : ISourceStep(DataStream{.header = storage_snapshot_->getSampleBlockForColumns(columns_to_read_)}) , type(type_) , storage(storage_) @@ -381,7 +386,7 @@ private: bool apply_deleted_mask; ActionsDAGPtr filter; ContextPtr context; - Poco::Logger * log; + LoggerPtr log; }; void createReadFromPartStep( @@ -394,7 +399,7 @@ void createReadFromPartStep( bool apply_deleted_mask, ActionsDAGPtr filter, ContextPtr context, - Poco::Logger * log) + LoggerPtr log) { auto reading = std::make_unique(type, storage, storage_snapshot, std::move(data_part), diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.h b/src/Storages/MergeTree/MergeTreeSequentialSource.h index 41def48aab6..a5e36a7726f 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.h +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.h @@ -41,6 +41,6 @@ void createReadFromPartStep( bool apply_deleted_mask, ActionsDAGPtr filter, ContextPtr context, - Poco::Logger * log); + LoggerPtr log); } diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index e0015cdeb40..b42da22239e 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -44,7 +44,7 @@ void MergeTreeSettings::loadFromConfig(const String & config_elem, const Poco::U } } -void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def, ContextPtr context) +void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def, ContextPtr context, bool is_attach) { if (storage_def.settings) { @@ -64,8 +64,8 @@ void MergeTreeSettings::loadFromQuery(ASTStorage & storage_def, ContextPtr conte auto ast = dynamic_cast(custom.getImpl()).ast; if (ast && isDiskFunction(ast)) { - auto disk_name = getOrCreateDiskFromDiskAST(ast, context); - LOG_TRACE(&Poco::Logger::get("MergeTreeSettings"), "Created custom disk {}", disk_name); + auto disk_name = getOrCreateDiskFromDiskAST(ast, context, is_attach); + LOG_TRACE(getLogger("MergeTreeSettings"), "Created custom disk {}", disk_name); value = disk_name; } } @@ -213,6 +213,27 @@ void MergeTreeSettings::sanityCheck(size_t background_pool_tasks) const } } +void MergeTreeColumnSettings::validate(const SettingsChanges & changes) +{ + static const MergeTreeSettings merge_tree_settings; + static const std::set allowed_column_level_settings = + { + "min_compress_block_size", + "max_compress_block_size" + }; + + for (const auto & change : changes) + { + if (!allowed_column_level_settings.contains(change.name)) + throw Exception( + ErrorCodes::UNKNOWN_SETTING, + "Setting {} is unknown or not supported at column level, supported settings: {}", + change.name, + fmt::join(allowed_column_level_settings, ", ")); + merge_tree_settings.checkCanSet(change.name, change.value); + } +} + std::vector MergeTreeSettings::getAllRegisteredNames() const { diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index eb6c14d7754..96cab9c0293 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -255,7 +255,7 @@ struct MergeTreeSettings : public BaseSettings, public void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config); /// NOTE: will rewrite the AST to add immutable settings. - void loadFromQuery(ASTStorage & storage_def, ContextPtr context); + void loadFromQuery(ASTStorage & storage_def, ContextPtr context, bool is_attach); /// We check settings after storage creation static bool isReadonlySetting(const String & name) @@ -277,4 +277,11 @@ struct MergeTreeSettings : public BaseSettings, public using MergeTreeSettingsPtr = std::shared_ptr; + +/// Column-level Merge-Tree settings which overwrite MergeTree settings +namespace MergeTreeColumnSettings +{ + void validate(const SettingsChanges & changes); +} + } diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 0cac051bb2c..4aecf85ac2a 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -56,7 +56,7 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer( const ConditionEstimator & estimator_, const Names & queried_columns_, const std::optional & supported_columns_, - Poco::Logger * log_) + LoggerPtr log_) : estimator(estimator_) , table_columns{collections::map( metadata_snapshot->getColumns().getAllPhysical(), [](const NameAndTypePair & col) { return col.name; })} @@ -132,8 +132,8 @@ std::optional MergeTreeWhe if (!optimize_result) return {}; - auto filter_actions = reconstructDAG(optimize_result->where_conditions, context); - auto prewhere_filter_actions = reconstructDAG(optimize_result->prewhere_conditions, context); + auto filter_actions = reconstructDAG(optimize_result->where_conditions); + auto prewhere_filter_actions = reconstructDAG(optimize_result->prewhere_conditions); FilterActionsOptimizeResult result = { std::move(filter_actions), std::move(prewhere_filter_actions) }; return result; @@ -343,7 +343,7 @@ ASTPtr MergeTreeWhereOptimizer::reconstructAST(const Conditions & conditions) return function; } -ActionsDAGPtr MergeTreeWhereOptimizer::reconstructDAG(const Conditions & conditions, const ContextPtr & context) +ActionsDAGPtr MergeTreeWhereOptimizer::reconstructDAG(const Conditions & conditions) { if (conditions.empty()) return {}; @@ -354,7 +354,7 @@ ActionsDAGPtr MergeTreeWhereOptimizer::reconstructDAG(const Conditions & conditi for (const auto & condition : conditions) filter_nodes.push_back(condition.node.getDAGNode()); - return ActionsDAG::buildFilterActionsDAG(filter_nodes, {} /*node_name_to_input_node_column*/, context); + return ActionsDAG::buildFilterActionsDAG(filter_nodes); } std::optional MergeTreeWhereOptimizer::optimizeImpl(const RPNBuilderTreeNode & node, diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index 0ef7ac9efff..b56219e3c59 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -41,7 +41,7 @@ public: const ConditionEstimator & estimator_, const Names & queried_columns_, const std::optional & supported_columns_, - Poco::Logger * log_); + LoggerPtr log_); void optimize(SelectQueryInfo & select_query_info, const ContextPtr & context) const; @@ -123,7 +123,7 @@ private: static ASTPtr reconstructAST(const Conditions & conditions); /// Reconstruct DAG from conditions - static ActionsDAGPtr reconstructDAG(const Conditions & conditions, const ContextPtr & context); + static ActionsDAGPtr reconstructDAG(const Conditions & conditions); void optimizeArbitrary(ASTSelectQuery & select) const; @@ -156,7 +156,7 @@ private: const std::optional supported_columns; const NameSet sorting_key_names; const NameToIndexMap primary_key_names_positions; - Poco::Logger * log; + LoggerPtr log; std::unordered_map column_sizes; UInt64 total_size_of_queried_columns = 0; }; diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index a8b3df483ed..2236c1a9380 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -36,7 +36,7 @@ MergeTreeWriteAheadLog::MergeTreeWriteAheadLog( , name(name_) , path(storage.getRelativeDataPath() + name_) , pool(storage.getContext()->getSchedulePool()) - , log(&Poco::Logger::get(storage.getLogName() + " (WriteAheadLog)")) + , log(getLogger(storage.getLogName() + " (WriteAheadLog)")) { init(); sync_task = pool.createTask("MergeTreeWriteAheadLog::sync", [this] diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h index 5fb9dd907a1..9550fa6ecee 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h @@ -99,7 +99,7 @@ private: mutable std::mutex write_mutex; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 8b34c221eec..1d10a1433ef 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -155,7 +155,7 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( for (const auto & name : checksums_to_remove) checksums.files.erase(name); - LOG_TRACE(&Poco::Logger::get("MergedBlockOutputStream"), "filled checksums {}", new_part->getNameWithState()); + LOG_TRACE(getLogger("MergedBlockOutputStream"), "filled checksums {}", new_part->getNameWithState()); for (const auto & [projection_name, projection_part] : new_part->getProjectionParts()) checksums.addFile( diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.h b/src/Storages/MergeTree/MutateFromLogEntryTask.h index 42d8307e948..68c7f464214 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.h +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.h @@ -23,7 +23,7 @@ public: StorageReplicatedMergeTree & storage_, Callback && task_result_callback_) : ReplicatedMergeMutateTaskBase( - &Poco::Logger::get(storage_.getStorageID().getShortName() + "::" + selected_entry_->log_entry->new_part_name + " (MutateFromLogEntryTask)"), + getLogger(storage_.getStorageID().getShortName() + "::" + selected_entry_->log_entry->new_part_name + " (MutateFromLogEntryTask)"), storage_, selected_entry_, task_result_callback_) diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index bf8e879e3d0..0b19aebe36d 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -111,7 +111,7 @@ bool MutatePlainMergeTreeTask::executeStep() if (merge_mutate_entry->txn) merge_mutate_entry->txn->onException(); PreformattedMessage exception_message = getCurrentExceptionMessageAndPattern(/* with_stacktrace */ false); - LOG_ERROR(&Poco::Logger::get("MutatePlainMergeTreeTask"), exception_message); + LOG_ERROR(getLogger("MutatePlainMergeTreeTask"), exception_message); storage.updateMutationEntriesErrors(future_part, false, exception_message.text); write_part_log(ExecutionStatus::fromCurrentException("", true)); tryLogCurrentException(__PRETTY_FUNCTION__); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index e4070aa8262..e325bb5d720 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -21,7 +21,9 @@ #include #include #include +#include #include +#include #include #include @@ -61,7 +63,7 @@ static void splitAndModifyMutationCommands( const MutationCommands & commands, MutationCommands & for_interpreter, MutationCommands & for_file_renames, - Poco::Logger * log) + LoggerPtr log) { auto part_columns = part->getColumnsDescription(); @@ -308,6 +310,15 @@ getColumnsForNewDataPart( } } + if (!storage_columns_set.contains(BlockNumberColumn::name)) + { + if (source_part->tryGetSerialization(BlockNumberColumn::name) != nullptr) + { + storage_columns.push_back({BlockNumberColumn::name, BlockNumberColumn::type}); + storage_columns_set.insert(BlockNumberColumn::name); + } + } + SerializationInfoByName new_serialization_infos; for (const auto & [name, old_info] : serialization_infos) { @@ -674,15 +685,25 @@ static NameToNameVector collectFilesForRenames( { if (command.type == MutationCommand::Type::DROP_INDEX) { - if (source_part->checksums.has(INDEX_FILE_PREFIX + command.column_name + ".idx2")) + static const std::array suffixes = {".idx2", ".idx"}; + static const std::array gin_suffixes = {".gin_dict", ".gin_post", ".gin_seg", ".gin_sid"}; /// .gin_* is inverted index + + for (const auto & suffix : suffixes) { - add_rename(INDEX_FILE_PREFIX + command.column_name + ".idx2", ""); - add_rename(INDEX_FILE_PREFIX + command.column_name + mrk_extension, ""); + const String filename = INDEX_FILE_PREFIX + command.column_name + suffix; + const String filename_mrk = INDEX_FILE_PREFIX + command.column_name + mrk_extension; + + if (source_part->checksums.has(filename)) + { + add_rename(filename, ""); + add_rename(filename_mrk, ""); + } } - else if (source_part->checksums.has(INDEX_FILE_PREFIX + command.column_name + ".idx")) + for (const auto & gin_suffix : gin_suffixes) { - add_rename(INDEX_FILE_PREFIX + command.column_name + ".idx", ""); - add_rename(INDEX_FILE_PREFIX + command.column_name + mrk_extension, ""); + const String filename = INDEX_FILE_PREFIX + command.column_name + gin_suffix; + if (source_part->checksums.has(filename)) + add_rename(filename, ""); } } else if (command.type == MutationCommand::Type::DROP_PROJECTION) @@ -896,7 +917,7 @@ struct MutationContext TableLockHolder * holder; MergeListEntry * mutate_entry; - Poco::Logger * log{&Poco::Logger::get("MutateTask")}; + LoggerPtr log{getLogger("MutateTask")}; FutureMergedMutatedPartPtr future_part; MergeTreeData::DataPartPtr source_part; @@ -975,7 +996,7 @@ public: , projection(projection_) , block_num(block_num_) , ctx(ctx_) - , log(&Poco::Logger::get("MergeProjectionPartsTask")) + , log(getLogger("MergeProjectionPartsTask")) { LOG_DEBUG(log, "Selected {} projection_parts from {} to {}", parts.size(), parts.front()->name, parts.back()->name); level_parts[current_level] = std::move(parts); @@ -1079,7 +1100,7 @@ private: size_t & block_num; MutationContextPtr ctx; - Poco::Logger * log; + LoggerPtr log; std::map level_parts; size_t current_level = 0; @@ -1921,7 +1942,7 @@ static bool canSkipConversionToNullable(const MergeTreeDataPartPtr & part, const if (!part_column) return false; - /// For ALTER MODIFY COLUMN from 'Type' to 'Nullable(Type)' we can skip mutatation and + /// For ALTER MODIFY COLUMN from 'Type' to 'Nullable(Type)' we can skip mutation and /// apply only metadata conversion. But it doesn't work for custom serialization. const auto * to_nullable = typeid_cast(command.data_type.get()); if (!to_nullable) @@ -1937,6 +1958,20 @@ static bool canSkipConversionToNullable(const MergeTreeDataPartPtr & part, const return true; } +static bool canSkipConversionToVariant(const MergeTreeDataPartPtr & part, const MutationCommand & command) +{ + if (command.type != MutationCommand::READ_COLUMN) + return false; + + auto part_column = part->tryGetColumn(command.column_name); + if (!part_column) + return false; + + /// For ALTER MODIFY COLUMN with Variant extension (like 'Variant(T1, T2)' to 'Variant(T1, T2, T3, ...)') + /// we can skip mutation and apply only metadata conversion. + return isVariantExtension(part_column->type, command.data_type); +} + static bool canSkipMutationCommandForPart(const MergeTreeDataPartPtr & part, const MutationCommand & command, const ContextPtr & context) { if (command.partition) @@ -1952,6 +1987,9 @@ static bool canSkipMutationCommandForPart(const MergeTreeDataPartPtr & part, con if (canSkipConversionToNullable(part, command)) return true; + if (canSkipConversionToVariant(part, command)) + return true; + return false; } diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index bbe8c30a5c0..abc51bde3fb 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -56,6 +56,32 @@ takeFromRange(const MarkRange & range, size_t min_number_of_marks, size_t & curr current_marks_amount += range_we_take.getNumberOfMarks(); return range_we_take.getNumberOfMarks(); } + +void sortResponseRanges(RangesInDataPartsDescription & result) +{ + std::ranges::sort(result, [](const auto & lhs, const auto & rhs) { return lhs.info < rhs.info; }); + + RangesInDataPartsDescription new_result; + + /// Aggregate ranges for each part within a single entry + for (auto & ranges_in_part : result) + { + if (new_result.empty() || new_result.back().info != ranges_in_part.info) + new_result.push_back(RangesInDataPartDescription{.info = ranges_in_part.info}); + + new_result.back().ranges.insert( + new_result.back().ranges.end(), + std::make_move_iterator(ranges_in_part.ranges.begin()), + std::make_move_iterator(ranges_in_part.ranges.end())); + ranges_in_part.ranges.clear(); + } + + /// Sort ranges for each part + for (auto & ranges_in_part : new_result) + std::sort(ranges_in_part.ranges.begin(), ranges_in_part.ranges.end()); + + result = std::move(new_result); +} } namespace ProfileEvents @@ -219,7 +245,7 @@ private: }; std::vector replica_status; - Poco::Logger * log = &Poco::Logger::get("DefaultCoordinator"); + LoggerPtr log = getLogger("DefaultCoordinator"); /// Workflow of a segment: /// 0. `all_parts_to_read` contains all the parts and thus all the segments initially present there (virtually) @@ -429,12 +455,17 @@ void DefaultCoordinator::handleInitialAllRangesAnnouncement(InitialAllRangesAnno setProgressCallback(); /// Sift the queue to move out all invisible segments - for (const auto & segment : distribution_by_hash_queue[replica_num]) + for (auto segment_it = distribution_by_hash_queue[replica_num].begin(); segment_it != distribution_by_hash_queue[replica_num].end();) { - if (!part_visibility[segment.info.getPartNameV1()].contains(replica_num)) + if (!part_visibility[segment_it->info.getPartNameV1()].contains(replica_num)) { - chassert(segment.ranges.size() == 1); - enqueueToStealerOrStealingQueue(segment.info, segment.ranges.front()); + chassert(segment_it->ranges.size() == 1); + enqueueToStealerOrStealingQueue(segment_it->info, segment_it->ranges.front()); + segment_it = distribution_by_hash_queue[replica_num].erase(segment_it); + } + else + { + ++segment_it; } } } @@ -599,6 +630,8 @@ void DefaultCoordinator::processPartsFurther( { ProfileEventTimeIncrement watch(ProfileEvents::ParallelReplicasProcessingPartsMicroseconds); + auto replica_can_read_part = [&](auto replica, const auto & part) { return part_visibility[part.getPartNameV1()].contains(replica); }; + for (const auto & part : all_parts_to_read) { if (current_marks_amount >= min_number_of_marks) @@ -622,7 +655,7 @@ void DefaultCoordinator::processPartsFurther( = MarkRange{std::max(range.begin, segment_begin), std::min(range.end, segment_begin + mark_segment_size)}; const auto owner = computeConsistentHash(part.description.info.getPartNameV1(), segment_begin, scan_mode); - if (owner == replica_num) + if (owner == replica_num && replica_can_read_part(replica_num, part.description.info)) { const auto taken = takeFromRange(cur_segment, min_number_of_marks, current_marks_amount, result); if (taken == range.getNumberOfMarks()) @@ -681,7 +714,7 @@ void DefaultCoordinator::enqueueSegment(const MergeTreePartInfo & info, const Ma { /// TODO: optimize me (maybe we can store something lighter than RangesInDataPartDescription) distribution_by_hash_queue[owner].insert(RangesInDataPartDescription{.info = info, .ranges = {segment}}); - LOG_TEST(log, "Segment {} is added to its owner's ({}) queue", segment, owner); + LOG_TEST(log, "Segment {} of {} is added to its owner's ({}) queue", segment, info.getPartNameV1(), owner); } else enqueueToStealerOrStealingQueue(info, segment); @@ -695,12 +728,12 @@ void DefaultCoordinator::enqueueToStealerOrStealingQueue(const MergeTreePartInfo if (possiblyCanReadPart(stealer_by_hash, info)) { distribution_by_hash_queue[stealer_by_hash].insert(std::move(range)); - LOG_TEST(log, "Segment {} is added to its stealer's ({}) queue", segment, stealer_by_hash); + LOG_TEST(log, "Segment {} of {} is added to its stealer's ({}) queue", segment, info.getPartNameV1(), stealer_by_hash); } else { ranges_for_stealing_queue.push_back(std::move(range)); - LOG_TEST(log, "Segment {} is added to stealing queue", segment); + LOG_TEST(log, "Segment {} of {} is added to stealing queue", segment, info.getPartNameV1()); } } @@ -768,6 +801,8 @@ ParallelReadResponse DefaultCoordinator::handleRequest(ParallelReadRequest reque } } + sortResponseRanges(response.description); + LOG_DEBUG( log, "Going to respond to replica {} with {}; mine_marks={}, stolen_by_hash={}, stolen_rest={}", @@ -800,7 +835,7 @@ public: Parts all_parts_to_read; size_t total_rows_to_read = 0; - Poco::Logger * log = &Poco::Logger::get(fmt::format("{}{}", magic_enum::enum_name(mode), "Coordinator")); + LoggerPtr log = getLogger(fmt::format("{}{}", magic_enum::enum_name(mode), "Coordinator")); }; template diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp index 76b8080f64c..78fcfabb704 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp @@ -20,7 +20,7 @@ PartMovesBetweenShardsOrchestrator::PartMovesBetweenShardsOrchestrator(StorageRe : storage(storage_) , zookeeper_path(storage.zookeeper_path) , logger_name(storage.getStorageID().getFullTableName() + " (PartMovesBetweenShardsOrchestrator)") - , log(&Poco::Logger::get(logger_name)) + , log(getLogger(logger_name)) , entries_znode_path(zookeeper_path + "/part_moves_shard") { /// Schedule pool is not designed for long-running tasks. TODO replace with a separate thread? diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h index af21022953c..abe259c77ab 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h @@ -176,7 +176,7 @@ private: String zookeeper_path; String logger_name; - Poco::Logger * log = nullptr; + LoggerPtr log = nullptr; std::atomic need_stop{false}; BackgroundSchedulePool::TaskHolder task; diff --git a/src/Storages/MergeTree/PartitionPruner.cpp b/src/Storages/MergeTree/PartitionPruner.cpp index 668576f9021..eb51d600da3 100644 --- a/src/Storages/MergeTree/PartitionPruner.cpp +++ b/src/Storages/MergeTree/PartitionPruner.cpp @@ -59,7 +59,7 @@ bool PartitionPruner::canBePruned(const IMergeTreeDataPart & part) const { WriteBufferFromOwnString buf; part.partition.serializeText(part.storage, buf, FormatSettings{}); - LOG_TRACE(&Poco::Logger::get("PartitionPruner"), "Partition {} gets pruned", buf.str()); + LOG_TRACE(getLogger("PartitionPruner"), "Partition {} gets pruned", buf.str()); } } diff --git a/src/Storages/MergeTree/RPNBuilder.cpp b/src/Storages/MergeTree/RPNBuilder.cpp index 3ffef1de718..dc8c6b0c230 100644 --- a/src/Storages/MergeTree/RPNBuilder.cpp +++ b/src/Storages/MergeTree/RPNBuilder.cpp @@ -14,7 +14,9 @@ #include #include +#include #include +#include #include @@ -390,6 +392,15 @@ size_t RPNBuilderFunctionTreeNode::getArgumentsSize() const } else { + // indexHint arguments are stored inside of `FunctionIndexHint` class, + // because they are used only for index analysis. + if (dag_node->function_base->getName() == "indexHint") + { + const auto * adaptor = typeid_cast(dag_node->function_base.get()); + const auto * index_hint = typeid_cast(adaptor->getFunction().get()); + return index_hint->getActions()->getOutputs().size(); + } + return dag_node->children.size(); } } @@ -409,6 +420,15 @@ RPNBuilderTreeNode RPNBuilderFunctionTreeNode::getArgumentAt(size_t index) const } else { + // indexHint arguments are stored inside of `FunctionIndexHint` class, + // because they are used only for index analysis. + if (dag_node->function_base->getName() == "indexHint") + { + const auto * adaptor = typeid_cast(dag_node->function_base.get()); + const auto * index_hint = typeid_cast(adaptor->getFunction().get()); + return RPNBuilderTreeNode(index_hint->getActions()->getOutputs()[index], tree_context); + } + return RPNBuilderTreeNode(dag_node->children[index], tree_context); } } diff --git a/src/Storages/MergeTree/RPNBuilder.h b/src/Storages/MergeTree/RPNBuilder.h index b0755ccd3ca..d750c02d3e1 100644 --- a/src/Storages/MergeTree/RPNBuilder.h +++ b/src/Storages/MergeTree/RPNBuilder.h @@ -229,6 +229,12 @@ private: rpn_elements.emplace_back(std::move(element)); } + if (arguments_size == 0 && function_node.getFunctionName() == "indexHint") + { + element.function = RPNElement::ALWAYS_TRUE; + rpn_elements.emplace_back(std::move(element)); + } + return; } } diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h index 18fcacecc9e..2b1fcec62a8 100644 --- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h +++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h @@ -17,7 +17,7 @@ class ReplicatedMergeMutateTaskBase : public IExecutableTask { public: ReplicatedMergeMutateTaskBase( - Poco::Logger * log_, + LoggerPtr log_, StorageReplicatedMergeTree & storage_, ReplicatedMergeTreeQueue::SelectedEntryPtr & selected_entry_, IExecutableTask::TaskResultCallback & task_result_callback_) @@ -66,7 +66,7 @@ protected: ReplicatedMergeTreeQueue::SelectedEntryPtr selected_entry; ReplicatedMergeTreeLogEntry & entry; MergeList::EntryPtr merge_mutate_entry{nullptr}; - Poco::Logger * log; + LoggerPtr log; /// ProfileEvents for current part will be stored here ProfileEvents::Counters profile_counters; ContextMutablePtr task_context; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp index a544ac908a4..336d19692d4 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp @@ -19,7 +19,7 @@ namespace ErrorCodes ReplicatedMergeTreeAttachThread::ReplicatedMergeTreeAttachThread(StorageReplicatedMergeTree & storage_) : storage(storage_) , log_name(storage.getStorageID().getFullTableName() + " (ReplicatedMergeTreeAttachThread)") - , log(&Poco::Logger::get(log_name)) + , log(getLogger(log_name)) { task = storage.getContext()->getSchedulePool().createTask(log_name, [this] { run(); }); const auto storage_settings = storage.getSettings(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.h index 222b30b519b..250a5ed34d1 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.h @@ -34,7 +34,7 @@ private: BackgroundSchedulePool::TaskHolder task; std::string log_name; - Poco::Logger * log; + LoggerPtr log; std::atomic first_try_done{false}; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 8daee661c75..67942491ae2 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -24,7 +24,7 @@ namespace ErrorCodes ReplicatedMergeTreeCleanupThread::ReplicatedMergeTreeCleanupThread(StorageReplicatedMergeTree & storage_) : storage(storage_) , log_name(storage.getStorageID().getFullTableName() + " (ReplicatedMergeTreeCleanupThread)") - , log(&Poco::Logger::get(log_name)) + , log(getLogger(log_name)) , sleep_ms(storage.getSettings()->cleanup_delay_period * 1000) { task = storage.getContext()->getSchedulePool().createTask(log_name, [this]{ run(); }); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h index ae9aabdb4e7..5beaef56995 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h @@ -40,7 +40,7 @@ public: private: StorageReplicatedMergeTree & storage; String log_name; - Poco::Logger * log; + LoggerPtr log; BackgroundSchedulePool::TaskHolder task; pcg64 rng{randomSeed()}; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index b1875464725..156c41563ec 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -28,7 +28,7 @@ static const auto PART_CHECK_ERROR_SLEEP_MS = 5 * 1000; ReplicatedMergeTreePartCheckThread::ReplicatedMergeTreePartCheckThread(StorageReplicatedMergeTree & storage_) : storage(storage_) , log_name(storage.getStorageID().getFullTableName() + " (ReplicatedMergeTreePartCheckThread)") - , log(&Poco::Logger::get(log_name)) + , log(getLogger(log_name)) { task = storage.getContext()->getSchedulePool().createTask(log_name, [this] { run(); }); task->schedule(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h index 68dc6ca3d1d..f2e26b3d324 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h @@ -87,7 +87,7 @@ private: StorageReplicatedMergeTree & storage; String log_name; - Poco::Logger * log; + LoggerPtr log; using StringSet = std::set; struct PartToCheck diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 2d0617e5826..8d921bdcb1c 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -36,7 +36,7 @@ ReplicatedMergeTreeQueue::ReplicatedMergeTreeQueue(StorageReplicatedMergeTree & zookeeper_path = storage.zookeeper_path; replica_path = storage.replica_path; logger_name = storage.getStorageID().getFullTableName() + " (ReplicatedMergeTreeQueue)"; - log = &Poco::Logger::get(logger_name); + log = getLogger(logger_name); } @@ -2149,7 +2149,7 @@ LocalMergePredicate::LocalMergePredicate(ReplicatedMergeTreeQueue & queue_) template CommittingBlocks BaseMergePredicate::getCommittingBlocks( - zkutil::ZooKeeperPtr & zookeeper, const std::string & zookeeper_path, Poco::Logger * log_) + zkutil::ZooKeeperPtr & zookeeper, const std::string & zookeeper_path, LoggerPtr log_) { CommittingBlocks committing_blocks; @@ -2622,7 +2622,8 @@ String ReplicatedMergeTreeMergePredicate::getCoveringVirtualPart(const String & ReplicatedMergeTreeQueue::SubscriberHandler ReplicatedMergeTreeQueue::addSubscriber(ReplicatedMergeTreeQueue::SubscriberCallBack && callback, - std::unordered_set & out_entry_names, SyncReplicaMode sync_mode) + std::unordered_set & out_entry_names, SyncReplicaMode sync_mode, + std::unordered_set src_replicas) { std::lock_guard lock(state_mutex); std::lock_guard lock_subscribers(subscribers_mutex); @@ -2639,14 +2640,57 @@ ReplicatedMergeTreeQueue::addSubscriber(ReplicatedMergeTreeQueue::SubscriberCall LogEntry::REPLACE_RANGE, LogEntry::DROP_PART }; + + std::unordered_set existing_replicas; + if (!src_replicas.empty()) + { + Strings unfiltered_hosts; + unfiltered_hosts = storage.getZooKeeper()->getChildren(zookeeper_path + "/replicas"); + for (const auto & host : unfiltered_hosts) + existing_replicas.insert(host); + } + out_entry_names.reserve(queue.size()); + for (const auto & entry : queue) { - if (!lightweight_entries_only - || std::find(lightweight_entries.begin(), lightweight_entries.end(), entry->type) != lightweight_entries.end()) + bool entry_matches = !lightweight_entries_only || std::find(lightweight_entries.begin(), lightweight_entries.end(), entry->type) != lightweight_entries.end(); + if (!entry_matches) + continue; + + // `src_replicas` is used for specified sets of replicas; however, we also account for + // entries from removed or unknown replicas. This is necessary because the `source_replica` + // field in a replication queue entry doesn't always indicate the current existence or state + // of the part in that replica. Therefore, we include entries from replicas not listed in zookeeper. + // The `need_wait_for_entry` condition ensures: + // 1. Waiting for entries from both specified (`src_replicas`) and potentially removed + // or unknown replicas, as `source_replica` may not reflect the current part status. + // 2. Handling cases where parts become broken (e.g., due to a hard restart) leading to + // changes in the source replica or empty `source_replica` fields. + + // Example Scenario: + // - A part is added on replica1 and fetched by replica2. If the part on replica1 breaks and + // replica1 schedules a re-fetch from another source, a GET_PART entry with an empty + // `source_replica` may be created. + // - If replica3 is added and replica2 (with the intact part) is removed, SYNC .. FROM replica2 + // might not account for the re-fetch need from replica1, risking data inconsistencies. + // - Therefore, `need_wait_for_entry` considers entries with specified sources, those not in + // zookeeper->getChildren(zookeeper_path + "/replicas"), and entries with empty `source_replica`. + + bool is_entry_from_specified_replica = src_replicas.contains(entry->source_replica); + + chassert(!existing_replicas.contains("")); + bool is_entry_from_removed_or_unknown_replica = !existing_replicas.contains(entry->source_replica) || entry->source_replica.empty(); + + bool need_wait_for_entry = src_replicas.empty() || is_entry_from_specified_replica || is_entry_from_removed_or_unknown_replica; + + if (need_wait_for_entry) + { out_entry_names.insert(entry->znode_name); + } } - LOG_TEST(log, "Waiting for {} entries to be processed: {}", out_entry_names.size(), fmt::join(out_entry_names, ", ")); + + LOG_TRACE(log, "Waiting for {} entries to be processed: {}", out_entry_names.size(), fmt::join(out_entry_names, ", ")); } auto it = subscribers.emplace(subscribers.end(), std::move(callback)); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index ae8ae623a30..84106565dff 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -71,7 +71,7 @@ private: String zookeeper_path; String replica_path; String logger_name; - Poco::Logger * log = nullptr; + LoggerPtr log = nullptr; /// Protects the queue, future_parts and other queue state variables. mutable std::mutex state_mutex; @@ -430,7 +430,7 @@ public: ActionBlocker pull_log_blocker; /// Adds a subscriber - SubscriberHandler addSubscriber(SubscriberCallBack && callback, std::unordered_set & out_entry_names, SyncReplicaMode sync_mode); + SubscriberHandler addSubscriber(SubscriberCallBack && callback, std::unordered_set & out_entry_names, SyncReplicaMode sync_mode, std::unordered_set src_replicas); void notifySubscribersOnPartialShutdown(); @@ -519,7 +519,7 @@ public: /// This predicate is checked for the first part of each range. bool canMergeSinglePart(const MergeTreeData::DataPartPtr & part, String & out_reason) const; - CommittingBlocks getCommittingBlocks(zkutil::ZooKeeperPtr & zookeeper, const std::string & zookeeper_path, Poco::Logger * log_); + CommittingBlocks getCommittingBlocks(zkutil::ZooKeeperPtr & zookeeper, const std::string & zookeeper_path, LoggerPtr log_); protected: /// A list of partitions that can be used in the merge predicate diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 579592b0b3e..b79418da791 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -33,7 +33,7 @@ static String generateActiveNodeIdentifier() ReplicatedMergeTreeRestartingThread::ReplicatedMergeTreeRestartingThread(StorageReplicatedMergeTree & storage_) : storage(storage_) , log_name(storage.getStorageID().getFullTableName() + " (ReplicatedMergeTreeRestartingThread)") - , log(&Poco::Logger::get(log_name)) + , log(getLogger(log_name)) , active_node_identifier(generateActiveNodeIdentifier()) { const auto storage_settings = storage.getSettings(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index 02103272a1f..01071d80e8b 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -42,7 +42,7 @@ public: private: StorageReplicatedMergeTree & storage; String log_name; - Poco::Logger * log; + LoggerPtr log; std::atomic need_stop {false}; /// The random data we wrote into `/replicas/me/is_active`. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 218ed3bff12..1fb2393948a 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -58,7 +58,7 @@ struct ReplicatedMergeTreeSinkImpl::DelayedChunk ProfileEvents::Counters part_counters; Partition() = default; - Partition(Poco::Logger * log_, + Partition(LoggerPtr log_, MergeTreeDataWriter::TemporaryPart && temp_part_, UInt64 elapsed_ns_, BlockIDsType && block_id_, @@ -92,7 +92,7 @@ std::vector testSelfDeduplicate(std::vector data, std::vector::DelayedChunk::Partition part( - &Poco::Logger::get("testSelfDeduplicate"), MergeTreeDataWriter::TemporaryPart(), 0, std::move(hashes), std::move(block1), std::nullopt, std::move(profile_counters)); + getLogger("testSelfDeduplicate"), MergeTreeDataWriter::TemporaryPart(), 0, std::move(hashes), std::move(block1), std::nullopt, std::move(profile_counters)); part.filterSelfDuplicate(); @@ -138,7 +138,7 @@ ReplicatedMergeTreeSinkImpl::ReplicatedMergeTreeSinkImpl( , is_attach(is_attach_) , quorum_parallel(quorum_parallel_) , deduplicate(deduplicate_) - , log(&Poco::Logger::get(storage.getLogName() + " (Replicated OutputStream)")) + , log(getLogger(storage.getLogName() + " (Replicated OutputStream)")) , context(context_) , storage_snapshot(storage.getStorageSnapshotWithoutData(metadata_snapshot, context_)) { @@ -1089,6 +1089,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: retry_context.actual_part_name, quorum_info.status_path, quorum_info.is_active_node_version, + quorum_info.host_node_version, replicas_num); }); } @@ -1117,7 +1118,8 @@ void ReplicatedMergeTreeSinkImpl::waitForQuorum( const ZooKeeperWithFaultInjectionPtr & zookeeper, const std::string & part_name, const std::string & quorum_path, - Int32 is_active_node_version, + int is_active_node_version, + int host_node_version, size_t replicas_num) const { /// We are waiting for quorum to be satisfied. @@ -1153,12 +1155,16 @@ void ReplicatedMergeTreeSinkImpl::waitForQuorum( /// And what if it is possible that the current replica at this time has ceased to be active /// and the quorum is marked as failed and deleted? - Coordination::Stat stat; - String value; - if (!zookeeper->tryGet(storage.replica_path + "/is_active", value, &stat) || stat.version != is_active_node_version) + /// Note: checking is_active is not enough since it's ephemeral, and the version can be the same after recreation, + /// so need to check host node as well + auto get_results = zookeeper->tryGet(Strings{storage.replica_path + "/is_active", storage.replica_path + "/host"}); + const auto & is_active = get_results[0]; + const auto & host = get_results[1]; + if ((is_active.error == Coordination::Error::ZNONODE || is_active.stat.version != is_active_node_version) + || (host.error == Coordination::Error::ZNONODE || host.stat.version != host_node_version)) throw Exception( ErrorCodes::UNKNOWN_STATUS_OF_INSERT, - "Unknown quorum status. The data was inserted in the local replica but we could not verify quorum. Reason: " + "Unknown quorum status. The data was inserted in the local replica, but we could not verify the quorum. Reason: " "Replica became inactive while waiting for quorum"); LOG_TRACE(log, "Quorum '{}' for part {} satisfied", quorum_path, part_name); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 5c70d0c76e1..bc23204e7d3 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -104,6 +104,7 @@ private: const std::string & part_name, const std::string & quorum_path, int is_active_node_version, + int host_node_version, size_t replicas_num) const; StorageReplicatedMergeTree & storage; @@ -127,7 +128,7 @@ private: bool last_block_is_duplicate = false; UInt64 num_blocks_processed = 0; - Poco::Logger * log; + LoggerPtr log; ContextPtr context; StorageSnapshotPtr storage_snapshot; diff --git a/src/Storages/MergeTree/ZooKeeperRetries.h b/src/Storages/MergeTree/ZooKeeperRetries.h index 22282345220..ecef174c6c7 100644 --- a/src/Storages/MergeTree/ZooKeeperRetries.h +++ b/src/Storages/MergeTree/ZooKeeperRetries.h @@ -30,7 +30,7 @@ struct ZooKeeperRetriesInfo class ZooKeeperRetriesControl { public: - ZooKeeperRetriesControl(std::string name_, Poco::Logger * logger_, ZooKeeperRetriesInfo retries_info_, QueryStatusPtr elem) + ZooKeeperRetriesControl(std::string name_, LoggerPtr logger_, ZooKeeperRetriesInfo retries_info_, QueryStatusPtr elem) : name(std::move(name_)), logger(logger_), retries_info(retries_info_), process_list_element(elem) { } @@ -151,7 +151,7 @@ public: bool isLastRetry() const { return total_failures >= retries_info.max_retries; } - bool isRetry() const { return current_iteration > 1; } + bool isRetry() const { return current_iteration > 0; } const std::string & getLastKeeperErrorMessage() const { return keeper_error.message; } @@ -160,7 +160,7 @@ public: const std::string & getName() const { return name; } - Poco::Logger * getLogger() const { return logger; } + LoggerPtr getLogger() const { return logger; } private: struct KeeperError @@ -263,7 +263,7 @@ private: std::string name; - Poco::Logger * logger = nullptr; + LoggerPtr logger = nullptr; ZooKeeperRetriesInfo retries_info; UInt64 total_failures = 0; UserError user_error; diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 7e0b69c7638..8ae9b54b6e9 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -338,7 +338,7 @@ IMergeTreeDataPart::Checksums checkDataPart( throw; LOG_DEBUG( - &Poco::Logger::get("checkDataPart"), + getLogger("checkDataPart"), "Will drop cache for data part {} and will check it once again", data_part->name); auto & cache = *FileCacheFactory::instance().getByName(*cache_name)->cache; @@ -348,7 +348,7 @@ IMergeTreeDataPart::Checksums checkDataPart( if (!data_part_storage.isDirectory(file_name)) { auto remote_path = data_part_storage.getRemotePath(file_name); - cache.removePathIfExists(remote_path); + cache.removePathIfExists(remote_path, FileCache::getCommonUser().user_id); } } diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 9a5af77d57c..8e646e48f16 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -608,7 +608,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) metadata.column_ttls_by_name[name] = new_ttl_entry; } - storage_settings->loadFromQuery(*args.storage_def, context); + storage_settings->loadFromQuery(*args.storage_def, context, args.attach); // updates the default storage_settings with settings specified via SETTINGS arg in a query if (args.storage_def->settings) diff --git a/src/Storages/MessageQueueSink.cpp b/src/Storages/MessageQueueSink.cpp index 1aa19c9ccde..4fb81d69070 100644 --- a/src/Storages/MessageQueueSink.cpp +++ b/src/Storages/MessageQueueSink.cpp @@ -20,7 +20,7 @@ MessageQueueSink::MessageQueueSink( void MessageQueueSink::onStart() { LOG_TEST( - &Poco::Logger::get("MessageQueueSink"), + getLogger("MessageQueueSink"), "Executing startup for MessageQueueSink"); initialize(); diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp index 36388a32b41..f6ec277c270 100644 --- a/src/Storages/MutationCommands.cpp +++ b/src/Storages/MutationCommands.cpp @@ -37,8 +37,9 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, MutationCommand res; res.ast = command->ptr(); res.type = DELETE; - res.predicate = command->predicate; - res.partition = command->partition; + res.predicate = command->predicate->clone(); + if (command->partition) + res.partition = command->partition->clone(); return res; } else if (command->type == ASTAlterCommand::UPDATE) @@ -46,8 +47,9 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, MutationCommand res; res.ast = command->ptr(); res.type = UPDATE; - res.predicate = command->predicate; - res.partition = command->partition; + res.predicate = command->predicate->clone(); + if (command->partition) + res.partition = command->partition->clone(); for (const ASTPtr & assignment_ast : command->update_assignments->children) { const auto & assignment = assignment_ast->as(); @@ -64,8 +66,10 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, MutationCommand res; res.ast = command->ptr(); res.type = APPLY_DELETED_MASK; - res.predicate = command->predicate; - res.partition = command->partition; + if (command->predicate) + res.predicate = command->predicate->clone(); + if (command->partition) + res.partition = command->partition->clone(); return res; } else if (command->type == ASTAlterCommand::MATERIALIZE_INDEX) @@ -73,7 +77,8 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, MutationCommand res; res.ast = command->ptr(); res.type = MATERIALIZE_INDEX; - res.partition = command->partition; + if (command->partition) + res.partition = command->partition->clone(); res.predicate = nullptr; res.index_name = command->index->as().name(); return res; @@ -83,7 +88,8 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, MutationCommand res; res.ast = command->ptr(); res.type = MATERIALIZE_STATISTIC; - res.partition = command->partition; + if (command->partition) + res.partition = command->partition->clone(); res.predicate = nullptr; res.statistic_columns = command->statistic_decl->as().getColumnNames(); return res; @@ -93,7 +99,8 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, MutationCommand res; res.ast = command->ptr(); res.type = MATERIALIZE_PROJECTION; - res.partition = command->partition; + if (command->partition) + res.partition = command->partition->clone(); res.predicate = nullptr; res.projection_name = command->projection->as().name(); return res; @@ -103,7 +110,8 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, MutationCommand res; res.ast = command->ptr(); res.type = MATERIALIZE_COLUMN; - res.partition = command->partition; + if (command->partition) + res.partition = command->partition->clone(); res.column_name = getIdentifierName(command->column); return res; } @@ -124,7 +132,7 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, res.type = MutationCommand::Type::DROP_COLUMN; res.column_name = getIdentifierName(command->column); if (command->partition) - res.partition = command->partition; + res.partition = command->partition->clone(); if (command->clear_column) res.clear = true; @@ -137,7 +145,7 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, res.type = MutationCommand::Type::DROP_INDEX; res.column_name = command->index->as().name(); if (command->partition) - res.partition = command->partition; + res.partition = command->partition->clone(); if (command->clear_index) res.clear = true; return res; @@ -148,7 +156,7 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, res.ast = command->ptr(); res.type = MutationCommand::Type::DROP_STATISTIC; if (command->partition) - res.partition = command->partition; + res.partition = command->partition->clone(); if (command->clear_index) res.clear = true; res.statistic_columns = command->statistic_decl->as().getColumnNames(); @@ -161,7 +169,7 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, res.type = MutationCommand::Type::DROP_PROJECTION; res.column_name = command->projection->as().name(); if (command->partition) - res.partition = command->partition; + res.partition = command->partition->clone(); if (command->clear_projection) res.clear = true; return res; @@ -180,7 +188,8 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, MutationCommand res; res.ast = command->ptr(); res.type = MATERIALIZE_TTL; - res.partition = command->partition; + if (command->partition) + res.partition = command->partition->clone(); return res; } else diff --git a/src/Storages/NATS/NATSConnection.cpp b/src/Storages/NATS/NATSConnection.cpp index 70b3599aa09..d7ad0cf8219 100644 --- a/src/Storages/NATS/NATSConnection.cpp +++ b/src/Storages/NATS/NATSConnection.cpp @@ -13,7 +13,7 @@ static const auto RETRIES_MAX = 20; static const auto CONNECTED_TO_BUFFER_SIZE = 256; -NATSConnectionManager::NATSConnectionManager(const NATSConfiguration & configuration_, Poco::Logger * log_) +NATSConnectionManager::NATSConnectionManager(const NATSConfiguration & configuration_, LoggerPtr log_) : configuration(configuration_) , log(log_) , event_handler(loop.getLoop(), log) @@ -115,8 +115,8 @@ void NATSConnectionManager::connectImpl() } natsOptions_SetMaxReconnect(options, configuration.max_reconnect); natsOptions_SetReconnectWait(options, configuration.reconnect_wait); - natsOptions_SetDisconnectedCB(options, disconnectedCallback, log); - natsOptions_SetReconnectedCB(options, reconnectedCallback, log); + natsOptions_SetDisconnectedCB(options, disconnectedCallback, log.get()); + natsOptions_SetReconnectedCB(options, reconnectedCallback, log.get()); natsStatus status; { auto lock = event_handler.setThreadLocalLoop(); diff --git a/src/Storages/NATS/NATSConnection.h b/src/Storages/NATS/NATSConnection.h index b49070473b2..c350f395a92 100644 --- a/src/Storages/NATS/NATSConnection.h +++ b/src/Storages/NATS/NATSConnection.h @@ -24,7 +24,7 @@ struct NATSConfiguration class NATSConnectionManager { public: - NATSConnectionManager(const NATSConfiguration & configuration_, Poco::Logger * log_); + NATSConnectionManager(const NATSConfiguration & configuration_, LoggerPtr log_); ~NATSConnectionManager(); bool isConnected(); @@ -54,7 +54,7 @@ private: static void reconnectedCallback(natsConnection * nc, void * log); NATSConfiguration configuration; - Poco::Logger * log; + LoggerPtr log; UVLoop loop; NATSHandler event_handler; diff --git a/src/Storages/NATS/NATSConsumer.cpp b/src/Storages/NATS/NATSConsumer.cpp index c7b40973b72..136cb13ddfa 100644 --- a/src/Storages/NATS/NATSConsumer.cpp +++ b/src/Storages/NATS/NATSConsumer.cpp @@ -21,7 +21,7 @@ NATSConsumer::NATSConsumer( StorageNATS & storage_, std::vector & subjects_, const String & subscribe_queue_name, - Poco::Logger * log_, + LoggerPtr log_, uint32_t queue_size_, const std::atomic & stopped_) : connection(connection_) diff --git a/src/Storages/NATS/NATSConsumer.h b/src/Storages/NATS/NATSConsumer.h index a5470433303..e8d3a849c2a 100644 --- a/src/Storages/NATS/NATSConsumer.h +++ b/src/Storages/NATS/NATSConsumer.h @@ -24,7 +24,7 @@ public: StorageNATS & storage_, std::vector & subjects_, const String & subscribe_queue_name, - Poco::Logger * log_, + LoggerPtr log_, uint32_t queue_size_, const std::atomic & stopped_); @@ -58,7 +58,7 @@ private: StorageNATS & storage; std::vector subscriptions; std::vector subjects; - Poco::Logger * log; + LoggerPtr log; const std::atomic & stopped; bool subscribed = false; diff --git a/src/Storages/NATS/NATSHandler.cpp b/src/Storages/NATS/NATSHandler.cpp index 7006e5633a9..03f1fc1a495 100644 --- a/src/Storages/NATS/NATSHandler.cpp +++ b/src/Storages/NATS/NATSHandler.cpp @@ -12,7 +12,7 @@ namespace DB static const auto MAX_THREAD_WORK_DURATION_MS = 60000; -NATSHandler::NATSHandler(uv_loop_t * loop_, Poco::Logger * log_) : +NATSHandler::NATSHandler(uv_loop_t * loop_, LoggerPtr log_) : loop(loop_), log(log_), loop_running(false), diff --git a/src/Storages/NATS/NATSHandler.h b/src/Storages/NATS/NATSHandler.h index e3894c888a3..6f9ec398cfa 100644 --- a/src/Storages/NATS/NATSHandler.h +++ b/src/Storages/NATS/NATSHandler.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace DB { @@ -23,7 +23,7 @@ using LockPtr = std::unique_ptr>; class NATSHandler { public: - NATSHandler(uv_loop_t * loop_, Poco::Logger * log_); + NATSHandler(uv_loop_t * loop_, LoggerPtr log_); ~NATSHandler(); @@ -47,7 +47,7 @@ public: private: uv_loop_t * loop; natsOptions * opts = nullptr; - Poco::Logger * log; + LoggerPtr log; std::atomic loop_running; std::atomic loop_state; diff --git a/src/Storages/NATS/NATSProducer.cpp b/src/Storages/NATS/NATSProducer.cpp index a8510149baf..fb8abb016f8 100644 --- a/src/Storages/NATS/NATSProducer.cpp +++ b/src/Storages/NATS/NATSProducer.cpp @@ -23,7 +23,7 @@ NATSProducer::NATSProducer( const NATSConfiguration & configuration_, const String & subject_, std::atomic & shutdown_called_, - Poco::Logger * log_) + LoggerPtr log_) : AsynchronousMessageProducer(log_) , connection(configuration_, log_) , subject(subject_) diff --git a/src/Storages/NATS/NATSProducer.h b/src/Storages/NATS/NATSProducer.h index 0303d05969b..6923553a551 100644 --- a/src/Storages/NATS/NATSProducer.h +++ b/src/Storages/NATS/NATSProducer.h @@ -20,7 +20,7 @@ public: const NATSConfiguration & configuration_, const String & subject_, std::atomic & shutdown_called_, - Poco::Logger * log_); + LoggerPtr log_); void produce(const String & message, size_t rows_in_message, const Columns & columns, size_t last_row) override; diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 9cb1fbd8506..2af9a9f974f 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -59,7 +59,7 @@ StorageNATS::StorageNATS( , schema_name(getContext()->getMacros()->expand(nats_settings->nats_schema)) , num_consumers(nats_settings->nats_num_consumers.value) , max_rows_per_message(nats_settings->nats_max_rows_per_message) - , log(&Poco::Logger::get("StorageNATS (" + table_id_.table_name + ")")) + , log(getLogger("StorageNATS (" + table_id_.table_name + ")")) , semaphore(0, static_cast(num_consumers)) , queue_size(std::max(QUEUE_SIZE, static_cast(getMaxBlockSize()))) , is_attach(is_attach_) diff --git a/src/Storages/NATS/StorageNATS.h b/src/Storages/NATS/StorageNATS.h index 16a162b8500..882119f5cdb 100644 --- a/src/Storages/NATS/StorageNATS.h +++ b/src/Storages/NATS/StorageNATS.h @@ -78,7 +78,7 @@ private: size_t num_consumers; size_t max_rows_per_message; - Poco::Logger * log; + LoggerPtr log; NATSConnectionManagerPtr connection; /// Connection for all consumers NATSConfiguration configuration; diff --git a/src/Storages/PartitionCommands.cpp b/src/Storages/PartitionCommands.cpp index 92aea597ab3..6ce66d85ddc 100644 --- a/src/Storages/PartitionCommands.cpp +++ b/src/Storages/PartitionCommands.cpp @@ -23,7 +23,7 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * { PartitionCommand res; res.type = DROP_PARTITION; - res.partition = command_ast->partition; + res.partition = command_ast->partition->clone(); res.detach = command_ast->detach; res.part = command_ast->part; return res; @@ -32,7 +32,7 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * { PartitionCommand res; res.type = DROP_DETACHED_PARTITION; - res.partition = command_ast->partition; + res.partition = command_ast->partition->clone(); res.part = command_ast->part; return res; } @@ -40,7 +40,7 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * { PartitionCommand res; res.type = ATTACH_PARTITION; - res.partition = command_ast->partition; + res.partition = command_ast->partition->clone(); res.part = command_ast->part; return res; } @@ -48,7 +48,7 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * { PartitionCommand res; res.type = MOVE_PARTITION; - res.partition = command_ast->partition; + res.partition = command_ast->partition->clone(); res.part = command_ast->part; switch (command_ast->move_destination_type) { @@ -77,7 +77,7 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * { PartitionCommand res; res.type = REPLACE_PARTITION; - res.partition = command_ast->partition; + res.partition = command_ast->partition->clone(); res.replace = command_ast->replace; res.from_database = command_ast->from_database; res.from_table = command_ast->from_table; @@ -87,7 +87,7 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * { PartitionCommand res; res.type = FETCH_PARTITION; - res.partition = command_ast->partition; + res.partition = command_ast->partition->clone(); res.from_zookeeper_path = command_ast->from; res.part = command_ast->part; return res; @@ -96,7 +96,7 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * { PartitionCommand res; res.type = FREEZE_PARTITION; - res.partition = command_ast->partition; + res.partition = command_ast->partition->clone(); res.with_name = command_ast->with_name; return res; } @@ -111,7 +111,7 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * { PartitionCommand res; res.type = PartitionCommand::UNFREEZE_PARTITION; - res.partition = command_ast->partition; + res.partition = command_ast->partition->clone(); res.with_name = command_ast->with_name; return res; } diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index b2442109409..f99ebf51792 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -51,7 +51,7 @@ MaterializedPostgreSQLConsumer::MaterializedPostgreSQLConsumer( bool schema_as_a_part_of_table_name_, StorageInfos storages_info_, const String & name_for_logger) - : log(&Poco::Logger::get("PostgreSQLReplicaConsumer(" + name_for_logger + ")")) + : log(getLogger("PostgreSQLReplicaConsumer(" + name_for_logger + ")")) , context(context_) , replication_slot_name(replication_slot_name_) , publication_name(publication_name_) @@ -76,7 +76,7 @@ MaterializedPostgreSQLConsumer::MaterializedPostgreSQLConsumer( } -MaterializedPostgreSQLConsumer::StorageData::StorageData(const StorageInfo & storage_info, Poco::Logger * log_) +MaterializedPostgreSQLConsumer::StorageData::StorageData(const StorageInfo & storage_info, LoggerPtr log_) : storage(storage_info.storage) , table_description(storage_info.storage->getInMemoryMetadataPtr()->getSampleBlock()) , columns_attributes(storage_info.attributes) diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h index 3e95c1cd7de..972c03e50d8 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h @@ -32,7 +32,7 @@ class MaterializedPostgreSQLConsumer private: struct StorageData { - explicit StorageData(const StorageInfo & storage_info, Poco::Logger * log_); + explicit StorageData(const StorageInfo & storage_info, LoggerPtr log_); size_t getColumnsNum() const { return table_description.sample_block.columns(); } @@ -137,7 +137,7 @@ private: return (static_cast(upper_half) << 32) + lower_half; } - Poco::Logger * log; + LoggerPtr log; ContextPtr context; const std::string replication_slot_name, publication_name; diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 43de2069b19..2bb1e2dde0d 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -128,7 +128,7 @@ PostgreSQLReplicationHandler::PostgreSQLReplicationHandler( const MaterializedPostgreSQLSettings & replication_settings, bool is_materialized_postgresql_database_) : WithContext(context_->getGlobalContext()) - , log(&Poco::Logger::get("PostgreSQLReplicationHandler")) + , log(getLogger("PostgreSQLReplicationHandler")) , is_attach(is_attach_) , postgres_database(postgres_database_) , postgres_schema(replication_settings.materialized_postgresql_schema) diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h index 5d426b3c512..5c519053d84 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h @@ -102,7 +102,7 @@ private: void assertInitialized() const; - Poco::Logger * log; + LoggerPtr log; /// If it is not attach, i.e. a create query, then if publication already exists - always drop it. bool is_attach; diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index 0faf553797a..f13cb820ec3 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -60,7 +60,7 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( std::unique_ptr replication_settings) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) - , log(&Poco::Logger::get("StorageMaterializedPostgreSQL(" + postgres::formatNameForLogs(remote_database_name, remote_table_name_) + ")")) + , log(getLogger("StorageMaterializedPostgreSQL(" + postgres::formatNameForLogs(remote_database_name, remote_table_name_) + ")")) , is_materialized_postgresql_database(false) , has_nested(false) , nested_context(makeNestedTableContext(context_->getGlobalContext())) @@ -101,7 +101,7 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( const String & postgres_table_name) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) - , log(&Poco::Logger::get("StorageMaterializedPostgreSQL(" + postgres::formatNameForLogs(postgres_database_name, postgres_table_name) + ")")) + , log(getLogger("StorageMaterializedPostgreSQL(" + postgres::formatNameForLogs(postgres_database_name, postgres_table_name) + ")")) , is_materialized_postgresql_database(true) , has_nested(false) , nested_context(makeNestedTableContext(context_->getGlobalContext())) @@ -120,7 +120,7 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( const String & postgres_table_name) : IStorage(StorageID(nested_storage_->getStorageID().database_name, nested_storage_->getStorageID().table_name)) , WithContext(context_->getGlobalContext()) - , log(&Poco::Logger::get("StorageMaterializedPostgreSQL(" + postgres::formatNameForLogs(postgres_database_name, postgres_table_name) + ")")) + , log(getLogger("StorageMaterializedPostgreSQL(" + postgres::formatNameForLogs(postgres_database_name, postgres_table_name) + ")")) , is_materialized_postgresql_database(true) , has_nested(true) , nested_context(makeNestedTableContext(context_->getGlobalContext())) @@ -141,7 +141,7 @@ StoragePtr StorageMaterializedPostgreSQL::createTemporary() const auto tmp_storage = DatabaseCatalog::instance().tryGetTable(tmp_table_id, nested_context); if (tmp_storage) { - LOG_TRACE(&Poco::Logger::get("MaterializedPostgreSQLStorage"), "Temporary table {} already exists, dropping", tmp_table_id.getNameForLogs()); + LOG_TRACE(getLogger("MaterializedPostgreSQLStorage"), "Temporary table {} already exists, dropping", tmp_table_id.getNameForLogs()); InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, getContext(), getContext(), tmp_table_id, /* sync */true); } diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h index bebbb74ddd1..9c9418a8caa 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h @@ -142,7 +142,7 @@ private: String getNestedTableName() const; - Poco::Logger * log; + LoggerPtr log; /// Not nullptr only for single MaterializedPostgreSQL storage, because for MaterializedPostgreSQL /// database engine there is one replication handler for all tables. diff --git a/src/Storages/RabbitMQ/RabbitMQConnection.cpp b/src/Storages/RabbitMQ/RabbitMQConnection.cpp index 13d065774a2..98ceba42676 100644 --- a/src/Storages/RabbitMQ/RabbitMQConnection.cpp +++ b/src/Storages/RabbitMQ/RabbitMQConnection.cpp @@ -11,7 +11,7 @@ static const auto CONNECT_SLEEP = 200; static const auto RETRIES_MAX = 20; -RabbitMQConnection::RabbitMQConnection(const RabbitMQConfiguration & configuration_, Poco::Logger * log_) +RabbitMQConnection::RabbitMQConnection(const RabbitMQConfiguration & configuration_, LoggerPtr log_) : configuration(configuration_) , log(log_) , event_handler(loop.getLoop(), log) diff --git a/src/Storages/RabbitMQ/RabbitMQConnection.h b/src/Storages/RabbitMQ/RabbitMQConnection.h index 698230b16f4..5adb6456194 100644 --- a/src/Storages/RabbitMQ/RabbitMQConnection.h +++ b/src/Storages/RabbitMQ/RabbitMQConnection.h @@ -22,7 +22,7 @@ struct RabbitMQConfiguration class RabbitMQConnection { public: - RabbitMQConnection(const RabbitMQConfiguration & configuration_, Poco::Logger * log_); + RabbitMQConnection(const RabbitMQConfiguration & configuration_, LoggerPtr log_); bool isConnected(); @@ -51,7 +51,7 @@ private: void disconnectImpl(bool immediately = false); RabbitMQConfiguration configuration; - Poco::Logger * log; + LoggerPtr log; UVLoop loop; /// Preserve order of destruction here: diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp index f6facc04212..1843bebe3c7 100644 --- a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp @@ -24,7 +24,7 @@ RabbitMQConsumer::RabbitMQConsumer( std::vector & queues_, size_t channel_id_base_, const String & channel_base_, - Poco::Logger * log_, + LoggerPtr log_, uint32_t queue_size_) : event_handler(event_handler_) , queues(queues_) diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.h b/src/Storages/RabbitMQ/RabbitMQConsumer.h index 89dfa060eec..c78b33bfc7c 100644 --- a/src/Storages/RabbitMQ/RabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/RabbitMQConsumer.h @@ -32,7 +32,7 @@ public: std::vector & queues_, size_t channel_id_base_, const String & channel_base_, - Poco::Logger * log_, + LoggerPtr log_, uint32_t queue_size_); struct CommitInfo @@ -88,7 +88,7 @@ private: const String channel_base; const size_t channel_id_base; - Poco::Logger * log; + LoggerPtr log; std::atomic stopped; String channel_id; diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 745af0d20e3..be352f26f7b 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -8,7 +8,7 @@ namespace DB /* The object of this class is shared between concurrent consumers (who share the same connection == share the same * event loop and handler). */ -RabbitMQHandler::RabbitMQHandler(uv_loop_t * loop_, Poco::Logger * log_) : +RabbitMQHandler::RabbitMQHandler(uv_loop_t * loop_, LoggerPtr log_) : AMQP::LibUvHandler(loop_), loop(loop_), log(log_), diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 4223732a4a0..244692cf800 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -24,7 +24,7 @@ class RabbitMQHandler : public AMQP::LibUvHandler { public: - RabbitMQHandler(uv_loop_t * loop_, Poco::Logger * log_); + RabbitMQHandler(uv_loop_t * loop_, LoggerPtr log_); void onError(AMQP::TcpConnection * connection, const char * message) override; void onReady(AMQP::TcpConnection * connection) override; @@ -50,7 +50,7 @@ public: private: uv_loop_t * loop; - Poco::Logger * log; + LoggerPtr log; std::atomic connection_running, loop_running; std::atomic loop_state; diff --git a/src/Storages/RabbitMQ/RabbitMQProducer.cpp b/src/Storages/RabbitMQ/RabbitMQProducer.cpp index 246569060d0..7ad83213b9b 100644 --- a/src/Storages/RabbitMQ/RabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/RabbitMQProducer.cpp @@ -31,7 +31,7 @@ RabbitMQProducer::RabbitMQProducer( const size_t channel_id_base_, const bool persistent_, std::atomic & shutdown_called_, - Poco::Logger * log_) + LoggerPtr log_) : AsynchronousMessageProducer(log_) , connection(configuration_, log_) , routing_keys(routing_keys_) diff --git a/src/Storages/RabbitMQ/RabbitMQProducer.h b/src/Storages/RabbitMQ/RabbitMQProducer.h index 70afbbb9b90..a790eda0d08 100644 --- a/src/Storages/RabbitMQ/RabbitMQProducer.h +++ b/src/Storages/RabbitMQ/RabbitMQProducer.h @@ -24,7 +24,7 @@ public: const size_t channel_id_base_, const bool persistent_, std::atomic & shutdown_called_, - Poco::Logger * log_); + LoggerPtr log_); void produce(const String & message, size_t rows_in_message, const Columns & columns, size_t last_row) override; diff --git a/src/Storages/RabbitMQ/RabbitMQSource.cpp b/src/Storages/RabbitMQ/RabbitMQSource.cpp index 793064c10f8..3cec448fc11 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.cpp +++ b/src/Storages/RabbitMQ/RabbitMQSource.cpp @@ -70,7 +70,7 @@ RabbitMQSource::RabbitMQSource( , ack_in_suffix(ack_in_suffix_) , non_virtual_header(std::move(headers.first)) , virtual_header(std::move(headers.second)) - , log(&Poco::Logger::get("RabbitMQSource")) + , log(getLogger("RabbitMQSource")) , max_execution_time_ms(max_execution_time_) { storage.incrementReader(); diff --git a/src/Storages/RabbitMQ/RabbitMQSource.h b/src/Storages/RabbitMQ/RabbitMQSource.h index a25b3d50222..21d059bfae2 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.h +++ b/src/Storages/RabbitMQ/RabbitMQSource.h @@ -47,7 +47,7 @@ private: const Block non_virtual_header; const Block virtual_header; - Poco::Logger * log; + LoggerPtr log; RabbitMQConsumerPtr consumer; uint64_t max_execution_time_ms = 0; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index fce2d775b15..868f48d0b7d 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -69,7 +69,7 @@ StorageRabbitMQ::StorageRabbitMQ( ContextPtr context_, const ColumnsDescription & columns_, std::unique_ptr rabbitmq_settings_, - bool is_attach_) + bool is_attach) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) , rabbitmq_settings(std::move(rabbitmq_settings_)) @@ -86,12 +86,11 @@ StorageRabbitMQ::StorageRabbitMQ( , persistent(rabbitmq_settings->rabbitmq_persistent.value) , use_user_setup(rabbitmq_settings->rabbitmq_queue_consume.value) , hash_exchange(num_consumers > 1 || num_queues > 1) - , log(&Poco::Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) + , log(getLogger("StorageRabbitMQ (" + table_id_.table_name + ")")) , semaphore(0, static_cast(num_consumers)) , unique_strbase(getRandomName()) , queue_size(std::max(QUEUE_SIZE, static_cast(getMaxBlockSize()))) , milliseconds_to_wait(rabbitmq_settings->rabbitmq_empty_queue_backoff_start_ms) - , is_attach(is_attach_) { const auto & config = getContext()->getConfigRef(); @@ -318,10 +317,11 @@ void StorageRabbitMQ::connectionFunc() try { if (connection->reconnect()) + { initRabbitMQ(); - - streaming_task->scheduleAfter(RESCHEDULE_MS); - return; + streaming_task->scheduleAfter(RESCHEDULE_MS); + return; + } } catch (...) { @@ -373,57 +373,37 @@ void StorageRabbitMQ::initRabbitMQ() } else { - try + auto rabbit_channel = connection->createChannel(); + + /// Main exchange -> Bridge exchange -> ( Sharding exchange ) -> Queues -> Consumers + + initExchange(*rabbit_channel); + bindExchange(*rabbit_channel); + + for (const auto i : collections::range(0, num_queues)) + bindQueue(i + 1, *rabbit_channel); + + if (queues.size() != num_queues) { - auto rabbit_channel = connection->createChannel(); - - /// Main exchange -> Bridge exchange -> ( Sharding exchange ) -> Queues -> Consumers - - initExchange(*rabbit_channel); - bindExchange(*rabbit_channel); - - for (const auto i : collections::range(0, num_queues)) - bindQueue(i + 1, *rabbit_channel); - - if (queues.size() != num_queues) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Expected all queues to be initialized (but having {}/{})", - queues.size(), num_queues); - } - - LOG_TRACE(log, "RabbitMQ setup completed"); - rabbit_channel->close(); - } - catch (...) - { - tryLogCurrentException(log); - if (is_attach) - return; /// A user will have to reattach the table. - throw; + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Expected all queues to be initialized (but having {}/{})", + queues.size(), num_queues); } + + LOG_TRACE(log, "RabbitMQ setup completed"); + rabbit_channel->close(); } LOG_TRACE(log, "Registering {} conumers", num_consumers); for (size_t i = 0; i < num_consumers; ++i) { - try - { - auto consumer = createConsumer(); - consumer->updateChannel(*connection); - consumers_ref.push_back(consumer); - pushConsumer(consumer); - ++num_created_consumers; - } - catch (...) - { - if (!is_attach) - throw; - - tryLogCurrentException(log); - } + auto consumer = createConsumer(); + consumer->updateChannel(*connection); + consumers_ref.push_back(consumer); + pushConsumer(consumer); + ++num_created_consumers; } LOG_TRACE(log, "Registered {}/{} conumers", num_created_consumers, num_consumers); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 120930cf01d..696734617be 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -27,7 +27,7 @@ public: ContextPtr context_, const ColumnsDescription & columns_, std::unique_ptr rabbitmq_settings_, - bool is_attach_); + bool is_attach); std::string getName() const override { return "RabbitMQ"; } @@ -102,7 +102,7 @@ private: bool use_user_setup; bool hash_exchange; - Poco::Logger * log; + LoggerPtr log; RabbitMQConnectionPtr connection; /// Connection for all consumers RabbitMQConfiguration configuration; @@ -158,10 +158,9 @@ private: size_t read_attempts = 0; mutable bool drop_table = false; - bool is_attach; RabbitMQConsumerPtr createConsumer(); - bool initialized = false; + std::atomic initialized = false; /// Functions working in the background void streamingToViewsFunc(); diff --git a/src/Storages/RocksDB/StorageSystemRocksDB.cpp b/src/Storages/RocksDB/StorageSystemRocksDB.cpp index cbb96ed4001..d0533b5ba0c 100644 --- a/src/Storages/RocksDB/StorageSystemRocksDB.cpp +++ b/src/Storages/RocksDB/StorageSystemRocksDB.cpp @@ -27,13 +27,14 @@ namespace DB { -NamesAndTypesList StorageSystemRocksDB::getNamesAndTypes() +ColumnsDescription StorageSystemRocksDB::getColumnsDescription() { - return { - { "database", std::make_shared() }, - { "table", std::make_shared() }, - { "name", std::make_shared() }, - { "value", std::make_shared() }, + return ColumnsDescription + { + {"database", std::make_shared(), "Database name."}, + {"table", std::make_shared(), "Name of the table with StorageEmbeddedRocksDB engine."}, + {"name", std::make_shared(), "Metric name."}, + {"value", std::make_shared(), "Metric value."}, }; } diff --git a/src/Storages/RocksDB/StorageSystemRocksDB.h b/src/Storages/RocksDB/StorageSystemRocksDB.h index deafba069f4..c1f10a7722d 100644 --- a/src/Storages/RocksDB/StorageSystemRocksDB.h +++ b/src/Storages/RocksDB/StorageSystemRocksDB.h @@ -16,7 +16,7 @@ class StorageSystemRocksDB final : public IStorageSystemOneBlock & table_is_being_dropped_, std::shared_ptr s3_queue_log_, const StorageID & storage_id_, - Poco::Logger * log_) + LoggerPtr log_) : ISource(header_) , WithContext(context_) , name(std::move(name_)) diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h index 542f8e8fd8c..82e75020efb 100644 --- a/src/Storages/S3Queue/S3QueueSource.h +++ b/src/Storages/S3Queue/S3QueueSource.h @@ -67,7 +67,7 @@ public: const std::atomic & table_is_being_dropped_, std::shared_ptr s3_queue_log_, const StorageID & storage_id_, - Poco::Logger * log_); + LoggerPtr log_); ~StorageS3QueueSource() override; @@ -89,7 +89,7 @@ private: const StorageID storage_id; RemoveFileFunc remove_file_func; - Poco::Logger * log; + LoggerPtr log; using ReaderHolder = StorageS3Source::ReaderHolder; ReaderHolder reader; diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp index bc33e8cf2a9..a8741aed3c5 100644 --- a/src/Storages/S3Queue/StorageS3Queue.cpp +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -75,7 +75,7 @@ namespace return zkutil::extractZooKeeperPath(result_zk_path, true); } - void checkAndAdjustSettings(S3QueueSettings & s3queue_settings, const Settings & settings, Poco::Logger * log) + void checkAndAdjustSettings(S3QueueSettings & s3queue_settings, const Settings & settings, LoggerPtr log) { if (s3queue_settings.mode == S3QueueMode::ORDERED && s3queue_settings.s3queue_processing_threads_num > 1) { @@ -119,7 +119,7 @@ StorageS3Queue::StorageS3Queue( , configuration{configuration_} , format_settings(format_settings_) , reschedule_processing_interval_ms(s3queue_settings->s3queue_polling_min_timeout_ms) - , log(&Poco::Logger::get("StorageS3Queue (" + table_id_.table_name + ")")) + , log(getLogger("StorageS3Queue (" + table_id_.table_name + ")")) { if (configuration.url.key.empty()) { @@ -254,7 +254,7 @@ void ReadFromS3Queue::createIterator(const ActionsDAG::Node * predicate) void ReadFromS3Queue::applyFilters() { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); @@ -600,7 +600,7 @@ void registerStorageS3QueueImpl(const String & name, StorageFactory & factory) if (user_format_settings.has(change.name)) user_format_settings.set(change.name, change.value); else - LOG_TRACE(&Poco::Logger::get("StorageS3"), "Remove: {}", change.name); + LOG_TRACE(getLogger("StorageS3"), "Remove: {}", change.name); args.storage_def->settings->changes.removeSetting(change.name); } diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h index 3d3594dc2ab..5d2be610d58 100644 --- a/src/Storages/S3Queue/StorageS3Queue.h +++ b/src/Storages/S3Queue/StorageS3Queue.h @@ -79,7 +79,7 @@ private: std::atomic shutdown_called = false; std::atomic table_is_being_dropped = false; - Poco::Logger * log; + LoggerPtr log; void startup() override; void shutdown(bool is_drop) override; diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index b1a6f8f43c7..662a5c0ef5a 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -207,6 +207,12 @@ struct SelectQueryInfo /// If query has aggregate functions bool has_aggregates = false; + /// If query has any filter and no arrayJoin before filter. Used by skipping FINAL + /// Skipping FINAL algorithm will output the original chunk and a column indices of + /// selected rows. If query has filter and doesn't have array join before any filter, + /// we can merge the indices with the first filter in FilterTransform later. + bool has_filters_and_no_array_join_before_filter = false; + ClusterPtr getCluster() const { return !optimized_cluster ? cluster : optimized_cluster; } bool settings_limit_offset_done = false; diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index c7fbb633a82..01c31eab2b1 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -707,7 +707,7 @@ private: void ReadFromAzureBlob::applyFilters() { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); @@ -1121,7 +1121,7 @@ Chunk StorageAzureBlobSource::generate() return {}; } -void StorageAzureBlobSource::addNumRowsToCache(const DB::String & path, size_t num_rows) +void StorageAzureBlobSource::addNumRowsToCache(const String & path, size_t num_rows) { String source = fs::path(connection_url) / container / path; auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext()); diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 16e5b9edfb6..6fc3c5ce592 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -319,7 +319,7 @@ private: ReaderHolder reader; - Poco::Logger * log = &Poco::Logger::get("StorageAzureBlobSource"); + LoggerPtr log = getLogger("StorageAzureBlobSource"); ThreadPool create_reader_pool; ThreadPoolCallbackRunner create_reader_scheduler; diff --git a/src/Storages/StorageAzureBlobCluster.cpp b/src/Storages/StorageAzureBlobCluster.cpp index a6372577fb0..1d587512f38 100644 --- a/src/Storages/StorageAzureBlobCluster.cpp +++ b/src/Storages/StorageAzureBlobCluster.cpp @@ -38,7 +38,7 @@ StorageAzureBlobCluster::StorageAzureBlobCluster( const ConstraintsDescription & constraints_, ContextPtr context_, bool structure_argument_was_provided_) - : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageAzureBlobCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) + : IStorageCluster(cluster_name_, table_id_, getLogger("StorageAzureBlobCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) , configuration{configuration_} , object_storage(std::move(object_storage_)) { diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 6f4b1563a46..d5c135bb81d 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -137,7 +137,7 @@ StorageBuffer::StorageBuffer( , flush_thresholds(flush_thresholds_) , destination_id(destination_id_) , allow_materialized(allow_materialized_) - , log(&Poco::Logger::get("StorageBuffer (" + table_id_.getFullTableName() + ")")) + , log(getLogger("StorageBuffer (" + table_id_.getFullTableName() + ")")) , bg_pool(getContext()->getBufferFlushSchedulePool()) { StorageInMemoryMetadata storage_metadata; @@ -433,7 +433,7 @@ void StorageBuffer::read( } -static void appendBlock(Poco::Logger * log, const Block & from, Block & to) +static void appendBlock(LoggerPtr log, const Block & from, Block & to) { size_t rows = from.rows(); size_t old_rows = to.rows(); diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index ef646a12548..47f6239b173 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -166,7 +166,7 @@ private: Writes lifetime_writes; Writes total_writes; - Poco::Logger * log; + LoggerPtr log; void flushAllBuffers(bool check_thresholds = true); bool flushBuffer(Buffer & buffer, bool check_thresholds, bool locked = false); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index a829002187b..afd9e4aad76 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -329,7 +329,7 @@ StorageDistributed::StorageDistributed( , remote_database(remote_database_) , remote_table(remote_table_) , remote_table_function_ptr(remote_table_function_ptr_) - , log(&Poco::Logger::get("StorageDistributed (" + id_.table_name + ")")) + , log(getLogger("StorageDistributed (" + id_.table_name + ")")) , owned_cluster(std::move(owned_cluster_)) , cluster_name(getContext()->getMacros()->expand(cluster_name_)) , has_sharding_key(sharding_key_) @@ -1102,7 +1102,7 @@ static ActionsDAGPtr getFilterFromQuery(const ASTPtr & ast, ContextPtr context) if (!source) return nullptr; - return ActionsDAG::buildFilterActionsDAG(source->getFilterNodes().nodes, {}, context); + return ActionsDAG::buildFilterActionsDAG(source->getFilterNodes().nodes); } @@ -1140,22 +1140,20 @@ std::optional StorageDistributed::distributedWriteFromClusterStor ContextMutablePtr query_context = Context::createCopy(local_context); query_context->increaseDistributedDepth(); - /// Here we take addresses from destination cluster and assume source table exists on these nodes - for (const auto & replicas : getCluster()->getShardsAddresses()) - { - /// There will be only one replica, because we consider each replica as a shard - for (const auto & node : replicas) - { - auto connection = std::make_shared( - node.host_name, node.port, query_context->getGlobalContext()->getCurrentDatabase(), - node.user, node.password, ssh::SSHKey(), node.quota_key, node.cluster, node.cluster_secret, - "ParallelInsertSelectInititiator", - node.compression, - node.secure - ); + const auto & current_settings = query_context->getSettingsRef(); + auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings); + /// Here we take addresses from destination cluster and assume source table exists on these nodes + for (const auto & replicas : getCluster()->getShardsInfo()) + { + /// Skip unavailable hosts if necessary + auto try_results = replicas.pool->getMany(timeouts, current_settings, PoolMode::GET_MANY, /*async_callback*/ {}, /*skip_unavailable_endpoints*/ true); + + /// There will be only one replica, because we consider each replica as a shard + for (const auto & try_result : try_results) + { auto remote_query_executor = std::make_shared( - connection, + std::vector{try_result}, new_query_str, Block{}, query_context, @@ -1599,7 +1597,7 @@ ClusterPtr StorageDistributed::skipUnusedShardsWithAnalyzer( if (nodes.empty()) return nullptr; - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(nodes, {}, local_context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(nodes); size_t limit = local_context->getSettingsRef().optimize_skip_unused_shards_limit; if (!limit || limit > SSIZE_MAX) diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index b7ed85e87df..161a5983f94 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -238,7 +238,7 @@ private: String remote_table; ASTPtr remote_table_function_ptr; - Poco::Logger * log; + LoggerPtr log; /// Used to implement TableFunctionRemote. std::shared_ptr owned_cluster; diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index 2acbf3f4610..e475211deb3 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -80,7 +80,7 @@ StorageExecutable::StorageExecutable( : IStorage(table_id_) , settings(settings_) , input_queries(input_queries_) - , log(settings.is_executable_pool ? &Poco::Logger::get("StorageExecutablePool") : &Poco::Logger::get("StorageExecutable")) + , log(settings.is_executable_pool ? getLogger("StorageExecutablePool") : getLogger("StorageExecutable")) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns); diff --git a/src/Storages/StorageExecutable.h b/src/Storages/StorageExecutable.h index 37455385675..2be2a84ab49 100644 --- a/src/Storages/StorageExecutable.h +++ b/src/Storages/StorageExecutable.h @@ -45,7 +45,7 @@ public: private: ExecutableSettings settings; std::vector input_queries; - Poco::Logger * log; + LoggerPtr log; std::unique_ptr coordinator; }; diff --git a/src/Storages/StorageFactory.h b/src/Storages/StorageFactory.h index 239f1bb63ef..7b1d7235bac 100644 --- a/src/Storages/StorageFactory.h +++ b/src/Storages/StorageFactory.h @@ -31,6 +31,7 @@ public: struct Arguments { const String & engine_name; + /// Mutable to allow replacing constant expressions with literals, and other transformations. ASTs & engine_args; ASTStorage * storage_def; const ASTCreateQuery & query; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 9f864813de9..8b8a151fb1d 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1040,7 +1040,7 @@ void StorageFileSource::beforeDestroy() catch (const std::exception & e) { // Cannot throw exception from destructor, will write only error - LOG_ERROR(&Poco::Logger::get("~StorageFileSource"), "Failed to rename file {}: {}", file_path_ref, e.what()); + LOG_ERROR(getLogger("~StorageFileSource"), "Failed to rename file {}: {}", file_path_ref, e.what()); continue; } } @@ -1352,7 +1352,7 @@ private: void ReadFromFile::applyFilters() { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index b74868597a6..2955eb0f1aa 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -163,7 +163,7 @@ private: mutable std::shared_timed_mutex rwlock; - Poco::Logger * log = &Poco::Logger::get("StorageFile"); + LoggerPtr log = getLogger("StorageFile"); /// Total number of bytes to read (sums for multiple files in case of globs). Needed for progress bar. size_t total_bytes_to_read = 0; diff --git a/src/Storages/StorageFileCluster.cpp b/src/Storages/StorageFileCluster.cpp index c12124f1e07..0cc961bb464 100644 --- a/src/Storages/StorageFileCluster.cpp +++ b/src/Storages/StorageFileCluster.cpp @@ -34,7 +34,7 @@ StorageFileCluster::StorageFileCluster( const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, bool structure_argument_was_provided_) - : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageFileCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) + : IStorageCluster(cluster_name_, table_id_, getLogger("StorageFileCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) , filename(filename_) , format_name(format_name_) , compression_method(compression_method_) diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index efe446a8ccd..b9e082c0b22 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -104,7 +104,7 @@ void StorageJoin::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPt if (disk->exists(path)) disk->removeRecursive(path); else - LOG_INFO(&Poco::Logger::get("StorageJoin"), "Path {} is already removed from disk {}", path, disk->getName()); + LOG_INFO(getLogger("StorageJoin"), "Path {} is already removed from disk {}", path, disk->getName()); disk->createDirectories(path); disk->createDirectories(fs::path(path) / "tmp/"); diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index e3c960529de..80abaa3ea2d 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -321,7 +321,7 @@ StorageKeeperMap::StorageKeeperMap( , primary_key(primary_key_) , zookeeper_name(zkutil::extractZooKeeperName(zk_root_path_)) , keys_limit(keys_limit_) - , log(&Poco::Logger::get(fmt::format("StorageKeeperMap ({})", table_id.getNameForLogs()))) + , log(getLogger(fmt::format("StorageKeeperMap ({})", table_id.getNameForLogs()))) { std::string path_prefix = context_->getConfigRef().getString("keeper_map_path_prefix", ""); if (path_prefix.empty()) @@ -776,9 +776,10 @@ void StorageKeeperMap::backupData(BackupEntriesCollector & backup_entries_collec auto with_retries = std::make_shared ( - &Poco::Logger::get(fmt::format("StorageKeeperMapBackup ({})", getStorageID().getNameForLogs())), + getLogger(fmt::format("StorageKeeperMapBackup ({})", getStorageID().getNameForLogs())), [&] { return getClient(); }, WithRetries::KeeperSettings::fromContext(backup_entries_collector.getContext()), + backup_entries_collector.getContext()->getProcessListElement(), [](WithRetries::FaultyKeeper &) {} ); @@ -807,9 +808,10 @@ void StorageKeeperMap::restoreDataFromBackup(RestorerFromBackup & restorer, cons auto with_retries = std::make_shared ( - &Poco::Logger::get(fmt::format("StorageKeeperMapRestore ({})", getStorageID().getNameForLogs())), + getLogger(fmt::format("StorageKeeperMapRestore ({})", getStorageID().getNameForLogs())), [&] { return getClient(); }, WithRetries::KeeperSettings::fromContext(restorer.getContext()), + restorer.getContext()->getProcessListElement(), [](WithRetries::FaultyKeeper &) {} ); diff --git a/src/Storages/StorageKeeperMap.h b/src/Storages/StorageKeeperMap.h index aa9687243d8..9dca96a24a3 100644 --- a/src/Storages/StorageKeeperMap.h +++ b/src/Storages/StorageKeeperMap.h @@ -146,7 +146,7 @@ private: mutable std::mutex init_mutex; mutable std::optional table_is_valid; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index fda628079c0..c7b0a9d0644 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -104,6 +104,8 @@ protected: Chunk generate() override; private: + NameAndTypePair getColumnOnDisk(const NameAndTypePair & column) const; + const size_t block_size; const NamesAndTypesList columns; const StorageLog & storage; @@ -149,6 +151,22 @@ private: bool isFinished(); }; +NameAndTypePair LogSource::getColumnOnDisk(const NameAndTypePair & column) const +{ + const auto & storage_columns = storage.columns_with_collected_nested; + + /// A special case when we read subcolumn of shared offsets of Nested. + /// E.g. instead of requested column "n.arr1.size0" we must read column "n.size0" from disk. + auto name_in_storage = column.getNameInStorage(); + if (column.getSubcolumnName() == "size0" && Nested::isSubcolumnOfNested(name_in_storage, storage_columns)) + { + auto nested_name_in_storage = Nested::splitName(name_in_storage).first; + auto new_name = Nested::concatenateName(nested_name_in_storage, column.getSubcolumnName()); + return storage_columns.getColumnOrSubcolumn(GetColumnsOptions::All, new_name); + } + + return column; +} Chunk LogSource::generate() { @@ -169,19 +187,21 @@ Chunk LogSource::generate() for (const auto & name_type : columns) { ColumnPtr column; + auto name_type_on_disk = getColumnOnDisk(name_type); + try { - column = name_type.type->createColumn(); - readData(name_type, column, max_rows_to_read, caches[name_type.getNameInStorage()]); + column = name_type_on_disk.type->createColumn(); + readData(name_type_on_disk, column, max_rows_to_read, caches[name_type_on_disk.getNameInStorage()]); } catch (Exception & e) { - e.addMessage("while reading column " + name_type.name + " at " + fullPath(storage.disk, storage.table_path)); + e.addMessage("while reading column " + name_type_on_disk.name + " at " + fullPath(storage.disk, storage.table_path)); throw; } if (!column->empty()) - res.insert(ColumnWithTypeAndName(column, name_type.type, name_type.name)); + res.insert(ColumnWithTypeAndName(column, name_type_on_disk.type, name_type_on_disk.name)); } if (res) @@ -600,6 +620,7 @@ StorageLog::StorageLog( } } + columns_with_collected_nested = ColumnsDescription{Nested::collect(columns_.getAll())}; total_bytes = file_checker.getTotalSize(); } @@ -820,10 +841,6 @@ Pipe StorageLog::read( if (num_streams > max_streams) num_streams = max_streams; - auto options = GetColumnsOptions(GetColumnsOptions::All).withSubcolumns(); - auto all_columns = storage_snapshot->getColumnsByNames(options, column_names); - all_columns = Nested::convertToSubcolumns(all_columns); - std::vector offsets; offsets.resize(num_data_files, 0); @@ -840,6 +857,12 @@ Pipe StorageLog::read( ReadSettings read_settings = local_context->getReadSettings(); Pipes pipes; + /// Converting to subcolumns of Nested is needed for + /// correct reading of parts of Nested with shared offsets. + auto options = GetColumnsOptions(GetColumnsOptions::All).withSubcolumns(); + auto all_columns = storage_snapshot->getColumnsByNames(options, column_names); + all_columns = Nested::convertToSubcolumns(all_columns); + for (size_t stream = 0; stream < num_streams; ++stream) { if (use_marks_file) diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index 5c699b0bb45..d5daed21b3c 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -133,6 +133,9 @@ private: size_t num_data_files = 0; std::map data_files_by_names; + /// The same as metadata->columns but after call of Nested::collect(). + ColumnsDescription columns_with_collected_nested; + /// The Log engine uses the marks file, and the TinyLog engine doesn't. const bool use_marks_file; diff --git a/src/Storages/StorageLogSettings.h b/src/Storages/StorageLogSettings.h index fa8bb282360..e89ed90fc04 100644 --- a/src/Storages/StorageLogSettings.h +++ b/src/Storages/StorageLogSettings.h @@ -1,5 +1,6 @@ #pragma once +#include #include namespace DB diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index ae616b1df04..bfe75e61bcd 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -38,6 +39,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int NOT_IMPLEMENTED; extern const int INCORRECT_QUERY; + extern const int TOO_MANY_MATERIALIZED_VIEWS; } namespace ActionLocks @@ -87,6 +89,16 @@ StorageMaterializedView::StorageMaterializedView( "either ENGINE or an existing table in a TO clause"); auto select = SelectQueryDescription::getSelectQueryFromASTForMatView(query.select->clone(), query.refresh_strategy != nullptr, local_context); + if (select.select_table_id) + { + auto select_table_dependent_views = DatabaseCatalog::instance().getDependentViews(select.select_table_id); + + auto max_materialized_views_count_for_table = getContext()->getServerSettings().max_materialized_views_count_for_table; + if (max_materialized_views_count_for_table && select_table_dependent_views.size() >= max_materialized_views_count_for_table) + throw Exception(ErrorCodes::TOO_MANY_MATERIALIZED_VIEWS, + "Too many materialized views, maximum: {}", max_materialized_views_count_for_table); + } + storage_metadata.setSelectQuery(select); if (!comment.empty()) storage_metadata.setComment(comment); @@ -511,7 +523,7 @@ void StorageMaterializedView::backupData(BackupEntriesCollector & backup_entries if (auto table = tryGetTargetTable()) table->backupData(backup_entries_collector, data_path_in_backup, partitions); else - LOG_WARNING(&Poco::Logger::get("StorageMaterializedView"), + LOG_WARNING(getLogger("StorageMaterializedView"), "Inner table does not exist, will not backup any data"); } } diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 0011e3c57a2..02d81eda59a 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -79,11 +79,11 @@ public: for (const auto & elem : block) compressed_block.insert({ elem.column->compress(), elem.type, elem.name }); - new_blocks.emplace_back(compressed_block); + new_blocks.push_back(std::move(compressed_block)); } else { - new_blocks.emplace_back(block); + new_blocks.push_back(std::move(block)); } } @@ -472,9 +472,21 @@ void StorageMemory::restoreDataImpl(const BackupPtr & backup, const String & dat while (auto block = block_in.read()) { - new_bytes += block.bytes(); - new_rows += block.rows(); - new_blocks.push_back(std::move(block)); + if (compress) + { + Block compressed_block; + for (const auto & elem : block) + compressed_block.insert({ elem.column->compress(), elem.type, elem.name }); + + new_blocks.push_back(std::move(compressed_block)); + } + else + { + new_blocks.push_back(std::move(block)); + } + + new_bytes += new_blocks.back().bytes(); + new_rows += new_blocks.back().rows(); } } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 0d67403fa2f..8d75382c91c 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -982,7 +982,7 @@ StorageMerge::StorageListWithLocks ReadFromMerge::getSelectedTables( if (!filter_by_database_virtual_column && !filter_by_table_virtual_column) return res; - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); if (!filter_actions_dag) return res; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 4761ccd8b58..bf5aac6f80c 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -5,9 +5,9 @@ #include #include -#include #include #include +#include #include "Common/Exception.h" #include #include @@ -20,25 +20,30 @@ #include #include #include +#include #include -#include #include #include #include #include #include -#include #include +#include +#include +#include #include #include #include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include #include +#include +#include #include #include #include @@ -63,6 +68,7 @@ namespace ErrorCodes extern const int NO_SUCH_DATA_PART; extern const int ABORTED; extern const int SUPPORT_IS_DISABLED; + extern const int TABLE_IS_READ_ONLY; } namespace ActionLocks @@ -72,7 +78,7 @@ namespace ActionLocks extern const StorageActionBlockType PartsMove; } -static MergeTreeTransactionPtr tryGetTransactionForMutation(const MergeTreeMutationEntry & mutation, Poco::Logger * log = nullptr) +static MergeTreeTransactionPtr tryGetTransactionForMutation(const MergeTreeMutationEntry & mutation, LoggerPtr log = nullptr) { assert(!mutation.tid.isEmpty()); if (mutation.tid.isPrehistoric()) @@ -213,16 +219,25 @@ void StorageMergeTree::read( { if (local_context->canUseParallelReplicasOnInitiator() && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree) { - const auto table_id = getStorageID(); - const auto & modified_query_ast = ClusterProxy::rewriteSelectQuery( - local_context, query_info.query, - table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); - + ASTPtr modified_query_ast; Block header; if (local_context->getSettingsRef().allow_experimental_analyzer) - header = InterpreterSelectQueryAnalyzer::getSampleBlock(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()); + { + QueryTreeNodePtr modified_query_tree = query_info.query_tree->clone(); + rewriteJoinToGlobalJoin(modified_query_tree); + modified_query_tree = buildQueryTreeForShard(query_info, modified_query_tree); + header = InterpreterSelectQueryAnalyzer::getSampleBlock( + modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze()); + modified_query_ast = queryNodeToSelectQuery(modified_query_tree); + } else - header = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); + { + const auto table_id = getStorageID(); + modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query, + table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); + header + = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); + } ClusterProxy::SelectStreamFactory select_stream_factory = ClusterProxy::SelectStreamFactory( @@ -233,7 +248,6 @@ void StorageMergeTree::read( ClusterProxy::executeQueryWithParallelReplicas( query_plan, - getStorageID(), select_stream_factory, modified_query_ast, local_context, @@ -284,6 +298,8 @@ std::optional StorageMergeTree::totalBytesUncompressed(const Settings &) SinkToStoragePtr StorageMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/) { + assertNotReadonly(); + const auto & settings = local_context->getSettingsRef(); return std::make_shared( *this, metadata_snapshot, settings.max_partitions_per_insert_block, local_context); @@ -309,9 +325,6 @@ void StorageMergeTree::checkTableCanBeDropped(ContextPtr query_context) const void StorageMergeTree::drop() { shutdown(true); - /// In case there is read-only disk we cannot allow to call dropAllData(), but dropping tables is allowed. - if (isStaticStorage()) - return; dropAllData(); } @@ -320,6 +333,8 @@ void StorageMergeTree::alter( ContextPtr local_context, AlterLockHolder & table_lock_holder) { + assertNotReadonly(); + if (local_context->getCurrentTransaction() && local_context->getSettingsRef().throw_on_unsupported_query_inside_transaction) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ALTER METADATA is not supported inside transactions"); @@ -610,6 +625,8 @@ void StorageMergeTree::setMutationCSN(const String & mutation_id, CSN csn) void StorageMergeTree::mutate(const MutationCommands & commands, ContextPtr query_context) { + assertNotReadonly(); + delayMutationOrThrowIfNeeded(nullptr, query_context); /// Validate partition IDs (if any) before starting mutation @@ -673,7 +690,7 @@ std::optional StorageMergeTree::getIncompleteMutationsS const auto & mutation_entry = current_mutation_it->second; - auto txn = tryGetTransactionForMutation(mutation_entry, log); + auto txn = tryGetTransactionForMutation(mutation_entry, log.load()); /// There's no way a transaction may finish before a mutation that was started by the transaction. /// But sometimes we need to check status of an unrelated mutation, in this case we don't care about transactions. assert(txn || mutation_entry.tid.isPrehistoric() || from_another_mutation); @@ -800,6 +817,8 @@ std::vector StorageMergeTree::getMutationsStatus() cons CancellationCode StorageMergeTree::killMutation(const String & mutation_id) { + assertNotReadonly(); + LOG_TRACE(log, "Killing mutation {}", mutation_id); UInt64 mutation_version = MergeTreeMutationEntry::tryParseFileName(mutation_id); if (!mutation_version) @@ -819,7 +838,7 @@ CancellationCode StorageMergeTree::killMutation(const String & mutation_id) if (!to_kill) return CancellationCode::NotFound; - if (auto txn = tryGetTransactionForMutation(*to_kill, log)) + if (auto txn = tryGetTransactionForMutation(*to_kill, log.load())) { LOG_TRACE(log, "Cancelling transaction {} which had started mutation {}", to_kill->tid, mutation_id); TransactionLog::instance().rollbackTransaction(txn); @@ -1212,7 +1231,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( if (!part->version.isVisible(first_mutation_tid.start_csn, first_mutation_tid)) continue; - txn = tryGetTransactionForMutation(mutations_begin_it->second, log); + txn = tryGetTransactionForMutation(mutations_begin_it->second, log.load()); if (!txn) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find transaction {} that has started mutation {} " "that is going to be applied to part {}", @@ -1510,6 +1529,8 @@ bool StorageMergeTree::optimize( bool cleanup, ContextPtr local_context) { + assertNotReadonly(); + if (deduplicate) { if (deduplicate_by_columns.empty()) @@ -1755,6 +1776,8 @@ void StorageMergeTree::renameAndCommitEmptyParts(MutableDataPartsVector & new_pa void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr query_context, TableExclusiveLockHolder &) { + assertNotReadonly(); + { /// Asks to complete merges and does not allow them to start. /// This protects against "revival" of data for a removed partition after completion of merge. @@ -2029,6 +2052,8 @@ PartitionCommandsResultInfo StorageMergeTree::attachPartition( void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, ContextPtr local_context) { + assertNotReadonly(); + auto lock1 = lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); auto lock2 = source_table->lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); auto merges_blocker = stopMergesAndWait(); @@ -2039,41 +2064,73 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con ProfileEventsScope profile_events_scope; MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, my_metadata_snapshot); - String partition_id = getPartitionIDFromQuery(partition, local_context); + String partition_id = src_data.getPartitionIDFromQuery(partition, local_context); DataPartsVector src_parts = src_data.getVisibleDataPartsVectorInPartition(local_context, partition_id); + + bool attach_empty_partition = !replace && src_parts.empty(); + if (attach_empty_partition) + return; + MutableDataPartsVector dst_parts; std::vector dst_parts_locks; static const String TMP_PREFIX = "tmp_replace_from_"; - for (const DataPartPtr & src_part : src_parts) + const auto my_partition_expression = my_metadata_snapshot->getPartitionKeyAST(); + const auto src_partition_expression = source_metadata_snapshot->getPartitionKeyAST(); + const auto is_partition_exp_different = queryToStringNullable(my_partition_expression) != queryToStringNullable(src_partition_expression); + + if (is_partition_exp_different && !src_parts.empty()) + MergeTreePartitionCompatibilityVerifier::verify(src_data, /* destination_storage */ *this, src_parts); + + for (DataPartPtr & src_part : src_parts) { if (!canReplacePartition(src_part)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot replace partition '{}' because part '{}' has inconsistent granularity with table", partition_id, src_part->name); - /// This will generate unique name in scope of current server process. - Int64 temp_index = insert_increment.get(); - MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level); - IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()}; - auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( - src_part, - TMP_PREFIX, - dst_part_info, - my_metadata_snapshot, - clone_params, - local_context->getReadSettings(), - local_context->getWriteSettings()); - dst_parts.emplace_back(std::move(dst_part)); - dst_parts_locks.emplace_back(std::move(part_lock)); - } + /// This will generate unique name in scope of current server process. + auto index = insert_increment.get(); - /// ATTACH empty part set - if (!replace && dst_parts.empty()) - return; + if (is_partition_exp_different) + { + auto [new_partition, new_min_max_index] = createPartitionAndMinMaxIndexFromSourcePart( + src_part, my_metadata_snapshot, local_context); + + auto [dst_part, part_lock] = cloneAndLoadPartOnSameDiskWithDifferentPartitionKey( + src_part, + new_partition, + new_partition.getID(*this), + new_min_max_index, + TMP_PREFIX, + my_metadata_snapshot, + clone_params, + local_context, + index, + index); + + dst_parts.emplace_back(std::move(dst_part)); + dst_parts_locks.emplace_back(std::move(part_lock)); + } + else + { + MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); + + auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( + src_part, + TMP_PREFIX, + dst_part_info, + my_metadata_snapshot, + clone_params, + local_context->getReadSettings(), + local_context->getWriteSettings()); + dst_parts.emplace_back(std::move(dst_part)); + dst_parts_locks.emplace_back(std::move(part_lock)); + } + } MergeTreePartInfo drop_range; if (replace) @@ -2427,6 +2484,12 @@ PreparedSetsCachePtr StorageMergeTree::getPreparedSetsCache(Int64 mutation_id) return cache; } +void StorageMergeTree::assertNotReadonly() const +{ + if (isStaticStorage()) + throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is in readonly mode due to static storage"); +} + void StorageMergeTree::fillNewPartName(MutableDataPartPtr & part, DataPartsLock &) { part->info.min_block = part->info.max_block = increment.get(); diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 51bf6aa42e7..359fa1d262d 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -273,6 +273,8 @@ private: PreparedSetsCachePtr getPreparedSetsCache(Int64 mutation_id); + void assertNotReadonly() const; + friend class MergeTreeSink; friend class MergeTreeData; friend class MergePlainMergeTreeTask; diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 76a439eabaf..da391909dff 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -55,7 +55,7 @@ StorageMySQL::StorageMySQL( , on_duplicate_clause{on_duplicate_clause_} , mysql_settings(mysql_settings_) , pool(std::make_shared(pool_)) - , log(&Poco::Logger::get("StorageMySQL (" + table_id_.table_name + ")")) + , log(getLogger("StorageMySQL (" + table_id_.table_name + ")")) { StorageInMemoryMetadata storage_metadata; diff --git a/src/Storages/StorageMySQL.h b/src/Storages/StorageMySQL.h index 5303117cf5c..daabd66a530 100644 --- a/src/Storages/StorageMySQL.h +++ b/src/Storages/StorageMySQL.h @@ -92,7 +92,7 @@ private: mysqlxx::PoolWithFailoverPtr pool; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index 8fe2a161dba..9379cb5a1c6 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -72,7 +72,7 @@ StoragePostgreSQL::StoragePostgreSQL( , remote_table_schema(remote_table_schema_) , on_conflict(on_conflict_) , pool(std::move(pool_)) - , log(&Poco::Logger::get("StoragePostgreSQL (" + table_id_.table_name + ")")) + , log(getLogger("StoragePostgreSQL (" + table_id_.table_name + ")")) { StorageInMemoryMetadata storage_metadata; diff --git a/src/Storages/StoragePostgreSQL.h b/src/Storages/StoragePostgreSQL.h index 725a935aa46..1ed4f7a7611 100644 --- a/src/Storages/StoragePostgreSQL.h +++ b/src/Storages/StoragePostgreSQL.h @@ -79,7 +79,7 @@ private: String on_conflict; postgres::PoolWithFailoverPtr pool; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/StorageRedis.cpp b/src/Storages/StorageRedis.cpp index ddb1b62c7b0..83bb3c606c9 100644 --- a/src/Storages/StorageRedis.cpp +++ b/src/Storages/StorageRedis.cpp @@ -206,7 +206,7 @@ StorageRedis::StorageRedis( , WithContext(context_->getGlobalContext()) , table_id(table_id_) , configuration(configuration_) - , log(&Poco::Logger::get("StorageRedis")) + , log(getLogger("StorageRedis")) , primary_key(primary_key_) { pool = std::make_shared(configuration.pool_size); diff --git a/src/Storages/StorageRedis.h b/src/Storages/StorageRedis.h index a525a4ed7de..a0eb2bfa580 100644 --- a/src/Storages/StorageRedis.h +++ b/src/Storages/StorageRedis.h @@ -74,7 +74,7 @@ private: StorageID table_id; RedisConfiguration configuration; - Poco::Logger * log; + LoggerPtr log; RedisPoolPtr pool; const String primary_key; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 1d9a50b18b7..4dd4d240e4e 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -26,22 +26,21 @@ #include -#include #include #include #include #include #include -#include #include #include -#include #include #include #include #include #include +#include #include +#include #include #include #include @@ -53,9 +52,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -320,7 +321,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( attach, [this] (const std::string & name) { enqueuePartForCheck(name); }) , zookeeper_name(zkutil::extractZooKeeperName(zookeeper_path_)) - , zookeeper_path(zkutil::extractZooKeeperPath(zookeeper_path_, /* check_starts_with_slash */ !attach, log)) + , zookeeper_path(zkutil::extractZooKeeperPath(zookeeper_path_, /* check_starts_with_slash */ !attach, log.load())) , replica_name(replica_name_) , replica_path(fs::path(zookeeper_path) / "replicas" / replica_name_) , reader(*this) @@ -704,13 +705,13 @@ void StorageReplicatedMergeTree::createNewZooKeeperNodes() if (settings->allow_remote_fs_zero_copy_replication) { auto disks = getStoragePolicy()->getDisks(); - std::set disk_types_for_zero_copy; + std::set disk_types_for_zero_copy; for (auto & disk : disks) { if (!disk->supportZeroCopyReplication()) continue; - disk_types_for_zero_copy.insert(disk->getDataSourceDescription().type); + disk_types_for_zero_copy.insert(disk->getDataSourceDescription().toString()); } const auto table_shared_id_ = getTableSharedID(); @@ -733,9 +734,9 @@ void StorageReplicatedMergeTree::createNewZooKeeperNodes() } } - for (const auto & disk_type: disk_types_for_zero_copy) + for (const auto & disk_type : disk_types_for_zero_copy) { - auto zero_copy = fmt::format("zero_copy_{}", toString(disk_type)); + auto zero_copy = fmt::format("zero_copy_{}", disk_type); auto zero_copy_path = fs::path(settings->remote_fs_zero_copy_zookeeper_path.toString()) / zero_copy; futures.push_back(zookeeper->asyncTryCreateNoThrow(zero_copy_path, String(), zkutil::CreateMode::Persistent)); futures.push_back(zookeeper->asyncTryCreateNoThrow(zero_copy_path / table_shared_id_, String(), zkutil::CreateMode::Persistent)); @@ -812,7 +813,7 @@ bool StorageReplicatedMergeTree::createTableIfNotExists(const StorageMetadataPtr else { auto metadata_drop_lock = zkutil::EphemeralNodeHolder::existing(drop_lock_path, *zookeeper); - if (!removeTableNodesFromZooKeeper(zookeeper, zookeeper_path, metadata_drop_lock, log)) + if (!removeTableNodesFromZooKeeper(zookeeper, zookeeper_path, metadata_drop_lock, log.load())) { /// Someone is recursively removing table right now, we cannot create new table until old one is removed continue; @@ -1096,6 +1097,8 @@ void StorageReplicatedMergeTree::drop() /// Table can be shut down, restarting thread is not active /// and calling StorageReplicatedMergeTree::getZooKeeper()/getAuxiliaryZooKeeper() won't suffice. zookeeper = getZooKeeperIfTableShutDown(); + /// Update zookeeper client, since existing may be expired, while ZooKeeper is required inside dropAllData(). + current_zookeeper = zookeeper; /// If probably there is metadata in ZooKeeper, we don't allow to drop the table. if (!zookeeper) @@ -1128,12 +1131,12 @@ void StorageReplicatedMergeTree::drop() if (lost_part_count > 0) LOG_INFO(log, "Dropping table with non-zero lost_part_count equal to {}", lost_part_count); } - dropReplica(zookeeper, zookeeper_path, replica_name, log, getSettings(), &has_metadata_in_zookeeper); + dropReplica(zookeeper, zookeeper_path, replica_name, log.load(), getSettings(), &has_metadata_in_zookeeper); } } void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica, - Poco::Logger * logger, MergeTreeSettingsPtr table_settings, std::optional * has_metadata_out) + LoggerPtr logger, MergeTreeSettingsPtr table_settings, std::optional * has_metadata_out) { if (zookeeper->expired()) throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Table was not dropped because ZooKeeper session has expired."); @@ -1251,7 +1254,7 @@ void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, con } } -void StorageReplicatedMergeTree::dropReplica(const String & drop_zookeeper_path, const String & drop_replica, Poco::Logger * logger) +void StorageReplicatedMergeTree::dropReplica(const String & drop_zookeeper_path, const String & drop_replica, LoggerPtr logger) { zkutil::ZooKeeperPtr zookeeper = getZooKeeperIfTableShutDown(); @@ -1266,7 +1269,7 @@ void StorageReplicatedMergeTree::dropReplica(const String & drop_zookeeper_path, bool StorageReplicatedMergeTree::removeTableNodesFromZooKeeper(zkutil::ZooKeeperPtr zookeeper, - const String & zookeeper_path, const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock, Poco::Logger * logger) + const String & zookeeper_path, const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock, LoggerPtr logger) { bool completely_removed = false; @@ -2713,16 +2716,48 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry) .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || ((our_zero_copy_enabled || source_zero_copy_enabled) && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport()), .metadata_version_to_write = metadata_snapshot->getMetadataVersion() }; - auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk( - part_desc->src_table_part, - TMP_PREFIX + "clone_", - part_desc->new_part_info, - metadata_snapshot, - clone_params, - getContext()->getReadSettings(), - getContext()->getWriteSettings()); - part_desc->res_part = std::move(res_part); - part_desc->temporary_part_lock = std::move(temporary_part_lock); + + const auto my_partition_expression = metadata_snapshot->getPartitionKeyAST(); + const auto src_partition_expression = source_table->getInMemoryMetadataPtr()->getPartitionKeyAST(); + + const auto is_partition_exp_different = queryToStringNullable(my_partition_expression) != queryToStringNullable(src_partition_expression); + + if (is_partition_exp_different) + { + auto [new_partition, new_min_max_index] = createPartitionAndMinMaxIndexFromSourcePart( + part_desc->src_table_part, metadata_snapshot, getContext()); + + auto partition_id = new_partition.getID(*this); + + auto [res_part, temporary_part_lock] = cloneAndLoadPartOnSameDiskWithDifferentPartitionKey( + part_desc->src_table_part, + new_partition, + partition_id, + new_min_max_index, + TMP_PREFIX + "clone_", + metadata_snapshot, + clone_params, + getContext(), + part_desc->new_part_info.min_block, + part_desc->new_part_info.max_block); + + part_desc->res_part = std::move(res_part); + part_desc->temporary_part_lock = std::move(temporary_part_lock); + } + else + { + auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk( + part_desc->src_table_part, + TMP_PREFIX + "clone_", + part_desc->new_part_info, + metadata_snapshot, + clone_params, + getContext()->getReadSettings(), + getContext()->getWriteSettings()); + + part_desc->res_part = std::move(res_part); + part_desc->temporary_part_lock = std::move(temporary_part_lock); + } } else if (!part_desc->replica.empty()) { @@ -4181,7 +4216,7 @@ void StorageReplicatedMergeTree::startBeingLeader() return; } - zkutil::checkNoOldLeaders(log, *zookeeper, fs::path(zookeeper_path) / "leader_election"); + zkutil::checkNoOldLeaders(log.load(), *zookeeper, fs::path(zookeeper_path) / "leader_election"); LOG_INFO(log, "Became leader"); is_leader = true; @@ -4275,7 +4310,7 @@ void StorageReplicatedMergeTree::waitForUniquePartsToBeFetchedByOtherReplicas(St auto zookeeper = getZooKeeperIfTableShutDown(); - auto unique_parts_set = findReplicaUniqueParts(replica_name, zookeeper_path, format_version, zookeeper, log); + auto unique_parts_set = findReplicaUniqueParts(replica_name, zookeeper_path, format_version, zookeeper, log.load()); if (unique_parts_set.empty()) { LOG_INFO(log, "Will not wait for unique parts to be fetched because we don't have any unique parts"); @@ -4316,7 +4351,7 @@ void StorageReplicatedMergeTree::waitForUniquePartsToBeFetchedByOtherReplicas(St LOG_INFO(log, "Successfully waited all the parts"); } -std::set StorageReplicatedMergeTree::findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, Poco::Logger * log_) +std::set StorageReplicatedMergeTree::findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, LoggerPtr log_) { if (!zookeeper_->exists(fs::path(zookeeper_path_) / "replicas" / replica_name_ / "is_active")) { @@ -4659,6 +4694,9 @@ bool StorageReplicatedMergeTree::fetchPart( zkutil::ZooKeeper::Ptr zookeeper_, bool try_fetch_shared) { + if (isStaticStorage()) + throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is in readonly mode due to static storage"); + auto zookeeper = zookeeper_ ? zookeeper_ : getZooKeeper(); const auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); @@ -5385,7 +5423,9 @@ void StorageReplicatedMergeTree::readParallelReplicasImpl( if (local_context->getSettingsRef().allow_experimental_analyzer) { - auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree); + QueryTreeNodePtr modified_query_tree = query_info.query_tree->clone(); + rewriteJoinToGlobalJoin(modified_query_tree); + modified_query_tree = buildQueryTreeForShard(query_info, modified_query_tree); header = InterpreterSelectQueryAnalyzer::getSampleBlock( modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze()); @@ -5408,7 +5448,6 @@ void StorageReplicatedMergeTree::readParallelReplicasImpl( ClusterProxy::executeQueryWithParallelReplicas( query_plan, - getStorageID(), select_stream_factory, modified_query_ast, local_context, @@ -5495,6 +5534,8 @@ void StorageReplicatedMergeTree::assertNotReadonly() const { if (is_readonly) throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is in readonly mode (replica path: {})", replica_path); + if (isStaticStorage()) + throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is in readonly mode due to static storage"); } @@ -5503,6 +5544,8 @@ SinkToStoragePtr StorageReplicatedMergeTree::write(const ASTPtr & /*query*/, con if (!initialization_done) throw Exception(ErrorCodes::NOT_INITIALIZED, "Table is not initialized yet"); + if (isStaticStorage()) + throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is in readonly mode due to static storage"); /// If table is read-only because it doesn't have metadata in zk yet, then it's not possible to insert into it /// Without this check, we'll write data parts on disk, and afterwards will remove them since we'll fail to commit them into zk /// In case of remote storage like s3, it'll generate unnecessary PUT requests @@ -7852,11 +7895,22 @@ void StorageReplicatedMergeTree::replacePartitionFrom( ProfileEventsScope profile_events_scope; MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, metadata_snapshot); - String partition_id = getPartitionIDFromQuery(partition, query_context); + String partition_id = src_data.getPartitionIDFromQuery(partition, query_context); /// NOTE: Some covered parts may be missing in src_all_parts if corresponding log entries are not executed yet. DataPartsVector src_all_parts = src_data.getVisibleDataPartsVectorInPartition(query_context, partition_id); + bool attach_empty_partition = !replace && src_all_parts.empty(); + if (attach_empty_partition) + return; + + const auto my_partition_expression = metadata_snapshot->getPartitionKeyAST(); + const auto src_partition_expression = source_metadata_snapshot->getPartitionKeyAST(); + const auto is_partition_exp_different = queryToStringNullable(my_partition_expression) != queryToStringNullable(src_partition_expression); + + if (is_partition_exp_different && !src_all_parts.empty()) + MergeTreePartitionCompatibilityVerifier::verify(src_data, /* destination_storage */ *this, src_all_parts); + LOG_DEBUG(log, "Cloning {} parts", src_all_parts.size()); static const String TMP_PREFIX = "tmp_replace_from_"; @@ -7911,6 +7965,18 @@ void StorageReplicatedMergeTree::replacePartitionFrom( "Cannot replace partition '{}' because part '{}" "' has inconsistent granularity with table", partition_id, src_part->name); + IMergeTreeDataPart::MinMaxIndex min_max_index = *src_part->minmax_idx; + MergeTreePartition merge_tree_partition = src_part->partition; + + if (is_partition_exp_different) + { + auto [new_partition, new_min_max_index] = createPartitionAndMinMaxIndexFromSourcePart(src_part, metadata_snapshot, query_context); + + merge_tree_partition = new_partition; + min_max_index = new_min_max_index; + partition_id = merge_tree_partition.getID(*this); + } + String hash_hex = src_part->checksums.getTotalChecksumHex(); const bool is_duplicated_part = replaced_parts.contains(hash_hex); replaced_parts.insert(hash_hex); @@ -7929,27 +7995,52 @@ void StorageReplicatedMergeTree::replacePartitionFrom( continue; } - UInt64 index = lock->getNumber(); - MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); - bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication || dynamic_cast(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; + + UInt64 index = lock->getNumber(); + IDataPartStorage::ClonePartParams clone_params { .copy_instead_of_hardlink = storage_settings_ptr->always_use_copy_instead_of_hardlinks || (zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport()), .metadata_version_to_write = metadata_snapshot->getMetadataVersion() }; - auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( - src_part, - TMP_PREFIX, - dst_part_info, - metadata_snapshot, - clone_params, - query_context->getReadSettings(), - query_context->getWriteSettings()); + + if (is_partition_exp_different) + { + auto [dst_part, part_lock] = cloneAndLoadPartOnSameDiskWithDifferentPartitionKey( + src_part, + merge_tree_partition, + partition_id, + min_max_index, + TMP_PREFIX, + metadata_snapshot, + clone_params, + query_context, + index, + index); + + dst_parts.emplace_back(dst_part); + dst_parts_locks.emplace_back(std::move(part_lock)); + } + else + { + MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); + + auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( + src_part, + TMP_PREFIX, + dst_part_info, + metadata_snapshot, + clone_params, + query_context->getReadSettings(), + query_context->getWriteSettings()); + + dst_parts.emplace_back(dst_part); + dst_parts_locks.emplace_back(std::move(part_lock)); + } + src_parts.emplace_back(src_part); - dst_parts.emplace_back(dst_part); - dst_parts_locks.emplace_back(std::move(part_lock)); ephemeral_locks.emplace_back(std::move(*lock)); block_id_paths.emplace_back(block_id_path); part_checksums.emplace_back(hash_hex); @@ -8551,7 +8642,7 @@ void StorageReplicatedMergeTree::onActionLockRemove(StorageActionBlockType actio cleanup_thread.wakeup(); } -bool StorageReplicatedMergeTree::waitForProcessingQueue(UInt64 max_wait_milliseconds, SyncReplicaMode sync_mode) +bool StorageReplicatedMergeTree::waitForProcessingQueue(UInt64 max_wait_milliseconds, SyncReplicaMode sync_mode, std::unordered_set source_replicas) { /// Let's fetch new log entries firstly queue.pullLogsToQueue(getZooKeeperAndAssertNotReadonly(), {}, ReplicatedMergeTreeQueue::SYNC); @@ -8591,7 +8682,8 @@ bool StorageReplicatedMergeTree::waitForProcessingQueue(UInt64 max_wait_millisec if (wait_for_ids.empty()) target_entry_event.set(); }; - const auto handler = queue.addSubscriber(std::move(callback), wait_for_ids, sync_mode); + + const auto handler = queue.addSubscriber(std::move(callback), wait_for_ids, sync_mode, source_replicas); if (!target_entry_event.tryWait(max_wait_milliseconds)) return false; @@ -9094,7 +9186,7 @@ zkutil::EphemeralNodeHolderPtr StorageReplicatedMergeTree::lockSharedDataTempora String id = part_id; boost::replace_all(id, "/", "_"); - String zc_zookeeper_path = getZeroCopyPartPath(*getSettings(), toString(disk->getDataSourceDescription().type), getTableSharedID(), + String zc_zookeeper_path = getZeroCopyPartPath(*getSettings(), disk->getDataSourceDescription().toString(), getTableSharedID(), part_name, zookeeper_path)[0]; String zookeeper_node = fs::path(zc_zookeeper_path) / id / replica_name; @@ -9346,7 +9438,7 @@ StorageReplicatedMergeTree::unlockSharedData(const IMergeTreeDataPart & part, co return unlockSharedDataByID( part.getUniqueId(), shared_id, part.info, replica_name, - part.getDataPartStorage().getDiskType(), zookeeper, *getSettings(), log, zookeeper_path, format_version); + part.getDataPartStorage().getDiskType(), zookeeper, *getSettings(), log.load(), zookeeper_path, format_version); } namespace @@ -9362,7 +9454,7 @@ namespace /// But sometimes we need an opposite. When we deleting all_0_0_0_1 it can be non replicated to other replicas, so we are the only owner of this part. /// In this case when we will drop all_0_0_0_1 we will drop blobs for all_0_0_0. But it will lead to dataloss. For such case we need to check that other replicas /// still need parent part. -std::pair getParentLockedBlobs(const ZooKeeperWithFaultInjectionPtr & zookeeper_ptr, const std::string & zero_copy_part_path_prefix, const MergeTreePartInfo & part_info, MergeTreeDataFormatVersion format_version, Poco::Logger * log) +std::pair getParentLockedBlobs(const ZooKeeperWithFaultInjectionPtr & zookeeper_ptr, const std::string & zero_copy_part_path_prefix, const MergeTreePartInfo & part_info, MergeTreeDataFormatVersion format_version, LoggerPtr log) { NameSet files_not_to_remove; @@ -9453,7 +9545,7 @@ std::pair getParentLockedBlobs(const ZooKeeperWithFaultInjectionP std::pair StorageReplicatedMergeTree::unlockSharedDataByID( String part_id, const String & table_uuid, const MergeTreePartInfo & part_info, const String & replica_name_, const std::string & disk_type, const ZooKeeperWithFaultInjectionPtr & zookeeper_ptr, const MergeTreeSettings & settings, - Poco::Logger * logger, const String & zookeeper_path_old, MergeTreeDataFormatVersion data_format_version) + LoggerPtr logger, const String & zookeeper_path_old, MergeTreeDataFormatVersion data_format_version) { boost::replace_all(part_id, "/", "_"); @@ -9606,7 +9698,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::tryToFetchIfShared if (!(disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication)) return nullptr; - String replica = getSharedDataReplica(part, data_source_description.type); + String replica = getSharedDataReplica(part, data_source_description); /// We can't fetch part when none replicas have this part on a same type remote disk if (replica.empty()) @@ -9616,7 +9708,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::tryToFetchIfShared } String StorageReplicatedMergeTree::getSharedDataReplica( - const IMergeTreeDataPart & part, DataSourceType data_source_type) const + const IMergeTreeDataPart & part, const DataSourceDescription & data_source_description) const { String best_replica; @@ -9624,7 +9716,7 @@ String StorageReplicatedMergeTree::getSharedDataReplica( if (!zookeeper) return ""; - Strings zc_zookeeper_paths = getZeroCopyPartPath(*getSettings(), toString(data_source_type), getTableSharedID(), part.name, + Strings zc_zookeeper_paths = getZeroCopyPartPath(*getSettings(), data_source_description.toString(), getTableSharedID(), part.name, zookeeper_path); std::set replicas; @@ -9781,7 +9873,7 @@ std::optional StorageReplicatedMergeTree::getZeroCopyPartPath(const Stri if (!disk || !disk->supportZeroCopyReplication()) return std::nullopt; - return getZeroCopyPartPath(*getSettings(), toString(disk->getDataSourceDescription().type), getTableSharedID(), part_name, zookeeper_path)[0]; + return getZeroCopyPartPath(*getSettings(), disk->getDataSourceDescription().toString(), getTableSharedID(), part_name, zookeeper_path)[0]; } bool StorageReplicatedMergeTree::waitZeroCopyLockToDisappear(const ZeroCopyLock & lock, size_t milliseconds_to_wait) @@ -10141,7 +10233,7 @@ void StorageReplicatedMergeTree::createZeroCopyLockNode( size_t failed_op = zkutil::getFailedOpIndex(error, responses); if (ops[failed_op]->getPath() == zookeeper_node) { - LOG_WARNING(&Poco::Logger::get("ZeroCopyLocks"), "Replacing persistent lock with ephemeral for path {}. It can happen only in case of local part loss", zookeeper_node); + LOG_WARNING(getLogger("ZeroCopyLocks"), "Replacing persistent lock with ephemeral for path {}. It can happen only in case of local part loss", zookeeper_node); replace_existing_lock = true; continue; } @@ -10197,9 +10289,9 @@ bool StorageReplicatedMergeTree::removeSharedDetachedPart(DiskPtr disk, const St std::tie(can_remove, files_not_to_remove) = StorageReplicatedMergeTree::unlockSharedDataByID( id, table_uuid, part_info, detached_replica_name, - toString(disk->getDataSourceDescription().type), + disk->getDataSourceDescription().toString(), std::make_shared(zookeeper), local_context->getReplicatedMergeTreeSettings(), - &Poco::Logger::get("StorageReplicatedMergeTree"), + getLogger("StorageReplicatedMergeTree"), detached_zookeeper_path, MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING); @@ -10299,7 +10391,7 @@ void StorageReplicatedMergeTree::backupData( bool exists = false; Strings mutation_ids; { - ZooKeeperRetriesControl retries_ctl("getMutations", log, zookeeper_retries_info, nullptr); + ZooKeeperRetriesControl retries_ctl("getMutations", log.load(), zookeeper_retries_info, nullptr); retries_ctl.retryLoop([&]() { if (!zookeeper || zookeeper->expired()) @@ -10318,7 +10410,7 @@ void StorageReplicatedMergeTree::backupData( bool mutation_id_exists = false; String mutation; - ZooKeeperRetriesControl retries_ctl("getMutation", log, zookeeper_retries_info, nullptr); + ZooKeeperRetriesControl retries_ctl("getMutation", log.load(), zookeeper_retries_info, nullptr); retries_ctl.retryLoop([&]() { if (!zookeeper || zookeeper->expired()) diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index fb74097d768..c682b1ec88d 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -208,7 +208,7 @@ public: /// Wait till replication queue's current last entry is processed or till size becomes 0 /// If timeout is exceeded returns false - bool waitForProcessingQueue(UInt64 max_wait_milliseconds, SyncReplicaMode sync_mode); + bool waitForProcessingQueue(UInt64 max_wait_milliseconds, SyncReplicaMode sync_mode, std::unordered_set source_replicas); /// Get the status of the table. If with_zk_fields = false - do not fill in the fields that require queries to ZK. void getStatus(ReplicatedTableStatus & res, bool with_zk_fields = true); @@ -255,13 +255,13 @@ public: /** Remove a specific replica from zookeeper. */ static void dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica, - Poco::Logger * logger, MergeTreeSettingsPtr table_settings = nullptr, std::optional * has_metadata_out = nullptr); + LoggerPtr logger, MergeTreeSettingsPtr table_settings = nullptr, std::optional * has_metadata_out = nullptr); - void dropReplica(const String & drop_zookeeper_path, const String & drop_replica, Poco::Logger * logger); + void dropReplica(const String & drop_zookeeper_path, const String & drop_replica, LoggerPtr logger); /// Removes table from ZooKeeper after the last replica was dropped static bool removeTableNodesFromZooKeeper(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, - const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock, Poco::Logger * logger); + const zkutil::EphemeralNodeHolder::Ptr & metadata_drop_lock, LoggerPtr logger); /// Schedules job to execute in background pool (merge, mutate, drop range and so on) bool scheduleDataProcessingJob(BackgroundJobsAssignee & assignee) override; @@ -308,7 +308,7 @@ public: const std::string & disk_type, const ZooKeeperWithFaultInjectionPtr & zookeeper_, const MergeTreeSettings & settings, - Poco::Logger * logger, + LoggerPtr logger, const String & zookeeper_path_old, MergeTreeDataFormatVersion data_format_version); @@ -316,7 +316,7 @@ public: MutableDataPartPtr tryToFetchIfShared(const IMergeTreeDataPart & part, const DiskPtr & disk, const String & path) override; /// Get best replica having this partition on a same type remote disk - String getSharedDataReplica(const IMergeTreeDataPart & part, DataSourceType data_source_type) const; + String getSharedDataReplica(const IMergeTreeDataPart & part, const DataSourceDescription & data_source_description) const; inline const String & getReplicaName() const { return replica_name; } @@ -773,7 +773,7 @@ private: String findReplicaHavingCoveringPart(LogEntry & entry, bool active); bool findReplicaHavingCoveringPart(const String & part_name, bool active); String findReplicaHavingCoveringPartImplLowLevel(LogEntry * entry, const String & part_name, String & found_part_name, bool active); - static std::set findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, Poco::Logger * log_); + static std::set findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, LoggerPtr log_); /** Download the specified part from the specified replica. * If `to_detached`, the part is placed in the `detached` directory. diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 3ddbfe8d894..aec967cc95c 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1160,7 +1160,7 @@ void StorageS3::read( void ReadFromStorageS3Step::applyFilters() { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, local_context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); @@ -1194,7 +1194,7 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, const size_t max_threads = local_context->getSettingsRef().max_threads; const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul)); - LOG_DEBUG(&Poco::Logger::get("StorageS3"), "Reading in {} streams, {} threads per stream", num_streams, max_parsing_threads); + LOG_DEBUG(getLogger("StorageS3"), "Reading in {} streams, {} threads per stream", num_streams, max_parsing_threads); Pipes pipes; pipes.reserve(num_streams); @@ -1347,7 +1347,7 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, } for (const auto & error : response.GetResult().GetErrors()) - LOG_WARNING(&Poco::Logger::get("StorageS3"), "Failed to delete {}, error: {}", error.GetKey(), error.GetMessage()); + LOG_WARNING(getLogger("StorageS3"), "Failed to delete {}, error: {}", error.GetKey(), error.GetMessage()); } StorageS3::Configuration StorageS3::updateConfigurationAndGetCopy(ContextPtr local_context) @@ -1488,25 +1488,13 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format', 'compression') /// with optional headers() function - if (engine_args.empty() || engine_args.size() > 6) + size_t count = StorageURL::evalArgsAndCollectHeaders(engine_args, configuration.headers_from_ast, local_context); + + if (count == 0 || count > 6) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage S3 requires 1 to 5 arguments: " "url, [NOSIGN | access_key_id, secret_access_key], name of used format and [compression_method]"); - auto * header_it = StorageURL::collectHeaders(engine_args, configuration.headers_from_ast, local_context); - if (header_it != engine_args.end()) - engine_args.erase(header_it); - - for (auto & engine_arg : engine_args) - engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); - - /// Size -> argument indexes - static std::unordered_map> size_to_engine_args - { - {1, {{}}}, - {6, {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}}} - }; - std::unordered_map engine_args_to_idx; bool no_sign_request = false; @@ -1514,7 +1502,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context /// - s3(source, format) /// - s3(source, NOSIGN) /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. - if (engine_args.size() == 2) + if (count == 2) { auto second_arg = checkAndGetLiteralArgument(engine_args[1], "format/NOSIGN"); if (boost::iequals(second_arg, "NOSIGN")) @@ -1524,10 +1512,10 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context } /// For 3 arguments we support 2 possible variants: /// - s3(source, format, compression_method) - /// - s3(source, access_key_id, access_key_id) + /// - s3(source, access_key_id, secret_access_key) /// - s3(source, NOSIGN, format) /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or format name. - else if (engine_args.size() == 3) + else if (count == 3) { auto second_arg = checkAndGetLiteralArgument(engine_args[1], "format/access_key_id/NOSIGN"); if (boost::iequals(second_arg, "NOSIGN")) @@ -1545,7 +1533,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context /// - s3(source, access_key_id, secret_access_key, format) /// - s3(source, NOSIGN, format, compression_method) /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN or not. - else if (engine_args.size() == 4) + else if (count == 4) { auto second_arg = checkAndGetLiteralArgument(engine_args[1], "access_key_id/NOSIGN"); if (boost::iequals(second_arg, "NOSIGN")) @@ -1569,7 +1557,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context /// For 5 arguments we support 2 possible variants: /// - s3(source, access_key_id, secret_access_key, session_token, format) /// - s3(source, access_key_id, secret_access_key, format, compression) - else if (engine_args.size() == 5) + else if (count == 5) { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) @@ -1581,9 +1569,9 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, Context engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}}; } } - else + else if (count == 6) { - engine_args_to_idx = size_to_engine_args[engine_args.size()]; + engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}}; } /// This argument is always the first diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index b90a0d394cb..8d020c5e9a2 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -242,7 +242,7 @@ private: size_t max_parsing_threads = 1; bool need_only_count; - Poco::Logger * log = &Poco::Logger::get("StorageS3Source"); + LoggerPtr log = getLogger("StorageS3Source"); ThreadPool create_reader_pool; ThreadPoolCallbackRunner create_reader_scheduler; diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index e1738056e9d..25c2b42b766 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -40,7 +40,7 @@ StorageS3Cluster::StorageS3Cluster( const ConstraintsDescription & constraints_, ContextPtr context_, bool structure_argument_was_provided_) - : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageS3Cluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) + : IStorageCluster(cluster_name_, table_id_, getLogger("StorageS3Cluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) , s3_configuration{configuration_} { context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.url.uri); diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index d5db5763da9..85c5e16a1bf 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -42,7 +42,7 @@ StorageSQLite::StorageSQLite( , remote_table_name(remote_table_name_) , database_path(database_path_) , sqlite_db(sqlite_db_) - , log(&Poco::Logger::get("StorageSQLite (" + table_id_.table_name + ")")) + , log(getLogger("StorageSQLite (" + table_id_.table_name + ")")) { StorageInMemoryMetadata storage_metadata; diff --git a/src/Storages/StorageSQLite.h b/src/Storages/StorageSQLite.h index 9da040cbd5c..baacdfb4899 100644 --- a/src/Storages/StorageSQLite.h +++ b/src/Storages/StorageSQLite.h @@ -50,7 +50,7 @@ private: String remote_table_name; String database_path; SQLitePtr sqlite_db; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 1b0db1da800..7d7f3113cdb 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -218,7 +218,7 @@ void StorageSet::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_sn if (disk->exists(path)) disk->removeRecursive(path); else - LOG_INFO(&Poco::Logger::get("StorageSet"), "Path {} is already removed from disk {}", path, disk->getName()); + LOG_INFO(getLogger("StorageSet"), "Path {} is already removed from disk {}", path, disk->getName()); disk->createDirectories(path); disk->createDirectories(fs::path(path) / "tmp/"); @@ -284,7 +284,7 @@ void StorageSetOrJoinBase::restoreFromFile(const String & file_path) finishInsert(); /// TODO Add speed, compressed bytes, data volume in memory, compression ratio ... Generalize all statistics logging in project. - LOG_INFO(&Poco::Logger::get("StorageSetOrJoinBase"), "Loaded from backup file {}. {} rows, {}. State has {} unique rows.", + LOG_INFO(getLogger("StorageSetOrJoinBase"), "Loaded from backup file {}. {} rows, {}. State has {} unique rows.", file_path, info.rows, ReadableSize(info.bytes), getSize(ctx)); } diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 91f6246d101..359f142949f 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -277,7 +277,7 @@ StorageStripeLog::StorageStripeLog( , index_file_path(table_path + "index.mrk") , file_checker(disk, table_path + "sizes.json") , max_compress_block_size(context_->getSettings().max_compress_block_size) - , log(&Poco::Logger::get("StorageStripeLog")) + , log(getLogger("StorageStripeLog")) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); diff --git a/src/Storages/StorageStripeLog.h b/src/Storages/StorageStripeLog.h index a05117a9ad5..c7f3e7e21e6 100644 --- a/src/Storages/StorageStripeLog.h +++ b/src/Storages/StorageStripeLog.h @@ -123,7 +123,7 @@ private: mutable std::shared_timed_mutex rwlock; - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 5041b95ecc8..433f4ed7700 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -498,13 +498,13 @@ std::pair> StorageURLSource: throw Exception(ErrorCodes::NETWORK_ERROR, "All uri ({}) options are unreachable: {}", options, first_exception_message); } -void StorageURLSource::addNumRowsToCache(const DB::String & uri, size_t num_rows) +void StorageURLSource::addNumRowsToCache(const String & uri, size_t num_rows) { auto cache_key = getKeyForSchemaCache(uri, format, format_settings, getContext()); StorageURL::getSchemaCache(getContext()).addNumRows(cache_key, num_rows); } -std::optional StorageURLSource::tryGetNumRowsFromCache(const DB::String & uri, std::optional last_mod_time) +std::optional StorageURLSource::tryGetNumRowsFromCache(const String & uri, std::optional last_mod_time) { auto cache_key = getKeyForSchemaCache(uri, format, format_settings, getContext()); auto get_last_mod_time = [&]() -> std::optional @@ -930,7 +930,7 @@ private: void ReadFromURL::applyFilters() { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); @@ -1284,7 +1284,7 @@ StorageURLWithFailover::StorageURLWithFailover( { Poco::URI poco_uri(uri_option); context_->getRemoteHostFilter().checkURL(poco_uri); - LOG_DEBUG(&Poco::Logger::get("StorageURLDistributed"), "Adding URL option: {}", uri_option); + LOG_DEBUG(getLogger("StorageURLDistributed"), "Adding URL option: {}", uri_option); uri_options.emplace_back(uri_option); } } @@ -1324,7 +1324,7 @@ FormatSettings StorageURL::getFormatSettingsFromArgs(const StorageFactory::Argum return format_settings; } -ASTs::iterator StorageURL::collectHeaders( +size_t StorageURL::evalArgsAndCollectHeaders( ASTs & url_function_args, HTTPHeaderEntries & header_entries, ContextPtr context) { ASTs::iterator headers_it = url_function_args.end(); @@ -1382,7 +1382,11 @@ ASTs::iterator StorageURL::collectHeaders( (*arg_it) = evaluateConstantExpressionOrIdentifierAsLiteral((*arg_it), context); } - return headers_it; + if (headers_it == url_function_args.end()) + return url_function_args.size(); + + std::rotate(headers_it, std::next(headers_it), url_function_args.end()); + return url_function_args.size() - 1; } void StorageURL::processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection) @@ -1412,21 +1416,19 @@ StorageURL::Configuration StorageURL::getConfiguration(ASTs & args, ContextPtr l if (auto named_collection = tryGetNamedCollectionWithOverrides(args, local_context)) { StorageURL::processNamedCollectionResult(configuration, *named_collection); - collectHeaders(args, configuration.headers, local_context); + evalArgsAndCollectHeaders(args, configuration.headers, local_context); } else { - if (args.empty() || args.size() > 3) + size_t count = evalArgsAndCollectHeaders(args, configuration.headers, local_context); + + if (count == 0 || count > 3) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, bad_arguments_error_message); - auto * header_it = collectHeaders(args, configuration.headers, local_context); - if (header_it != args.end()) - args.erase(header_it); - configuration.url = checkAndGetLiteralArgument(args[0], "url"); - if (args.size() > 1) + if (count > 1) configuration.format = checkAndGetLiteralArgument(args[1], "format"); - if (args.size() == 3) + if (count == 3) configuration.compression_method = checkAndGetLiteralArgument(args[2], "compression_method"); } diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 07d4d0cad38..c8b8d0942f4 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -294,7 +294,10 @@ public: static Configuration getConfiguration(ASTs & args, ContextPtr context); - static ASTs::iterator collectHeaders(ASTs & url_function_args, HTTPHeaderEntries & header_entries, ContextPtr context); + /// Does evaluateConstantExpressionOrIdentifierAsLiteral() on all arguments. + /// If `headers(...)` argument is present, parses it and moves it to the end of the array. + /// Returns number of arguments excluding `headers(...)`. + static size_t evalArgsAndCollectHeaders(ASTs & url_function_args, HTTPHeaderEntries & header_entries, ContextPtr context); static void processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection); }; diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp index a0b5fcd6f28..2365887983d 100644 --- a/src/Storages/StorageURLCluster.cpp +++ b/src/Storages/StorageURLCluster.cpp @@ -45,7 +45,7 @@ StorageURLCluster::StorageURLCluster( const ConstraintsDescription & constraints_, const StorageURL::Configuration & configuration_, bool structure_argument_was_provided_) - : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageURLCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) + : IStorageCluster(cluster_name_, table_id_, getLogger("StorageURLCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) , uri(uri_) { context_->getRemoteHostFilter().checkURL(Poco::URI(uri)); diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index a274b1ba4db..259abefb00f 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -45,7 +45,7 @@ StorageXDBC::StorageXDBC( , bridge_helper(bridge_helper_) , remote_database_name(remote_database_name_) , remote_table_name(remote_table_name_) - , log(&Poco::Logger::get("Storage" + bridge_helper->getName())) + , log(getLogger("Storage" + bridge_helper->getName())) { uri = bridge_helper->getMainURI().toString(); } diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h index fe678785dc2..cba15a83226 100644 --- a/src/Storages/StorageXDBC.h +++ b/src/Storages/StorageXDBC.h @@ -47,7 +47,7 @@ private: std::string remote_database_name; std::string remote_table_name; - Poco::Logger * log; + LoggerPtr log; std::string getReadMethod() const override; diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index e09b27adf32..3b2807965a4 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -38,7 +38,7 @@ public: explicit IStorageSystemOneBlock(const StorageID & table_id_) : IStorage(table_id_) { StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(ColumnsDescription(Self::getNamesAndTypes(), Self::getNamesAndAliases())); + storage_metadata.setColumns(Self::getColumnsDescription()); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemAggregateFunctionCombinators.cpp b/src/Storages/System/StorageSystemAggregateFunctionCombinators.cpp index 6a7d9a2a948..8e32a137fcb 100644 --- a/src/Storages/System/StorageSystemAggregateFunctionCombinators.cpp +++ b/src/Storages/System/StorageSystemAggregateFunctionCombinators.cpp @@ -4,11 +4,12 @@ namespace DB { -NamesAndTypesList StorageSystemAggregateFunctionCombinators::getNamesAndTypes() +ColumnsDescription StorageSystemAggregateFunctionCombinators::getColumnsDescription() { - return { - {"name", std::make_shared()}, - {"is_internal", std::make_shared()}, + return ColumnsDescription + { + {"name", std::make_shared(), "The name of the combinator."}, + {"is_internal", std::make_shared(), "Whether this combinator is for internal usage only."}, }; } diff --git a/src/Storages/System/StorageSystemAggregateFunctionCombinators.h b/src/Storages/System/StorageSystemAggregateFunctionCombinators.h index a2ef26109a3..6f3f90b58af 100644 --- a/src/Storages/System/StorageSystemAggregateFunctionCombinators.h +++ b/src/Storages/System/StorageSystemAggregateFunctionCombinators.h @@ -19,6 +19,6 @@ public: return "SystemAggregateFunctionCombinators"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemAsyncLoader.cpp b/src/Storages/System/StorageSystemAsyncLoader.cpp index aa182e9a4f3..c56a3c3ce78 100644 --- a/src/Storages/System/StorageSystemAsyncLoader.cpp +++ b/src/Storages/System/StorageSystemAsyncLoader.cpp @@ -45,31 +45,32 @@ namespace } } -NamesAndTypesList StorageSystemAsyncLoader::getNamesAndTypes() +ColumnsDescription StorageSystemAsyncLoader::getColumnsDescription() { - return { - { "job", std::make_shared() }, - { "job_id", std::make_shared() }, - { "dependencies", std::make_shared(std::make_shared()) }, - { "dependencies_left", std::make_shared() }, - { "status", std::make_shared(getTypeEnumValues()) }, - { "is_executing", std::make_shared() }, - { "is_blocked", std::make_shared() }, - { "is_ready", std::make_shared() }, - { "elapsed", std::make_shared()}, - { "pool_id", std::make_shared() }, - { "pool", std::make_shared() }, - { "priority", std::make_shared() }, - { "execution_pool_id", std::make_shared() }, - { "execution_pool", std::make_shared() }, - { "execution_priority", std::make_shared() }, - { "ready_seqno", std::make_shared(std::make_shared()) }, - { "waiters", std::make_shared() }, - { "exception", std::make_shared(std::make_shared()) }, - { "schedule_time", std::make_shared(TIME_SCALE) }, - { "enqueue_time", std::make_shared(std::make_shared(TIME_SCALE)) }, - { "start_time", std::make_shared(std::make_shared(TIME_SCALE)) }, - { "finish_time", std::make_shared(std::make_shared(TIME_SCALE)) }, + return ColumnsDescription + { + {"job", std::make_shared(), "Job name (may be not unique)."}, + {"job_id", std::make_shared(), "Unique ID of the job."}, + {"dependencies", std::make_shared(std::make_shared()), "List of IDs of jobs that should be done before this job."}, + {"dependencies_left", std::make_shared(), "Current number of dependencies left to be done."}, + {"status", std::make_shared(getTypeEnumValues()), "Current load status of a job: PENDING: Load job is not started yet. OK: Load job executed and was successful. FAILED: Load job executed and failed. CANCELED: Load job is not going to be executed due to removal or dependency failure."}, + {"is_executing", std::make_shared(), "The job is currently being executed by a worker."}, + {"is_blocked", std::make_shared(), "The job waits for its dependencies to be done."}, + {"is_ready", std::make_shared(), "The job is ready to be executed and waits for a worker."}, + {"elapsed", std::make_shared(), "Seconds elapsed since start of execution. Zero if job is not started. Total execution time if job finished."}, + {"pool_id", std::make_shared(), "ID of a pool currently assigned to the job."}, + {"pool", std::make_shared(), "Name of `pool_id` pool."}, + {"priority", std::make_shared(), "Priority of `pool_id` pool."}, + {"execution_pool_id", std::make_shared(), "ID of a pool the job is executed in. Equals initially assigned pool before execution starts."}, + {"execution_pool", std::make_shared(), "Name of `execution_pool_id` pool."}, + {"execution_priority", std::make_shared(), "Priority of execution_pool_id pool."}, + {"ready_seqno", std::make_shared(std::make_shared()), "Not null for ready jobs. Worker pulls the next job to be executed from a ready queue of its pool. If there are multiple ready jobs, then job with the lowest value of `ready_seqno` is picked."}, + {"waiters", std::make_shared(), "The number of threads waiting on this job."}, + {"exception", std::make_shared(std::make_shared()), "Not null for failed and canceled jobs. Holds error message raised during query execution or error leading to cancelling of this job along with dependency failure chain of job names."}, + {"schedule_time", std::make_shared(TIME_SCALE), "Time when job was created and scheduled to be executed (usually with all its dependencies)."}, + {"enqueue_time", std::make_shared(std::make_shared(TIME_SCALE)), "Time when job became ready and was enqueued into a ready queue of it's pool. Null if the job is not ready yet."}, + {"start_time", std::make_shared(std::make_shared(TIME_SCALE)), "Time when worker dequeues the job from ready queue and start its execution. Null if the job is not started yet."}, + {"finish_time", std::make_shared(std::make_shared(TIME_SCALE)), "Time when job execution is finished. Null if the job is not finished yet."}, }; } diff --git a/src/Storages/System/StorageSystemAsyncLoader.h b/src/Storages/System/StorageSystemAsyncLoader.h index 28c17392d08..fa0ce11efe3 100644 --- a/src/Storages/System/StorageSystemAsyncLoader.h +++ b/src/Storages/System/StorageSystemAsyncLoader.h @@ -15,7 +15,7 @@ class StorageSystemAsyncLoader final : public IStorageSystemOneBlock()}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"format", std::make_shared()}, - {"first_update", std::make_shared(TIME_SCALE)}, - {"total_bytes", std::make_shared()}, - {"entries.query_id", std::make_shared(std::make_shared())}, - {"entries.bytes", std::make_shared(std::make_shared())}, + {"query", std::make_shared(), "Query text."}, + {"database", std::make_shared(), "Database name."}, + {"table", std::make_shared(), "Table name."}, + {"format", std::make_shared(), "Format name."}, + {"first_update", std::make_shared(TIME_SCALE), "First insert time with microseconds resolution."}, + {"total_bytes", std::make_shared(), "Total number of bytes waiting in the queue."}, + {"entries.query_id", std::make_shared(std::make_shared()), "Array of query ids of the inserts waiting in the queue."}, + {"entries.bytes", std::make_shared(std::make_shared()), "Array of bytes of each insert query waiting in the queue."}, }; } diff --git a/src/Storages/System/StorageSystemAsynchronousInserts.h b/src/Storages/System/StorageSystemAsynchronousInserts.h index 01e51dca6fa..891494ffbeb 100644 --- a/src/Storages/System/StorageSystemAsynchronousInserts.h +++ b/src/Storages/System/StorageSystemAsynchronousInserts.h @@ -12,7 +12,7 @@ class StorageSystemAsynchronousInserts final : public IStorageSystemOneBlock()}, - {"value", std::make_shared()}, - {"description", std::make_shared()}, + return ColumnsDescription + { + {"metric", std::make_shared(), "Metric name."}, + {"value", std::make_shared(), "Metric value."}, + {"description", std::make_shared(), "Metric description."}, }; } diff --git a/src/Storages/System/StorageSystemAsynchronousMetrics.h b/src/Storages/System/StorageSystemAsynchronousMetrics.h index e5b2070e96f..026377c77a0 100644 --- a/src/Storages/System/StorageSystemAsynchronousMetrics.h +++ b/src/Storages/System/StorageSystemAsynchronousMetrics.h @@ -18,7 +18,7 @@ public: std::string getName() const override { return "SystemAsynchronousMetrics"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); private: const AsynchronousMetrics & async_metrics; diff --git a/src/Storages/System/StorageSystemBackups.cpp b/src/Storages/System/StorageSystemBackups.cpp index a5dd7ea6e0b..17fb56e0a92 100644 --- a/src/Storages/System/StorageSystemBackups.cpp +++ b/src/Storages/System/StorageSystemBackups.cpp @@ -15,26 +15,26 @@ namespace DB { -NamesAndTypesList StorageSystemBackups::getNamesAndTypes() +ColumnsDescription StorageSystemBackups::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"id", std::make_shared()}, - {"name", std::make_shared()}, - {"base_backup_name", std::make_shared()}, - {"status", std::make_shared(getBackupStatusEnumValues())}, - {"error", std::make_shared()}, - {"start_time", std::make_shared()}, - {"end_time", std::make_shared()}, - {"num_files", std::make_shared()}, - {"total_size", std::make_shared()}, - {"num_entries", std::make_shared()}, - {"uncompressed_size", std::make_shared()}, - {"compressed_size", std::make_shared()}, - {"files_read", std::make_shared()}, - {"bytes_read", std::make_shared()}, - {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, + return ColumnsDescription + { + {"id", std::make_shared(), "Operation ID, can be either passed via SETTINGS id=... or be randomly generated UUID."}, + {"name", std::make_shared(), "Operation name, a string like `Disk('backups', 'my_backup')`"}, + {"base_backup_name", std::make_shared(), "Base Backup Operation name, a string like `Disk('backups', 'my_base_backup')`"}, + {"status", std::make_shared(getBackupStatusEnumValues()), "Status of backup or restore operation."}, + {"error", std::make_shared(), "The error message if any."}, + {"start_time", std::make_shared(), "The time when operation started."}, + {"end_time", std::make_shared(), "The time when operation finished."}, + {"num_files", std::make_shared(), "The number of files stored in the backup."}, + {"total_size", std::make_shared(), "The total size of files stored in the backup."}, + {"num_entries", std::make_shared(), "The number of entries in the backup, i.e. the number of files inside the folder if the backup is stored as a folder."}, + {"uncompressed_size", std::make_shared(), "The uncompressed size of the backup."}, + {"compressed_size", std::make_shared(), "The compressed size of the backup."}, + {"files_read", std::make_shared(), "Returns the number of files read during RESTORE from this backup."}, + {"bytes_read", std::make_shared(), "Returns the total size of files read during RESTORE from this backup."}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared()), "All the profile events captured during this operation."}, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemBackups.h b/src/Storages/System/StorageSystemBackups.h index f23cd1a223a..a081bd52b4c 100644 --- a/src/Storages/System/StorageSystemBackups.h +++ b/src/Storages/System/StorageSystemBackups.h @@ -11,7 +11,7 @@ class StorageSystemBackups final : public IStorageSystemOneBlock()}, - {"value", std::make_shared()}, + return ColumnsDescription + { + {"name", std::make_shared(), "Name of the build option."}, + {"value", std::make_shared(), "Value of the build option."}, }; } diff --git a/src/Storages/System/StorageSystemBuildOptions.h b/src/Storages/System/StorageSystemBuildOptions.h index 0fe891c11e3..7c0bbf6b5fd 100644 --- a/src/Storages/System/StorageSystemBuildOptions.h +++ b/src/Storages/System/StorageSystemBuildOptions.h @@ -22,7 +22,7 @@ public: std::string getName() const override { return "SystemBuildOptions"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemCertificates.cpp b/src/Storages/System/StorageSystemCertificates.cpp index 0077d184a8e..c372e4b32b0 100644 --- a/src/Storages/System/StorageSystemCertificates.cpp +++ b/src/Storages/System/StorageSystemCertificates.cpp @@ -17,9 +17,10 @@ namespace DB { -NamesAndTypesList StorageSystemCertificates::getNamesAndTypes() +ColumnsDescription StorageSystemCertificates::getColumnsDescription() { - return + /// TODO: Fill in all the comments. + return ColumnsDescription { {"version", std::make_shared>()}, {"serial_number", std::make_shared(std::make_shared())}, diff --git a/src/Storages/System/StorageSystemCertificates.h b/src/Storages/System/StorageSystemCertificates.h index 4df0010d0b4..f8c8477c998 100644 --- a/src/Storages/System/StorageSystemCertificates.h +++ b/src/Storages/System/StorageSystemCertificates.h @@ -18,7 +18,7 @@ class StorageSystemCertificates final : public IStorageSystemOneBlock()}, - {"shard_num", std::make_shared()}, - {"shard_weight", std::make_shared()}, - {"internal_replication", std::make_shared()}, - {"replica_num", std::make_shared()}, - {"host_name", std::make_shared()}, - {"host_address", std::make_shared()}, - {"port", std::make_shared()}, - {"is_local", std::make_shared()}, - {"user", std::make_shared()}, - {"default_database", std::make_shared()}, - {"errors_count", std::make_shared()}, - {"slowdowns_count", std::make_shared()}, - {"estimated_recovery_time", std::make_shared()}, - {"database_shard_name", std::make_shared()}, - {"database_replica_name", std::make_shared()}, - {"is_active", std::make_shared(std::make_shared())}, + {"cluster", std::make_shared(), "The cluster name."}, + {"shard_num", std::make_shared(), "The shard number in the cluster, starting from 1."}, + {"shard_weight", std::make_shared(), "The relative weight of the shard when writing data."}, + {"internal_replication", std::make_shared(), "Flag that indicates whether this host is a part on ensemble which can replicate the data on its own."}, + {"replica_num", std::make_shared(), "The replica number in the shard, starting from 1."}, + {"host_name", std::make_shared(), "The host name, as specified in the config."}, + {"host_address", std::make_shared(), "The host IP address obtained from DNS."}, + {"port", std::make_shared(), "The port to use for connecting to the server."}, + {"is_local", std::make_shared(), "Flag that indicates whether the host is local."}, + {"user", std::make_shared(), "The name of the user for connecting to the server."}, + {"default_database", std::make_shared(), "The default database name."}, + {"errors_count", std::make_shared(), "The number of times this host failed to reach replica."}, + {"slowdowns_count", std::make_shared(), "The number of slowdowns that led to changing replica when establishing a connection with hedged requests."}, + {"estimated_recovery_time", std::make_shared(), "Seconds remaining until the replica error count is zeroed and it is considered to be back to normal."}, + {"database_shard_name", std::make_shared(), "The name of the `Replicated` database shard (for clusters that belong to a `Replicated` database)."}, + {"database_replica_name", std::make_shared(), "The name of the `Replicated` database replica (for clusters that belong to a `Replicated` database)."}, + {"is_active", std::make_shared(std::make_shared()), "The status of the Replicated database replica (for clusters that belong to a Replicated database): 1 means 'replica is online', 0 means 'replica is offline', NULL means 'unknown'."}, }; -} -NamesAndAliases StorageSystemClusters::getNamesAndAliases() -{ - return { + description.setAliases({ {"name", std::make_shared(), "cluster"}, - }; + }); + + return description; } void StorageSystemClusters::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const diff --git a/src/Storages/System/StorageSystemClusters.h b/src/Storages/System/StorageSystemClusters.h index 071ad423b89..7b568641cb2 100644 --- a/src/Storages/System/StorageSystemClusters.h +++ b/src/Storages/System/StorageSystemClusters.h @@ -20,9 +20,7 @@ class StorageSystemClusters final : public IStorageSystemOneBlock()}, - {"language", std::make_shared(std::make_shared())}, + return ColumnsDescription + { + {"name", std::make_shared(), "Name of the collation."}, + {"language", std::make_shared(std::make_shared()), "The language."}, }; } diff --git a/src/Storages/System/StorageSystemCollations.h b/src/Storages/System/StorageSystemCollations.h index 8440cfa3ebc..1fc0ff0e024 100644 --- a/src/Storages/System/StorageSystemCollations.h +++ b/src/Storages/System/StorageSystemCollations.h @@ -15,7 +15,7 @@ protected: public: std::string getName() const override { return "SystemTableCollations"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemContributors.cpp b/src/Storages/System/StorageSystemContributors.cpp index ed28be2a4ab..860a96c4388 100644 --- a/src/Storages/System/StorageSystemContributors.cpp +++ b/src/Storages/System/StorageSystemContributors.cpp @@ -9,10 +9,11 @@ extern const char * auto_contributors[]; namespace DB { -NamesAndTypesList StorageSystemContributors::getNamesAndTypes() +ColumnsDescription StorageSystemContributors::getColumnsDescription() { - return { - {"name", std::make_shared()}, + return ColumnsDescription + { + {"name", std::make_shared(), "Contributor (author) name from git log."}, }; } diff --git a/src/Storages/System/StorageSystemContributors.h b/src/Storages/System/StorageSystemContributors.h index ba8c930118e..ed983c5e61f 100644 --- a/src/Storages/System/StorageSystemContributors.h +++ b/src/Storages/System/StorageSystemContributors.h @@ -22,6 +22,6 @@ public: return "SystemContributors"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemCurrentRoles.cpp b/src/Storages/System/StorageSystemCurrentRoles.cpp index cf7df0b8b99..88bdf088175 100644 --- a/src/Storages/System/StorageSystemCurrentRoles.cpp +++ b/src/Storages/System/StorageSystemCurrentRoles.cpp @@ -11,14 +11,14 @@ namespace DB { -NamesAndTypesList StorageSystemCurrentRoles::getNamesAndTypes() +ColumnsDescription StorageSystemCurrentRoles::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"role_name", std::make_shared()}, - {"with_admin_option", std::make_shared()}, - {"is_default", std::make_shared()}, + return ColumnsDescription + { + {"role_name", std::make_shared(), "Role name."}, + {"with_admin_option", std::make_shared(), "1 if the role has ADMIN OPTION privilege."}, + {"is_default", std::make_shared(), "1 if role is set to be a default."}, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemCurrentRoles.h b/src/Storages/System/StorageSystemCurrentRoles.h index bf62b81d422..4cc9b11d3f4 100644 --- a/src/Storages/System/StorageSystemCurrentRoles.h +++ b/src/Storages/System/StorageSystemCurrentRoles.h @@ -12,7 +12,7 @@ class StorageSystemCurrentRoles final : public IStorageSystemOneBlock> getStatusEnumsAndValues() }; } -NamesAndTypesList StorageSystemDDLWorkerQueue::getNamesAndTypes() +ColumnsDescription StorageSystemDDLWorkerQueue::getColumnsDescription() { - return { - {"entry", std::make_shared()}, - {"entry_version", std::make_shared(std::make_shared())}, - {"initiator_host", std::make_shared(std::make_shared())}, - {"initiator_port", std::make_shared(std::make_shared())}, - {"cluster", std::make_shared()}, - {"query", std::make_shared()}, - {"settings", std::make_shared(std::make_shared(), std::make_shared())}, - {"query_create_time", std::make_shared()}, + return ColumnsDescription + { + {"entry", std::make_shared(), "Query id."}, + {"entry_version", std::make_shared(std::make_shared()), "Version of the entry."}, + {"initiator_host", std::make_shared(std::make_shared()), "Host that initiated the DDL operation."}, + {"initiator_port", std::make_shared(std::make_shared()), "Port used by the initiator."}, + {"cluster", std::make_shared(), "Cluster name."}, + {"query", std::make_shared(), "Query executed."}, + {"settings", std::make_shared(std::make_shared(), std::make_shared()), "Settings used in the DDL operation."}, + {"query_create_time", std::make_shared(), "Query created time."}, - {"host", std::make_shared(std::make_shared())}, - {"port", std::make_shared(std::make_shared())}, - {"status", std::make_shared(std::make_shared(getStatusEnumsAndValues()))}, - {"exception_code", std::make_shared(std::make_shared())}, - {"exception_text", std::make_shared(std::make_shared())}, - {"query_finish_time", std::make_shared(std::make_shared())}, - {"query_duration_ms", std::make_shared(std::make_shared())}, + {"host", std::make_shared(std::make_shared()), "Hostname."}, + {"port", std::make_shared(std::make_shared()), "Host Port."}, + {"status", std::make_shared(std::make_shared(getStatusEnumsAndValues())), "Status of the query."}, + {"exception_code", std::make_shared(std::make_shared()), "Exception code."}, + {"exception_text", std::make_shared(std::make_shared()), "Exception message."}, + {"query_finish_time", std::make_shared(std::make_shared()), "Query finish time."}, + {"query_duration_ms", std::make_shared(std::make_shared()), "Duration of query execution (in milliseconds)."}, }; } diff --git a/src/Storages/System/StorageSystemDDLWorkerQueue.h b/src/Storages/System/StorageSystemDDLWorkerQueue.h index 797aff5cf98..871bb706f94 100644 --- a/src/Storages/System/StorageSystemDDLWorkerQueue.h +++ b/src/Storages/System/StorageSystemDDLWorkerQueue.h @@ -21,6 +21,6 @@ protected: public: std::string getName() const override { return "SystemDDLWorkerQueue"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemDashboards.cpp b/src/Storages/System/StorageSystemDashboards.cpp index 3e22a6c0664..7e545757129 100644 --- a/src/Storages/System/StorageSystemDashboards.cpp +++ b/src/Storages/System/StorageSystemDashboards.cpp @@ -5,12 +5,13 @@ namespace DB { -NamesAndTypesList StorageSystemDashboards::getNamesAndTypes() +ColumnsDescription StorageSystemDashboards::getColumnsDescription() { - return { - {"dashboard", std::make_shared()}, - {"title", std::make_shared()}, - {"query", std::make_shared()}, + return ColumnsDescription + { + {"dashboard", std::make_shared(), "The dashboard name."}, + {"title", std::make_shared(), "The title of a chart."}, + {"query", std::make_shared(), "The query to obtain data to be displayed."}, }; } diff --git a/src/Storages/System/StorageSystemDashboards.h b/src/Storages/System/StorageSystemDashboards.h index cbd7c5bbf57..83a8664ad27 100644 --- a/src/Storages/System/StorageSystemDashboards.h +++ b/src/Storages/System/StorageSystemDashboards.h @@ -17,7 +17,7 @@ class StorageSystemDashboards final : public IStorageSystemOneBlock()}, - {"case_insensitive", std::make_shared()}, - {"alias_to", std::make_shared()}, + return ColumnsDescription + { + {"name", std::make_shared(), "Data type name."}, + {"case_insensitive", std::make_shared(), "Property that shows whether you can use a data type name in a query in case insensitive manner or not. For example, `Date` and `date` are both valid."}, + {"alias_to", std::make_shared(), "Data type name for which `name` is an alias."}, }; } diff --git a/src/Storages/System/StorageSystemDataTypeFamilies.h b/src/Storages/System/StorageSystemDataTypeFamilies.h index 25e20d2cd76..2cb834f6931 100644 --- a/src/Storages/System/StorageSystemDataTypeFamilies.h +++ b/src/Storages/System/StorageSystemDataTypeFamilies.h @@ -15,7 +15,7 @@ protected: public: std::string getName() const override { return "SystemTableDataTypeFamilies"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemDatabaseEngines.cpp b/src/Storages/System/StorageSystemDatabaseEngines.cpp index fb878a0eda0..514ca6d0ab3 100644 --- a/src/Storages/System/StorageSystemDatabaseEngines.cpp +++ b/src/Storages/System/StorageSystemDatabaseEngines.cpp @@ -6,10 +6,11 @@ namespace DB { -NamesAndTypesList StorageSystemDatabaseEngines::getNamesAndTypes() +ColumnsDescription StorageSystemDatabaseEngines::getColumnsDescription() { - return { - {"name", std::make_shared()}, + return ColumnsDescription + { + {"name", std::make_shared(), "The name of database engine."}, }; } diff --git a/src/Storages/System/StorageSystemDatabaseEngines.h b/src/Storages/System/StorageSystemDatabaseEngines.h index cd0ee541633..16b517c91e6 100644 --- a/src/Storages/System/StorageSystemDatabaseEngines.h +++ b/src/Storages/System/StorageSystemDatabaseEngines.h @@ -16,7 +16,7 @@ protected: public: std::string getName() const override { return "SystemDatabaseEngines"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index 6dbe780193d..51ecb8f17ca 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -15,24 +15,24 @@ namespace DB { -NamesAndTypesList StorageSystemDatabases::getNamesAndTypes() +ColumnsDescription StorageSystemDatabases::getColumnsDescription() { - return { - {"name", std::make_shared()}, - {"engine", std::make_shared()}, - {"data_path", std::make_shared()}, - {"metadata_path", std::make_shared()}, - {"uuid", std::make_shared()}, - {"engine_full", std::make_shared()}, - {"comment", std::make_shared()} + auto description = ColumnsDescription + { + {"name", std::make_shared(), "Database name."}, + {"engine", std::make_shared(), "Database engine."}, + {"data_path", std::make_shared(), "Data path."}, + {"metadata_path", std::make_shared(), "Metadata path."}, + {"uuid", std::make_shared(), "Database UUID."}, + {"engine_full", std::make_shared(), "Parameters of the database engine."}, + {"comment", std::make_shared(), "Database comment."} }; -} -NamesAndAliases StorageSystemDatabases::getNamesAndAliases() -{ - return { + description.setAliases({ {"database", std::make_shared(), "name"} - }; + }); + + return description; } static String getEngineFull(const ContextPtr & ctx, const DatabasePtr & database) @@ -54,7 +54,7 @@ static String getEngineFull(const ContextPtr & ctx, const DatabasePtr & database return {}; guard.reset(); - LOG_TRACE(&Poco::Logger::get("StorageSystemDatabases"), "Failed to lock database {} ({}), will retry", name, database->getUUID()); + LOG_TRACE(getLogger("StorageSystemDatabases"), "Failed to lock database {} ({}), will retry", name, database->getUUID()); } ASTPtr ast = database->getCreateDatabaseQuery(); diff --git a/src/Storages/System/StorageSystemDatabases.h b/src/Storages/System/StorageSystemDatabases.h index 29dd786ca0a..2fd9ccdc970 100644 --- a/src/Storages/System/StorageSystemDatabases.h +++ b/src/Storages/System/StorageSystemDatabases.h @@ -19,9 +19,7 @@ public: return "SystemDatabases"; } - static NamesAndTypesList getNamesAndTypes(); - - static NamesAndAliases getNamesAndAliases(); + static ColumnsDescription getColumnsDescription(); protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemDictionaries.cpp b/src/Storages/System/StorageSystemDictionaries.cpp index 3299365b4ac..c2ed35c5510 100644 --- a/src/Storages/System/StorageSystemDictionaries.cpp +++ b/src/Storages/System/StorageSystemDictionaries.cpp @@ -52,34 +52,43 @@ catch (const DB::Exception &) } -NamesAndTypesList StorageSystemDictionaries::getNamesAndTypes() +ColumnsDescription StorageSystemDictionaries::getColumnsDescription() { - return { - {"database", std::make_shared()}, - {"name", std::make_shared()}, - {"uuid", std::make_shared()}, - {"status", std::make_shared(getStatusEnumAllPossibleValues())}, - {"origin", std::make_shared()}, - {"type", std::make_shared()}, - {"key.names", std::make_shared(std::make_shared())}, - {"key.types", std::make_shared(std::make_shared())}, - {"attribute.names", std::make_shared(std::make_shared())}, - {"attribute.types", std::make_shared(std::make_shared())}, - {"bytes_allocated", std::make_shared()}, - {"hierarchical_index_bytes_allocated", std::make_shared()}, - {"query_count", std::make_shared()}, - {"hit_rate", std::make_shared()}, - {"found_rate", std::make_shared()}, - {"element_count", std::make_shared()}, - {"load_factor", std::make_shared()}, - {"source", std::make_shared()}, - {"lifetime_min", std::make_shared()}, - {"lifetime_max", std::make_shared()}, - {"loading_start_time", std::make_shared()}, - {"last_successful_update_time", std::make_shared()}, - {"loading_duration", std::make_shared()}, - {"last_exception", std::make_shared()}, - {"comment", std::make_shared()} + return ColumnsDescription + { + {"database", std::make_shared(), "Name of the database containing the dictionary created by DDL query. Empty string for other dictionaries."}, + {"name", std::make_shared(), "Dictionary name."}, + {"uuid", std::make_shared(), "Dictionary UUID."}, + {"status", std::make_shared(getStatusEnumAllPossibleValues()), + "Dictionary status. Possible values: " + "NOT_LOADED — Dictionary was not loaded because it was not used, " + "LOADED — Dictionary loaded successfully, " + "FAILED — Unable to load the dictionary as a result of an error, " + "LOADING — Dictionary is loading now, " + "LOADED_AND_RELOADING — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: SYSTEM RELOAD DICTIONARY query, timeout, dictionary config has changed), " + "FAILED_AND_RELOADING — Could not load the dictionary as a result of an error and is loading now." + }, + {"origin", std::make_shared(), "Path to the configuration file that describes the dictionary."}, + {"type", std::make_shared(), "Type of a dictionary allocation. Storing Dictionaries in Memory."}, + {"key.names", std::make_shared(std::make_shared()), "Array of key names provided by the dictionary."}, + {"key.types", std::make_shared(std::make_shared()), "Corresponding array of key types provided by the dictionary."}, + {"attribute.names", std::make_shared(std::make_shared()), "Array of attribute names provided by the dictionary."}, + {"attribute.types", std::make_shared(std::make_shared()), "Corresponding array of attribute types provided by the dictionary."}, + {"bytes_allocated", std::make_shared(), "Amount of RAM allocated for the dictionary."}, + {"hierarchical_index_bytes_allocated", std::make_shared(), ""}, + {"query_count", std::make_shared(), "Number of queries since the dictionary was loaded or since the last successful reboot."}, + {"hit_rate", std::make_shared(), "For cache dictionaries, the percentage of uses for which the value was in the cache."}, + {"found_rate", std::make_shared(), "The percentage of uses for which the value was found."}, + {"element_count", std::make_shared(), "Number of items stored in the dictionary."}, + {"load_factor", std::make_shared(), "Percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table)."}, + {"source", std::make_shared(), "Text describing the data source for the dictionary."}, + {"lifetime_min", std::make_shared(), "Minimum lifetime of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if invalidate_query is set, then only if it has changed). Set in seconds."}, + {"lifetime_max", std::make_shared(), "Maximum lifetime of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if invalidate_query is set, then only if it has changed). Set in seconds."}, + {"loading_start_time", std::make_shared(), "Start time for loading the dictionary."}, + {"last_successful_update_time", std::make_shared(), "End time for loading or updating the dictionary. Helps to monitor some troubles with dictionary sources and investigate the causes."}, + {"loading_duration", std::make_shared(), "Duration of a dictionary loading."}, + {"last_exception", std::make_shared(), "Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn’t be created."}, + {"comment", std::make_shared(), "Text of the comment to dictionary."} }; } diff --git a/src/Storages/System/StorageSystemDictionaries.h b/src/Storages/System/StorageSystemDictionaries.h index 6d4234362e5..792b3c0dd30 100644 --- a/src/Storages/System/StorageSystemDictionaries.h +++ b/src/Storages/System/StorageSystemDictionaries.h @@ -14,7 +14,7 @@ class StorageSystemDictionaries final : public IStorageSystemOneBlockinsert(disk_ptr->getUnreservedSpace().value_or(std::numeric_limits::max())); col_keep->insert(disk_ptr->getKeepingFreeSpace()); auto data_source_description = disk_ptr->getDataSourceDescription(); - col_type->insert(toString(data_source_description.type)); + col_type->insert(data_source_description.toString()); col_is_encrypted->insert(data_source_description.is_encrypted); col_is_read_only->insert(disk_ptr->isReadOnly()); col_is_write_once->insert(disk_ptr->isWriteOnce()); diff --git a/src/Storages/System/StorageSystemDistributionQueue.cpp b/src/Storages/System/StorageSystemDistributionQueue.cpp index 33de723b6d4..f0247275a8b 100644 --- a/src/Storages/System/StorageSystemDistributionQueue.cpp +++ b/src/Storages/System/StorageSystemDistributionQueue.cpp @@ -88,20 +88,21 @@ namespace DB { -NamesAndTypesList StorageSystemDistributionQueue::getNamesAndTypes() +ColumnsDescription StorageSystemDistributionQueue::getColumnsDescription() { - return { - { "database", std::make_shared() }, - { "table", std::make_shared() }, - { "data_path", std::make_shared() }, - { "is_blocked", std::make_shared() }, - { "error_count", std::make_shared() }, - { "data_files", std::make_shared() }, - { "data_compressed_bytes", std::make_shared() }, - { "broken_data_files", std::make_shared() }, - { "broken_data_compressed_bytes", std::make_shared() }, - { "last_exception", std::make_shared() }, - { "last_exception_time", std::make_shared() }, + return ColumnsDescription + { + { "database", std::make_shared(), "Name of the database."}, + { "table", std::make_shared(), "Name of the table."}, + { "data_path", std::make_shared(), "Path to the folder with local files."}, + { "is_blocked", std::make_shared(), "Flag indicates whether sending local files to the server is blocked."}, + { "error_count", std::make_shared(), "Number of errors."}, + { "data_files", std::make_shared(), "Number of local files in a folder."}, + { "data_compressed_bytes", std::make_shared(), "Size of compressed data in local files, in bytes."}, + { "broken_data_files", std::make_shared(), "Number of files that has been marked as broken (due to an error)."}, + { "broken_data_compressed_bytes", std::make_shared(), "Size of compressed data in broken files, in bytes."}, + { "last_exception", std::make_shared(), "Text message about the last error that occurred (if any)."}, + { "last_exception_time", std::make_shared(), "Time when last exception occurred."}, }; } diff --git a/src/Storages/System/StorageSystemDistributionQueue.h b/src/Storages/System/StorageSystemDistributionQueue.h index 63c5d40f07b..477a9d6e245 100644 --- a/src/Storages/System/StorageSystemDistributionQueue.h +++ b/src/Storages/System/StorageSystemDistributionQueue.h @@ -16,7 +16,7 @@ class StorageSystemDistributionQueue final : public IStorageSystemOneBlock()}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"uuid", std::make_shared()}, - {"engine", std::make_shared()}, - {"metadata_dropped_path", std::make_shared()}, - {"table_dropped_time", std::make_shared()}, + return ColumnsDescription + { + {"index", std::make_shared(), "Index in marked_dropped_tables queue."}, + {"database", std::make_shared(), "Database name."}, + {"table", std::make_shared(), "Table name."}, + {"uuid", std::make_shared(), "Table UUID."}, + {"engine", std::make_shared(), "Table engine name."}, + {"metadata_dropped_path", std::make_shared(), "Path of table's metadata file in metadata_dropped directory."}, + {"table_dropped_time", std::make_shared(), "The time when the next attempt to remove table's data is scheduled on. Usually it's the table when the table was dropped plus `database_atomic_delay_before_drop_table_sec`."}, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemDroppedTables.h b/src/Storages/System/StorageSystemDroppedTables.h index 44cc8122603..d7c3569eb62 100644 --- a/src/Storages/System/StorageSystemDroppedTables.h +++ b/src/Storages/System/StorageSystemDroppedTables.h @@ -10,7 +10,7 @@ class StorageSystemDroppedTables final : public IStorageSystemOneBlock()}, - {"with_admin_option", std::make_shared()}, - {"is_current", std::make_shared()}, - {"is_default", std::make_shared()}, + return ColumnsDescription + { + {"role_name", std::make_shared(), "Role name."}, + {"with_admin_option", std::make_shared(), "1 if the role has ADMIN OPTION privilege."}, + {"is_current", std::make_shared(), "Flag that shows whether `enabled_role` is a current role of a current user."}, + {"is_default", std::make_shared(), "Flag that shows whether `enabled_role` is a default role."}, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemEnabledRoles.h b/src/Storages/System/StorageSystemEnabledRoles.h index 93e8e0b5311..5367b2ccbea 100644 --- a/src/Storages/System/StorageSystemEnabledRoles.h +++ b/src/Storages/System/StorageSystemEnabledRoles.h @@ -12,7 +12,7 @@ class StorageSystemEnabledRoles final : public IStorageSystemOneBlock() }, - { "code", std::make_shared() }, - { "value", std::make_shared() }, - { "last_error_time", std::make_shared() }, - { "last_error_message", std::make_shared() }, - { "last_error_trace", std::make_shared(std::make_shared()) }, - { "remote", std::make_shared() }, + return ColumnsDescription + { + { "name", std::make_shared(), "Name of the error (errorCodeToName)."}, + { "code", std::make_shared(), "Code number of the error."}, + { "value", std::make_shared(), "The number of times this error happened."}, + { "last_error_time", std::make_shared(), "The time when the last error happened."}, + { "last_error_message", std::make_shared(), "Message for the last error."}, + { "last_error_trace", std::make_shared(std::make_shared()), "A stack trace that represents a list of physical addresses where the called methods are stored."}, + { "remote", std::make_shared(), "Remote exception (i.e. received during one of the distributed queries)."}, }; } diff --git a/src/Storages/System/StorageSystemErrors.h b/src/Storages/System/StorageSystemErrors.h index f44ae9c8025..9e8ec628bac 100644 --- a/src/Storages/System/StorageSystemErrors.h +++ b/src/Storages/System/StorageSystemErrors.h @@ -18,7 +18,7 @@ class StorageSystemErrors final : public IStorageSystemOneBlock()}, - {"value", std::make_shared()}, - {"description", std::make_shared()}, + auto description = ColumnsDescription + { + {"event", std::make_shared(), "Event name."}, + {"value", std::make_shared(), "Number of events occurred."}, + {"description", std::make_shared(), "Event description."}, }; -} -NamesAndAliases StorageSystemEvents::getNamesAndAliases() -{ - return { + description.setAliases({ {"name", std::make_shared(), "event"} - }; + }); + + return description; } void StorageSystemEvents::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const diff --git a/src/Storages/System/StorageSystemEvents.h b/src/Storages/System/StorageSystemEvents.h index b2e4bac072a..cbd92f90d7a 100644 --- a/src/Storages/System/StorageSystemEvents.h +++ b/src/Storages/System/StorageSystemEvents.h @@ -15,9 +15,7 @@ class StorageSystemEvents final : public IStorageSystemOneBlock()}, {"cache_base_path", std::make_shared()}, {"cache_path", std::make_shared()}, @@ -28,6 +30,7 @@ NamesAndTypesList StorageSystemFilesystemCache::getNamesAndTypes() {"downloaded_size", std::make_shared()}, {"kind", std::make_shared()}, {"unbound", std::make_shared>()}, + {"user_id", std::make_shared()}, {"file_size", std::make_shared(std::make_shared())}, }; } @@ -50,10 +53,12 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex res_columns[i++]->insert(cache_name); res_columns[i++]->insert(cache->getBasePath()); - /// Do not use `file_segment->getPathInLocalCache` here because it will lead to nullptr dereference + /// Do not use `file_segment->getPath` here because it will lead to nullptr dereference /// (because file_segments in getSnapshot doesn't have `cache` field set) - const auto path = cache->getPathInLocalCache(file_segment.key, file_segment.offset, file_segment.kind); + const auto path = cache->getFileSegmentPath( + file_segment.key, file_segment.offset, file_segment.kind, + FileCache::UserInfo(file_segment.user_id, file_segment.user_weight)); res_columns[i++]->insert(path); res_columns[i++]->insert(file_segment.key.toString()); res_columns[i++]->insert(file_segment.range_left); @@ -65,6 +70,7 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex res_columns[i++]->insert(file_segment.downloaded_size); res_columns[i++]->insert(toString(file_segment.kind)); res_columns[i++]->insert(file_segment.is_unbound); + res_columns[i++]->insert(file_segment.user_id); std::error_code ec; auto size = fs::file_size(path, ec); @@ -72,7 +78,7 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex res_columns[i++]->insert(size); else res_columns[i++]->insertDefault(); - }); + }, FileCache::getCommonUser().user_id); } } diff --git a/src/Storages/System/StorageSystemFilesystemCache.h b/src/Storages/System/StorageSystemFilesystemCache.h index cc5c8d12f79..4b13b375f95 100644 --- a/src/Storages/System/StorageSystemFilesystemCache.h +++ b/src/Storages/System/StorageSystemFilesystemCache.h @@ -7,7 +7,7 @@ namespace DB { /** - * Usgae example. How to get mapping from local paths to remote paths: + * Usage example. How to get mapping from local paths to remote paths: * SELECT * cache_path, * cache_hits, @@ -36,7 +36,7 @@ public: std::string getName() const override { return "SystemFilesystemCache"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); protected: void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; diff --git a/src/Storages/System/StorageSystemFormats.cpp b/src/Storages/System/StorageSystemFormats.cpp index daac1e60057..a360971e1f7 100644 --- a/src/Storages/System/StorageSystemFormats.cpp +++ b/src/Storages/System/StorageSystemFormats.cpp @@ -6,14 +6,15 @@ namespace DB { -NamesAndTypesList StorageSystemFormats::getNamesAndTypes() +ColumnsDescription StorageSystemFormats::getColumnsDescription() { - return { - {"name", std::make_shared()}, - {"is_input", std::make_shared()}, - {"is_output", std::make_shared()}, - {"supports_parallel_parsing", std::make_shared()}, - {"supports_parallel_formatting", std::make_shared()}, + return ColumnsDescription + { + {"name", std::make_shared(), "Format name."}, + {"is_input", std::make_shared(), "Flag that indicates whether the format is suitable for data input."}, + {"is_output", std::make_shared(), "Flag that indicates whether the format is suitable for data output."}, + {"supports_parallel_parsing", std::make_shared(), "Flag that indicates whether the format supports parallel parsing."}, + {"supports_parallel_formatting", std::make_shared(), "Flag that indicates whether the format supports parallel formatting."}, }; } diff --git a/src/Storages/System/StorageSystemFormats.h b/src/Storages/System/StorageSystemFormats.h index d7631066020..9f9d1df1bde 100644 --- a/src/Storages/System/StorageSystemFormats.h +++ b/src/Storages/System/StorageSystemFormats.h @@ -17,6 +17,6 @@ public: return "SystemFormats"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemFunctions.cpp b/src/Storages/System/StorageSystemFunctions.cpp index 9809b9435f2..45c00e6de27 100644 --- a/src/Storages/System/StorageSystemFunctions.cpp +++ b/src/Storages/System/StorageSystemFunctions.cpp @@ -113,22 +113,23 @@ std::vector> getOriginEnumsAndValues() }; } -NamesAndTypesList StorageSystemFunctions::getNamesAndTypes() +ColumnsDescription StorageSystemFunctions::getColumnsDescription() { - return { - {"name", std::make_shared()}, - {"is_aggregate", std::make_shared()}, - {"is_deterministic", std::make_shared(std::make_shared())}, - {"case_insensitive", std::make_shared()}, - {"alias_to", std::make_shared()}, - {"create_query", std::make_shared()}, - {"origin", std::make_shared(getOriginEnumsAndValues())}, - {"description", std::make_shared()}, - {"syntax", std::make_shared()}, - {"arguments", std::make_shared()}, - {"returned_value", std::make_shared()}, - {"examples", std::make_shared()}, - {"categories", std::make_shared()} + return ColumnsDescription + { + {"name", std::make_shared(), "The name of the function."}, + {"is_aggregate", std::make_shared(), "Whether the function is an aggregate function."}, + {"is_deterministic", std::make_shared(std::make_shared()), "Whether the function is deterministic."}, + {"case_insensitive", std::make_shared(), "Whether the function name can be used case-insensitively."}, + {"alias_to", std::make_shared(), "The original function name, if the function name is an alias."}, + {"create_query", std::make_shared(), "Obsolete."}, + {"origin", std::make_shared(getOriginEnumsAndValues()), "Obsolete."}, + {"description", std::make_shared(), "A high-level description what the function does."}, + {"syntax", std::make_shared(), "Signature of the function."}, + {"arguments", std::make_shared(), "What arguments does the function take."}, + {"returned_value", std::make_shared(), "What does the function return."}, + {"examples", std::make_shared(), "Usage example."}, + {"categories", std::make_shared(), "The category of the function."} }; } diff --git a/src/Storages/System/StorageSystemFunctions.h b/src/Storages/System/StorageSystemFunctions.h index 606694a4c0b..ac1129e8127 100644 --- a/src/Storages/System/StorageSystemFunctions.h +++ b/src/Storages/System/StorageSystemFunctions.h @@ -17,7 +17,7 @@ class StorageSystemFunctions final : public IStorageSystemOneBlock & partitions) override; void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; diff --git a/src/Storages/System/StorageSystemGrants.cpp b/src/Storages/System/StorageSystemGrants.cpp index 461efd7f640..f5f3fa07e53 100644 --- a/src/Storages/System/StorageSystemGrants.cpp +++ b/src/Storages/System/StorageSystemGrants.cpp @@ -18,19 +18,23 @@ namespace DB { -NamesAndTypesList StorageSystemGrants::getNamesAndTypes() +ColumnsDescription StorageSystemGrants::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"user_name", std::make_shared(std::make_shared())}, - {"role_name", std::make_shared(std::make_shared())}, - {"access_type", std::make_shared(StorageSystemPrivileges::getAccessTypeEnumValues())}, - {"database", std::make_shared(std::make_shared())}, - {"table", std::make_shared(std::make_shared())}, - {"column", std::make_shared(std::make_shared())}, - {"is_partial_revoke", std::make_shared()}, - {"grant_option", std::make_shared()}, + return ColumnsDescription + { + {"user_name", std::make_shared(std::make_shared()), "User name."}, + {"role_name", std::make_shared(std::make_shared()), "Role assigned to user account."}, + {"access_type", std::make_shared(StorageSystemPrivileges::getAccessTypeEnumValues()), "Access parameters for ClickHouse user account."}, + {"database", std::make_shared(std::make_shared()), "Name of a database."}, + {"table", std::make_shared(std::make_shared()), "Name of a table."}, + {"column", std::make_shared(std::make_shared()), "Name of a column to which access is granted."}, + {"is_partial_revoke", std::make_shared(), + "Logical value. It shows whether some privileges have been revoked. Possible values: " + "0 — The row describes a partial revoke, " + "1 — The row describes a grant." + }, + {"grant_option", std::make_shared(), "Permission is granted WITH GRANT OPTION."}, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemGrants.h b/src/Storages/System/StorageSystemGrants.h index 48d95f487a1..2202b52ad5f 100644 --- a/src/Storages/System/StorageSystemGrants.h +++ b/src/Storages/System/StorageSystemGrants.h @@ -12,7 +12,7 @@ class StorageSystemGrants final : public IStorageSystemOneBlock()}, - {"rule_type", std::make_shared()}, - {"regexp", std::make_shared()}, - {"function", std::make_shared()}, - {"age", std::make_shared()}, - {"precision", std::make_shared()}, - {"priority", std::make_shared()}, - {"is_default", std::make_shared()}, - {"Tables.database", std::make_shared(std::make_shared())}, - {"Tables.table", std::make_shared(std::make_shared())}, + return ColumnsDescription + { + {"config_name", std::make_shared(), "graphite_rollup parameter name."}, + {"rule_type", std::make_shared(), ""}, + {"regexp", std::make_shared(), "A pattern for the metric name."}, + {"function", std::make_shared(), "The name of the aggregating function."}, + {"age", std::make_shared(), "The minimum age of the data in seconds."}, + {"precision", std::make_shared(), "How precisely to define the age of the data in seconds."}, + {"priority", std::make_shared(), "Pattern priority."}, + {"is_default", std::make_shared(), "Whether the pattern is the default."}, + {"Tables.database", std::make_shared(std::make_shared()), "Array of names of database tables that use the `config_name` parameter."}, + {"Tables.table", std::make_shared(std::make_shared()), "Array of table names that use the `config_name` parameter."}, }; } diff --git a/src/Storages/System/StorageSystemGraphite.h b/src/Storages/System/StorageSystemGraphite.h index 608dac79133..be101181cf7 100644 --- a/src/Storages/System/StorageSystemGraphite.h +++ b/src/Storages/System/StorageSystemGraphite.h @@ -15,7 +15,7 @@ class StorageSystemGraphite final : public IStorageSystemOneBlock() }, - { "large", std::make_shared() }, - { "size", std::make_shared() }, - { "allocations", std::make_shared() }, - { "deallocations", std::make_shared() }, + return ColumnsDescription + { + { "index", std::make_shared(), "Index of the bin ordered by size."}, + { "large", std::make_shared(), "True for large allocations and False for small."}, + { "size", std::make_shared(), "Size of allocations in this bin."}, + { "allocations", std::make_shared(), "Number of allocations."}, + { "deallocations", std::make_shared(), "Number of deallocations."}, }; } diff --git a/src/Storages/System/StorageSystemJemalloc.h b/src/Storages/System/StorageSystemJemalloc.h index a4ac2fbcdcb..0cd29d99131 100644 --- a/src/Storages/System/StorageSystemJemalloc.h +++ b/src/Storages/System/StorageSystemJemalloc.h @@ -15,7 +15,7 @@ public: std::string getName() const override { return "SystemJemallocBins"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); Pipe read( const Names & column_names, diff --git a/src/Storages/System/StorageSystemKafkaConsumers.cpp b/src/Storages/System/StorageSystemKafkaConsumers.cpp index e333f6e2c15..95962d8de8b 100644 --- a/src/Storages/System/StorageSystemKafkaConsumers.cpp +++ b/src/Storages/System/StorageSystemKafkaConsumers.cpp @@ -22,29 +22,29 @@ namespace DB { -NamesAndTypesList StorageSystemKafkaConsumers::getNamesAndTypes() +ColumnsDescription StorageSystemKafkaConsumers::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"consumer_id", std::make_shared()}, //(number? or string? - single clickhouse table can have many consumers) - {"assignments.topic", std::make_shared(std::make_shared())}, - {"assignments.partition_id", std::make_shared(std::make_shared())}, - {"assignments.current_offset", std::make_shared(std::make_shared())}, - {"exceptions.time", std::make_shared(std::make_shared())}, - {"exceptions.text", std::make_shared(std::make_shared())}, - {"last_poll_time", std::make_shared()}, - {"num_messages_read", std::make_shared()}, - {"last_commit_time", std::make_shared()}, - {"num_commits", std::make_shared()}, - {"last_rebalance_time", std::make_shared()}, - {"num_rebalance_revocations", std::make_shared()}, - {"num_rebalance_assignments", std::make_shared()}, - {"is_currently_used", std::make_shared()}, + return ColumnsDescription + { + {"database", std::make_shared(), "Database of the table with Kafka Engine."}, + {"table", std::make_shared(), "Name of the table with Kafka Engine."}, + {"consumer_id", std::make_shared(), "Kafka consumer identifier. Note, that a table can have many consumers. Specified by `kafka_num_consumers` parameter."}, + {"assignments.topic", std::make_shared(std::make_shared()), "Kafka topic."}, + {"assignments.partition_id", std::make_shared(std::make_shared()), "Kafka partition id. Note, that only one consumer can be assigned to a partition."}, + {"assignments.current_offset", std::make_shared(std::make_shared()), "Current offset."}, + {"exceptions.time", std::make_shared(std::make_shared()), "Timestamp when the 10 most recent exceptions were generated."}, + {"exceptions.text", std::make_shared(std::make_shared()), "Text of 10 most recent exceptions."}, + {"last_poll_time", std::make_shared(), "Timestamp of the most recent poll."}, + {"num_messages_read", std::make_shared(), "Number of messages read by the consumer."}, + {"last_commit_time", std::make_shared(), "Timestamp of the most recent poll."}, + {"num_commits", std::make_shared(), "Total number of commits for the consumer."}, + {"last_rebalance_time", std::make_shared(), "Timestamp of the most recent Kafka rebalance."}, + {"num_rebalance_revocations", std::make_shared(), "Number of times the consumer was revoked its partitions."}, + {"num_rebalance_assignments", std::make_shared(), "Number of times the consumer was assigned to Kafka cluster."}, + {"is_currently_used", std::make_shared(), "Consumer is in use."}, {"last_used", std::make_shared(6)}, - {"rdkafka_stat", std::make_shared()}, + {"rdkafka_stat", std::make_shared(), "Library internal statistic. Set statistics_interval_ms to 0 disable, default is 3000 (once in three seconds)."}, }; - return names_and_types; } void StorageSystemKafkaConsumers::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const diff --git a/src/Storages/System/StorageSystemKafkaConsumers.h b/src/Storages/System/StorageSystemKafkaConsumers.h index eda3a39bc7e..ae2c726849d 100644 --- a/src/Storages/System/StorageSystemKafkaConsumers.h +++ b/src/Storages/System/StorageSystemKafkaConsumers.h @@ -15,7 +15,7 @@ class StorageSystemKafkaConsumers final : public IStorageSystemOneBlock()}, - {"license_type", std::make_shared()}, - {"license_path", std::make_shared()}, - {"license_text", std::make_shared()}, + return ColumnsDescription + { + {"library_name", std::make_shared(), "Name of the library."}, + {"license_type", std::make_shared(), "License type — e.g. Apache, MIT."}, + {"license_path", std::make_shared(), "Path to the file with the license text."}, + {"license_text", std::make_shared(), "License text."}, }; } diff --git a/src/Storages/System/StorageSystemLicenses.h b/src/Storages/System/StorageSystemLicenses.h index 76320607805..57a3ff201a2 100644 --- a/src/Storages/System/StorageSystemLicenses.h +++ b/src/Storages/System/StorageSystemLicenses.h @@ -23,6 +23,6 @@ public: return "SystemLicenses"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemMacros.cpp b/src/Storages/System/StorageSystemMacros.cpp index 576fbc69039..6c1a24d152a 100644 --- a/src/Storages/System/StorageSystemMacros.cpp +++ b/src/Storages/System/StorageSystemMacros.cpp @@ -6,11 +6,12 @@ namespace DB { -NamesAndTypesList StorageSystemMacros::getNamesAndTypes() +ColumnsDescription StorageSystemMacros::getColumnsDescription() { - return { - {"macro", std::make_shared()}, - {"substitution", std::make_shared()}, + return ColumnsDescription + { + {"macro", std::make_shared(), "The macro name."}, + {"substitution", std::make_shared(), "The substitution string."}, }; } diff --git a/src/Storages/System/StorageSystemMacros.h b/src/Storages/System/StorageSystemMacros.h index 58c99f9efb8..ffbeb70796e 100644 --- a/src/Storages/System/StorageSystemMacros.h +++ b/src/Storages/System/StorageSystemMacros.h @@ -17,7 +17,7 @@ class StorageSystemMacros final : public IStorageSystemOneBlock -NamesAndTypesList SystemMergeTreeSettings::getNamesAndTypes() +ColumnsDescription SystemMergeTreeSettings::getColumnsDescription() { - return { - {"name", std::make_shared()}, - {"value", std::make_shared()}, - {"changed", std::make_shared()}, - {"description", std::make_shared()}, - {"min", std::make_shared(std::make_shared())}, - {"max", std::make_shared(std::make_shared())}, - {"readonly", std::make_shared()}, - {"type", std::make_shared()}, - {"is_obsolete", std::make_shared()}, + return ColumnsDescription + { + {"name", std::make_shared(), "Setting name."}, + {"value", std::make_shared(), "Setting value."}, + {"changed", std::make_shared(), "1 if the setting was explicitly defined in the config or explicitly changed."}, + {"description", std::make_shared(), "Setting description."}, + {"min", std::make_shared(std::make_shared()), "Minimum value of the setting, if any is set via constraints. If the setting has no minimum value, contains NULL."}, + {"max", std::make_shared(std::make_shared()), "Maximum value of the setting, if any is set via constraints. If the setting has no maximum value, contains NULL."}, + {"readonly", std::make_shared(), + "Shows whether the current user can change the setting: " + "0 — Current user can change the setting, " + "1 — Current user can't change the setting." + }, + {"type", std::make_shared(), "Setting type (implementation specific string value)."}, + {"is_obsolete", std::make_shared(), "Shows whether a setting is obsolete."}, }; } diff --git a/src/Storages/System/StorageSystemMergeTreeSettings.h b/src/Storages/System/StorageSystemMergeTreeSettings.h index f8a3c9ee422..48e83f0a880 100644 --- a/src/Storages/System/StorageSystemMergeTreeSettings.h +++ b/src/Storages/System/StorageSystemMergeTreeSettings.h @@ -19,7 +19,7 @@ class SystemMergeTreeSettings final : public IStorageSystemOneBlock>::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemMerges.cpp b/src/Storages/System/StorageSystemMerges.cpp index 2dbe2964eb9..4129e4c235b 100644 --- a/src/Storages/System/StorageSystemMerges.cpp +++ b/src/Storages/System/StorageSystemMerges.cpp @@ -7,33 +7,34 @@ namespace DB { -NamesAndTypesList StorageSystemMerges::getNamesAndTypes() +ColumnsDescription StorageSystemMerges::getColumnsDescription() { - return { - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"elapsed", std::make_shared()}, - {"progress", std::make_shared()}, - {"num_parts", std::make_shared()}, - {"source_part_names", std::make_shared(std::make_shared())}, - {"result_part_name", std::make_shared()}, - {"source_part_paths", std::make_shared(std::make_shared())}, - {"result_part_path", std::make_shared()}, + return ColumnsDescription + { + {"database", std::make_shared(), "The name of the database the table is in."}, + {"table", std::make_shared(), "Table name."}, + {"elapsed", std::make_shared(), "The time elapsed (in seconds) since the merge started."}, + {"progress", std::make_shared(), "The percentage of completed work from 0 to 1."}, + {"num_parts", std::make_shared(), "The number of parts to be merged."}, + {"source_part_names", std::make_shared(std::make_shared()), ""}, + {"result_part_name", std::make_shared(), "The name of the part that will be formed as the result of merging."}, + {"source_part_paths", std::make_shared(std::make_shared()), ""}, + {"result_part_path", std::make_shared(), ""}, {"partition_id", std::make_shared()}, {"partition", std::make_shared()}, - {"is_mutation", std::make_shared()}, - {"total_size_bytes_compressed", std::make_shared()}, - {"total_size_bytes_uncompressed", std::make_shared()}, - {"total_size_marks", std::make_shared()}, - {"bytes_read_uncompressed", std::make_shared()}, - {"rows_read", std::make_shared()}, - {"bytes_written_uncompressed", std::make_shared()}, - {"rows_written", std::make_shared()}, - {"columns_written", std::make_shared()}, - {"memory_usage", std::make_shared()}, - {"thread_id", std::make_shared()}, - {"merge_type", std::make_shared()}, - {"merge_algorithm", std::make_shared()}, + {"is_mutation", std::make_shared(), "1 if this process is a part mutation."}, + {"total_size_bytes_compressed", std::make_shared(), "The total size of the compressed data in the merged chunks."}, + {"total_size_bytes_uncompressed", std::make_shared(), "The total size of compressed data in the merged chunks."}, + {"total_size_marks", std::make_shared(), "The total number of marks in the merged parts."}, + {"bytes_read_uncompressed", std::make_shared(), "Number of bytes read, uncompressed."}, + {"rows_read", std::make_shared(), "Number of rows read."}, + {"bytes_written_uncompressed", std::make_shared(), "Number of bytes written, uncompressed."}, + {"rows_written", std::make_shared(), "Number of rows written."}, + {"columns_written", std::make_shared(), "Number of columns written (for Vertical merge algorithm)."}, + {"memory_usage", std::make_shared(), "Memory consumption of the merge process."}, + {"thread_id", std::make_shared(), "Thread ID of the merge process."}, + {"merge_type", std::make_shared(), "The type of current merge. Empty if it's an mutation."}, + {"merge_algorithm", std::make_shared(), "The algorithm used in current merge. Empty if it's an mutation."}, }; } diff --git a/src/Storages/System/StorageSystemMerges.h b/src/Storages/System/StorageSystemMerges.h index d4a2b98d9ab..961d28daf9a 100644 --- a/src/Storages/System/StorageSystemMerges.h +++ b/src/Storages/System/StorageSystemMerges.h @@ -17,7 +17,7 @@ class StorageSystemMerges final : public IStorageSystemOneBlock #include #include #include @@ -8,20 +9,20 @@ namespace DB { -NamesAndTypesList StorageSystemMetrics::getNamesAndTypes() +ColumnsDescription StorageSystemMetrics::getColumnsDescription() { - return { - {"metric", std::make_shared()}, - {"value", std::make_shared()}, - {"description", std::make_shared()}, + auto description = ColumnsDescription + { + {"metric", std::make_shared(), "Metric name."}, + {"value", std::make_shared(), "Metric value."}, + {"description", std::make_shared(), "Metric description."}, }; -} -NamesAndAliases StorageSystemMetrics::getNamesAndAliases() -{ - return { + description.setAliases({ {"name", std::make_shared(), "metric"} - }; + }); + + return description; } void StorageSystemMetrics::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const diff --git a/src/Storages/System/StorageSystemMetrics.h b/src/Storages/System/StorageSystemMetrics.h index e3e2c07014f..ec0c67cf6b7 100644 --- a/src/Storages/System/StorageSystemMetrics.h +++ b/src/Storages/System/StorageSystemMetrics.h @@ -16,9 +16,7 @@ class StorageSystemMetrics final : public IStorageSystemOneBlock() }, { "type", std::make_shared() }, { "loading_start_time", std::make_shared() }, diff --git a/src/Storages/System/StorageSystemModels.h b/src/Storages/System/StorageSystemModels.h index dfb6ad3de5a..91fa3761743 100644 --- a/src/Storages/System/StorageSystemModels.h +++ b/src/Storages/System/StorageSystemModels.h @@ -14,7 +14,7 @@ class StorageSystemModels final : public IStorageSystemOneBlock()}, - {"table", std::make_shared()}, - {"elapsed", std::make_shared()}, - {"target_disk_name", std::make_shared()}, - {"target_disk_path", std::make_shared()}, - {"part_name", std::make_shared()}, - {"part_size", std::make_shared()}, - {"thread_id", std::make_shared()}, + return ColumnsDescription + { + {"database", std::make_shared(), "Name of the database."}, + {"table", std::make_shared(), "Name of the table containing moving data part."}, + {"elapsed", std::make_shared(), "Time elapsed (in seconds) since data part movement started."}, + {"target_disk_name", std::make_shared(), "Name of disk to which the data part is moving."}, + {"target_disk_path", std::make_shared(), "Path to the mount point of the disk in the file system."}, + {"part_name", std::make_shared(), "Name of the data part being moved."}, + {"part_size", std::make_shared(), "Data part size."}, + {"thread_id", std::make_shared(), "Identifier of a thread performing the movement."}, }; } diff --git a/src/Storages/System/StorageSystemMoves.h b/src/Storages/System/StorageSystemMoves.h index 2e4ceec2abd..acdd9642f8f 100644 --- a/src/Storages/System/StorageSystemMoves.h +++ b/src/Storages/System/StorageSystemMoves.h @@ -17,7 +17,7 @@ class StorageSystemMoves final : public IStorageSystemOneBlock() }, - { "table", std::make_shared() }, - { "mutation_id", std::make_shared() }, - { "command", std::make_shared() }, - { "create_time", std::make_shared() }, - { "block_numbers.partition_id", std::make_shared(std::make_shared()) }, - { "block_numbers.number", std::make_shared(std::make_shared()) }, - { "parts_to_do_names", std::make_shared(std::make_shared()) }, - { "parts_to_do", std::make_shared() }, - { "is_done", std::make_shared() }, + return ColumnsDescription + { + { "database", std::make_shared(), "The name of the database to which the mutation was applied."}, + { "table", std::make_shared(), "The name of the table to which the mutation was applied."}, + { "mutation_id", std::make_shared(), "The ID of the mutation. For replicated tables these IDs correspond to znode names in the /mutations/ directory in ClickHouse Keeper. For non-replicated tables the IDs correspond to file names in the data directory of the table."}, + { "command", std::make_shared(), "The mutation command string (the part of the query after ALTER TABLE [db.]table)."}, + { "create_time", std::make_shared(), "Date and time when the mutation command was submitted for execution."}, + { "block_numbers.partition_id", std::make_shared(std::make_shared()), "For mutations of replicated tables, the array contains the partitions' IDs (one record for each partition). For mutations of non-replicated tables the array is empty."}, + { "block_numbers.number", std::make_shared(std::make_shared()), + "For mutations of replicated tables, the array contains one record for each partition, with the block number that was acquired by the mutation. " + "Only parts that contain blocks with numbers less than this number will be mutated in the partition." + "In non-replicated tables, block numbers in all partitions form a single sequence. " + "This means that for mutations of non-replicated tables, the column will contain one record with a single block number acquired by the mutation." + }, + { "parts_to_do_names", std::make_shared(std::make_shared()), "An array of names of data parts that need to be mutated for the mutation to complete."}, + { "parts_to_do", std::make_shared(), "The number of data parts that need to be mutated for the mutation to complete."}, + { "is_done", std::make_shared(), + "The flag whether the mutation is done or not. Possible values: " + "1 if the mutation is completed, " + "0 if the mutation is still in process. " + }, { "is_killed", std::make_shared() }, - { "latest_failed_part", std::make_shared() }, - { "latest_fail_time", std::make_shared() }, - { "latest_fail_reason", std::make_shared() }, + { "latest_failed_part", std::make_shared(), "The name of the most recent part that could not be mutated."}, + { "latest_fail_time", std::make_shared(), "The date and time of the most recent part mutation failure."}, + { "latest_fail_reason", std::make_shared(), "The exception message that caused the most recent part mutation failure."}, }; } diff --git a/src/Storages/System/StorageSystemMutations.h b/src/Storages/System/StorageSystemMutations.h index 0dac17eeb10..2db6e0c17f1 100644 --- a/src/Storages/System/StorageSystemMutations.h +++ b/src/Storages/System/StorageSystemMutations.h @@ -16,7 +16,7 @@ class StorageSystemMutations final : public IStorageSystemOneBlock()}, - {"collection", std::make_shared(std::make_shared(), std::make_shared())}, + return ColumnsDescription + { + {"name", std::make_shared(), "Name of the collection."}, + {"collection", std::make_shared(std::make_shared(), std::make_shared()), "Collection internals."}, }; } diff --git a/src/Storages/System/StorageSystemNamedCollections.h b/src/Storages/System/StorageSystemNamedCollections.h index d20fa62d30b..596df99be83 100644 --- a/src/Storages/System/StorageSystemNamedCollections.h +++ b/src/Storages/System/StorageSystemNamedCollections.h @@ -12,7 +12,7 @@ public: std::string getName() const override { return "SystemNamedCollections"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); protected: void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; diff --git a/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp b/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp index c397392e9fb..1a2646d3295 100644 --- a/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp +++ b/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp @@ -16,9 +16,11 @@ namespace DB { -NamesAndTypesList StorageSystemPartMovesBetweenShards::getNamesAndTypes() +ColumnsDescription StorageSystemPartMovesBetweenShards::getColumnsDescription() { - return { + /// TODO: Fill in all the comments + return ColumnsDescription + { /// Table properties. { "database", std::make_shared() }, { "table", std::make_shared() }, diff --git a/src/Storages/System/StorageSystemPartMovesBetweenShards.h b/src/Storages/System/StorageSystemPartMovesBetweenShards.h index 9fbc8e532a4..93a26bcd1b7 100644 --- a/src/Storages/System/StorageSystemPartMovesBetweenShards.h +++ b/src/Storages/System/StorageSystemPartMovesBetweenShards.h @@ -14,7 +14,7 @@ class StorageSystemPartMovesBetweenShards final : public IStorageSystemOneBlock< public: std::string getName() const override { return "SystemShardMoves"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 57c455fcdc7..b6e4ee4161e 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -45,54 +45,54 @@ namespace DB StorageSystemParts::StorageSystemParts(const StorageID & table_id_) : StorageSystemPartsBase(table_id_, - { - {"partition", std::make_shared()}, - {"name", std::make_shared()}, - {"uuid", std::make_shared()}, - {"part_type", std::make_shared()}, - {"active", std::make_shared()}, - {"marks", std::make_shared()}, - {"rows", std::make_shared()}, - {"bytes_on_disk", std::make_shared()}, - {"data_compressed_bytes", std::make_shared()}, - {"data_uncompressed_bytes", std::make_shared()}, - {"primary_key_size", std::make_shared()}, - {"marks_bytes", std::make_shared()}, - {"secondary_indices_compressed_bytes", std::make_shared()}, - {"secondary_indices_uncompressed_bytes", std::make_shared()}, - {"secondary_indices_marks_bytes", std::make_shared()}, - {"modification_time", std::make_shared()}, - {"remove_time", std::make_shared()}, - {"refcount", std::make_shared()}, - {"min_date", std::make_shared()}, - {"max_date", std::make_shared()}, - {"min_time", std::make_shared()}, - {"max_time", std::make_shared()}, - {"partition_id", std::make_shared()}, - {"min_block_number", std::make_shared()}, - {"max_block_number", std::make_shared()}, - {"level", std::make_shared()}, - {"data_version", std::make_shared()}, - {"primary_key_bytes_in_memory", std::make_shared()}, - {"primary_key_bytes_in_memory_allocated", std::make_shared()}, - {"is_frozen", std::make_shared()}, + ColumnsDescription{ + {"partition", std::make_shared(), "The partition name."}, + {"name", std::make_shared(), "Name of the data part."}, + {"uuid", std::make_shared(), "The UUID of data part."}, + {"part_type", std::make_shared(), "The data part storing format. Possible Values: Wide (a file per column) and Compact (a single file for all columns)."}, + {"active", std::make_shared(), "Flag that indicates whether the data part is active. If a data part is active, it's used in a table. Otherwise, it's about to be deleted. Inactive data parts appear after merging and mutating operations."}, + {"marks", std::make_shared(), "The number of marks. To get the approximate number of rows in a data part, multiply marks by the index granularity (usually 8192) (this hint does not work for adaptive granularity)."}, + {"rows", std::make_shared(), "The number of rows."}, + {"bytes_on_disk", std::make_shared(), "Total size of all the data part files in bytes."}, + {"data_compressed_bytes", std::make_shared(), "Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"data_uncompressed_bytes", std::make_shared(), "Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"primary_key_size", std::make_shared(), "The amount of memory (in bytes) used by primary key values in the primary.idx/cidx file on disk."}, + {"marks_bytes", std::make_shared(), "The size of the file with marks."}, + {"secondary_indices_compressed_bytes", std::make_shared(), "Total size of compressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"secondary_indices_uncompressed_bytes", std::make_shared(), "Total size of uncompressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included."}, + {"secondary_indices_marks_bytes", std::make_shared(), "The size of the file with marks for secondary indices."}, + {"modification_time", std::make_shared(), "The time the directory with the data part was modified. This usually corresponds to the time of data part creation."}, + {"remove_time", std::make_shared(), "The time when the data part became inactive."}, + {"refcount", std::make_shared(), "The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges."}, + {"min_date", std::make_shared(), "The minimum value of the date key in the data part."}, + {"max_date", std::make_shared(), "The maximum value of the date key in the data part."}, + {"min_time", std::make_shared(), "The minimum value of the date and time key in the data part."}, + {"max_time", std::make_shared(), "The maximum value of the date and time key in the data part."}, + {"partition_id", std::make_shared(), "ID of the partition."}, + {"min_block_number", std::make_shared(), "The minimum number of data parts that make up the current part after merging."}, + {"max_block_number", std::make_shared(), "The maximum number of data parts that make up the current part after merging."}, + {"level", std::make_shared(), "Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts."}, + {"data_version", std::make_shared(), "Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than data_version)."}, + {"primary_key_bytes_in_memory", std::make_shared(), "The amount of memory (in bytes) used by primary key values."}, + {"primary_key_bytes_in_memory_allocated", std::make_shared(), "The amount of memory (in bytes) reserved for primary key values."}, + {"is_frozen", std::make_shared(), "Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup does not exist. "}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"engine", std::make_shared()}, - {"disk_name", std::make_shared()}, - {"path", std::make_shared()}, + {"database", std::make_shared(), "Name of the database."}, + {"table", std::make_shared(), "Name of the table."}, + {"engine", std::make_shared(), "Name of the table engine without parameters."}, + {"disk_name", std::make_shared(), "Name of a disk that stores the data part."}, + {"path", std::make_shared(), "Absolute path to the folder with data part files."}, - {"hash_of_all_files", std::make_shared()}, - {"hash_of_uncompressed_files", std::make_shared()}, - {"uncompressed_hash_of_compressed_files", std::make_shared()}, + {"hash_of_all_files", std::make_shared(), "sipHash128 of compressed files."}, + {"hash_of_uncompressed_files", std::make_shared(), "sipHash128 of uncompressed files (files with marks, index file etc.)."}, + {"uncompressed_hash_of_compressed_files", std::make_shared(), "sipHash128 of data in the compressed files as if they were uncompressed."}, - {"delete_ttl_info_min", std::make_shared()}, - {"delete_ttl_info_max", std::make_shared()}, + {"delete_ttl_info_min", std::make_shared(), "The minimum value of the date and time key for TTL DELETE rule."}, + {"delete_ttl_info_max", std::make_shared(), "The maximum value of the date and time key for TTL DELETE rule."}, - {"move_ttl_info.expression", std::make_shared(std::make_shared())}, - {"move_ttl_info.min", std::make_shared(std::make_shared())}, - {"move_ttl_info.max", std::make_shared(std::make_shared())}, + {"move_ttl_info.expression", std::make_shared(std::make_shared()), "Array of expressions. Each expression defines a TTL MOVE rule."}, + {"move_ttl_info.min", std::make_shared(std::make_shared()), "Array of date and time values. Each element describes the minimum key value for a TTL MOVE rule."}, + {"move_ttl_info.max", std::make_shared(std::make_shared()), "Array of date and time values. Each element describes the maximum key value for a TTL MOVE rule."}, {"default_compression_codec", std::make_shared()}, @@ -119,7 +119,7 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_) {"has_lightweight_delete", std::make_shared()}, - {"last_removal_attempt_time", std::make_shared()}, + {"last_removal_attempt_time", std::make_shared()}, {"removal_state", std::make_shared()}, } ) diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index a5eb475ef74..48dab8c4777 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -238,19 +238,17 @@ Pipe StorageSystemPartsBase::read( } -StorageSystemPartsBase::StorageSystemPartsBase(const StorageID & table_id_, NamesAndTypesList && columns_) +StorageSystemPartsBase::StorageSystemPartsBase(const StorageID & table_id_, ColumnsDescription && columns) : IStorage(table_id_) { - ColumnsDescription tmp_columns(std::move(columns_)); - auto add_alias = [&](const String & alias_name, const String & column_name) { - if (!tmp_columns.has(column_name)) + if (!columns.has(column_name)) return; - ColumnDescription column(alias_name, tmp_columns.get(column_name).type); + ColumnDescription column(alias_name, columns.get(column_name).type); column.default_desc.kind = ColumnDefaultKind::Alias; column.default_desc.expression = std::make_shared(column_name); - tmp_columns.add(column); + columns.add(column); }; /// Add aliases for old column names for backwards compatibility. @@ -259,7 +257,7 @@ StorageSystemPartsBase::StorageSystemPartsBase(const StorageID & table_id_, Name add_alias("part_name", "name"); StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(tmp_columns); + storage_metadata.setColumns(columns); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index ce7994e7a9d..0a45d0f9dfe 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -141,7 +141,7 @@ private: protected: const FormatSettings format_settings = {}; - StorageSystemPartsBase(const StorageID & table_id_, NamesAndTypesList && columns_); + StorageSystemPartsBase(const StorageID & table_id_, ColumnsDescription && columns); virtual std::unique_ptr getStoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context) { diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp index a41ab24a340..833a5e1ec16 100644 --- a/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/src/Storages/System/StorageSystemPartsColumns.cpp @@ -21,7 +21,7 @@ namespace DB StorageSystemPartsColumns::StorageSystemPartsColumns(const StorageID & table_id_) : StorageSystemPartsBase(table_id_, - { + ColumnsDescription{ {"partition", std::make_shared()}, {"name", std::make_shared()}, {"uuid", std::make_shared()}, diff --git a/src/Storages/System/StorageSystemPrivileges.cpp b/src/Storages/System/StorageSystemPrivileges.cpp index ee412d0e648..f45f3c6ed01 100644 --- a/src/Storages/System/StorageSystemPrivileges.cpp +++ b/src/Storages/System/StorageSystemPrivileges.cpp @@ -64,15 +64,16 @@ const std::vector> & StorageSystemPrivileges::getAccess } -NamesAndTypesList StorageSystemPrivileges::getNamesAndTypes() +ColumnsDescription StorageSystemPrivileges::getColumnsDescription() { - NamesAndTypesList names_and_types{ + /// TODO: Fill in all the comments. + return ColumnsDescription + { {"privilege", std::make_shared(getAccessTypeEnumValues())}, {"aliases", std::make_shared(std::make_shared())}, {"level", std::make_shared(std::make_shared(getLevelEnumValues()))}, {"parent_group", std::make_shared(std::make_shared(getAccessTypeEnumValues()))}, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemPrivileges.h b/src/Storages/System/StorageSystemPrivileges.h index a30fefad137..4441cf78d5c 100644 --- a/src/Storages/System/StorageSystemPrivileges.h +++ b/src/Storages/System/StorageSystemPrivileges.h @@ -12,7 +12,7 @@ class StorageSystemPrivileges final : public IStorageSystemOneBlock> & getAccessTypeEnumValues(); protected: diff --git a/src/Storages/System/StorageSystemProcesses.cpp b/src/Storages/System/StorageSystemProcesses.cpp index e053f2e63ff..6702e68b81e 100644 --- a/src/Storages/System/StorageSystemProcesses.cpp +++ b/src/Storages/System/StorageSystemProcesses.cpp @@ -17,14 +17,15 @@ namespace DB { -NamesAndTypesList StorageSystemProcesses::getNamesAndTypes() +ColumnsDescription StorageSystemProcesses::getColumnsDescription() { - return { + auto description = ColumnsDescription + { {"is_initial_query", std::make_shared()}, - {"user", std::make_shared()}, - {"query_id", std::make_shared()}, - {"address", DataTypeFactory::instance().get("IPv6")}, + {"user", std::make_shared(), "The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the default user. The field contains the username for a specific query, not for a query that this query initiated."}, + {"query_id", std::make_shared(), "Query ID, if defined."}, + {"address", DataTypeFactory::instance().get("IPv6"), "The IP address the request was made from. The same for distributed processing. To track where a distributed query was originally made from, look at system.processes on the query requestor server."}, {"port", std::make_shared()}, {"initial_user", std::make_shared()}, @@ -50,17 +51,17 @@ NamesAndTypesList StorageSystemProcesses::getNamesAndTypes() {"quota_key", std::make_shared()}, {"distributed_depth", std::make_shared()}, - {"elapsed", std::make_shared()}, - {"is_cancelled", std::make_shared()}, - {"is_all_data_sent", std::make_shared()}, - {"read_rows", std::make_shared()}, - {"read_bytes", std::make_shared()}, - {"total_rows_approx", std::make_shared()}, + {"elapsed", std::make_shared(), "The time in seconds since request execution started."}, + {"is_cancelled", std::make_shared(), "Was query cancelled."}, + {"is_all_data_sent", std::make_shared(), "Was all data sent to the client (in other words query had been finished on the server)."}, + {"read_rows", std::make_shared(), "The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers."}, + {"read_bytes", std::make_shared(), "The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers."}, + {"total_rows_approx", std::make_shared(), "The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known."}, {"written_rows", std::make_shared()}, {"written_bytes", std::make_shared()}, - {"memory_usage", std::make_shared()}, + {"memory_usage", std::make_shared(), "Amount of RAM the query uses. It might not include some types of dedicated memory"}, {"peak_memory_usage", std::make_shared()}, - {"query", std::make_shared()}, + {"query", std::make_shared(), "The query text. For INSERT, it does not include the data to insert."}, {"query_kind", std::make_shared()}, {"thread_ids", std::make_shared(std::make_shared())}, @@ -69,17 +70,15 @@ NamesAndTypesList StorageSystemProcesses::getNamesAndTypes() {"current_database", std::make_shared()}, }; -} -NamesAndAliases StorageSystemProcesses::getNamesAndAliases() -{ - return - { + description.setAliases({ {"ProfileEvents.Names", {std::make_shared(std::make_shared())}, "mapKeys(ProfileEvents)"}, {"ProfileEvents.Values", {std::make_shared(std::make_shared())}, "mapValues(ProfileEvents)"}, {"Settings.Names", {std::make_shared(std::make_shared())}, "mapKeys(Settings)" }, {"Settings.Values", {std::make_shared(std::make_shared())}, "mapValues(Settings)"} - }; + }); + + return description; } void StorageSystemProcesses::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const diff --git a/src/Storages/System/StorageSystemProcesses.h b/src/Storages/System/StorageSystemProcesses.h index 8e61a3a402c..3017f9fd367 100644 --- a/src/Storages/System/StorageSystemProcesses.h +++ b/src/Storages/System/StorageSystemProcesses.h @@ -16,9 +16,7 @@ class StorageSystemProcesses final : public IStorageSystemOneBlock()}, {"name", std::make_shared()}, {"part_type", std::make_shared()}, diff --git a/src/Storages/System/StorageSystemProjectionPartsColumns.cpp b/src/Storages/System/StorageSystemProjectionPartsColumns.cpp index 06becc6d91c..2ff25f86366 100644 --- a/src/Storages/System/StorageSystemProjectionPartsColumns.cpp +++ b/src/Storages/System/StorageSystemProjectionPartsColumns.cpp @@ -18,7 +18,7 @@ namespace DB StorageSystemProjectionPartsColumns::StorageSystemProjectionPartsColumns(const StorageID & table_id_) : StorageSystemPartsBase(table_id_, - { + ColumnsDescription{ {"partition", std::make_shared()}, {"name", std::make_shared()}, {"part_type", std::make_shared()}, diff --git a/src/Storages/System/StorageSystemQueryCache.cpp b/src/Storages/System/StorageSystemQueryCache.cpp index ec4f4eeec23..03111755904 100644 --- a/src/Storages/System/StorageSystemQueryCache.cpp +++ b/src/Storages/System/StorageSystemQueryCache.cpp @@ -9,16 +9,17 @@ namespace DB { -NamesAndTypesList StorageSystemQueryCache::getNamesAndTypes() +ColumnsDescription StorageSystemQueryCache::getColumnsDescription() { - return { - {"query", std::make_shared()}, - {"result_size", std::make_shared()}, - {"stale", std::make_shared()}, - {"shared", std::make_shared()}, - {"compressed", std::make_shared()}, - {"expires_at", std::make_shared()}, - {"key_hash", std::make_shared()} + return ColumnsDescription + { + {"query", std::make_shared(), "Query string."}, + {"result_size", std::make_shared(), "Size of the query cache entry."}, + {"stale", std::make_shared(), "If the query cache entry is stale."}, + {"shared", std::make_shared(), "If the query cache entry is shared between multiple users."}, + {"compressed", std::make_shared(), "If the query cache entry is compressed."}, + {"expires_at", std::make_shared(), "When the query cache entry becomes stale."}, + {"key_hash", std::make_shared(), "A hash of the query string, used as a key to find query cache entries."} }; } diff --git a/src/Storages/System/StorageSystemQueryCache.h b/src/Storages/System/StorageSystemQueryCache.h index 5ff5f0a0454..08ad30afb81 100644 --- a/src/Storages/System/StorageSystemQueryCache.h +++ b/src/Storages/System/StorageSystemQueryCache.h @@ -12,7 +12,7 @@ public: std::string getName() const override { return "SystemQueryCache"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); protected: void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; diff --git a/src/Storages/System/StorageSystemQuotaLimits.cpp b/src/Storages/System/StorageSystemQuotaLimits.cpp index 6cc269130a0..f125a990a88 100644 --- a/src/Storages/System/StorageSystemQuotaLimits.cpp +++ b/src/Storages/System/StorageSystemQuotaLimits.cpp @@ -40,12 +40,23 @@ namespace } -NamesAndTypesList StorageSystemQuotaLimits::getNamesAndTypes() +ColumnsDescription StorageSystemQuotaLimits::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"quota_name", std::make_shared()}, - {"duration", std::make_shared()}, - {"is_randomized_interval", std::make_shared()}, + ColumnsDescription result + { + {"quota_name", std::make_shared(), "Quota name."}, + {"duration", std::make_shared(), "Length of the time interval for calculating resource consumption, in seconds."}, + {"is_randomized_interval", std::make_shared(), + "Boolean value. It shows whether the interval is randomized. " + "Interval always starts at the same time if it is not randomized. " + "For example, an interval of 1 minute always starts at an integer number of minutes " + "(i.e. it can start at 11:20:00, but it never starts at 11:20:01), " + "an interval of one day always starts at midnight UTC. " + "If interval is randomized, the very first interval starts at random time, " + "and subsequent intervals starts one by one. Values: " + "0 — Interval is not randomized, " + "1 — Interval is randomized." + }, }; for (auto quota_type : collections::range(QuotaType::MAX)) @@ -57,10 +68,10 @@ NamesAndTypesList StorageSystemQuotaLimits::getNamesAndTypes() data_type = std::make_shared(); else data_type = std::make_shared(); - names_and_types.push_back({column_name, std::make_shared(data_type)}); + result.add({column_name, std::make_shared(data_type)}); } - return names_and_types; + return result; } diff --git a/src/Storages/System/StorageSystemQuotaLimits.h b/src/Storages/System/StorageSystemQuotaLimits.h index 927e45492e1..acc977d0df7 100644 --- a/src/Storages/System/StorageSystemQuotaLimits.h +++ b/src/Storages/System/StorageSystemQuotaLimits.h @@ -12,7 +12,7 @@ class StorageSystemQuotaLimits final : public IStorageSystemOneBlock()}, - {"quota_key", std::make_shared()} + ColumnsDescription description + { + {"quota_name", std::make_shared(), "Quota name."}, + {"quota_key", std::make_shared(), "Key value."} }; if (add_column_is_current) - names_and_types.push_back({"is_current", std::make_shared()}); + description.add({"is_current", std::make_shared(), "Quota usage for current user."}); - names_and_types.push_back({"start_time", std::make_shared(std::make_shared())}); - names_and_types.push_back({"end_time", std::make_shared(std::make_shared())}); - names_and_types.push_back({"duration", std::make_shared(std::make_shared())}); + description.add({ + "start_time", + std::make_shared(std::make_shared()), + "Start time for calculating resource consumption." + }); + description.add({ + "end_time", + std::make_shared(std::make_shared()), + "End time for calculating resource consumption." + }); + description.add({ + "duration", + std::make_shared(std::make_shared()), + "Length of the time interval for calculating resource consumption, in seconds." + }); for (auto quota_type : collections::range(QuotaType::MAX)) { @@ -68,11 +81,11 @@ NamesAndTypesList StorageSystemQuotaUsage::getNamesAndTypesImpl(bool add_column_ data_type = std::make_shared(); else data_type = std::make_shared(); - names_and_types.push_back({column_name, std::make_shared(data_type)}); - names_and_types.push_back({String("max_") + column_name, std::make_shared(data_type)}); + description.add({column_name, std::make_shared(data_type)}); + description.add({String("max_") + column_name, std::make_shared(data_type)}); } - return names_and_types; + return description; } diff --git a/src/Storages/System/StorageSystemQuotaUsage.h b/src/Storages/System/StorageSystemQuotaUsage.h index 47cf8f5dfc0..a3109e9ca31 100644 --- a/src/Storages/System/StorageSystemQuotaUsage.h +++ b/src/Storages/System/StorageSystemQuotaUsage.h @@ -16,9 +16,9 @@ class StorageSystemQuotaUsage final : public IStorageSystemOneBlock & quotas_usage); protected: diff --git a/src/Storages/System/StorageSystemQuotas.cpp b/src/Storages/System/StorageSystemQuotas.cpp index 439883e038a..ee302f2f163 100644 --- a/src/Storages/System/StorageSystemQuotas.cpp +++ b/src/Storages/System/StorageSystemQuotas.cpp @@ -35,19 +35,34 @@ namespace } -NamesAndTypesList StorageSystemQuotas::getNamesAndTypes() +ColumnsDescription StorageSystemQuotas::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"name", std::make_shared()}, - {"id", std::make_shared()}, - {"storage", std::make_shared()}, - {"keys", std::make_shared(std::make_shared(getKeyTypeEnumValues()))}, - {"durations", std::make_shared(std::make_shared())}, - {"apply_to_all", std::make_shared()}, - {"apply_to_list", std::make_shared(std::make_shared())}, - {"apply_to_except", std::make_shared(std::make_shared())} + return ColumnsDescription + { + {"name", std::make_shared(), "Quota name."}, + {"id", std::make_shared(), "Quota ID."}, + {"storage", std::make_shared(), "Storage of quotas. Possible value: “users.xml” if a quota configured in the users.xml file, “disk” if a quota configured by an SQL-query."}, + {"keys", std::make_shared(std::make_shared(getKeyTypeEnumValues())), + "Key specifies how the quota should be shared. If two connections use the same quota and key, they share the same amounts of resources. Values: " + "[] — All users share the same quota, " + "['user_name'] — Connections with the same user name share the same quota, " + "['ip_address'] — Connections from the same IP share the same quota. " + "['client_key'] — Connections with the same key share the same quota. A key must be explicitly provided by a client. " + "When using clickhouse-client, pass a key value in the --quota_key parameter, " + "or use the quota_key parameter in the client configuration file. " + "When using HTTP interface, use the X-ClickHouse-Quota header, " + "['user_name', 'client_key'] — Connections with the same client_key share the same quota. If a key isn't provided by a client, the quota is tracked for `user_name`, " + "['client_key', 'ip_address'] — Connections with the same client_key share the same quota. If a key isn’t provided by a client, the quota is tracked for ip_address." + }, + {"durations", std::make_shared(std::make_shared()), "Time interval lengths in seconds."}, + {"apply_to_all", std::make_shared(), + "Logical value. It shows which users the quota is applied to. Values: " + "0 — The quota applies to users specify in the apply_to_list. " + "1 — The quota applies to all users except those listed in apply_to_except." + }, + {"apply_to_list", std::make_shared(std::make_shared()), "List of user names/roles that the quota should be applied to."}, + {"apply_to_except", std::make_shared(std::make_shared()), "List of user names/roles that the quota should not apply to."} }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemQuotas.h b/src/Storages/System/StorageSystemQuotas.h index 28c873aa734..cafd8b921fa 100644 --- a/src/Storages/System/StorageSystemQuotas.h +++ b/src/Storages/System/StorageSystemQuotas.h @@ -13,7 +13,7 @@ class StorageSystemQuotas final : public IStorageSystemOneBlock & partitions) override; void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; diff --git a/src/Storages/System/StorageSystemQuotasUsage.cpp b/src/Storages/System/StorageSystemQuotasUsage.cpp index a3c97247111..ed22f73dd50 100644 --- a/src/Storages/System/StorageSystemQuotasUsage.cpp +++ b/src/Storages/System/StorageSystemQuotasUsage.cpp @@ -8,9 +8,9 @@ namespace DB { -NamesAndTypesList StorageSystemQuotasUsage::getNamesAndTypes() +ColumnsDescription StorageSystemQuotasUsage::getColumnsDescription() { - return StorageSystemQuotaUsage::getNamesAndTypesImpl(/* add_column_is_current = */ true); + return StorageSystemQuotaUsage::getColumnsDescriptionImpl(/* add_column_is_current = */ true); } void StorageSystemQuotasUsage::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const diff --git a/src/Storages/System/StorageSystemQuotasUsage.h b/src/Storages/System/StorageSystemQuotasUsage.h index d60258bbc3f..ecdc62865d1 100644 --- a/src/Storages/System/StorageSystemQuotasUsage.h +++ b/src/Storages/System/StorageSystemQuotasUsage.h @@ -14,7 +14,7 @@ class StorageSystemQuotasUsage final : public IStorageSystemOneBlock()}, - {"table", std::make_shared()}, - {"elapsed", std::make_shared()}, - {"progress", std::make_shared()}, - {"result_part_name", std::make_shared()}, - {"result_part_path", std::make_shared()}, - {"partition_id", std::make_shared()}, - {"total_size_bytes_compressed", std::make_shared()}, - {"bytes_read_compressed", std::make_shared()}, - {"source_replica_path", std::make_shared()}, - {"source_replica_hostname", std::make_shared()}, - {"source_replica_port", std::make_shared()}, - {"interserver_scheme", std::make_shared()}, - {"URI", std::make_shared()}, - {"to_detached", std::make_shared()}, - {"thread_id", std::make_shared()}, + return ColumnsDescription + { + {"database", std::make_shared(), "Name of the database."}, + {"table", std::make_shared(), "Name of the table."}, + {"elapsed", std::make_shared(), "The time elapsed (in seconds) since showing currently running background fetches started."}, + {"progress", std::make_shared(), "The percentage of completed work from 0 to 1."}, + {"result_part_name", std::make_shared(), + "The name of the part that will be formed as the result of showing currently running background fetches."}, + {"result_part_path", std::make_shared(), + "Absolute path to the part that will be formed as the result of showing currently running background fetches."}, + {"partition_id", std::make_shared(), "ID of the partition."}, + {"total_size_bytes_compressed", std::make_shared(), "The total size (in bytes) of the compressed data in the result part."}, + {"bytes_read_compressed", std::make_shared(), "The number of compressed bytes read from the result part."}, + {"source_replica_path", std::make_shared(), "Absolute path to the source replica."}, + {"source_replica_hostname", std::make_shared(), "Hostname of the source replica."}, + {"source_replica_port", std::make_shared(), "Port number of the source replica."}, + {"interserver_scheme", std::make_shared(), "Name of the interserver scheme."}, + {"URI", std::make_shared(), "Uniform resource identifier."}, + {"to_detached", std::make_shared(), + "The flag indicates whether the currently running background fetch is being performed using the TO DETACHED expression."}, + {"thread_id", std::make_shared(), "Thread identifier."}, }; } diff --git a/src/Storages/System/StorageSystemReplicatedFetches.h b/src/Storages/System/StorageSystemReplicatedFetches.h index d496741b42a..a176912cac0 100644 --- a/src/Storages/System/StorageSystemReplicatedFetches.h +++ b/src/Storages/System/StorageSystemReplicatedFetches.h @@ -15,7 +15,7 @@ class StorageSystemReplicatedFetches final : public IStorageSystemOneBlock() }, - { "table", std::make_shared() }, - { "replica_name", std::make_shared() }, + { "database", std::make_shared(), "Name of the database."}, + { "table", std::make_shared(), "Name of the table."}, + { "replica_name", std::make_shared(), + "Replica name in ClickHouse Keeper. Different replicas of the same table have different names."}, /// Constant element properties. - { "position", std::make_shared() }, - { "node_name", std::make_shared() }, - { "type", std::make_shared() }, - { "create_time", std::make_shared() }, - { "required_quorum", std::make_shared() }, - { "source_replica", std::make_shared() }, - { "new_part_name", std::make_shared() }, - { "parts_to_merge", std::make_shared(std::make_shared()) }, - { "is_detach", std::make_shared() }, + { "position", std::make_shared(), "Position of the task in the queue."}, + { "node_name", std::make_shared(), "Node name in ClickHouse Keeper."}, + { "type", std::make_shared(), + "Type of the task in the queue, one of: " + "• GET_PART — Get the part from another replica, " + "• ATTACH_PART — Attach the part, possibly from our own replica (if found in the detached folder). " + "You may think of it as a GET_PART with some optimizations as they're nearly identical, " + "• MERGE_PARTS — Merge the parts, " + "• DROP_RANGE — Delete the parts in the specified partition in the specified number range. " + "• CLEAR_COLUMN — NOTE: Deprecated. Drop specific column from specified partition. " + "• CLEAR_INDEX — NOTE: Deprecated. Drop specific index from specified partition. " + "• REPLACE_RANGE — Drop a certain range of parts and replace them with new ones. " + "• MUTATE_PART — Apply one or several mutations to the part. " + "• ALTER_METADATA — Apply alter modification according to global /metadata and /columns paths." + }, + { "create_time", std::make_shared(), "Date and time when the task was submitted for execution."}, + { "required_quorum", std::make_shared(), "The number of replicas waiting for the task to complete with confirmation of completion. This column is only relevant for the GET_PARTS task."}, + { "source_replica", std::make_shared(), "Name of the source replica."}, + { "new_part_name", std::make_shared(), "Name of the new part."}, + { "parts_to_merge", std::make_shared(std::make_shared()), "Names of parts to merge or update."}, + { "is_detach", std::make_shared(), "The flag indicates whether the DETACH_PARTS task is in the queue."}, /// Processing status of item. - { "is_currently_executing", std::make_shared() }, - { "num_tries", std::make_shared() }, - { "last_exception", std::make_shared() }, - { "last_exception_time", std::make_shared() }, - { "last_attempt_time", std::make_shared() }, - { "num_postponed", std::make_shared() }, - { "postpone_reason", std::make_shared() }, - { "last_postpone_time", std::make_shared() }, - { "merge_type", std::make_shared() }, + { "is_currently_executing", std::make_shared(), "The flag indicates whether a specific task is being performed right now."}, + { "num_tries", std::make_shared(), "The number of failed attempts to complete the task."}, + { "last_exception", std::make_shared(), "Text message about the last error that occurred (if any)."}, + { "last_exception_time", std::make_shared(), "Date and time when the last error occurred."}, + { "last_attempt_time", std::make_shared(), "Date and time when the task was last attempted."}, + { "num_postponed", std::make_shared(), "The number of postponed tasks."}, + { "postpone_reason", std::make_shared(), "The reason why the task was postponed."}, + { "last_postpone_time", std::make_shared(), "Date and time when the task was last postponed."}, + { "merge_type", std::make_shared(), "Type of the current merge. Empty if it's a mutation."}, }; } diff --git a/src/Storages/System/StorageSystemReplicationQueue.h b/src/Storages/System/StorageSystemReplicationQueue.h index 23376074d13..003e4eeb927 100644 --- a/src/Storages/System/StorageSystemReplicationQueue.h +++ b/src/Storages/System/StorageSystemReplicationQueue.h @@ -16,7 +16,7 @@ class StorageSystemReplicationQueue final : public IStorageSystemOneBlock(std::make_shared())}, - {"role_name", std::make_shared(std::make_shared())}, - {"granted_role_name", std::make_shared()}, - {"granted_role_id", std::make_shared()}, - {"granted_role_is_default", std::make_shared()}, - {"with_admin_option", std::make_shared()}, + return ColumnsDescription + { + {"user_name", std::make_shared(std::make_shared()), "User name."}, + {"role_name", std::make_shared(std::make_shared()), "Role name."}, + {"granted_role_name", std::make_shared(), + "Name of role granted to the `role_name` role. To grant one role to another one use `GRANT role1 TO role2`."}, + {"granted_role_id", std::make_shared(), "The ID of the role."}, + {"granted_role_is_default", std::make_shared(), + "Flag that shows whether `granted_role` is a default role. Possible values: " + "• 1 — `granted_role` is a default role, " + "• 0 — `granted_role` is not a default role." + }, + {"with_admin_option", std::make_shared(), + "Flag that shows whether `granted_role` is a role with `ADMIN OPTION` privilege. Possible values: " + "• 1 — The role has `ADMIN OPTION` privilege." + "• 0 — The role without `ADMIN OPTION` privilege." + }, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemRoleGrants.h b/src/Storages/System/StorageSystemRoleGrants.h index 16b27f7a608..969f82f85d5 100644 --- a/src/Storages/System/StorageSystemRoleGrants.h +++ b/src/Storages/System/StorageSystemRoleGrants.h @@ -12,7 +12,7 @@ class StorageSystemRoleGrants final : public IStorageSystemOneBlock()}, - {"id", std::make_shared()}, - {"storage", std::make_shared()}, + return ColumnsDescription + { + {"name", std::make_shared(), "Role name."}, + {"id", std::make_shared(), "Role ID."}, + {"storage", std::make_shared(), "Path to the storage of roles. Configured in the `access_control_path` parameter."}, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemRoles.h b/src/Storages/System/StorageSystemRoles.h index d9de9db5c65..2b4ae93a932 100644 --- a/src/Storages/System/StorageSystemRoles.h +++ b/src/Storages/System/StorageSystemRoles.h @@ -12,7 +12,7 @@ class StorageSystemRoles final : public IStorageSystemOneBlock & partitions) override; void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; diff --git a/src/Storages/System/StorageSystemRowPolicies.cpp b/src/Storages/System/StorageSystemRowPolicies.cpp index c0bc38edc21..5a959cdf9af 100644 --- a/src/Storages/System/StorageSystemRowPolicies.cpp +++ b/src/Storages/System/StorageSystemRowPolicies.cpp @@ -21,33 +21,43 @@ namespace DB { -NamesAndTypesList StorageSystemRowPolicies::getNamesAndTypes() +ColumnsDescription StorageSystemRowPolicies::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"name", std::make_shared()}, - {"short_name", std::make_shared()}, - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"id", std::make_shared()}, - {"storage", std::make_shared()}, + ColumnsDescription description + { + {"name", std::make_shared(), "Name of a row policy."}, + {"short_name", std::make_shared(), + "Short name of a row policy. Names of row policies are compound, for example: myfilter ON mydb.mytable. " + "Here 'myfilter ON mydb.mytable' is the name of the row policy, 'myfilter' is it's short name." + }, + {"database", std::make_shared(), "Database name."}, + {"table", std::make_shared(), "Table name. Empty if policy for database."}, + {"id", std::make_shared(), "Row policy ID."}, + {"storage", std::make_shared(), "Name of the directory where the row policy is stored."}, }; for (auto filter_type : collections::range(RowPolicyFilterType::MAX)) { const String & column_name = RowPolicyFilterTypeInfo::get(filter_type).name; - names_and_types.push_back({column_name, std::make_shared(std::make_shared())}); + description.add({column_name, std::make_shared(std::make_shared())}); } - NamesAndTypesList extra_names_and_types{ - {"is_restrictive", std::make_shared()}, - {"apply_to_all", std::make_shared()}, - {"apply_to_list", std::make_shared(std::make_shared())}, - {"apply_to_except", std::make_shared(std::make_shared())} - }; + description.add({"is_restrictive", std::make_shared(), + "Shows whether the row policy restricts access to rows. Value: " + "• 0 — The row policy is defined with `AS PERMISSIVE` clause, " + "• 1 — The row policy is defined with AS RESTRICTIVE clause." + }); + description.add({"apply_to_all", std::make_shared(), + "Shows that the row policies set for all roles and/or users." + }); + description.add({"apply_to_list", std::make_shared(std::make_shared()), + "List of the roles and/or users to which the row policies is applied." + }); + description.add({"apply_to_except", std::make_shared(std::make_shared()), + "The row policies is applied to all roles and/or users excepting of the listed ones." + }); - insertAtEnd(names_and_types, extra_names_and_types); - - return names_and_types; + return description; } diff --git a/src/Storages/System/StorageSystemRowPolicies.h b/src/Storages/System/StorageSystemRowPolicies.h index 9f94f7df65b..f8aa5618126 100644 --- a/src/Storages/System/StorageSystemRowPolicies.h +++ b/src/Storages/System/StorageSystemRowPolicies.h @@ -14,7 +14,7 @@ class StorageSystemRowPolicies final : public IStorageSystemOneBlock & partitions) override; void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; diff --git a/src/Storages/System/StorageSystemS3Queue.cpp b/src/Storages/System/StorageSystemS3Queue.cpp index 33b3dce4a83..3637734b225 100644 --- a/src/Storages/System/StorageSystemS3Queue.cpp +++ b/src/Storages/System/StorageSystemS3Queue.cpp @@ -20,9 +20,11 @@ namespace DB { -NamesAndTypesList StorageSystemS3Queue::getNamesAndTypes() +ColumnsDescription StorageSystemS3Queue::getColumnsDescription() { - return { + /// TODO: Fill in all the comments + return ColumnsDescription + { {"zookeeper_path", std::make_shared()}, {"file_name", std::make_shared()}, {"rows_processed", std::make_shared()}, diff --git a/src/Storages/System/StorageSystemS3Queue.h b/src/Storages/System/StorageSystemS3Queue.h index 1bb4e3694d2..1dc5c521941 100644 --- a/src/Storages/System/StorageSystemS3Queue.h +++ b/src/Storages/System/StorageSystemS3Queue.h @@ -14,7 +14,7 @@ public: std::string getName() const override { return "SystemS3Queue"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); protected: void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; diff --git a/src/Storages/System/StorageSystemScheduler.cpp b/src/Storages/System/StorageSystemScheduler.cpp index 0a6d8f91678..ba07d44dbf9 100644 --- a/src/Storages/System/StorageSystemScheduler.cpp +++ b/src/Storages/System/StorageSystemScheduler.cpp @@ -4,49 +4,79 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include -#include "IO/ResourceRequest.h" +#include "Common/Scheduler/ResourceRequest.h" namespace DB { -NamesAndTypesList StorageSystemScheduler::getNamesAndTypes() +ColumnsDescription StorageSystemScheduler::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"resource", std::make_shared()}, - {"path", std::make_shared()}, - {"type", std::make_shared()}, - {"weight", std::make_shared()}, - {"priority", std::make_shared()}, - {"is_active", std::make_shared()}, - {"active_children", std::make_shared()}, - {"dequeued_requests", std::make_shared()}, - {"dequeued_cost", std::make_shared()}, - {"busy_periods", std::make_shared()}, - {"vruntime", std::make_shared(std::make_shared())}, - {"system_vruntime", std::make_shared(std::make_shared())}, - {"queue_length", std::make_shared(std::make_shared())}, - {"queue_cost", std::make_shared(std::make_shared())}, - {"budget", std::make_shared(std::make_shared())}, - {"is_satisfied", std::make_shared(std::make_shared())}, - {"inflight_requests", std::make_shared(std::make_shared())}, - {"inflight_cost", std::make_shared(std::make_shared())}, - {"max_requests", std::make_shared(std::make_shared())}, - {"max_cost", std::make_shared(std::make_shared())}, - {"max_speed", std::make_shared(std::make_shared())}, - {"max_burst", std::make_shared(std::make_shared())}, - {"throttling_us", std::make_shared(std::make_shared())}, - {"tokens", std::make_shared(std::make_shared())}, + return ColumnsDescription + { + {"resource", std::make_shared(), "Resource name"}, + {"path", std::make_shared(), "Path to a scheduling node within this resource scheduling hierarchy"}, + {"type", std::make_shared(), "Type of a scheduling node."}, + {"weight", std::make_shared(), "Weight of a node, used by a parent node of `fair` type."}, + {"priority", std::make_shared(), "Priority of a node, used by a parent node of 'priority' type (Lower value means higher priority)."}, + {"is_active", std::make_shared(), "Whether this node is currently active - has resource requests to be dequeued and constraints satisfied."}, + {"active_children", std::make_shared(), "The number of children in active state."}, + {"dequeued_requests", std::make_shared(), "The total number of resource requests dequeued from this node."}, + {"dequeued_cost", std::make_shared(), "The sum of costs (e.g. size in bytes) of all requests dequeued from this node."}, + {"busy_periods", std::make_shared(), "The total number of deactivations of this node."}, + {"vruntime", std::make_shared(std::make_shared()), + "For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner."}, + {"system_vruntime", std::make_shared(std::make_shared()), + "For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. " + "Used during child activation as the new value of `vruntime`." + }, + {"queue_length", std::make_shared(std::make_shared()), + "For `fifo` nodes only. Current number of resource requests residing in the queue." + }, + {"queue_cost", std::make_shared(std::make_shared()), + "For fifo nodes only. Sum of costs (e.g. size in bytes) of all requests residing in the queue." + }, + {"budget", std::make_shared(std::make_shared()), + "For fifo nodes only. The number of available 'cost units' for new resource requests. " + "Can appear in case of discrepancy of estimated and real costs of resource requests (e.g. after read/write failure)" + }, + {"is_satisfied", std::make_shared(std::make_shared()), + "For constraint nodes only (e.g. `inflight_limit`). Equals to `1` if all the constraint of this node are satisfied." + }, + {"inflight_requests", std::make_shared(std::make_shared()), + "For `inflight_limit` nodes only. The number of resource requests dequeued from this node, that are currently in consumption state." + }, + {"inflight_cost", std::make_shared(std::make_shared()), + "For `inflight_limit` nodes only. " + "The sum of costs (e.g. bytes) of all resource requests dequeued from this node, that are currently in consumption state." + }, + {"max_requests", std::make_shared(std::make_shared()), + "For `inflight_limit` nodes only. Upper limit for inflight_requests leading to constraint violation." + }, + {"max_cost", std::make_shared(std::make_shared()), + "For `inflight_limit` nodes only. Upper limit for inflight_cost leading to constraint violation." + }, + {"max_speed", std::make_shared(std::make_shared()), + "For `bandwidth_limit` nodes only. Upper limit for bandwidth in tokens per second." + }, + {"max_burst", std::make_shared(std::make_shared()), + "For `bandwidth_limit` nodes only. Upper limit for tokens available in token-bucket throttler." + }, + {"throttling_us", std::make_shared(std::make_shared()), + "For `bandwidth_limit` nodes only. Total number of microseconds this node was in throttling state." + }, + {"tokens", std::make_shared(std::make_shared()), + "For `bandwidth_limit` nodes only. Number of tokens currently available in token-bucket throttler." + }, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemScheduler.h b/src/Storages/System/StorageSystemScheduler.h index 31d14862209..1de72a85e9b 100644 --- a/src/Storages/System/StorageSystemScheduler.h +++ b/src/Storages/System/StorageSystemScheduler.h @@ -12,7 +12,7 @@ class StorageSystemScheduler final : public IStorageSystemOneBlock()}, - {"source", std::make_shared()}, - {"format", std::make_shared()}, - {"additional_format_info", std::make_shared()}, - {"registration_time", std::make_shared()}, - {"schema", std::make_shared(std::make_shared())}, - {"number_of_rows", std::make_shared(std::make_shared())}, - {"schema_inference_mode", std::make_shared(std::make_shared())}, + return ColumnsDescription + { + {"storage", std::make_shared(), "Storage name: File, URL, S3 or HDFS."}, + {"source", std::make_shared(), "File source."}, + {"format", std::make_shared(), "Format name."}, + {"additional_format_info", std::make_shared(), + "Additional information required to identify the schema. For example, format specific settings." + }, + {"registration_time", std::make_shared(), "Timestamp when schema was added in cache."}, + {"schema", std::make_shared(std::make_shared()), "Cached schema."}, + {"number_of_rows", std::make_shared(std::make_shared()), "Number of rows in the file in given format. It's used for caching trivial count() from data files and for caching number of rows from the metadata during schema inference."}, + {"schema_inference_mode", std::make_shared(std::make_shared()), "Scheme inference mode."}, }; } diff --git a/src/Storages/System/StorageSystemSchemaInferenceCache.h b/src/Storages/System/StorageSystemSchemaInferenceCache.h index e3afc6e1e38..e6d306f8252 100644 --- a/src/Storages/System/StorageSystemSchemaInferenceCache.h +++ b/src/Storages/System/StorageSystemSchemaInferenceCache.h @@ -11,7 +11,7 @@ class StorageSystemSchemaInferenceCache final : public IStorageSystemOneBlock( DataTypeEnum8::Values @@ -42,15 +42,16 @@ NamesAndTypesList StorageSystemServerSettings::getNamesAndTypes() {"Yes", static_cast(ChangeableWithoutRestart::Yes)}, }); - return { - {"name", std::make_shared()}, - {"value", std::make_shared()}, - {"default", std::make_shared()}, - {"changed", std::make_shared()}, - {"description", std::make_shared()}, - {"type", std::make_shared()}, - {"changeable_without_restart", std::move(changeable_without_restart_type)}, - {"is_obsolete", std::make_shared()} + return ColumnsDescription + { + {"name", std::make_shared(), "Server setting name."}, + {"value", std::make_shared(), "Server setting value."}, + {"default", std::make_shared(), "Server setting default value."}, + {"changed", std::make_shared(), "Shows whether a setting was specified in config.xml"}, + {"description", std::make_shared(), "Short server setting description."}, + {"type", std::make_shared(), "Server setting value type."}, + {"changeable_without_restart", std::move(changeable_without_restart_type), "Shows whether a setting can be changed at runtime."}, + {"is_obsolete", std::make_shared(), "Shows whether a setting is obsolete."} }; } diff --git a/src/Storages/System/StorageSystemServerSettings.h b/src/Storages/System/StorageSystemServerSettings.h index b3aa8055853..276f21d674b 100644 --- a/src/Storages/System/StorageSystemServerSettings.h +++ b/src/Storages/System/StorageSystemServerSettings.h @@ -16,7 +16,7 @@ class StorageSystemServerSettings final : public IStorageSystemOneBlock()}, - {"value", std::make_shared()}, - {"changed", std::make_shared()}, - {"description", std::make_shared()}, - {"min", std::make_shared(std::make_shared())}, - {"max", std::make_shared(std::make_shared())}, - {"readonly", std::make_shared()}, - {"type", std::make_shared()}, - {"default", std::make_shared()}, - {"alias_for", std::make_shared()}, - {"is_obsolete", std::make_shared()}, + return ColumnsDescription + { + {"name", std::make_shared(), "Setting name."}, + {"value", std::make_shared(), "Setting value."}, + {"changed", std::make_shared(), "Shows whether a setting is changed from its default value."}, + {"description", std::make_shared(), "Short setting description."}, + {"min", std::make_shared(std::make_shared()), + "Minimum value of the setting, if any is set via constraints. If the setting has no minimum value, contains NULL." + }, + {"max", std::make_shared(std::make_shared()), + "Maximum value of the setting, if any is set via constraints. If the setting has no maximum value, contains NULL." + }, + {"readonly", std::make_shared(), + "Shows whether the current user can change the setting: " + "0 — Current user can change the setting, " + "1 — Current user can't change the setting." + }, + {"type", std::make_shared(), "The type of the value that can be assigned to this setting."}, + {"default", std::make_shared(), "Setting default value."}, + {"alias_for", std::make_shared(), "Flag that shows whether this name is an alias to another setting."}, + {"is_obsolete", std::make_shared(), "Shows whether a setting is obsolete."}, }; } diff --git a/src/Storages/System/StorageSystemSettings.h b/src/Storages/System/StorageSystemSettings.h index 201b79ac309..6749f9b20a4 100644 --- a/src/Storages/System/StorageSystemSettings.h +++ b/src/Storages/System/StorageSystemSettings.h @@ -16,7 +16,7 @@ class StorageSystemSettings final : public IStorageSystemOneBlock()}, {"changes", std::make_shared(std::make_shared( diff --git a/src/Storages/System/StorageSystemSettingsChanges.h b/src/Storages/System/StorageSystemSettingsChanges.h index 283487df51b..3a1a8ce23d1 100644 --- a/src/Storages/System/StorageSystemSettingsChanges.h +++ b/src/Storages/System/StorageSystemSettingsChanges.h @@ -17,7 +17,7 @@ class StorageSystemSettingsChanges final : public IStorageSystemOneBlock> & getSettingConstraintWritabilityEnum return values; } -NamesAndTypesList StorageSystemSettingsProfileElements::getNamesAndTypes() +ColumnsDescription StorageSystemSettingsProfileElements::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"profile_name", std::make_shared(std::make_shared())}, - {"user_name", std::make_shared(std::make_shared())}, - {"role_name", std::make_shared(std::make_shared())}, - {"index", std::make_shared()}, - {"setting_name", std::make_shared(std::make_shared())}, - {"value", std::make_shared(std::make_shared())}, - {"min", std::make_shared(std::make_shared())}, - {"max", std::make_shared(std::make_shared())}, + return ColumnsDescription + { + {"profile_name", std::make_shared(std::make_shared()), "Setting profile name."}, + {"user_name", std::make_shared(std::make_shared()), "User name."}, + {"role_name", std::make_shared(std::make_shared()), "Role name."}, + {"index", std::make_shared(), "Sequential number of the settings profile element."}, + {"setting_name", std::make_shared(std::make_shared()), "Setting name."}, + {"value", std::make_shared(std::make_shared()), "Setting value."}, + {"min", std::make_shared(std::make_shared()), "The minimum value of the setting. NULL if not set."}, + {"max", std::make_shared(std::make_shared()), "The maximum value of the setting. NULL if not set."}, {"writability", std::make_shared(std::make_shared(getSettingConstraintWritabilityEnumValues()))}, - {"inherit_profile", std::make_shared(std::make_shared())}, + {"inherit_profile", std::make_shared(std::make_shared()), + "A parent profile for this setting profile. NULL if not set. " + "Setting profile will inherit all the settings' values and constraints (min, max, readonly) from its parent profiles." + }, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemSettingsProfileElements.h b/src/Storages/System/StorageSystemSettingsProfileElements.h index 58c792c591c..1dedd616c82 100644 --- a/src/Storages/System/StorageSystemSettingsProfileElements.h +++ b/src/Storages/System/StorageSystemSettingsProfileElements.h @@ -12,7 +12,7 @@ class StorageSystemSettingsProfileElements final : public IStorageSystemOneBlock { public: std::string getName() const override { return "SystemSettingsProfileElements"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemSettingsProfiles.cpp b/src/Storages/System/StorageSystemSettingsProfiles.cpp index 069c8762154..01041bee445 100644 --- a/src/Storages/System/StorageSystemSettingsProfiles.cpp +++ b/src/Storages/System/StorageSystemSettingsProfiles.cpp @@ -17,18 +17,22 @@ namespace DB { -NamesAndTypesList StorageSystemSettingsProfiles::getNamesAndTypes() +ColumnsDescription StorageSystemSettingsProfiles::getColumnsDescription() { - NamesAndTypesList names_and_types{ - {"name", std::make_shared()}, - {"id", std::make_shared()}, - {"storage", std::make_shared()}, - {"num_elements", std::make_shared()}, - {"apply_to_all", std::make_shared()}, - {"apply_to_list", std::make_shared(std::make_shared())}, - {"apply_to_except", std::make_shared(std::make_shared())}, + return ColumnsDescription + { + {"name", std::make_shared(), "Setting profile name."}, + {"id", std::make_shared(), "Setting profile ID."}, + {"storage", std::make_shared(), "Path to the storage of setting profiles. Configured in the `access_control_path` parameter."}, + {"num_elements", std::make_shared(), "Number of elements for this profile in the `system.settings_profile_elements` table."}, + {"apply_to_all", std::make_shared(), "Shows that the settings profile set for all roles and/or users."}, + {"apply_to_list", std::make_shared(std::make_shared()), + "List of the roles and/or users to which the setting profile is applied." + }, + {"apply_to_except", std::make_shared(std::make_shared()), + "The setting profile is applied to all roles and/or users excepting of the listed ones." + }, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemSettingsProfiles.h b/src/Storages/System/StorageSystemSettingsProfiles.h index 6edaa02a4c3..b0c8fc8658c 100644 --- a/src/Storages/System/StorageSystemSettingsProfiles.h +++ b/src/Storages/System/StorageSystemSettingsProfiles.h @@ -12,7 +12,7 @@ class StorageSystemSettingsProfiles final : public IStorageSystemOneBlock & partitions) override; void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index e02d4bf1733..82a5fd4e33f 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -173,7 +173,7 @@ bool wait(int timeout_ms) } using ThreadIdToName = std::unordered_map>; -ThreadIdToName getFilteredThreadNames(const ActionsDAG::Node * predicate, ContextPtr context, const PaddedPODArray & thread_ids, Poco::Logger * log) +ThreadIdToName getFilteredThreadNames(const ActionsDAG::Node * predicate, ContextPtr context, const PaddedPODArray & thread_ids, LoggerPtr log) { ThreadIdToName tid_to_name; MutableColumnPtr all_thread_names = ColumnString::create(); @@ -274,7 +274,7 @@ bool isSignalBlocked(UInt64 tid, int signal) class StackTraceSource : public ISource { public: - StackTraceSource(const Names & column_names, Block header_, ASTPtr && query_, ActionsDAGPtr && filter_dag_, ContextPtr context_, UInt64 max_block_size_, Poco::Logger * log_) + StackTraceSource(const Names & column_names, Block header_, ASTPtr && query_, ActionsDAGPtr && filter_dag_, ContextPtr context_, UInt64 max_block_size_, LoggerPtr log_) : ISource(header_) , context(context_) , header(std::move(header_)) @@ -426,7 +426,7 @@ private: bool send_signal = false; bool read_thread_names = false; - Poco::Logger * log; + LoggerPtr log; std::filesystem::directory_iterator proc_it; std::filesystem::directory_iterator end; @@ -463,7 +463,7 @@ public: void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); Pipe pipe(std::make_shared( column_names, getOutputStream().header, @@ -481,7 +481,7 @@ public: ASTPtr && query_, ContextPtr context_, size_t max_block_size_, - Poco::Logger * log_) + LoggerPtr log_) : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}) , column_names(column_names_) , query(query_) @@ -496,7 +496,7 @@ private: ASTPtr query; ContextPtr context; size_t max_block_size; - Poco::Logger * log; + LoggerPtr log; }; } @@ -504,7 +504,7 @@ private: StorageSystemStackTrace::StorageSystemStackTrace(const StorageID & table_id_) : IStorage(table_id_) - , log(&Poco::Logger::get("StorageSystemStackTrace")) + , log(getLogger("StorageSystemStackTrace")) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription({ diff --git a/src/Storages/System/StorageSystemStackTrace.h b/src/Storages/System/StorageSystemStackTrace.h index 18216cea1bd..ce1b7f8ccd2 100644 --- a/src/Storages/System/StorageSystemStackTrace.h +++ b/src/Storages/System/StorageSystemStackTrace.h @@ -38,7 +38,7 @@ public: bool isSystemStorage() const override { return true; } protected: - Poco::Logger * log; + LoggerPtr log; }; } diff --git a/src/Storages/System/StorageSystemTableEngines.cpp b/src/Storages/System/StorageSystemTableEngines.cpp index e74b2670426..c0cf95423d9 100644 --- a/src/Storages/System/StorageSystemTableEngines.cpp +++ b/src/Storages/System/StorageSystemTableEngines.cpp @@ -6,18 +6,23 @@ namespace DB { -NamesAndTypesList StorageSystemTableEngines::getNamesAndTypes() +ColumnsDescription StorageSystemTableEngines::getColumnsDescription() { - return { - {"name", std::make_shared()}, - {"supports_settings", std::make_shared()}, - {"supports_skipping_indices", std::make_shared()}, - {"supports_projections", std::make_shared()}, - {"supports_sort_order", std::make_shared()}, - {"supports_ttl", std::make_shared()}, - {"supports_replication", std::make_shared()}, - {"supports_deduplication", std::make_shared()}, - {"supports_parallel_insert", std::make_shared()}, + return ColumnsDescription + { + {"name", std::make_shared(), "The name of table engine."}, + {"supports_settings", std::make_shared(), "Flag that indicates if table engine supports SETTINGS clause."}, + {"supports_skipping_indices", std::make_shared(), "Flag that indicates if table engine supports skipping indices."}, + {"supports_projections", std::make_shared(), "Flag that indicated if table engine supports projections."}, + {"supports_sort_order", std::make_shared(), + "Flag that indicates if table engine supports clauses PARTITION_BY, PRIMARY_KEY, ORDER_BY and SAMPLE_BY." + }, + {"supports_ttl", std::make_shared(), "Flag that indicates if table engine supports TTL."}, + {"supports_replication", std::make_shared(), "Flag that indicates if table engine supports data replication."}, + {"supports_deduplication", std::make_shared(), "Flag that indicates if table engine supports data deduplication."}, + {"supports_parallel_insert", std::make_shared(), + "Flag that indicates if table engine supports parallel insert (see max_insert_threads setting)." + }, }; } diff --git a/src/Storages/System/StorageSystemTableEngines.h b/src/Storages/System/StorageSystemTableEngines.h index f2849848ea7..258b9d210b1 100644 --- a/src/Storages/System/StorageSystemTableEngines.h +++ b/src/Storages/System/StorageSystemTableEngines.h @@ -19,7 +19,7 @@ public: return "SystemTableEngines"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemTableFunctions.cpp b/src/Storages/System/StorageSystemTableFunctions.cpp index 07a504edc5e..9fb8e11e4d1 100644 --- a/src/Storages/System/StorageSystemTableFunctions.cpp +++ b/src/Storages/System/StorageSystemTableFunctions.cpp @@ -10,14 +10,14 @@ namespace ErrorCodes extern const int UNKNOWN_FUNCTION; } -NamesAndTypesList StorageSystemTableFunctions::getNamesAndTypes() +ColumnsDescription StorageSystemTableFunctions::getColumnsDescription() { - return - { - {"name", std::make_shared()}, - {"description", std::make_shared()}, - {"allow_readonly", std::make_shared()} - }; + return ColumnsDescription + { + {"name", std::make_shared(), "Name of a table function."}, + {"description", std::make_shared(), "Brief description of a table function."}, + {"allow_readonly", std::make_shared(), "Flag that indicated whether a readonly user may use this function."} + }; } void StorageSystemTableFunctions::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const diff --git a/src/Storages/System/StorageSystemTableFunctions.h b/src/Storages/System/StorageSystemTableFunctions.h index a74e2968731..804c3b51940 100644 --- a/src/Storages/System/StorageSystemTableFunctions.h +++ b/src/Storages/System/StorageSystemTableFunctions.h @@ -19,7 +19,7 @@ public: return "SystemTableFunctions"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index d2c01ec3dea..e0d2dd03e78 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -33,42 +33,71 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_) : IStorage(table_id_) { StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(ColumnsDescription({ - {"database", std::make_shared()}, - {"name", std::make_shared()}, - {"uuid", std::make_shared()}, - {"engine", std::make_shared()}, - {"is_temporary", std::make_shared()}, - {"data_paths", std::make_shared(std::make_shared())}, - {"metadata_path", std::make_shared()}, - {"metadata_modification_time", std::make_shared()}, - {"dependencies_database", std::make_shared(std::make_shared())}, - {"dependencies_table", std::make_shared(std::make_shared())}, - {"create_table_query", std::make_shared()}, - {"engine_full", std::make_shared()}, - {"as_select", std::make_shared()}, - {"partition_key", std::make_shared()}, - {"sorting_key", std::make_shared()}, - {"primary_key", std::make_shared()}, - {"sampling_key", std::make_shared()}, - {"storage_policy", std::make_shared()}, - {"total_rows", std::make_shared(std::make_shared())}, - {"total_bytes", std::make_shared(std::make_shared())}, - {"total_bytes_uncompressed", std::make_shared(std::make_shared())}, + + auto description = ColumnsDescription{ + {"database", std::make_shared(), "The name of the database the table is in."}, + {"name", std::make_shared(), "Table name."}, + {"uuid", std::make_shared(), "Table uuid (Atomic database)."}, + {"engine", std::make_shared(), "Table engine name (without parameters)."}, + {"is_temporary", std::make_shared(), "Flag that indicates whether the table is temporary."}, + {"data_paths", std::make_shared(std::make_shared()), "Paths to the table data in the file systems."}, + {"metadata_path", std::make_shared(), "Path to the table metadata in the file system."}, + {"metadata_modification_time", std::make_shared(), "Time of latest modification of the table metadata."}, + {"dependencies_database", std::make_shared(std::make_shared()), "Database dependencies."}, + {"dependencies_table", std::make_shared(std::make_shared()), "Table dependencies (materialized views the current table)."}, + {"create_table_query", std::make_shared(), "The query that was used to create the table."}, + {"engine_full", std::make_shared(), "Parameters of the table engine."}, + {"as_select", std::make_shared(), "SELECT query for view."}, + {"partition_key", std::make_shared(), "The partition key expression specified in the table."}, + {"sorting_key", std::make_shared(), "The sorting key expression specified in the table."}, + {"primary_key", std::make_shared(), "The primary key expression specified in the table."}, + {"sampling_key", std::make_shared(), "The sampling key expression specified in the table."}, + {"storage_policy", std::make_shared(), "The storage policy."}, + {"total_rows", std::make_shared(std::make_shared()), + "Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise NULL (including underlying Buffer table)." + }, + {"total_bytes", std::make_shared(std::make_shared()), + "Total number of bytes, if it is possible to quickly determine exact number " + "of bytes for the table on storage, otherwise NULL (does not includes any underlying storage). " + "If the table stores data on disk, returns used space on disk (i.e. compressed). " + "If the table stores data in memory, returns approximated number of used bytes in memory." + }, + {"total_bytes_uncompressed", std::make_shared(std::make_shared()), + "Total number of uncompressed bytes, if it's possible to quickly determine the exact number " + "of bytes from the part checksums for the table on storage, otherwise NULL (does not take underlying storage (if any) into account)." + }, {"parts", std::make_shared(std::make_shared())}, {"active_parts", std::make_shared(std::make_shared())}, {"total_marks", std::make_shared(std::make_shared())}, - {"lifetime_rows", std::make_shared(std::make_shared())}, - {"lifetime_bytes", std::make_shared(std::make_shared())}, - {"comment", std::make_shared()}, - {"has_own_data", std::make_shared()}, - {"loading_dependencies_database", std::make_shared(std::make_shared())}, - {"loading_dependencies_table", std::make_shared(std::make_shared())}, - {"loading_dependent_database", std::make_shared(std::make_shared())}, - {"loading_dependent_table", std::make_shared(std::make_shared())}, - }, { + {"lifetime_rows", std::make_shared(std::make_shared()), + "Total number of rows INSERTed since server start (only for Buffer tables)." + }, + {"lifetime_bytes", std::make_shared(std::make_shared()), + "Total number of bytes INSERTed since server start (only for Buffer tables)." + }, + {"comment", std::make_shared(), "The comment for the table."}, + {"has_own_data", std::make_shared(), + "Flag that indicates whether the table itself stores some data on disk or only accesses some other source." + }, + {"loading_dependencies_database", std::make_shared(std::make_shared()), + "Database loading dependencies (list of objects which should be loaded before the current object)." + }, + {"loading_dependencies_table", std::make_shared(std::make_shared()), + "Table loading dependencies (list of objects which should be loaded before the current object)." + }, + {"loading_dependent_database", std::make_shared(std::make_shared()), + "Dependent loading database." + }, + {"loading_dependent_table", std::make_shared(std::make_shared()), + "Dependent loading table." + }, + }; + + description.setAliases({ {"table", std::make_shared(), "name"} - })); + }); + + storage_metadata.setColumns(std::move(description)); setInMemoryMetadata(storage_metadata); } @@ -664,10 +693,15 @@ public: { } + void applyFilters() override; + private: ContextPtr context; std::vector columns_mask; size_t max_block_size; + + ColumnPtr filtered_databases_column; + ColumnPtr filtered_tables_column; }; void StorageSystemTables::read( @@ -694,16 +728,19 @@ void StorageSystemTables::read( query_plan.addStep(std::move(reading)); } -void ReadFromSystemTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +void ReadFromSystemTables::applyFilters() { - auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context); + auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes); const ActionsDAG::Node * predicate = nullptr; if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); - ColumnPtr filtered_databases_column = getFilteredDatabases(predicate, context); - ColumnPtr filtered_tables_column = getFilteredTables(predicate, filtered_databases_column, context); + filtered_databases_column = getFilteredDatabases(predicate, context); + filtered_tables_column = getFilteredTables(predicate, filtered_databases_column, context); +} +void ReadFromSystemTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ Pipe pipe(std::make_shared( std::move(columns_mask), getOutputStream().header, max_block_size, std::move(filtered_databases_column), std::move(filtered_tables_column), context)); pipeline.init(std::move(pipe)); diff --git a/src/Storages/System/StorageSystemTimeZones.cpp b/src/Storages/System/StorageSystemTimeZones.cpp index e0d7d2a5c42..14f4ce0f5de 100644 --- a/src/Storages/System/StorageSystemTimeZones.cpp +++ b/src/Storages/System/StorageSystemTimeZones.cpp @@ -8,11 +8,11 @@ extern const char * auto_time_zones[]; namespace DB { -NamesAndTypesList StorageSystemTimeZones::getNamesAndTypes() +ColumnsDescription StorageSystemTimeZones::getColumnsDescription() { - return + return ColumnsDescription { - {"time_zone", std::make_shared()}, + {"time_zone", std::make_shared(), "List of supported time zones."}, }; } diff --git a/src/Storages/System/StorageSystemTimeZones.h b/src/Storages/System/StorageSystemTimeZones.h index cd54470b07c..f3743a1ef09 100644 --- a/src/Storages/System/StorageSystemTimeZones.h +++ b/src/Storages/System/StorageSystemTimeZones.h @@ -20,6 +20,6 @@ public: public: std::string getName() const override { return "SystemTimeZones"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); }; } diff --git a/src/Storages/System/StorageSystemTransactions.cpp b/src/Storages/System/StorageSystemTransactions.cpp index 21fa72ea12a..47e44688c14 100644 --- a/src/Storages/System/StorageSystemTransactions.cpp +++ b/src/Storages/System/StorageSystemTransactions.cpp @@ -21,9 +21,11 @@ static DataTypePtr getStateEnumType() }); } -NamesAndTypesList StorageSystemTransactions::getNamesAndTypes() +ColumnsDescription StorageSystemTransactions::getColumnsDescription() { - return { + /// TODO: Fill in all the comments. + return ColumnsDescription + { {"tid", getTransactionIDDataType()}, {"tid_hash", std::make_shared()}, {"elapsed", std::make_shared()}, diff --git a/src/Storages/System/StorageSystemTransactions.h b/src/Storages/System/StorageSystemTransactions.h index 128acd04367..b5a538b7b55 100644 --- a/src/Storages/System/StorageSystemTransactions.h +++ b/src/Storages/System/StorageSystemTransactions.h @@ -13,9 +13,7 @@ class StorageSystemTransactions final : public IStorageSystemOneBlock()}, {"type", std::make_shared()}, {"params", std::make_shared()}, {"precedence", std::make_shared()}, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemUserDirectories.h b/src/Storages/System/StorageSystemUserDirectories.h index 0c5f4f14c7e..bca6a9b5aa6 100644 --- a/src/Storages/System/StorageSystemUserDirectories.h +++ b/src/Storages/System/StorageSystemUserDirectories.h @@ -12,7 +12,7 @@ class StorageSystemUserDirectories final : public IStorageSystemOneBlock()}, {"memory_usage", std::make_shared()}, {"peak_memory_usage", std::make_shared()}, {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, }; -} -NamesAndAliases StorageSystemUserProcesses::getNamesAndAliases() -{ - return { + description.setAliases({ {"ProfileEvents.Names", {std::make_shared(std::make_shared())}, "mapKeys(ProfileEvents)"}, - {"ProfileEvents.Values", {std::make_shared(std::make_shared())}, "mapValues(ProfileEvents)"}}; + {"ProfileEvents.Values", {std::make_shared(std::make_shared())}, "mapValues(ProfileEvents)"} + }); + + return description; } void StorageSystemUserProcesses::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const diff --git a/src/Storages/System/StorageSystemUserProcesses.h b/src/Storages/System/StorageSystemUserProcesses.h index 9bdc009d849..6eb12e30559 100644 --- a/src/Storages/System/StorageSystemUserProcesses.h +++ b/src/Storages/System/StorageSystemUserProcesses.h @@ -16,9 +16,7 @@ class StorageSystemUserProcesses final : public IStorageSystemOneBlock()}, - {"id", std::make_shared()}, - {"storage", std::make_shared()}, - {"auth_type", std::make_shared(getAuthenticationTypeEnumValues())}, - {"auth_params", std::make_shared()}, - {"host_ip", std::make_shared(std::make_shared())}, - {"host_names", std::make_shared(std::make_shared())}, - {"host_names_regexp", std::make_shared(std::make_shared())}, - {"host_names_like", std::make_shared(std::make_shared())}, - {"default_roles_all", std::make_shared()}, - {"default_roles_list", std::make_shared(std::make_shared())}, - {"default_roles_except", std::make_shared(std::make_shared())}, + return ColumnsDescription + { + {"name", std::make_shared(), "User name."}, + {"id", std::make_shared(), "User ID."}, + {"storage", std::make_shared(), "Path to the storage of users. Configured in the access_control_path parameter."}, + {"auth_type", std::make_shared(getAuthenticationTypeEnumValues()), + "Shows the authentication type. " + "There are multiple ways of user identification: " + "with no password, with plain text password, with SHA256-encoded password, " + "with double SHA-1-encoded password or with bcrypt-encoded password." + }, + {"auth_params", std::make_shared(), "Authentication parameters in the JSON format depending on the auth_type."}, + {"host_ip", std::make_shared(std::make_shared()), + "IP addresses of hosts that are allowed to connect to the ClickHouse server." + }, + {"host_names", std::make_shared(std::make_shared()), + "Names of hosts that are allowed to connect to the ClickHouse server." + }, + {"host_names_regexp", std::make_shared(std::make_shared()), + "Regular expression for host names that are allowed to connect to the ClickHouse server." + }, + {"host_names_like", std::make_shared(std::make_shared()), + "Names of hosts that are allowed to connect to the ClickHouse server, set using the LIKE predicate." + }, + {"default_roles_all", std::make_shared(), + "Shows that all granted roles set for user by default." + }, + {"default_roles_list", std::make_shared(std::make_shared()), + "List of granted roles provided by default." + }, + {"default_roles_except", std::make_shared(std::make_shared()), + "All the granted roles set as default excepting of the listed ones." + }, {"grantees_any", std::make_shared()}, {"grantees_list", std::make_shared(std::make_shared())}, {"grantees_except", std::make_shared(std::make_shared())}, {"default_database", std::make_shared()}, }; - return names_and_types; } diff --git a/src/Storages/System/StorageSystemUsers.h b/src/Storages/System/StorageSystemUsers.h index 536f0482480..cfa5947d370 100644 --- a/src/Storages/System/StorageSystemUsers.h +++ b/src/Storages/System/StorageSystemUsers.h @@ -12,7 +12,7 @@ class StorageSystemUsers final : public IStorageSystemOneBlock & partitions) override; void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp index f27cb3147c8..d2b933e65a8 100644 --- a/src/Storages/System/StorageSystemViewRefreshes.cpp +++ b/src/Storages/System/StorageSystemViewRefreshes.cpp @@ -13,25 +13,33 @@ namespace DB { -NamesAndTypesList StorageSystemViewRefreshes::getNamesAndTypes() +ColumnsDescription StorageSystemViewRefreshes::getColumnsDescription() { - return { - {"database", std::make_shared()}, - {"view", std::make_shared()}, - {"status", std::make_shared()}, - {"last_refresh_result", std::make_shared()}, - {"last_refresh_time", std::make_shared(std::make_shared())}, - {"last_success_time", std::make_shared(std::make_shared())}, - {"duration_ms", std::make_shared()}, - {"next_refresh_time", std::make_shared()}, - {"remaining_dependencies", std::make_shared(std::make_shared())}, - {"exception", std::make_shared()}, - {"refresh_count", std::make_shared()}, - {"progress", std::make_shared()}, + return ColumnsDescription + { + {"database", std::make_shared(), "The name of the database the table is in."}, + {"view", std::make_shared(), "Table name."}, + {"status", std::make_shared(), "Current state of the refresh."}, + {"last_refresh_result", std::make_shared(), "Outcome of the latest refresh attempt."}, + {"last_refresh_time", std::make_shared(std::make_shared()), + "Time of the last refresh attempt. NULL if no refresh attempts happened since server startup or table creation."}, + {"last_success_time", std::make_shared(std::make_shared()), + "Time of the last successful refresh. NULL if no successful refreshes happened since server startup or table creation."}, + {"duration_ms", std::make_shared(), "How long the last refresh attempt took."}, + {"next_refresh_time", std::make_shared(), "Time at which the next refresh is scheduled to start."}, + {"remaining_dependencies", std::make_shared(std::make_shared()), + "If the view has refresh dependencies, this array contains the subset of those dependencies that are not satisfied for the current refresh yet. " + "If status = 'WaitingForDependencies', a refresh is ready to start as soon as these dependencies are fulfilled." + }, + {"exception", std::make_shared(), + "if last_refresh_result = 'Exception', i.e. the last refresh attempt failed, this column contains the corresponding error message and stack trace." + }, + {"refresh_count", std::make_shared(), "Number of successful refreshes since last server restart or table creation."}, + {"progress", std::make_shared(), "Progress of the current refresh, between 0 and 1."}, {"elapsed", std::make_shared()}, - {"read_rows", std::make_shared()}, + {"read_rows", std::make_shared(), "Number of rows read by the current refresh so far."}, {"read_bytes", std::make_shared()}, - {"total_rows", std::make_shared()}, + {"total_rows", std::make_shared(), "Estimated total number of rows that need to be read by the current refresh."}, {"total_bytes", std::make_shared()}, {"written_rows", std::make_shared()}, {"written_bytes", std::make_shared()}, diff --git a/src/Storages/System/StorageSystemViewRefreshes.h b/src/Storages/System/StorageSystemViewRefreshes.h index 475ad45e68f..02d3a39dfff 100644 --- a/src/Storages/System/StorageSystemViewRefreshes.h +++ b/src/Storages/System/StorageSystemViewRefreshes.h @@ -15,7 +15,7 @@ class StorageSystemViewRefreshes final : public IStorageSystemOneBlock()}, + return ColumnsDescription + { + {"message", std::make_shared(), "A warning message issued by ClickHouse server."}, }; } diff --git a/src/Storages/System/StorageSystemWarnings.h b/src/Storages/System/StorageSystemWarnings.h index 3f403f5f1f9..42948a765ea 100644 --- a/src/Storages/System/StorageSystemWarnings.h +++ b/src/Storages/System/StorageSystemWarnings.h @@ -16,7 +16,7 @@ class StorageSystemWarnings final : public IStorageSystemOneBlock()}})); + storage_metadata.setColumns(ColumnsDescription({{"zero", std::make_shared(), "dummy"}})); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index ba069380855..37fe9074950 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -198,17 +198,7 @@ StorageSystemZooKeeper::StorageSystemZooKeeper(const StorageID & table_id_) : IStorage(table_id_) { StorageInMemoryMetadata storage_metadata; - ColumnsDescription desc; - auto columns = getNamesAndTypes(); - for (const auto & col : columns) - { - ColumnDescription col_desc(col.name, col.type); - /// We only allow column `name`, `path`, `value` to insert. - if (col.name != "name" && col.name != "path" && col.name != "value") - col_desc.default_desc.kind = ColumnDefaultKind::Materialized; - desc.add(col_desc); - } - storage_metadata.setColumns(desc); + storage_metadata.setColumns(getColumnsDescription()); setInMemoryMetadata(storage_metadata); } @@ -238,24 +228,37 @@ SinkToStoragePtr StorageSystemZooKeeper::write(const ASTPtr &, const StorageMeta return std::make_shared(write_header, context); } -NamesAndTypesList StorageSystemZooKeeper::getNamesAndTypes() +ColumnsDescription StorageSystemZooKeeper::getColumnsDescription() { - return { - { "name", std::make_shared() }, - { "value", std::make_shared() }, - { "czxid", std::make_shared() }, - { "mzxid", std::make_shared() }, - { "ctime", std::make_shared() }, - { "mtime", std::make_shared() }, - { "version", std::make_shared() }, - { "cversion", std::make_shared() }, - { "aversion", std::make_shared() }, - { "ephemeralOwner", std::make_shared() }, - { "dataLength", std::make_shared() }, - { "numChildren", std::make_shared() }, - { "pzxid", std::make_shared() }, - { "path", std::make_shared() }, + auto description = ColumnsDescription + { + {"name", std::make_shared(), "The name of the node."}, + {"value", std::make_shared(), "Node value."}, + {"czxid", std::make_shared(), "ID of the transaction that created the node."}, + {"mzxid", std::make_shared(), "ID of the transaction that last changed the node."}, + {"ctime", std::make_shared(), "Time of node creation."}, + {"mtime", std::make_shared(), "Time of the last modification of the node."}, + {"version", std::make_shared(), "Node version: the number of times the node was changed."}, + {"cversion", std::make_shared(), "Number of added or removed descendants."}, + {"aversion", std::make_shared(), "Number of changes to the ACL."}, + {"ephemeralOwner", std::make_shared(), "For ephemeral nodes, the ID of the session that owns this node."}, + {"dataLength", std::make_shared(), "Size of the value."}, + {"numChildren", std::make_shared(), "Number of descendants."}, + {"pzxid", std::make_shared(), "ID of the transaction that last deleted or added descendants."}, + {"path", std::make_shared(), "The path to the node."}, }; + + for (auto & name : description.getAllRegisteredNames()) + { + description.modify(name, [&](ColumnDescription & column) + { + /// We only allow column `name`, `path`, `value` to insert. + if (column.name != "name" && column.name != "path" && column.name != "value") + column.default_desc.kind = ColumnDefaultKind::Materialized; + }); + } + + return description; } static String pathCorrected(const String & path) diff --git a/src/Storages/System/StorageSystemZooKeeper.h b/src/Storages/System/StorageSystemZooKeeper.h index 7f7aba862a2..3c893a2fddc 100644 --- a/src/Storages/System/StorageSystemZooKeeper.h +++ b/src/Storages/System/StorageSystemZooKeeper.h @@ -18,7 +18,7 @@ public: std::string getName() const override { return "SystemZooKeeper"; } - static NamesAndTypesList getNamesAndTypes(); + static ColumnsDescription getColumnsDescription(); SinkToStoragePtr write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr /*context*/, bool /*async_insert*/) override; diff --git a/src/Storages/System/StorageSystemZooKeeperConnection.cpp b/src/Storages/System/StorageSystemZooKeeperConnection.cpp index 9d9a8763db2..c165bfa217d 100644 --- a/src/Storages/System/StorageSystemZooKeeperConnection.cpp +++ b/src/Storages/System/StorageSystemZooKeeperConnection.cpp @@ -11,7 +11,7 @@ namespace DB { -NamesAndTypesList StorageSystemZooKeeperConnection::getNamesAndTypes() +ColumnsDescription StorageSystemZooKeeperConnection::getColumnsDescription() { DataTypeEnum16::Values feature_flags_enum_values; feature_flags_enum_values.reserve(magic_enum::enum_count()); @@ -20,18 +20,21 @@ NamesAndTypesList StorageSystemZooKeeperConnection::getNamesAndTypes() auto feature_flags_enum = std::make_shared(std::move(feature_flags_enum_values)); - return { - /* 0 */ {"name", std::make_shared()}, - /* 1 */ {"host", std::make_shared()}, - /* 2 */ {"port", std::make_shared()}, - /* 3 */ {"index", std::make_shared()}, - /* 4 */ {"connected_time", std::make_shared()}, - /* 5 */ {"session_uptime_elapsed_seconds", std::make_shared()}, - /* 6 */ {"is_expired", std::make_shared()}, - /* 7 */ {"keeper_api_version", std::make_shared()}, - /* 8 */ {"client_id", std::make_shared()}, - /* 9 */ {"xid", std::make_shared()}, - /* 10*/ {"enabled_feature_flags", std::make_shared(std::move(feature_flags_enum))} + return ColumnsDescription + { + /* 0 */ {"name", std::make_shared(), "ZooKeeper cluster's name."}, + /* 1 */ {"host", std::make_shared(), "The hostname/IP of the ZooKeeper node that ClickHouse connected to."}, + /* 2 */ {"port", std::make_shared(), "The port of the ZooKeeper node that ClickHouse connected to."}, + /* 3 */ {"index", std::make_shared(), "The index of the ZooKeeper node that ClickHouse connected to. The index is from ZooKeeper config."}, + /* 4 */ {"connected_time", std::make_shared(), "When the connection was established."}, + /* 5 */ {"session_uptime_elapsed_seconds", std::make_shared(), "Seconds elapsed since the connection was established."}, + /* 6 */ {"is_expired", std::make_shared(), "Is the current connection expired."}, + /* 7 */ {"keeper_api_version", std::make_shared(), "Keeper API version."}, + /* 8 */ {"client_id", std::make_shared(), "Session id of the connection."}, + /* 9 */ {"xid", std::make_shared(), "XID of the current session."}, + /* 10*/ {"enabled_feature_flags", std::make_shared(std::move(feature_flags_enum)), + "Feature flags which are enabled. Only applicable to ClickHouse Keeper." + } }; } diff --git a/src/Storages/System/StorageSystemZooKeeperConnection.h b/src/Storages/System/StorageSystemZooKeeperConnection.h index dd4c293c112..2b6d3b2e516 100644 --- a/src/Storages/System/StorageSystemZooKeeperConnection.h +++ b/src/Storages/System/StorageSystemZooKeeperConnection.h @@ -16,7 +16,7 @@ class StorageSystemZooKeeperConnection final : public IStorageSystemOneBlock loop_ptr; - Poco::Logger * log = &Poco::Logger::get("UVLoop"); + LoggerPtr log = getLogger("UVLoop"); static void onUVWalkClosingCallback(uv_handle_t * handle, void *) { diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index e845e03d122..430ed012fa8 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -211,7 +211,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block const auto * expr_const_node = actions->tryFindInOutputs(expr_column_name); if (!expr_const_node) return false; - auto filter_actions = ActionsDAG::buildFilterActionsDAG({expr_const_node}, {}, context); + auto filter_actions = ActionsDAG::buildFilterActionsDAG({expr_const_node}); const auto & nodes = filter_actions->getNodes(); bool has_dependent_columns = std::any_of(nodes.begin(), nodes.end(), [&](const auto & node) { diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 12fd7580639..0764685cb07 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -418,8 +418,7 @@ ASTPtr StorageWindowView::getCleanupQuery() auto alter_command = std::make_shared(); alter_command->type = ASTAlterCommand::DELETE; - alter_command->predicate = function_less; - alter_command->children.push_back(alter_command->predicate); + alter_command->predicate = alter_command->children.emplace_back(function_less).get(); alter_query->command_list->children.push_back(alter_command); return alter_query; } @@ -1161,7 +1160,7 @@ StorageWindowView::StorageWindowView( bool attach_) : IStorage(table_id_) , WithContext(context_->getGlobalContext()) - , log(&Poco::Logger::get(fmt::format("StorageWindowView({}.{})", table_id_.database_name, table_id_.table_name))) + , log(getLogger(fmt::format("StorageWindowView({}.{})", table_id_.database_name, table_id_.table_name))) , fire_signal_timeout_s(context_->getSettingsRef().wait_for_window_view_fire_signal_timeout.totalSeconds()) , clean_interval_usec(context_->getSettingsRef().window_view_clean_interval.totalMicroseconds()) { diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index de8f880c602..969fda8f78e 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -177,7 +177,7 @@ public: const Block & getOutputHeader() const; private: - Poco::Logger * log; + LoggerPtr log; /// Stored query, e.g. SELECT * FROM * GROUP BY tumble(now(), *) ASTPtr select_query; diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index 00cc5e3ee58..5ea28d9e09c 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -373,11 +373,37 @@ QueryTreeNodePtr buildQueryTreeForShard(SelectQueryInfo & query_info, QueryTreeN removeGroupingFunctionSpecializations(query_tree_to_modify); - // std::cerr << "====================== build 1 \n" << query_tree_to_modify->dumpTree() << std::endl; createUniqueTableAliases(query_tree_to_modify, nullptr, planner_context->getQueryContext()); - // std::cerr << "====================== build 2 \n" << query_tree_to_modify->dumpTree() << std::endl; return query_tree_to_modify; } +class RewriteJoinToGlobalJoinVisitor : public InDepthQueryTreeVisitor +{ +public: + using Base = InDepthQueryTreeVisitor; + using Base::Base; + + void visitImpl(QueryTreeNodePtr & node) + { + if (auto * join_node = node->as()) + join_node->setLocality(JoinLocality::Global); + } + + static bool needChildVisit(QueryTreeNodePtr & parent, QueryTreeNodePtr & child) + { + auto * join_node = parent->as(); + if (join_node && join_node->getRightTableExpression() == child) + return false; + + return true; + } +}; + +void rewriteJoinToGlobalJoin(QueryTreeNodePtr query_tree_to_modify) +{ + RewriteJoinToGlobalJoinVisitor visitor; + visitor.visit(query_tree_to_modify); +} + } diff --git a/src/Storages/buildQueryTreeForShard.h b/src/Storages/buildQueryTreeForShard.h index 05d63faeb9f..eec5a0dc38a 100644 --- a/src/Storages/buildQueryTreeForShard.h +++ b/src/Storages/buildQueryTreeForShard.h @@ -12,4 +12,6 @@ using QueryTreeNodePtr = std::shared_ptr; QueryTreeNodePtr buildQueryTreeForShard(SelectQueryInfo & query_info, QueryTreeNodePtr query_tree_to_modify); +void rewriteJoinToGlobalJoin(QueryTreeNodePtr query_tree_to_modify); + } diff --git a/src/TableFunctions/Hive/TableFunctionHive.cpp b/src/TableFunctions/Hive/TableFunctionHive.cpp index d8885087532..e840d5fc8be 100644 --- a/src/TableFunctions/Hive/TableFunctionHive.cpp +++ b/src/TableFunctions/Hive/TableFunctionHive.cpp @@ -46,7 +46,7 @@ public: void parseArguments(const ASTPtr & ast_function_, ContextPtr context_) override; private: - Poco::Logger * logger = &Poco::Logger::get("TableFunctionHive"); + LoggerPtr logger = getLogger("TableFunctionHive"); String cluster_name; String hive_metastore_url; diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index d8524963776..961e5683fe2 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -34,7 +34,7 @@ protected: columns = parseColumnsListFromString(TableFunction::configuration.structure, context); StoragePtr storage = Storage::create( - TableFunction::configuration, context, StorageID(TableFunction::getDatabaseName(), table_name), + TableFunction::configuration, context, false, StorageID(TableFunction::getDatabaseName(), table_name), columns, ConstraintsDescription{}, String{}, std::nullopt); storage->startup(); diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index c52256fb984..a9c5a5c99f0 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -67,23 +67,11 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context else { - auto * header_it = StorageURL::collectHeaders(args, configuration.headers_from_ast, context); - if (header_it != args.end()) - args.erase(header_it); + size_t count = StorageURL::evalArgsAndCollectHeaders(args, configuration.headers_from_ast, context); - if (args.empty() || args.size() > 7) + if (count == 0 || count > 7) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature()); - for (auto & arg : args) - arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); - - /// Size -> argument indexes - static std::unordered_map> size_to_args - { - {1, {{}}}, - {7, {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}, {"compression_method", 6}}} - }; - std::unordered_map args_to_idx; bool no_sign_request = false; @@ -92,7 +80,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context /// - s3(source, format) /// - s3(source, NOSIGN) /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. - if (args.size() == 2) + if (count == 2) { auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); if (boost::iequals(second_arg, "NOSIGN")) @@ -102,10 +90,10 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context } /// For 3 arguments we support 3 possible variants: /// - s3(source, format, structure) - /// - s3(source, access_key_id, access_key_id) + /// - s3(source, access_key_id, secret_access_key) /// - s3(source, NOSIGN, format) /// We can distinguish them by looking at the 2-nd argument: check if it's a format name or not. - else if (args.size() == 3) + else if (count == 3) { auto second_arg = checkAndGetLiteralArgument(args[1], "format/access_key_id/NOSIGN"); if (boost::iequals(second_arg, "NOSIGN")) @@ -120,11 +108,11 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context } /// For 4 arguments we support 4 possible variants: /// - s3(source, format, structure, compression_method), - /// - s3(source, access_key_id, access_key_id, format), - /// - s3(source, access_key_id, access_key_id, session_token) + /// - s3(source, access_key_id, secret_access_key, format), + /// - s3(source, access_key_id, secret_access_key, session_token) /// - s3(source, NOSIGN, format, structure) /// We can distinguish them by looking at the 2-nd and 4-th argument: check if it's a format name or not. - else if (args.size() == 4) + else if (count == 4) { auto second_arg = checkAndGetLiteralArgument(args[1], "format/access_key_id/NOSIGN"); if (boost::iequals(second_arg, "NOSIGN")) @@ -150,12 +138,12 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context } } /// For 5 arguments we support 3 possible variants: - /// - s3(source, access_key_id, access_key_id, format, structure) - /// - s3(source, access_key_id, access_key_id, session_token, format) + /// - s3(source, access_key_id, secret_access_key, format, structure) + /// - s3(source, access_key_id, secret_access_key, session_token, format) /// - s3(source, NOSIGN, format, structure, compression_method) /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or no, /// and by the 4-th argument, check if it's a format name or not - else if (args.size() == 5) + else if (count == 5) { auto second_arg = checkAndGetLiteralArgument(args[1], "NOSIGN/access_key_id"); if (boost::iequals(second_arg, "NOSIGN")) @@ -177,10 +165,10 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context } } // For 6 arguments we support 2 possible variants: - /// - s3(source, access_key_id, access_key_id, format, structure, compression_method) - /// - s3(source, access_key_id, access_key_id, session_token, format, structure) + /// - s3(source, access_key_id, secret_access_key, format, structure, compression_method) + /// - s3(source, access_key_id, secret_access_key, session_token, format, structure) /// We can distinguish them by looking at the 4-th argument: check if it's a format name or not - else if (args.size() == 6) + else if (count == 6) { auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg)) @@ -192,9 +180,9 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}}; } } - else + else if (count == 7) { - args_to_idx = size_to_args[args.size()]; + args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}, {"compression_method", 6}}; } /// This argument is always the first @@ -262,24 +250,16 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & } else { - /// If arguments contain headers, just remove it and add to the end of arguments later - /// (header argument can be at any position). HTTPHeaderEntries tmp_headers; - auto * headers_it = StorageURL::collectHeaders(args, tmp_headers, context); - ASTPtr headers_ast; - if (headers_it != args.end()) - { - headers_ast = *headers_it; - args.erase(headers_it); - } + size_t count = StorageURL::evalArgsAndCollectHeaders(args, tmp_headers, context); - if (args.empty() || args.size() > getMaxNumberOfArguments()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to {} arguments in table function, got {}", getMaxNumberOfArguments(), args.size()); + if (count == 0 || count > getMaxNumberOfArguments()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to {} arguments in table function, got {}", getMaxNumberOfArguments(), count); auto structure_literal = std::make_shared(structure); /// s3(s3_url) - if (args.size() == 1) + if (count == 1) { /// Add format=auto before structure argument. args.push_back(std::make_shared("auto")); @@ -287,7 +267,7 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & } /// s3(s3_url, format) or s3(s3_url, NOSIGN) /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. - else if (args.size() == 2) + else if (count == 2) { auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); /// If there is NOSIGN, add format=auto before structure. @@ -296,10 +276,10 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & args.push_back(structure_literal); } /// s3(source, format, structure) or - /// s3(source, access_key_id, access_key_id) or + /// s3(source, access_key_id, secret_access_key) or /// s3(source, NOSIGN, format) /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither. - else if (args.size() == 3) + else if (count == 3) { auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); if (boost::iequals(second_arg, "NOSIGN")) @@ -308,7 +288,7 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & } else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) { - args.back() = structure_literal; + args[count - 1] = structure_literal; } else { @@ -318,48 +298,45 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & } } /// s3(source, format, structure, compression_method) or - /// s3(source, access_key_id, access_key_id, format) or + /// s3(source, access_key_id, secret_access_key, format) or /// s3(source, NOSIGN, format, structure) /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither. - else if (args.size() == 4) + else if (count == 4) { auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); if (boost::iequals(second_arg, "NOSIGN")) { - args.back() = structure_literal; + args[count - 1] = structure_literal; } else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) { - args[args.size() - 2] = structure_literal; + args[count - 2] = structure_literal; } else { args.push_back(structure_literal); } } - /// s3(source, access_key_id, access_key_id, format, structure) or + /// s3(source, access_key_id, secret_access_key, format, structure) or /// s3(source, NOSIGN, format, structure, compression_method) /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or not. - else if (args.size() == 5) + else if (count == 5) { auto sedond_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); if (boost::iequals(sedond_arg, "NOSIGN")) { - args[args.size() - 2] = structure_literal; + args[count - 2] = structure_literal; } else { - args.back() = structure_literal; + args[count - 1] = structure_literal; } } - /// s3(source, access_key_id, access_key_id, format, structure, compression) - else if (args.size() == 6) + /// s3(source, access_key_id, secret_access_key, format, structure, compression) + else if (count == 6) { - args[args.size() - 2] = structure_literal; + args[count - 2] = structure_literal; } - - if (headers_ast) - args.push_back(headers_ast); } } diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp index 254cdba64d5..aa535991d65 100644 --- a/src/TableFunctions/TableFunctionURL.cpp +++ b/src/TableFunctions/TableFunctionURL.cpp @@ -57,16 +57,24 @@ void TableFunctionURL::parseArgumentsImpl(ASTs & args, const ContextPtr & contex if (format == "auto") format = FormatFactory::instance().getFormatFromFileName(Poco::URI(filename).getPath(), true); - StorageURL::collectHeaders(args, configuration.headers, context); + StorageURL::evalArgsAndCollectHeaders(args, configuration.headers, context); } else { - auto * headers_it = StorageURL::collectHeaders(args, configuration.headers, context); + size_t count = StorageURL::evalArgsAndCollectHeaders(args, configuration.headers, context); /// ITableFunctionFileLike cannot parse headers argument, so remove it. - if (headers_it != args.end()) - args.erase(headers_it); + ASTPtr headers_ast; + if (count != args.size()) + { + chassert(count + 1 == args.size()); + headers_ast = args.back(); + args.pop_back(); + } ITableFunctionFileLike::parseArgumentsImpl(args, context); + + if (headers_ast) + args.push_back(headers_ast); } } @@ -82,15 +90,15 @@ void TableFunctionURL::addColumnsStructureToArguments(ASTs & args, const String } else { - /// If arguments contain headers, just remove it and add to the end of arguments later - /// (header argument can be at any position). + /// If arguments contain headers, just remove it and add to the end of arguments later. HTTPHeaderEntries tmp_headers; - auto * headers_it = StorageURL::collectHeaders(args, tmp_headers, context); + size_t count = StorageURL::evalArgsAndCollectHeaders(args, tmp_headers, context); ASTPtr headers_ast; - if (headers_it != args.end()) + if (count != args.size()) { - headers_ast = *headers_it; - args.erase(headers_it); + chassert(count + 1 == args.size()); + headers_ast = args.back(); + args.pop_back(); } ITableFunctionFileLike::addColumnsStructureToArguments(args, desired_structure, context); diff --git a/src/configure_config.cmake b/src/configure_config.cmake index 7de2d5a9fdd..141e51badbb 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -164,6 +164,9 @@ endif () if (ENABLE_OPENSSL) set(USE_OPENSSL_INTREE 1) endif () +if (TARGET ch_contrib::ssh) + set(USE_SSH 1) +endif() if (TARGET ch_contrib::fiu) set(FIU_ENABLE 1) endif() diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt index 23f22209451..e1d4de59a23 100644 --- a/tests/analyzer_integration_broken_tests.txt +++ b/tests/analyzer_integration_broken_tests.txt @@ -1,7 +1,6 @@ test_access_for_functions/test.py::test_access_rights_for_function test_build_sets_from_multiple_threads/test.py::test_set test_concurrent_backups_s3/test.py::test_concurrent_backups -test_dictionaries_update_and_reload/test.py::test_reload_after_fail_in_cache_dictionary test_distributed_backward_compatability/test.py::test_distributed_in_tuple test_distributed_type_object/test.py::test_distributed_type_object test_executable_table_function/test.py::test_executable_function_input_python diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index 4643d109c3d..d944dd96e25 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -7,7 +7,6 @@ 01214_test_storage_merge_aliases_with_where 01244_optimize_distributed_group_by_sharding_key 01268_shard_avgweighted -01495_subqueries_in_with_statement 01560_merge_distributed_join 01584_distributed_buffer_cannot_find_column 01624_soft_constraints @@ -27,8 +26,9 @@ 00917_multiple_joins_denny_crane 02725_agg_projection_resprect_PK 02763_row_policy_storage_merge_alias -02784_parallel_replicas_automatic_decision_join 02818_parameterized_view_with_cte_multiple_usage +# Check after constants refactoring +02901_parallel_replicas_rollup # Flaky. Please don't delete them without fixing them: 01287_max_execution_speed 02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 68004eec2bb..41e4ef19361 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -6,29 +6,16 @@ import subprocess import sys from pathlib import Path -from github import Github - from build_download_helper import get_build_name_for_check, read_build_urls from clickhouse_helper import ( CiLogsCredentials, - ClickHouseHelper, - prepare_tests_results_for_clickhouse, -) -from commit_status_helper import ( - RerunHelper, - format_description, - get_commit, - post_commit_status, ) from docker_images_helper import DockerImage, get_docker_image, pull_image from env_helper import REPORT_PATH, TEMP_PATH -from get_robot_token import get_best_robot_token from pr_info import PRInfo -from report import TestResult -from s3_helper import S3Helper +from report import JobReport from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results IMAGE_NAME = "clickhouse/fuzzer" @@ -77,14 +64,6 @@ def main(): pr_info = PRInfo() - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - - rerun_helper = RerunHelper(commit, check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - docker_image = pull_image(get_docker_image(IMAGE_NAME)) build_name = get_build_name_for_check(check_name) @@ -131,10 +110,6 @@ def main(): subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) ci_logs_credentials.clean_ci_logs_from_credentials(run_log_path) - check_name_lower = ( - check_name.lower().replace("(", "").replace(")", "").replace(" ", "") - ) - s3_prefix = f"{pr_info.number}/{pr_info.sha}/fuzzer_{check_name_lower}/" paths = { "run.log": run_log_path, "main.log": main_log_path, @@ -154,17 +129,6 @@ def main(): if not_compressed_server_log_path.exists(): paths["server.log"] = not_compressed_server_log_path - s3_helper = S3Helper() - urls = [] - report_url = "" - for file, path in paths.items(): - try: - url = s3_helper.upload_test_report_to_s3(path, s3_prefix + file) - report_url = url if file == "report.html" else report_url - urls.append(url) - except Exception as ex: - logging.info("Exception uploading file %s text %s", file, ex) - # Try to get status message saved by the fuzzer try: with open(workspace_path / "status.txt", "r", encoding="utf-8") as status_f: @@ -176,42 +140,19 @@ def main(): status = "failure" description = "Task failed: $?=" + str(retcode) - description = format_description(description) + JobReport( + description=description, + test_results=[], + status=status, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + # test generates its own report.html + additional_files=[v for _, v in paths.items()], + ).dump() - test_result = TestResult(description, "OK") - if "fail" in status: - test_result.status = "FAIL" - - if not report_url: - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - [test_result], - [], - check_name, - urls, - ) - - ch_helper = ClickHouseHelper() - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - [test_result], - status, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - check_name, - ) - - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) - - logging.info("Result: '%s', '%s', '%s'", status, description, report_url) - print(f"::notice ::Report url: {report_url}") - post_commit_status( - commit, status, report_url, description, check_name, pr_info, dump_to_file=True - ) + logging.info("Result: '%s', '%s'", status, description) + if status == "failure": + sys.exit(1) if __name__ == "__main__": diff --git a/tests/ci/bugfix_validate_check.py b/tests/ci/bugfix_validate_check.py index adb798bd392..107c02a0f56 100644 --- a/tests/ci/bugfix_validate_check.py +++ b/tests/ci/bugfix_validate_check.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 from pathlib import Path -from typing import List, Tuple +from typing import List, Tuple, Optional import argparse import csv import logging @@ -32,7 +32,8 @@ def post_commit_status_from_file(file_path: Path) -> List[str]: return res[0] -def process_result(file_path: Path) -> Tuple[bool, TestResults]: +# Returns (is_ok, test_results, error_message) +def process_result(file_path: Path) -> Tuple[bool, TestResults, Optional[str]]: test_results = [] # type: TestResults state, report_url, description = post_commit_status_from_file(file_path) prefix = file_path.parent.name @@ -46,11 +47,11 @@ def process_result(file_path: Path) -> Tuple[bool, TestResults]: if report_url != "null" else "Check failed" ) - return False, [TestResult(f"{prefix}: {description}", status)] + return False, [TestResult(f"{prefix}: {description}", status)], "Check failed" is_ok = state == "success" if is_ok and report_url == "null": - return is_ok, test_results + return is_ok, test_results, None status = ( f'OK: Bug reproduced (Report)' @@ -58,19 +59,22 @@ def process_result(file_path: Path) -> Tuple[bool, TestResults]: else f'Bug is not reproduced (Report)' ) test_results.append(TestResult(f"{prefix}: {description}", status)) - return is_ok, test_results + return is_ok, test_results, None -def process_all_results(file_paths: List[Path]) -> Tuple[bool, TestResults]: +def process_all_results( + file_paths: List[Path], +) -> Tuple[bool, TestResults, Optional[str]]: any_ok = False all_results = [] + error = None for status_path in file_paths: - is_ok, test_results = process_result(status_path) + is_ok, test_results, error = process_result(status_path) any_ok = any_ok or is_ok if test_results is not None: all_results.extend(test_results) - return any_ok, all_results + return any_ok and error is None, all_results, error def main(): @@ -80,7 +84,13 @@ def main(): check_name_with_group = "Bugfix validate check" - is_ok, test_results = process_all_results(status_files) + is_ok, test_results, error = process_all_results(status_files) + + description = "" + if error: + description = error + elif not is_ok: + description = "Changed tests don't reproduce the bug" pr_info = PRInfo() if not test_results: @@ -88,7 +98,6 @@ def main(): report_url = "" logging.info("No results to upload") else: - description = "" if is_ok else "Changed tests don't reproduce the bug" report_url = upload_results( S3Helper(), pr_info.number, diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 27243aac4f1..cec8c4c7b65 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -12,15 +12,13 @@ from ci_config import CI_CONFIG, BuildConfig from cache_utils import CargoCache from env_helper import ( - GITHUB_JOB_API_URL, REPO_COPY, S3_BUILDS_BUCKET, - S3_DOWNLOAD, TEMP_PATH, ) -from git_helper import Git, git_runner +from git_helper import Git from pr_info import PRInfo -from report import BuildResult, FAILURE, StatusType, SUCCESS +from report import FAILURE, JobReport, StatusType, SUCCESS from s3_helper import S3Helper from tee_popen import TeePopen import docker_images_helper @@ -29,13 +27,6 @@ from version_helper import ( get_version_from_repo, update_version_local, ) -from clickhouse_helper import ( - ClickHouseHelper, - CiLogsCredentials, - prepare_tests_results_for_clickhouse, - get_instance_type, - get_instance_id, -) from stopwatch import Stopwatch IMAGE_NAME = "clickhouse/binary-builder" @@ -122,61 +113,6 @@ def build_clickhouse( return build_log_path, SUCCESS if success else FAILURE -def check_for_success_run( - s3_helper: S3Helper, - s3_prefix: str, - build_name: str, - version: ClickHouseVersion, -) -> None: - # TODO: Remove after S3 artifacts - logging.info("Checking for artifacts %s in bucket %s", s3_prefix, S3_BUILDS_BUCKET) - try: - # Performance artifacts are now part of regular build, so we're safe - build_results = s3_helper.list_prefix(s3_prefix) - except Exception as ex: - logging.info("Got exception while listing %s: %s\nRerun", s3_prefix, ex) - return - - if build_results is None or len(build_results) == 0: - logging.info("Nothing found in %s, rerun", s3_prefix) - return - - logging.info("Some build results found:\n%s", build_results) - build_urls = [] - log_url = "" - for url in build_results: - url_escaped = url.replace("+", "%2B").replace(" ", "%20") - if BUILD_LOG_NAME in url: - log_url = f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{url_escaped}" - else: - build_urls.append(f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{url_escaped}") - if not log_url: - # log is uploaded the last, so if there's no log we need to rerun the build - return - - success = len(build_urls) > 0 - build_result = BuildResult( - build_name, - log_url, - build_urls, - version.describe, - SUCCESS if success else FAILURE, - 0, - GITHUB_JOB_API_URL(), - ) - result_json_path = build_result.write_json(Path(TEMP_PATH)) - logging.info( - "Build result file %s is written, content:\n %s", - result_json_path, - result_json_path.read_text(encoding="utf-8"), - ) - # Fail build job if not successeded - if not success: - sys.exit(1) - else: - sys.exit(0) - - def get_release_or_pr(pr_info: PRInfo, version: ClickHouseVersion) -> Tuple[str, str]: "Return prefixes for S3 artifacts paths" # FIXME performance @@ -196,34 +132,6 @@ def get_release_or_pr(pr_info: PRInfo, version: ClickHouseVersion) -> Tuple[str, return pr_number, pr_number -def upload_master_static_binaries( - pr_info: PRInfo, - build_config: BuildConfig, - s3_helper: S3Helper, - build_output_path: Path, -) -> None: - """Upload binary artifacts to a static S3 links""" - static_binary_name = build_config.static_binary_name - if pr_info.number != 0: - return - elif not static_binary_name: - return - elif pr_info.base_ref != "master": - return - - # Full binary with debug info: - s3_path_full = "/".join((pr_info.base_ref, static_binary_name, "clickhouse-full")) - binary_full = build_output_path / "clickhouse" - url_full = s3_helper.upload_build_file_to_s3(binary_full, s3_path_full) - print(f"::notice ::Binary static URL (with debug info): {url_full}") - - # Stripped binary without debug info: - s3_path_compact = "/".join((pr_info.base_ref, static_binary_name, "clickhouse")) - binary_compact = build_output_path / "clickhouse-stripped" - url_compact = s3_helper.upload_build_file_to_s3(binary_compact, s3_path_compact) - print(f"::notice ::Binary static URL (compact): {url_compact}") - - def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser("Clickhouse builder script") parser.add_argument( @@ -254,21 +162,6 @@ def main(): s3_helper = S3Helper() version = get_version_from_repo(git=Git(True)) - release_or_pr, performance_pr = get_release_or_pr(pr_info, version) - - s3_path_prefix = "/".join((release_or_pr, pr_info.sha, build_name)) - # FIXME performance - s3_performance_path = "/".join( - (performance_pr, pr_info.sha, build_name, "performance.tar.zst") - ) - - # FIXME: to be removed in favor of "skip by job digest" - # If this is rerun, then we try to find already created artifacts and just - # put them as github actions artifact (result) - # The s3_path_prefix has additional "/" in the end to prevent finding - # e.g. `binary_darwin_aarch64/clickhouse` for `binary_darwin` - check_for_success_run(s3_helper, f"{s3_path_prefix}/", build_name, version) - logging.info("Got version from repo %s", version.string) official_flag = pr_info.number == 0 @@ -331,174 +224,16 @@ def main(): ) sys.exit(1) - # FIXME performance - performance_urls = [] - performance_path = build_output_path / "performance.tar.zst" - if performance_path.exists(): - performance_urls.append( - s3_helper.upload_build_file_to_s3(performance_path, s3_performance_path) - ) - logging.info( - "Uploaded performance.tar.zst to %s, now delete to avoid duplication", - performance_urls[0], - ) - performance_path.unlink() - - build_urls = ( - s3_helper.upload_build_directory_to_s3( - build_output_path, - s3_path_prefix, - keep_dirs_in_s3_path=False, - upload_symlinks=False, - ) - + performance_urls - ) - logging.info("Got build URLs %s", build_urls) - - print("::notice ::Build URLs: {}".format("\n".join(build_urls))) - - if log_path.exists(): - log_url = s3_helper.upload_build_file_to_s3( - log_path, s3_path_prefix + "/" + log_path.name - ) - logging.info("Log url %s", log_url) - else: - logging.info("Build log doesn't exist") - - print(f"::notice ::Log URL: {log_url}") - - build_result = BuildResult( - build_name, - log_url, - build_urls, - version.describe, - build_status, - elapsed, - GITHUB_JOB_API_URL(), - ) - result_json_path = build_result.write_json(temp_path) - logging.info( - "Build result file %s is written, content:\n %s", - result_json_path, - result_json_path.read_text(encoding="utf-8"), - ) - - upload_master_static_binaries(pr_info, build_config, s3_helper, build_output_path) - - # Upload profile data - ch_helper = ClickHouseHelper() - - ci_logs_credentials = CiLogsCredentials(Path("/dev/null")) - if ci_logs_credentials.host: - instance_type = get_instance_type() - instance_id = get_instance_id() - query = f"""INSERT INTO build_time_trace -( - pull_request_number, - commit_sha, - check_start_time, - check_name, - instance_type, - instance_id, - file, - library, - time, - pid, - tid, - ph, - ts, - dur, - cat, - name, - detail, - count, - avgMs, - args_name -) -SELECT {pr_info.number}, '{pr_info.sha}', '{stopwatch.start_time_str}', '{build_name}', '{instance_type}', '{instance_id}', * -FROM input(' - file String, - library String, - time DateTime64(6), - pid UInt32, - tid UInt32, - ph String, - ts UInt64, - dur UInt64, - cat String, - name String, - detail String, - count UInt64, - avgMs UInt64, - args_name String') -FORMAT JSONCompactEachRow""" - - auth = { - "X-ClickHouse-User": "ci", - "X-ClickHouse-Key": ci_logs_credentials.password, - } - url = f"https://{ci_logs_credentials.host}/" - profiles_dir = temp_path / "profiles_source" - profiles_dir.mkdir(parents=True, exist_ok=True) - logging.info( - "Processing profile JSON files from %s", repo_path / "build_docker" - ) - git_runner( - "./utils/prepare-time-trace/prepare-time-trace.sh " - f"build_docker {profiles_dir.absolute()}" - ) - profile_data_file = temp_path / "profile.json" - with open(profile_data_file, "wb") as profile_fd: - for profile_source in profiles_dir.iterdir(): - if profile_source.name != "binary_sizes.txt": - with open(profiles_dir / profile_source, "rb") as ps_fd: - profile_fd.write(ps_fd.read()) - - logging.info( - "::notice ::Log Uploading profile data, path: %s, size: %s, query: %s", - profile_data_file, - profile_data_file.stat().st_size, - query, - ) - ch_helper.insert_file(url, auth, query, profile_data_file) - - query = f"""INSERT INTO binary_sizes -( - pull_request_number, - commit_sha, - check_start_time, - check_name, - instance_type, - instance_id, - file, - size -) -SELECT {pr_info.number}, '{pr_info.sha}', '{stopwatch.start_time_str}', '{build_name}', '{instance_type}', '{instance_id}', file, size -FROM input('size UInt64, file String') -SETTINGS format_regexp = '^\\s*(\\d+) (.+)$' -FORMAT Regexp""" - - binary_sizes_file = profiles_dir / "binary_sizes.txt" - - logging.info( - "::notice ::Log Uploading binary sizes data, path: %s, size: %s, query: %s", - binary_sizes_file, - binary_sizes_file.stat().st_size, - query, - ) - ch_helper.insert_file(url, auth, query, binary_sizes_file) - - # Upload statistics to CI database - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - [], - build_status, - stopwatch.duration_seconds, - stopwatch.start_time_str, - log_url, - f"Build ({build_name})", - ) - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + JobReport( + description=version.describe, + test_results=[], + status=build_status, + start_time=stopwatch.start_time_str, + duration=elapsed, + additional_files=[log_path], + build_dir_for_upload=build_output_path, + version=version.describe, + ).dump() # Fail the build job if it didn't succeed if build_status != SUCCESS: diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index d9925725eec..8f8f2b28935 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -4,12 +4,9 @@ import json import logging import os import sys -import atexit from pathlib import Path from typing import List -from github import Github - from env_helper import ( GITHUB_JOB_URL, GITHUB_REPOSITORY, @@ -22,20 +19,14 @@ from report import ( ERROR, PENDING, SUCCESS, + JobReport, create_build_html_report, get_worst_status, ) -from s3_helper import S3Helper -from get_robot_token import get_best_robot_token + from pr_info import PRInfo -from commit_status_helper import ( - RerunHelper, - format_description, - get_commit, - post_commit_status, - update_mergeable_check, -) from ci_config import CI_CONFIG +from stopwatch import Stopwatch # Old way to read the neads_data @@ -46,6 +37,7 @@ NEEDS_DATA = os.getenv("NEEDS_DATA", "") def main(): logging.basicConfig(level=logging.INFO) + stopwatch = Stopwatch() temp_path = Path(TEMP_PATH) reports_path = Path(REPORT_PATH) temp_path.mkdir(parents=True, exist_ok=True) @@ -74,16 +66,7 @@ def main(): if needs_data: logging.info("The next builds are required: %s", ", ".join(needs_data)) - gh = Github(get_best_robot_token(), per_page=100) pr_info = PRInfo() - commit = get_commit(gh, pr_info.sha) - - atexit.register(update_mergeable_check, commit, pr_info, build_check_name) - - rerun_helper = RerunHelper(commit, build_check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) builds_for_check = CI_CONFIG.get_builds_for_report(build_check_name) required_builds = required_builds or len(builds_for_check) @@ -91,10 +74,15 @@ def main(): # Collect reports from json artifacts build_results = [] for build_name in builds_for_check: - build_result = BuildResult.read_json(reports_path, build_name) - if build_result.is_missing: + build_result = BuildResult.load_any( + build_name, pr_info.number, pr_info.head_ref + ) + if not build_result: logging.warning("Build results for %s are missing", build_name) continue + assert ( + pr_info.head_ref == build_result.head_ref or pr_info.number > 0 + ), "BUG. if not a PR, report must be created on the same branch" build_results.append(build_result) # The code to collect missing reports for failed jobs @@ -125,8 +113,6 @@ def main(): logging.error("No success builds, failing check without creating a status") sys.exit(1) - s3_helper = S3Helper() - branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commits/master" branch_name = "master" if pr_info.number != 0: @@ -146,18 +132,6 @@ def main(): report_path = temp_path / "report.html" report_path.write_text(report, encoding="utf-8") - logging.info("Going to upload prepared report") - context_name_for_path = build_check_name.lower().replace(" ", "_") - s3_path_prefix = ( - str(pr_info.number) + "/" + pr_info.sha + "/" + context_name_for_path - ) - - url = s3_helper.upload_test_report_to_s3( - report_path, s3_path_prefix + "/report.html" - ) - logging.info("Report url %s", url) - print(f"::notice ::Report url: {url}") - # Prepare a commit status summary_status = get_worst_status(br.status for br in build_results) @@ -174,19 +148,16 @@ def main(): f" ({required_builds - missing_builds} of {required_builds} builds are OK)" ) - description = format_description( - f"{ok_groups}/{total_groups} artifact groups are OK{addition}" - ) + description = f"{ok_groups}/{total_groups} artifact groups are OK{addition}" - post_commit_status( - commit, - summary_status, - url, - description, - build_check_name, - pr_info, - dump_to_file=True, - ) + JobReport( + description=description, + test_results=[], + status=summary_status, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=[report_path], + ).dump() if summary_status == ERROR: sys.exit(1) diff --git a/tests/ci/cache_utils.py b/tests/ci/cache_utils.py index 0906b1d14e5..062207fadd1 100644 --- a/tests/ci/cache_utils.py +++ b/tests/ci/cache_utils.py @@ -116,9 +116,15 @@ class Cache: self.s3_helper = s3_helper def _download(self, url: str, ignore_error: bool = False) -> None: + self.temp_path.mkdir(parents=True, exist_ok=True) compressed_cache = self.temp_path / self.archive_name try: - download_build_with_progress(url, compressed_cache) + if url.startswith("file://"): + local_s3_cache = Path(url[7:]) + if local_s3_cache.is_file(): + shutil.copy2(local_s3_cache, compressed_cache) + else: + download_build_with_progress(url, compressed_cache) except DownloadException as e: if not ignore_error: raise CacheError(f"Failed to download {url}") from e diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py index 8282fb7768a..656198c6985 100644 --- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py +++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py @@ -1,28 +1,24 @@ #!/usr/bin/env python3 -from base64 import b64decode -from collections import namedtuple -from typing import Any, Dict, List, Optional, Tuple -from threading import Thread -from queue import Queue import json import re import time +from base64 import b64decode +from collections import namedtuple +from queue import Queue +from threading import Thread +from typing import Any, Dict, List, Optional, Tuple import requests # type: ignore - from lambda_shared.pr import CATEGORY_TO_LABEL, check_pr_description from lambda_shared.token import get_cached_access_token - -NEED_RERUN_ON_EDITED = { - "PullRequestCI", - "DocsCheck", -} - NEED_RERUN_OR_CANCELL_WORKFLOWS = { "BackportPR", -}.union(NEED_RERUN_ON_EDITED) + "DocsCheck", + "MasterCI", + "PullRequestCI", +} MAX_RETRY = 5 diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index 93677c2c07b..2c40b2a4099 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -32,6 +32,7 @@ from pathlib import Path from subprocess import CalledProcessError from typing import List, Optional +import __main__ from env_helper import TEMP_PATH from get_robot_token import get_best_robot_token from git_helper import git_runner, is_shallow @@ -606,16 +607,18 @@ def parse_args(): @contextmanager def clear_repo(): - orig_ref = git_runner("git branch --show-current") or git_runner( - "git rev-parse HEAD" - ) + def ref(): + return git_runner("git branch --show-current") or git_runner( + "git rev-parse HEAD" + ) + + orig_ref = ref() try: yield - except (Exception, KeyboardInterrupt): - git_runner(f"git checkout -f {orig_ref}") - raise - else: - git_runner(f"git checkout -f {orig_ref}") + finally: + current_ref = ref() + if orig_ref != current_ref: + git_runner(f"git checkout -f {orig_ref}") @contextmanager @@ -623,15 +626,14 @@ def stash(): # diff.ignoreSubmodules=all don't show changed submodules need_stash = bool(git_runner("git -c diff.ignoreSubmodules=all diff HEAD")) if need_stash: - git_runner("git stash push --no-keep-index -m 'running cherry_pick.py'") + script = ( + __main__.__file__ if hasattr(__main__, "__file__") else "unknown script" + ) + git_runner(f"git stash push --no-keep-index -m 'running {script}'") try: with clear_repo(): yield - except (Exception, KeyboardInterrupt): - if need_stash: - git_runner("git stash pop") - raise - else: + finally: if need_stash: git_runner("git stash pop") diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 701f66b9a6a..f52f28c3a16 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1,6 +1,7 @@ import argparse import concurrent.futures import json +import logging import os import re import subprocess @@ -9,22 +10,41 @@ from pathlib import Path from typing import Any, Dict, Iterable, List, Optional import docker_images_helper -from ci_config import CI_CONFIG +from ci_config import CI_CONFIG, Labels from commit_status_helper import ( CommitStatusData, + RerunHelper, format_description, get_commit, + post_commit_status, set_status_comment, + update_mergeable_check, ) from digest_helper import DockerDigester, JobDigester -from env_helper import CI, REPORT_PATH, ROOT_DIR, S3_BUILDS_BUCKET, TEMP_PATH +from env_helper import ( + CI, + GITHUB_JOB_API_URL, + REPO_COPY, + REPORT_PATH, + S3_BUILDS_BUCKET, + TEMP_PATH, +) from get_robot_token import get_best_robot_token from git_helper import GIT_PREFIX, Git from git_helper import Runner as GitRunner from github import Github from pr_info import PRInfo -from report import BuildResult +from report import SUCCESS, BuildResult, JobReport from s3_helper import S3Helper +from clickhouse_helper import ( + CiLogsCredentials, + ClickHouseHelper, + get_instance_id, + get_instance_type, + prepare_tests_results_for_clickhouse, +) +from build_check import get_release_or_pr +import upload_result_helper from version_helper import get_version_from_repo @@ -42,20 +62,6 @@ def normalize_check_name(check_name: str) -> str: return res -def is_build_job(job: str) -> bool: - if "package_" in job or "binary_" in job or job == "fuzzers": - return True - return False - - -def is_test_job(job: str) -> bool: - return not is_build_job(job) and not "Style" in job and not "Docs check" in job - - -def is_docs_job(job: str) -> bool: - return "Docs check" in job - - def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: # FIXME: consider switching to sub_parser for configure, pre, run, post actions parser.add_argument( @@ -94,6 +100,12 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: type=str, help="Job name as in config", ) + parser.add_argument( + "--run-command", + default="", + type=str, + help="A run command to run in --run action. Will override run_command from a job config if any", + ) parser.add_argument( "--batch", default=-1, @@ -149,6 +161,11 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace: default=False, help="will create run config without skipping build jobs in any case, used in --configure action (for release branches)", ) + parser.add_argument( + "--commit-message", + default="", + help="debug option to test commit message processing", + ) return parser.parse_args() @@ -271,6 +288,7 @@ def _update_config_for_docs_only(run_config: dict) -> None: def _configure_docker_jobs( rebuild_all_dockers: bool, docker_digest_or_latest: bool = False ) -> Dict: + print("::group::Docker images check") # generate docker jobs data docker_digester = DockerDigester() imagename_digest_dict = ( @@ -283,7 +301,6 @@ def _configure_docker_jobs( # FIXME: we need login as docker manifest inspect goes directly to one of the *.docker.com hosts instead of "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"] # find if it's possible to use the setting of /etc/docker/daemon.json docker_images_helper.docker_login() - print("Start checking missing images in dockerhub") missing_multi_dict = check_missing_images_on_dockerhub(imagename_digest_dict) missing_multi = list(missing_multi_dict) missing_amd64 = [] @@ -313,7 +330,6 @@ def _configure_docker_jobs( ) for image in missing_multi: imagename_digest_dict[image] = "latest" - print("...checking missing images in dockerhub - done") else: # add all images to missing missing_multi = list(imagename_digest_dict) @@ -324,6 +340,7 @@ def _configure_docker_jobs( for name in imagename_digest_dict if not images_info[name]["only_amd64"] ] + print("::endgroup::") return { "images": imagename_digest_dict, @@ -341,30 +358,36 @@ def _configure_jobs( rebuild_all_binaries: bool, pr_labels: Iterable[str], commit_tokens: List[str], + ci_cache_enabled: bool, ) -> Dict: - # a. digest each item from the config + ## a. digest each item from the config job_digester = JobDigester() jobs_params: Dict[str, Dict] = {} jobs_to_do: List[str] = [] jobs_to_skip: List[str] = [] digests: Dict[str, str] = {} - print("Calculating job digests - start") + print("::group::Job Digests") + for job in CI_CONFIG.job_generator(): digest = job_digester.get_job_digest(CI_CONFIG.get_digest_config(job)) digests[job] = digest print(f" job [{job.rjust(50)}] has digest [{digest}]") - print("Calculating job digests - done") + print("::endgroup::") + + ## b. check if we have something done + if ci_cache_enabled: + done_files = [] + else: + path = get_s3_path(build_digest) + done_files = s3.list_prefix(path) + done_files = [file.split("/")[-1] for file in done_files] + # print(f"S3 CI files for the build [{build_digest}]: {done_files}") + docs_path = get_s3_path_docs(docs_digest) + done_files_docs = s3.list_prefix(docs_path) + done_files_docs = [file.split("/")[-1] for file in done_files_docs] + # print(f"S3 CI files for the docs [{docs_digest}]: {done_files_docs}") + done_files += done_files_docs - # b. check if we have something done - path = get_s3_path(build_digest) - done_files = s3.list_prefix(path) - done_files = [file.split("/")[-1] for file in done_files] - print(f"S3 CI files for the build [{build_digest}]: {done_files}") - docs_path = get_s3_path_docs(docs_digest) - done_files_docs = s3.list_prefix(docs_path) - done_files_docs = [file.split("/")[-1] for file in done_files_docs] - print(f"S3 CI files for the docs [{docs_digest}]: {done_files_docs}") - done_files += done_files_docs for job in digests: digest = digests[job] job_config = CI_CONFIG.get_job_config(job) @@ -384,7 +407,7 @@ def _configure_jobs( for batch in range(num_batches): # type: ignore success_flag_name = get_file_flag_name(job, digest, batch, num_batches) if success_flag_name not in done_files or ( - rebuild_all_binaries and is_build_job(job) + rebuild_all_binaries and CI_CONFIG.is_build_job(job) ): batches_to_do.append(batch) @@ -395,8 +418,9 @@ def _configure_jobs( "num_batches": num_batches, } else: - jobs_to_skip += (job,) + jobs_to_skip.append(job) + ## c. check CI controlling labels commit messages if pr_labels: jobs_requested_by_label = [] # type: List[str] ci_controlling_labels = [] # type: List[str] @@ -410,41 +434,65 @@ def _configure_jobs( print( f" : following jobs will be executed: [{jobs_requested_by_label}]" ) - jobs_to_do = jobs_requested_by_label + jobs_to_do = [job for job in jobs_requested_by_label if job in jobs_to_do] if commit_tokens: + jobs_to_do_requested = [] # type: List[str] + + # handle ci set tokens + ci_controlling_tokens = [ + token for token in commit_tokens if token in CI_CONFIG.label_configs + ] + for token_ in ci_controlling_tokens: + label_config = CI_CONFIG.get_label_config(token_) + assert label_config, f"Unknonwn token [{token_}]" + print( + f"NOTE: CI controlling token: [{ci_controlling_tokens}], add jobs: [{label_config.run_jobs}]" + ) + jobs_to_do_requested += label_config.run_jobs + + # handle specific job requests requested_jobs = [ - token[len("#job_") :] - for token in commit_tokens - if token.startswith("#job_") + token[len("job_") :] for token in commit_tokens if token.startswith("job_") ] if requested_jobs: assert any( len(x) > 1 for x in requested_jobs ), f"Invalid job names requested [{requested_jobs}]" - jobs_to_do_requested = [] for job in requested_jobs: job_with_parents = CI_CONFIG.get_job_with_parents(job) + print( + f"NOTE: CI controlling token: [#job_{job}], add jobs: [{job_with_parents}]" + ) # always add requested job itself, even if it could be skipped jobs_to_do_requested.append(job_with_parents[0]) for parent in job_with_parents[1:]: if parent in jobs_to_do and parent not in jobs_to_do_requested: jobs_to_do_requested.append(parent) + + if jobs_to_do_requested: print( f"NOTE: Only specific job(s) were requested by commit message tokens: [{jobs_to_do_requested}]" ) - jobs_to_do = jobs_to_do_requested + jobs_to_do = list( + set(job for job in jobs_to_do_requested if job in jobs_to_do) + ) return { "digests": digests, "jobs_to_do": jobs_to_do, "jobs_to_skip": jobs_to_skip, - "jobs_params": jobs_params, + "jobs_params": { + job: params for job, params in jobs_params.items() if job in jobs_to_do + }, } def _update_gh_statuses(indata: Dict, s3: S3Helper) -> None: - # This action is required to re-create all GH statuses for skipped jobs, so that ci report can be generated afterwards + if indata["ci_flags"][Labels.NO_CI_CACHE]: + print("CI cache is disabled - skip restoring commit statuses from CI cache") + return + temp_path = Path(TEMP_PATH) if not temp_path.exists(): temp_path.mkdir(parents=True, exist_ok=True) @@ -485,7 +533,7 @@ def _update_gh_statuses(indata: Dict, s3: S3Helper) -> None: job_status = CommitStatusData.load_from_file( f"{TEMP_PATH}/{success_flag_name}" ) # type: CommitStatusData - assert job_status.status == "success", "BUG!" + assert job_status.status == SUCCESS, "BUG!" commit.create_status( state=job_status.status, target_url=job_status.report_url, @@ -500,7 +548,7 @@ def _update_gh_statuses(indata: Dict, s3: S3Helper) -> None: with concurrent.futures.ThreadPoolExecutor() as executor: futures = [] for job in job_digests: - if is_build_job(job): + if CI_CONFIG.is_build_job(job): # no GH status for build jobs continue digest = job_digests[job] @@ -528,25 +576,263 @@ def _update_gh_statuses(indata: Dict, s3: S3Helper) -> None: def _fetch_commit_tokens(message: str) -> List[str]: pattern = r"#[\w-]+" - matches = re.findall(pattern, message) - res = [ - match - for match in matches - if match == "#no-merge-commit" - or match.startswith("#job_") - or match.startswith("#job-") - ] + matches = [match[1:] for match in re.findall(pattern, message)] + res = [match for match in matches if match in Labels or match.startswith("job_")] return res +def _upload_build_artifacts( + pr_info: PRInfo, + build_name: str, + build_digest: str, + job_report: JobReport, + s3: S3Helper, + s3_destination: str, +) -> str: + # There are ugly artifacts for the performance test. FIXME: + s3_performance_path = "/".join( + ( + get_release_or_pr(pr_info, get_version_from_repo())[1], + pr_info.sha, + CI_CONFIG.normalize_string(build_name), + "performance.tar.zst", + ) + ) + performance_urls = [] + assert job_report.build_dir_for_upload, "Must be set for build job" + performance_path = Path(job_report.build_dir_for_upload) / "performance.tar.zst" + if performance_path.exists(): + performance_urls.append( + s3.upload_build_file_to_s3(performance_path, s3_performance_path) + ) + print( + "Uploaded performance.tar.zst to %s, now delete to avoid duplication", + performance_urls[0], + ) + performance_path.unlink() + build_urls = ( + s3.upload_build_directory_to_s3( + Path(job_report.build_dir_for_upload), + s3_destination, + keep_dirs_in_s3_path=False, + upload_symlinks=False, + ) + + performance_urls + ) + print("::notice ::Build URLs: {}".format("\n".join(build_urls))) + log_path = Path(job_report.additional_files[0]) + log_url = "" + if log_path.exists(): + log_url = s3.upload_build_file_to_s3( + log_path, s3_destination + "/" + log_path.name + ) + print(f"::notice ::Log URL: {log_url}") + + # generate and upload build report + build_result = BuildResult( + build_name, + log_url, + build_urls, + job_report.version, + job_report.status, + int(job_report.duration), + GITHUB_JOB_API_URL(), + head_ref=pr_info.head_ref, + pr_number=pr_info.number, + ) + result_json_path = build_result.write_json() + s3_path = get_s3_path(build_digest) + result_json_path.name + build_report_url = s3.upload_file( + bucket=S3_BUILDS_BUCKET, file_path=result_json_path, s3_path=s3_path + ) + print(f"Report file [{result_json_path}] has been uploaded to [{build_report_url}]") + + # Upload head master binaries + static_bin_name = CI_CONFIG.build_config[build_name].static_binary_name + if pr_info.is_master() and static_bin_name: + # Full binary with debug info: + s3_path_full = "/".join((pr_info.base_ref, static_bin_name, "clickhouse-full")) + binary_full = Path(job_report.build_dir_for_upload) / "clickhouse" + url_full = s3.upload_build_file_to_s3(binary_full, s3_path_full) + print(f"::notice ::Binary static URL (with debug info): {url_full}") + + # Stripped binary without debug info: + s3_path_compact = "/".join((pr_info.base_ref, static_bin_name, "clickhouse")) + binary_compact = Path(job_report.build_dir_for_upload) / "clickhouse-stripped" + url_compact = s3.upload_build_file_to_s3(binary_compact, s3_path_compact) + print(f"::notice ::Binary static URL (compact): {url_compact}") + + return log_url + + +def _upload_build_profile_data( + pr_info: PRInfo, + build_name: str, + job_report: JobReport, + git_runner: GitRunner, + ch_helper: ClickHouseHelper, +) -> None: + ci_logs_credentials = CiLogsCredentials(Path("/dev/null")) + if ci_logs_credentials.host: + instance_type = get_instance_type() + instance_id = get_instance_id() + query = f"""INSERT INTO build_time_trace + ( + pull_request_number, + commit_sha, + check_start_time, + check_name, + instance_type, + instance_id, + file, + library, + time, + pid, + tid, + ph, + ts, + dur, + cat, + name, + detail, + count, + avgMs, + args_name + ) + SELECT {pr_info.number}, '{pr_info.sha}', '{job_report.start_time}', '{build_name}', '{instance_type}', '{instance_id}', * + FROM input(' + file String, + library String, + time DateTime64(6), + pid UInt32, + tid UInt32, + ph String, + ts UInt64, + dur UInt64, + cat String, + name String, + detail String, + count UInt64, + avgMs UInt64, + args_name String') + FORMAT JSONCompactEachRow""" + + auth = { + "X-ClickHouse-User": "ci", + "X-ClickHouse-Key": ci_logs_credentials.password, + } + url = f"https://{ci_logs_credentials.host}/" + profiles_dir = Path(TEMP_PATH) / "profiles_source" + profiles_dir.mkdir(parents=True, exist_ok=True) + print( + "Processing profile JSON files from %s", + Path(REPO_COPY) / "build_docker", + ) + git_runner( + "./utils/prepare-time-trace/prepare-time-trace.sh " + f"build_docker {profiles_dir.absolute()}" + ) + profile_data_file = Path(TEMP_PATH) / "profile.json" + with open(profile_data_file, "wb") as profile_fd: + for profile_source in profiles_dir.iterdir(): + if profile_source.name != "binary_sizes.txt": + with open(profiles_dir / profile_source, "rb") as ps_fd: + profile_fd.write(ps_fd.read()) + + print( + "::notice ::Log Uploading profile data, path: %s, size: %s, query: %s", + profile_data_file, + profile_data_file.stat().st_size, + query, + ) + ch_helper.insert_file(url, auth, query, profile_data_file) + + query = f"""INSERT INTO binary_sizes + ( + pull_request_number, + commit_sha, + check_start_time, + check_name, + instance_type, + instance_id, + file, + size + ) + SELECT {pr_info.number}, '{pr_info.sha}', '{job_report.start_time}', '{build_name}', '{instance_type}', '{instance_id}', file, size + FROM input('size UInt64, file String') + SETTINGS format_regexp = '^\\s*(\\d+) (.+)$' + FORMAT Regexp""" + + binary_sizes_file = profiles_dir / "binary_sizes.txt" + + print( + "::notice ::Log Uploading binary sizes data, path: %s, size: %s, query: %s", + binary_sizes_file, + binary_sizes_file.stat().st_size, + query, + ) + ch_helper.insert_file(url, auth, query, binary_sizes_file) + + +def _run_test(job_name: str, run_command: str) -> int: + assert ( + run_command or CI_CONFIG.get_job_config(job_name).run_command + ), "Run command must be provided as input argument or be configured in job config" + + if not run_command: + if CI_CONFIG.get_job_config(job_name).timeout: + os.environ["KILL_TIMEOUT"] = str(CI_CONFIG.get_job_config(job_name).timeout) + run_command = "/".join( + (os.path.dirname(__file__), CI_CONFIG.get_job_config(job_name).run_command) + ) + if ".py" in run_command and not run_command.startswith("python"): + run_command = "python3 " + run_command + print("Use run command from a job config") + else: + print("Use run command from the workflow") + os.environ["CHECK_NAME"] = job_name + print(f"Going to start run command [{run_command}]") + process = subprocess.run( + run_command, + stdout=sys.stdout, + stderr=sys.stderr, + text=True, + check=False, + shell=True, + ) + + if process.returncode == 0: + print(f"Run action done for: [{job_name}]") + exit_code = 0 + else: + print( + f"Run action failed for: [{job_name}] with exit code [{process.returncode}]" + ) + exit_code = process.returncode + return exit_code + + +def _get_ext_check_name(check_name: str) -> str: + run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "0")) + run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL", "0")) + if run_by_hash_total > 1: + check_name_with_group = ( + check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]" + ) + else: + check_name_with_group = check_name + return check_name_with_group + + def main() -> int: + logging.basicConfig(level=logging.INFO) exit_code = 0 parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) args = parse_args(parser) - if args.mark_success or args.pre or args.post or args.run: + if args.mark_success or args.pre or args.run: assert args.infile, "Run config must be provided via --infile" assert args.job_name, "Job name must be provided via --job-name" @@ -561,26 +847,34 @@ def main() -> int: result: Dict[str, Any] = {} s3 = S3Helper() + pr_info = PRInfo() + git_runner = GitRunner(set_cwd_to_git_root=True) + ### CONFIGURE action: start if args.configure: - GR = GitRunner() - pr_info = PRInfo() - docker_data = {} - git_ref = GR.run(f"{GIT_PREFIX} rev-parse HEAD") + git_ref = git_runner.run(f"{GIT_PREFIX} rev-parse HEAD") - # if '#no-merge-commit' is set in commit message - set git ref to PR branch head to avoid merge-commit + # if '#no_merge_commit' is set in commit message - set git ref to PR branch head to avoid merge-commit tokens = [] - if pr_info.number != 0 and not args.skip_jobs: - message = GR.run(f"{GIT_PREFIX} log {pr_info.sha} --format=%B -n 1") + ci_flags = { + Labels.NO_MERGE_COMMIT: False, + Labels.NO_CI_CACHE: False, + } + if (pr_info.number != 0 and not args.skip_jobs) or args.commit_message: + message = args.commit_message or git_runner.run( + f"{GIT_PREFIX} log {pr_info.sha} --format=%B -n 1" + ) tokens = _fetch_commit_tokens(message) - print(f"Found commit message tokens: [{tokens}]") - if "#no-merge-commit" in tokens and CI: - GR.run(f"{GIT_PREFIX} checkout {pr_info.sha}") - git_ref = GR.run(f"{GIT_PREFIX} rev-parse HEAD") - print( - "#no-merge-commit is set in commit message - Setting git ref to PR branch HEAD to not use merge commit" - ) + print(f"Commit message tokens: [{tokens}]") + if Labels.NO_MERGE_COMMIT in tokens and CI: + git_runner.run(f"{GIT_PREFIX} checkout {pr_info.sha}") + git_ref = git_runner.run(f"{GIT_PREFIX} rev-parse HEAD") + ci_flags[Labels.NO_MERGE_COMMIT] = True + print("NOTE: Disable Merge Commit") + if Labels.NO_CI_CACHE in tokens: + ci_flags[Labels.NO_CI_CACHE] = True + print("NOTE: Disable CI Cache") # let's get CH version version = get_version_from_repo(git=Git(True)).string @@ -607,9 +901,11 @@ def main() -> int: docs_digest, job_digester, s3, - args.rebuild_all_binaries, + # FIXME: add suport for master wf w/o rebuilds + args.rebuild_all_binaries or pr_info.is_master(), pr_info.labels, tokens, + ci_flags[Labels.NO_CI_CACHE], ) if not args.skip_jobs else {} @@ -620,90 +916,204 @@ def main() -> int: result["version"] = version result["build"] = build_digest result["docs"] = docs_digest + result["ci_flags"] = ci_flags result["jobs_data"] = jobs_data result["docker_data"] = docker_data - if pr_info.number != 0 and not args.docker_digest_or_latest: + if ( + not args.skip_jobs + and pr_info.number != 0 + and not args.docker_digest_or_latest + ): # FIXME: it runs style check before docker build if possible (style-check images is not changed) # find a way to do style check always before docker build and others _check_and_update_for_early_style_check(result) - if pr_info.has_changes_in_documentation_only(): + if not args.skip_jobs and pr_info.has_changes_in_documentation_only(): _update_config_for_docs_only(result) + ### CONFIGURE action: end - elif args.update_gh_statuses: - assert indata, "Run config must be provided via --infile" - _update_gh_statuses(indata=indata, s3=s3) - + ### PRE action: start elif args.pre: - # remove job status file if any CommitStatusData.cleanup() + JobReport.cleanup() + BuildResult.cleanup() - if is_test_job(args.job_name): - assert indata, "Run config must be provided via --infile" - report_path = Path(REPORT_PATH) - report_path.mkdir(exist_ok=True, parents=True) - path = get_s3_path(indata["build"]) - files = s3.download_files( # type: ignore - bucket=S3_BUILDS_BUCKET, - s3_path=path, - file_suffix=".json", - local_directory=report_path, - ) - print( - f"Pre action done. Report files [{files}] have been downloaded from [{path}] to [{report_path}]" - ) - else: - print(f"Pre action done. Nothing to do for [{args.job_name}]") + assert indata, "Run config must be provided via --infile" + report_path = Path(REPORT_PATH) + report_path.mkdir(exist_ok=True, parents=True) + path = get_s3_path(indata["build"]) + reports_files = s3.download_files( # type: ignore + bucket=S3_BUILDS_BUCKET, + s3_path=path, + file_suffix=".json", + local_directory=report_path, + ) + # for release/master branches reports must be created on the same branches + files = [] + if pr_info.number == 0: + for file in reports_files: + if pr_info.head_ref not in file: + # keep reports from the same branch only, if not in a PR + (report_path / file).unlink() + print(f"drop report: [{report_path / file}]") + else: + files.append(file) + reports_files = files + print( + f"Pre action done. Report files [{reports_files}] have been downloaded from [{path}] to [{report_path}]" + ) + ### PRE action: end + ### RUN action: start elif args.run: - assert CI_CONFIG.get_job_config( - args.job_name - ).run_command, f"Run command must be configured in CI_CONFIG for [{args.job_name}] or in GH workflow" - if CI_CONFIG.get_job_config(args.job_name).timeout: - os.environ["KILL_TIMEOUT"] = str( - CI_CONFIG.get_job_config(args.job_name).timeout + assert indata + check_name = args.job_name + check_name_with_group = _get_ext_check_name(check_name) + print( + f"Check if rerun for name: [{check_name}], extended name [{check_name_with_group}]" + ) + previous_status = None + if CI_CONFIG.is_build_job(check_name): + # this is a build job - check if build report is present + build_result = ( + BuildResult.load_any(check_name, pr_info.number, pr_info.head_ref) + if not indata["ci_flags"][Labels.NO_CI_CACHE] + else None ) - os.environ["CHECK_NAME"] = args.job_name - run_command = ( - "./tests/ci/" + CI_CONFIG.get_job_config(args.job_name).run_command - ) - if ".py" in run_command: - run_command = "python3 " + run_command - print(f"Going to start run command [{run_command}]") - process = subprocess.run( - run_command, - stdout=sys.stdout, - stderr=sys.stderr, - text=True, - check=False, - shell=True, - ) - if process.returncode == 0: - print(f"Run action done for: [{args.job_name}]") + if build_result: + if build_result.status == SUCCESS: + previous_status = build_result.status + else: + # FIXME: Consider reusing failures for build jobs. + # Just remove this if/else - that makes build job starting and failing immediately + print( + "Build report found but status is unsuccessful - will try to rerun" + ) + print("::group::Build Report") + print(build_result.as_json()) + print("::endgroup::") else: - print( - f"Run action failed for: [{args.job_name}] with exit code [{process.returncode}]" + # this is a test job - check if GH commit status is present + commit = get_commit( + Github(get_best_robot_token(), per_page=100), pr_info.sha ) - exit_code = process.returncode + rerun_helper = RerunHelper(commit, check_name_with_group) + if rerun_helper.is_already_finished_by_status(): + status = rerun_helper.get_finished_status() + assert status + previous_status = status.state + print("::group::Commit Status") + print(status) + print("::endgroup::") + if previous_status: + print( + f"Commit status or Build Report is already present - job will be skipped with status: [{previous_status}]" + ) + if previous_status == SUCCESS: + exit_code = 0 + else: + exit_code = 1 + else: + exit_code = _run_test(check_name, args.run_command) + ### RUN action: end + + ### POST action: start elif args.post: - if is_build_job(args.job_name): - report_path = Path(TEMP_PATH) # build-check.py stores report in TEMP_PATH - assert report_path.is_dir(), f"File [{report_path}] is not a dir" - files = list(report_path.glob(f"*{args.job_name}.json")) # type: ignore[arg-type] - assert len(files) == 1, f"Which is the report file: {files}?" - local_report = f"{files[0]}" - report_name = BuildResult.get_report_name(args.job_name) - assert indata - s3_path = Path(get_s3_path(indata["build"])) / report_name - report_url = s3.upload_file( - bucket=S3_BUILDS_BUCKET, file_path=local_report, s3_path=s3_path + assert ( + not CI_CONFIG.is_build_job(args.job_name) or indata + ), "--infile with config must be provided for POST action of a build type job [{args.job_name}]" + job_report = JobReport.load() if JobReport.exist() else None + if job_report: + ch_helper = ClickHouseHelper() + check_url = "" + + if CI_CONFIG.is_build_job(args.job_name): + build_name = args.job_name + s3_path_prefix = "/".join( + ( + get_release_or_pr(pr_info, get_version_from_repo())[0], + pr_info.sha, + build_name, + ) + ) + log_url = _upload_build_artifacts( + pr_info, + build_name, + build_digest=indata["build"], # type: ignore + job_report=job_report, + s3=s3, + s3_destination=s3_path_prefix, + ) + _upload_build_profile_data( + pr_info, build_name, job_report, git_runner, ch_helper + ) + check_url = log_url + else: + # test job + additional_urls = [] + s3_path_prefix = "/".join( + ( + get_release_or_pr(pr_info, get_version_from_repo())[0], + pr_info.sha, + CI_CONFIG.normalize_string( + job_report.check_name or _get_ext_check_name(args.job_name) + ), + ) + ) + if job_report.build_dir_for_upload: + additional_urls = s3.upload_build_directory_to_s3( + Path(job_report.build_dir_for_upload), + s3_path_prefix, + keep_dirs_in_s3_path=False, + upload_symlinks=False, + ) + if job_report.test_results or job_report.additional_files: + check_url = upload_result_helper.upload_results( + s3, + pr_info.number, + pr_info.sha, + job_report.test_results, + job_report.additional_files, + job_report.check_name or args.job_name, + additional_urls=additional_urls or None, + ) + commit = get_commit( + Github(get_best_robot_token(), per_page=100), pr_info.sha + ) + post_commit_status( + commit, + job_report.status, + check_url, + format_description(job_report.description), + job_report.check_name or args.job_name, + pr_info, + dump_to_file=True, + ) + update_mergeable_check( + commit, + pr_info, + job_report.check_name or _get_ext_check_name(args.job_name), + ) + + print(f"Job report url: [{check_url}]") + prepared_events = prepare_tests_results_for_clickhouse( + pr_info, + job_report.test_results, + job_report.status, + job_report.duration, + job_report.start_time, + check_url or "", + job_report.check_name or args.job_name, ) - print( - f"Post action done. Report file [{local_report}] has been uploaded to [{report_url}]" + ch_helper.insert_events_into( + db="default", table="checks", events=prepared_events ) else: - print(f"Post action done. Nothing to do for [{args.job_name}]") + # no job report + print(f"No job report for {[args.job_name]} - do nothing") + ### POST action: end + ### MARK SUCCESS action: start elif args.mark_success: assert indata, "Run config must be provided via --infile" job = args.job_name @@ -715,7 +1125,7 @@ def main() -> int: # FIXME: find generic design for propagating and handling job status (e.g. stop using statuses in GH api) # now job ca be build job w/o status data, any other job that exit with 0 with or w/o status data - if is_build_job(job): + if CI_CONFIG.is_build_job(job): # there is no status for build jobs # create dummy success to mark it as done job_status = CommitStatusData( @@ -742,7 +1152,7 @@ def main() -> int: success_flag_name = get_file_flag_name( job, indata["jobs_data"]["digests"][job], args.batch, num_batches ) - if not is_docs_job(job): + if not CI_CONFIG.is_docs_job(job): path = get_s3_path(indata["build"]) + success_flag_name else: path = get_s3_path_docs(indata["docs"]) + success_flag_name @@ -756,8 +1166,15 @@ def main() -> int: ) else: print(f"Job [{job}] is not ok, status [{job_status.status}]") + ### MARK SUCCESS action: end - # print results + ### UPDATE GH STATUSES action: start + elif args.update_gh_statuses: + assert indata, "Run config must be provided via --infile" + _update_gh_statuses(indata=indata, s3=s3) + ### UPDATE GH STATUSES action: end + + ### print results if args.outfile: with open(args.outfile, "w") as f: if isinstance(result, str): @@ -773,10 +1190,8 @@ def main() -> int: print(json.dumps(result, indent=2 if args.pretty else None)) else: raise AssertionError(f"Unexpected type for 'res': {type(result)}") - return exit_code if __name__ == "__main__": - os.chdir(ROOT_DIR) sys.exit(main()) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index e3319fe4a72..e011b8ad3c9 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -3,15 +3,139 @@ import logging from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser from dataclasses import dataclass, field -from enum import Enum from pathlib import Path from typing import Callable, Dict, Iterable, List, Literal, Optional, Union from integration_test_images import IMAGES +from ci_utils import WithIter -class Labels(Enum): - DO_NOT_TEST_LABEL = "do not test" +class Labels(metaclass=WithIter): + DO_NOT_TEST_LABEL = "do_not_test" + NO_MERGE_COMMIT = "no_merge_commit" + NO_CI_CACHE = "no_ci_cache" + CI_SET_REDUCED = "ci_set_reduced" + + +class Build(metaclass=WithIter): + PACKAGE_RELEASE = "package_release" + PACKAGE_AARCH64 = "package_aarch64" + PACKAGE_ASAN = "package_asan" + PACKAGE_UBSAN = "package_ubsan" + PACKAGE_TSAN = "package_tsan" + PACKAGE_MSAN = "package_msan" + PACKAGE_DEBUG = "package_debug" + BINARY_RELEASE = "binary_release" + BINARY_TIDY = "binary_tidy" + BINARY_DARWIN = "binary_darwin" + BINARY_AARCH64 = "binary_aarch64" + BINARY_AARCH64_V80COMPAT = "binary_aarch64_v80compat" + BINARY_FREEBSD = "binary_freebsd" + BINARY_DARWIN_AARCH64 = "binary_darwin_aarch64" + BINARY_PPC64LE = "binary_ppc64le" + BINARY_AMD64_COMPAT = "binary_amd64_compat" + BINARY_AMD64_MUSL = "binary_amd64_musl" + BINARY_RISCV64 = "binary_riscv64" + BINARY_S390X = "binary_s390x" + FUZZERS = "fuzzers" + + +class JobNames(metaclass=WithIter): + STYLE_CHECK = "Style check" + FAST_TEST = "Fast tests" + DOCKER_SERVER = "Docker server and keeper images" + INSTALL_TEST_AMD = "Install packages (amd64)" + INSTALL_TEST_ARM = "Install packages (arm64)" + + STATELESS_TEST_DEBUG = "Stateless tests (debug)" + STATELESS_TEST_RELEASE = "Stateless tests (release)" + STATELESS_TEST_AARCH64 = "Stateless tests (aarch64)" + STATELESS_TEST_ASAN = "Stateless tests (asan)" + STATELESS_TEST_TSAN = "Stateless tests (tsan)" + STATELESS_TEST_MSAN = "Stateless tests (msan)" + STATELESS_TEST_UBSAN = "Stateless tests (ubsan)" + STATELESS_TEST_ANALYZER_RELEASE = "Stateless tests (release, analyzer)" + STATELESS_TEST_DB_REPL_RELEASE = "Stateless tests (release, DatabaseReplicated)" + STATELESS_TEST_S3_RELEASE = "Stateless tests (release, s3 storage)" + STATELESS_TEST_S3_DEBUG = "Stateless tests (debug, s3 storage)" + STATELESS_TEST_S3_TSAN = "Stateless tests (tsan, s3 storage)" + STATELESS_TEST_FLAKY_ASAN = "Stateless tests flaky check (asan)" + + STATEFUL_TEST_DEBUG = "Stateful tests (debug)" + STATEFUL_TEST_RELEASE = "Stateful tests (release)" + STATEFUL_TEST_AARCH64 = "Stateful tests (aarch64)" + STATEFUL_TEST_ASAN = "Stateful tests (asan)" + STATEFUL_TEST_TSAN = "Stateful tests (tsan)" + STATEFUL_TEST_MSAN = "Stateful tests (msan)" + STATEFUL_TEST_UBSAN = "Stateful tests (ubsan)" + STATEFUL_TEST_PARALLEL_REPL_RELEASE = "Stateful tests (release, ParallelReplicas)" + STATEFUL_TEST_PARALLEL_REPL_DEBUG = "Stateful tests (debug, ParallelReplicas)" + STATEFUL_TEST_PARALLEL_REPL_ASAN = "Stateful tests (asan, ParallelReplicas)" + STATEFUL_TEST_PARALLEL_REPL_MSAN = "Stateful tests (msan, ParallelReplicas)" + STATEFUL_TEST_PARALLEL_REPL_UBSAN = "Stateful tests (ubsan, ParallelReplicas)" + STATEFUL_TEST_PARALLEL_REPL_TSAN = "Stateful tests (tsan, ParallelReplicas)" + + STRESS_TEST_ASAN = "Stress test (asan)" + STRESS_TEST_TSAN = "Stress test (tsan)" + STRESS_TEST_UBSAN = "Stress test (ubsan)" + STRESS_TEST_MSAN = "Stress test (msan)" + STRESS_TEST_DEBUG = "Stress test (debug)" + + INTEGRATION_TEST = "Integration tests (release)" + INTEGRATION_TEST_ASAN = "Integration tests (asan)" + INTEGRATION_TEST_ASAN_ANALYZER = "Integration tests (asan, analyzer)" + INTEGRATION_TEST_TSAN = "Integration tests (tsan)" + INTEGRATION_TEST_FLAKY = "Integration tests flaky check (asan)" + + UPGRADE_TEST_DEBUG = "Upgrade check (debug)" + UPGRADE_TEST_ASAN = "Upgrade check (asan)" + UPGRADE_TEST_TSAN = "Upgrade check (tsan)" + UPGRADE_TEST_MSAN = "Upgrade check (msan)" + + UNIT_TEST = "Unit tests (release)" + UNIT_TEST_ASAN = "Unit tests (asan)" + UNIT_TEST_MSAN = "Unit tests (msan)" + UNIT_TEST_TSAN = "Unit tests (tsan)" + UNIT_TEST_UBSAN = "Unit tests (ubsan)" + + AST_FUZZER_TEST_DEBUG = "AST fuzzer (debug)" + AST_FUZZER_TEST_ASAN = "AST fuzzer (asan)" + AST_FUZZER_TEST_MSAN = "AST fuzzer (msan)" + AST_FUZZER_TEST_TSAN = "AST fuzzer (tsan)" + AST_FUZZER_TEST_UBSAN = "AST fuzzer (ubsan)" + + JEPSEN_KEEPER = "ClickHouse Keeper Jepsen" + JEPSEN_SERVER = "ClickHouse Server Jepsen" + + PERFORMANCE_TEST_AMD64 = "Performance Comparison" + PERFORMANCE_TEST_ARM64 = "Performance Comparison Aarch64" + + SQL_LANCER_TEST = "SQLancer (release)" + SQL_LOGIC_TEST = "Sqllogic test (release)" + + SQLANCER = "SQLancer (release)" + SQLANCER_DEBUG = "SQLancer (debug)" + SQLTEST = "SQLTest" + + COMPATIBILITY_TEST = "Compatibility check (amd64)" + COMPATIBILITY_TEST_ARM = "Compatibility check (aarch64)" + + CLCIKBENCH_TEST = "ClickBench (amd64)" + CLCIKBENCH_TEST_ARM = "ClickBench (aarch64)" + + LIBFUZZER_TEST = "libFuzzer tests" + + BUILD_CHECK = "ClickHouse build check" + BUILD_CHECK_SPECIAL = "ClickHouse special build check" + + DOCS_CHECK = "Docs check" + BUGFIX_VALIDATE = "tests bugfix validate check" + + +# dynamically update JobName with Build jobs +for attr_name in dir(Build): + if not attr_name.startswith("__") and not callable(getattr(Build, attr_name)): + setattr(JobNames, attr_name, getattr(Build, attr_name)) @dataclass @@ -31,7 +155,7 @@ class DigestConfig: @dataclass class LabelConfig: """ - class to configure different CI scenarious per GH label + class to configure different CI scenarious per GH label or commit message token """ run_jobs: Iterable[str] = frozenset() @@ -82,16 +206,25 @@ class BuildConfig: "./contrib/libmetrohash", "./contrib/update-submodules.sh", "./contrib/CMakeLists.txt", + "./CMakeLists.txt", + "./PreLoad.cmake", "./cmake", "./base", "./programs", "./packages", "./docker/packager/packager", + "./rust", + # FIXME: This is a WA to rebuild the CH and recreate the Performance.tar.zst artifact + # when there are changes in performance test scripts. + # Due to the current design of the perf test we need to rebuild CH when the performance test changes, + # otherwise the changes will not be visible in the PerformanceTest job in CI + "./tests/performance", ], exclude_files=[".md"], docker=["clickhouse/binary-builder"], git_submodules=True, ), + run_command="build_check.py $BUILD_NAME", ) ) @@ -109,7 +242,16 @@ class BuildConfig: @dataclass class BuildReportConfig: builds: List[str] - job_config: JobConfig = field(default_factory=JobConfig) + job_config: JobConfig = field( + default_factory=lambda: JobConfig( + digest=DigestConfig( + include_paths=[ + "./tests/ci/build_report_check.py", + "./tests/ci/upload_result_helper.py", + ], + ), + ) + ) @dataclass @@ -133,19 +275,32 @@ install_check_digest = DigestConfig( include_paths=["./tests/ci/install_check.py"], docker=["clickhouse/install-deb-test", "clickhouse/install-rpm-test"], ) -statless_check_digest = DigestConfig( - include_paths=["./tests/queries/0_stateless/"], +stateless_check_digest = DigestConfig( + include_paths=[ + "./tests/queries/0_stateless/", + "./tests/clickhouse-test", + "./tests/*.txt", + ], exclude_files=[".md"], docker=["clickhouse/stateless-test"], ) stateful_check_digest = DigestConfig( - include_paths=["./tests/queries/1_stateful/"], + include_paths=[ + "./tests/queries/1_stateful/", + "./tests/clickhouse-test", + "./tests/*.txt", + ], exclude_files=[".md"], docker=["clickhouse/stateful-test"], ) -# FIXME: which tests are stresstest? stateless? + stress_check_digest = DigestConfig( - include_paths=["./tests/queries/0_stateless/"], + include_paths=[ + "./tests/queries/0_stateless/", + "./tests/queries/1_stateful/", + "./tests/clickhouse-test", + "./tests/*.txt", + ], exclude_files=[".md"], docker=["clickhouse/stress-test"], ) @@ -209,7 +364,7 @@ bugfix_validate_check = DigestConfig( ) # common test params statless_test_common_params = { - "digest": statless_check_digest, + "digest": stateless_check_digest, "run_command": 'functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT', "timeout": 10800, } @@ -275,7 +430,7 @@ class CiConfig: def get_label_config(self, label_name: str) -> Optional[LabelConfig]: for label, config in self.label_configs.items(): - if label_name == label: + if self.normalize_string(label_name) == self.normalize_string(label): return config return None @@ -295,20 +450,21 @@ class CiConfig: ), f"Invalid check_name or CI_CONFIG outdated, config not found for [{check_name}]" return res # type: ignore - def get_job_with_parents(self, check_name: str) -> List[str]: - def _normalize_string(input_string: str) -> str: - lowercase_string = input_string.lower() - normalized_string = ( - lowercase_string.replace(" ", "_") - .replace("-", "_") - .replace("(", "") - .replace(")", "") - .replace(",", "") - ) - return normalized_string + @staticmethod + def normalize_string(input_string: str) -> str: + lowercase_string = input_string.lower() + normalized_string = ( + lowercase_string.replace(" ", "_") + .replace("-", "_") + .replace("(", "") + .replace(")", "") + .replace(",", "") + ) + return normalized_string + def get_job_with_parents(self, check_name: str) -> List[str]: res = [] - check_name = _normalize_string(check_name) + check_name = self.normalize_string(check_name) for config in ( self.build_config, @@ -317,18 +473,18 @@ class CiConfig: self.other_jobs_configs, ): for job_name in config: # type: ignore - if check_name == _normalize_string(job_name): + if check_name == self.normalize_string(job_name): res.append(job_name) if isinstance(config[job_name], TestConfig): # type: ignore - assert config[ - job_name - ].required_build, f"Error: Experimantal feature... Not supported job [{job_name}]" # type: ignore - res.append(config[job_name].required_build) # type: ignore - res.append("Fast tests") - res.append("Style check") + if config[job_name].required_build: # type: ignore + res.append(config[job_name].required_build) # type: ignore elif isinstance(config[job_name], BuildConfig): # type: ignore - res.append("Fast tests") - res.append("Style check") + pass + elif isinstance(config[job_name], BuildReportConfig): # type: ignore + # add all build jobs as parents for build report check + res.extend( + [job for job in JobNames if job in self.build_config] + ) else: assert ( False @@ -370,6 +526,22 @@ class CiConfig: def get_builds_for_report(self, report_name: str) -> List[str]: return self.builds_report_config[report_name].builds + @classmethod + def is_build_job(cls, job: str) -> bool: + return job in Build + + @classmethod + def is_test_job(cls, job: str) -> bool: + return ( + not cls.is_build_job(job) + and not cls.is_build_job(job) + and job != JobNames.STYLE_CHECK + ) + + @classmethod + def is_docs_job(cls, job: str) -> bool: + return job == JobNames.DOCS_CHECK + def validate(self) -> None: errors = [] for name, build_config in self.build_config.items(): @@ -428,61 +600,78 @@ class CiConfig: CI_CONFIG = CiConfig( label_configs={ - Labels.DO_NOT_TEST_LABEL.value: LabelConfig(run_jobs=["Style check"]), + Labels.DO_NOT_TEST_LABEL: LabelConfig(run_jobs=[JobNames.STYLE_CHECK]), + Labels.CI_SET_REDUCED: LabelConfig( + run_jobs=[ + job + for job in JobNames + if not any( + [ + nogo in job + for nogo in ( + "asan", + "tsan", + "msan", + "ubsan", + ) + ] + ) + ] + ), }, build_config={ - "package_release": BuildConfig( - name="package_release", + Build.PACKAGE_RELEASE: BuildConfig( + name=Build.PACKAGE_RELEASE, compiler="clang-17", package_type="deb", static_binary_name="amd64", additional_pkgs=True, ), - "package_aarch64": BuildConfig( - name="package_aarch64", + Build.PACKAGE_AARCH64: BuildConfig( + name=Build.PACKAGE_AARCH64, compiler="clang-17-aarch64", package_type="deb", static_binary_name="aarch64", additional_pkgs=True, ), - "package_asan": BuildConfig( - name="package_asan", + Build.PACKAGE_ASAN: BuildConfig( + name=Build.PACKAGE_ASAN, compiler="clang-17", sanitizer="address", package_type="deb", ), - "package_ubsan": BuildConfig( - name="package_ubsan", + Build.PACKAGE_UBSAN: BuildConfig( + name=Build.PACKAGE_UBSAN, compiler="clang-17", sanitizer="undefined", package_type="deb", ), - "package_tsan": BuildConfig( - name="package_tsan", + Build.PACKAGE_TSAN: BuildConfig( + name=Build.PACKAGE_TSAN, compiler="clang-17", sanitizer="thread", package_type="deb", ), - "package_msan": BuildConfig( - name="package_msan", + Build.PACKAGE_MSAN: BuildConfig( + name=Build.PACKAGE_MSAN, compiler="clang-17", sanitizer="memory", package_type="deb", ), - "package_debug": BuildConfig( - name="package_debug", + Build.PACKAGE_DEBUG: BuildConfig( + name=Build.PACKAGE_DEBUG, compiler="clang-17", debug_build=True, package_type="deb", - sparse_checkout=True, + sparse_checkout=True, # Check that it works with at least one build, see also update-submodules.sh ), - "binary_release": BuildConfig( - name="binary_release", + Build.BINARY_RELEASE: BuildConfig( + name=Build.BINARY_RELEASE, compiler="clang-17", package_type="binary", ), - "binary_tidy": BuildConfig( - name="binary_tidy", + Build.BINARY_TIDY: BuildConfig( + name=Build.BINARY_TIDY, compiler="clang-17", debug_build=True, package_type="binary", @@ -490,107 +679,107 @@ CI_CONFIG = CiConfig( tidy=True, comment="clang-tidy is used for static analysis", ), - "binary_darwin": BuildConfig( - name="binary_darwin", + Build.BINARY_DARWIN: BuildConfig( + name=Build.BINARY_DARWIN, compiler="clang-17-darwin", package_type="binary", static_binary_name="macos", - sparse_checkout=True, + sparse_checkout=True, # Check that it works with at least one build, see also update-submodules.sh ), - "binary_aarch64": BuildConfig( - name="binary_aarch64", + Build.BINARY_AARCH64: BuildConfig( + name=Build.BINARY_AARCH64, compiler="clang-17-aarch64", package_type="binary", ), - "binary_aarch64_v80compat": BuildConfig( - name="binary_aarch64_v80compat", + Build.BINARY_AARCH64_V80COMPAT: BuildConfig( + name=Build.BINARY_AARCH64_V80COMPAT, compiler="clang-17-aarch64-v80compat", package_type="binary", static_binary_name="aarch64v80compat", comment="For ARMv8.1 and older", ), - "binary_freebsd": BuildConfig( - name="binary_freebsd", + Build.BINARY_FREEBSD: BuildConfig( + name=Build.BINARY_FREEBSD, compiler="clang-17-freebsd", package_type="binary", static_binary_name="freebsd", ), - "binary_darwin_aarch64": BuildConfig( - name="binary_darwin_aarch64", + Build.BINARY_DARWIN_AARCH64: BuildConfig( + name=Build.BINARY_DARWIN_AARCH64, compiler="clang-17-darwin-aarch64", package_type="binary", static_binary_name="macos-aarch64", ), - "binary_ppc64le": BuildConfig( - name="binary_ppc64le", + Build.BINARY_PPC64LE: BuildConfig( + name=Build.BINARY_PPC64LE, compiler="clang-17-ppc64le", package_type="binary", static_binary_name="powerpc64le", ), - "binary_amd64_compat": BuildConfig( - name="binary_amd64_compat", + Build.BINARY_AMD64_COMPAT: BuildConfig( + name=Build.BINARY_AMD64_COMPAT, compiler="clang-17-amd64-compat", package_type="binary", static_binary_name="amd64compat", comment="SSE2-only build", ), - "binary_amd64_musl": BuildConfig( - name="binary_amd64_musl", + Build.BINARY_AMD64_MUSL: BuildConfig( + name=Build.BINARY_AMD64_MUSL, compiler="clang-17-amd64-musl", package_type="binary", static_binary_name="amd64musl", comment="Build with Musl", ), - "binary_riscv64": BuildConfig( - name="binary_riscv64", + Build.BINARY_RISCV64: BuildConfig( + name=Build.BINARY_RISCV64, compiler="clang-17-riscv64", package_type="binary", static_binary_name="riscv64", ), - "binary_s390x": BuildConfig( - name="binary_s390x", + Build.BINARY_S390X: BuildConfig( + name=Build.BINARY_S390X, compiler="clang-17-s390x", package_type="binary", static_binary_name="s390x", ), - "fuzzers": BuildConfig( - name="fuzzers", + Build.FUZZERS: BuildConfig( + name=Build.FUZZERS, compiler="clang-17", package_type="fuzzers", ), }, builds_report_config={ - "ClickHouse build check": BuildReportConfig( + JobNames.BUILD_CHECK: BuildReportConfig( builds=[ - "package_release", - "package_aarch64", - "package_asan", - "package_ubsan", - "package_tsan", - "package_msan", - "package_debug", - "binary_release", - "fuzzers", + Build.PACKAGE_RELEASE, + Build.PACKAGE_AARCH64, + Build.PACKAGE_ASAN, + Build.PACKAGE_UBSAN, + Build.PACKAGE_TSAN, + Build.PACKAGE_MSAN, + Build.PACKAGE_DEBUG, + Build.BINARY_RELEASE, + Build.FUZZERS, ] ), - "ClickHouse special build check": BuildReportConfig( + JobNames.BUILD_CHECK_SPECIAL: BuildReportConfig( builds=[ - "binary_tidy", - "binary_darwin", - "binary_aarch64", - "binary_aarch64_v80compat", - "binary_freebsd", - "binary_darwin_aarch64", - "binary_ppc64le", - "binary_riscv64", - "binary_s390x", - "binary_amd64_compat", - "binary_amd64_musl", + Build.BINARY_TIDY, + Build.BINARY_DARWIN, + Build.BINARY_AARCH64, + Build.BINARY_AARCH64_V80COMPAT, + Build.BINARY_FREEBSD, + Build.BINARY_DARWIN_AARCH64, + Build.BINARY_PPC64LE, + Build.BINARY_RISCV64, + Build.BINARY_S390X, + Build.BINARY_AMD64_COMPAT, + Build.BINARY_AMD64_MUSL, ] ), }, other_jobs_configs={ - "Docker server and keeper images": TestConfig( + JobNames.DOCKER_SERVER: TestConfig( "", job_config=JobConfig( digest=DigestConfig( @@ -602,7 +791,7 @@ CI_CONFIG = CiConfig( ) ), ), - "Docs check": TestConfig( + JobNames.DOCS_CHECK: TestConfig( "", job_config=JobConfig( digest=DigestConfig( @@ -611,7 +800,7 @@ CI_CONFIG = CiConfig( ), ), ), - "Fast tests": TestConfig( + JobNames.FAST_TEST: TestConfig( "", job_config=JobConfig( digest=DigestConfig( @@ -621,238 +810,249 @@ CI_CONFIG = CiConfig( ) ), ), - "Style check": TestConfig( + JobNames.STYLE_CHECK: TestConfig( "", job_config=JobConfig( run_always=True, ), ), - "tests bugfix validate check": TestConfig( + JobNames.BUGFIX_VALIDATE: TestConfig( "", # we run this check by label - no digest required job_config=JobConfig(run_by_label="pr-bugfix"), ), }, test_configs={ - "Install packages (amd64)": TestConfig( - "package_release", job_config=JobConfig(digest=install_check_digest) + JobNames.INSTALL_TEST_AMD: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(digest=install_check_digest) ), - "Install packages (arm64)": TestConfig( - "package_aarch64", job_config=JobConfig(digest=install_check_digest) + JobNames.INSTALL_TEST_ARM: TestConfig( + Build.PACKAGE_AARCH64, job_config=JobConfig(digest=install_check_digest) ), - "Stateful tests (asan)": TestConfig( - "package_asan", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_ASAN: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (tsan)": TestConfig( - "package_tsan", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_TSAN: TestConfig( + Build.PACKAGE_TSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (msan)": TestConfig( - "package_msan", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_MSAN: TestConfig( + Build.PACKAGE_MSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (ubsan)": TestConfig( - "package_ubsan", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_UBSAN: TestConfig( + Build.PACKAGE_UBSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (debug)": TestConfig( - "package_debug", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_DEBUG: TestConfig( + Build.PACKAGE_DEBUG, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (release)": TestConfig( - "package_release", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_RELEASE: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (aarch64)": TestConfig( - "package_aarch64", job_config=JobConfig(**stateful_test_common_params) # type: ignore - ), - "Stateful tests (release, DatabaseOrdinary)": TestConfig( - "package_release", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_AARCH64: TestConfig( + Build.PACKAGE_AARCH64, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), + # FIXME: delete? + # "Stateful tests (release, DatabaseOrdinary)": TestConfig( + # Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params) # type: ignore + # ), # "Stateful tests (release, DatabaseReplicated)": TestConfig( - # "package_release", job_config=JobConfig(**stateful_test_common_params) # type: ignore + # Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params) # type: ignore # ), # Stateful tests for parallel replicas - "Stateful tests (release, ParallelReplicas)": TestConfig( - "package_release", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_PARALLEL_REPL_RELEASE: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (debug, ParallelReplicas)": TestConfig( - "package_debug", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_PARALLEL_REPL_DEBUG: TestConfig( + Build.PACKAGE_DEBUG, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (asan, ParallelReplicas)": TestConfig( - "package_asan", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_PARALLEL_REPL_ASAN: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (msan, ParallelReplicas)": TestConfig( - "package_msan", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_PARALLEL_REPL_MSAN: TestConfig( + Build.PACKAGE_MSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (ubsan, ParallelReplicas)": TestConfig( - "package_ubsan", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_PARALLEL_REPL_UBSAN: TestConfig( + Build.PACKAGE_UBSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), - "Stateful tests (tsan, ParallelReplicas)": TestConfig( - "package_tsan", job_config=JobConfig(**stateful_test_common_params) # type: ignore + JobNames.STATEFUL_TEST_PARALLEL_REPL_TSAN: TestConfig( + Build.PACKAGE_TSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), # End stateful tests for parallel replicas - "Stateless tests (asan)": TestConfig( - "package_asan", + JobNames.STATELESS_TEST_ASAN: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(num_batches=4, **statless_test_common_params), # type: ignore ), - "Stateless tests (tsan)": TestConfig( - "package_tsan", + JobNames.STATELESS_TEST_TSAN: TestConfig( + Build.PACKAGE_TSAN, job_config=JobConfig(num_batches=5, **statless_test_common_params), # type: ignore ), - "Stateless tests (msan)": TestConfig( - "package_msan", + JobNames.STATELESS_TEST_MSAN: TestConfig( + Build.PACKAGE_MSAN, job_config=JobConfig(num_batches=6, **statless_test_common_params), # type: ignore ), - "Stateless tests (ubsan)": TestConfig( - "package_ubsan", + JobNames.STATELESS_TEST_UBSAN: TestConfig( + Build.PACKAGE_UBSAN, job_config=JobConfig(num_batches=2, **statless_test_common_params), # type: ignore ), - "Stateless tests (debug)": TestConfig( - "package_debug", + JobNames.STATELESS_TEST_DEBUG: TestConfig( + Build.PACKAGE_DEBUG, job_config=JobConfig(num_batches=5, **statless_test_common_params), # type: ignore ), - "Stateless tests (release)": TestConfig( - "package_release", job_config=JobConfig(**statless_test_common_params) # type: ignore + JobNames.STATELESS_TEST_RELEASE: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(**statless_test_common_params) # type: ignore ), - "Stateless tests (aarch64)": TestConfig( - "package_aarch64", job_config=JobConfig(**statless_test_common_params) # type: ignore + JobNames.STATELESS_TEST_AARCH64: TestConfig( + Build.PACKAGE_AARCH64, job_config=JobConfig(**statless_test_common_params) # type: ignore ), - "Stateless tests (release, analyzer)": TestConfig( - "package_release", job_config=JobConfig(**statless_test_common_params) # type: ignore + JobNames.STATELESS_TEST_ANALYZER_RELEASE: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(**statless_test_common_params) # type: ignore ), - "Stateless tests (release, DatabaseOrdinary)": TestConfig( - "package_release", job_config=JobConfig(**statless_test_common_params) # type: ignore - ), - "Stateless tests (release, DatabaseReplicated)": TestConfig( - "package_release", + # delete? + # "Stateless tests (release, DatabaseOrdinary)": TestConfig( + # Build.PACKAGE_RELEASE, job_config=JobConfig(**statless_test_common_params) # type: ignore + # ), + JobNames.STATELESS_TEST_DB_REPL_RELEASE: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(num_batches=4, **statless_test_common_params), # type: ignore ), - "Stateless tests (release, s3 storage)": TestConfig( - "package_release", + JobNames.STATELESS_TEST_S3_RELEASE: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(num_batches=2, **statless_test_common_params), # type: ignore ), - "Stateless tests (debug, s3 storage)": TestConfig( - "package_debug", + JobNames.STATELESS_TEST_S3_DEBUG: TestConfig( + Build.PACKAGE_DEBUG, job_config=JobConfig(num_batches=6, **statless_test_common_params), # type: ignore ), - "Stateless tests (tsan, s3 storage)": TestConfig( - "package_tsan", + JobNames.STATELESS_TEST_S3_DEBUG: TestConfig( + Build.PACKAGE_TSAN, job_config=JobConfig(num_batches=5, **statless_test_common_params), # type: ignore ), - "Stress test (asan)": TestConfig( - "package_asan", job_config=JobConfig(**stress_test_common_params) # type: ignore + JobNames.STRESS_TEST_ASAN: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(**stress_test_common_params) # type: ignore ), - "Stress test (tsan)": TestConfig( - "package_tsan", job_config=JobConfig(**stress_test_common_params) # type: ignore + JobNames.STRESS_TEST_TSAN: TestConfig( + Build.PACKAGE_TSAN, job_config=JobConfig(**stress_test_common_params) # type: ignore ), - "Stress test (ubsan)": TestConfig( - "package_ubsan", job_config=JobConfig(**stress_test_common_params) # type: ignore + JobNames.STRESS_TEST_UBSAN: TestConfig( + Build.PACKAGE_UBSAN, job_config=JobConfig(**stress_test_common_params) # type: ignore ), - "Stress test (msan)": TestConfig( - "package_msan", job_config=JobConfig(**stress_test_common_params) # type: ignore + JobNames.STRESS_TEST_MSAN: TestConfig( + Build.PACKAGE_MSAN, job_config=JobConfig(**stress_test_common_params) # type: ignore ), - "Stress test (debug)": TestConfig( - "package_debug", job_config=JobConfig(**stress_test_common_params) # type: ignore + JobNames.STRESS_TEST_DEBUG: TestConfig( + Build.PACKAGE_DEBUG, job_config=JobConfig(**stress_test_common_params) # type: ignore ), - "Upgrade check (asan)": TestConfig( - "package_asan", job_config=JobConfig(**upgrade_test_common_params) # type: ignore + JobNames.UPGRADE_TEST_ASAN: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(**upgrade_test_common_params) # type: ignore ), - "Upgrade check (tsan)": TestConfig( - "package_tsan", job_config=JobConfig(**upgrade_test_common_params) # type: ignore + JobNames.UPGRADE_TEST_TSAN: TestConfig( + Build.PACKAGE_TSAN, job_config=JobConfig(**upgrade_test_common_params) # type: ignore ), - "Upgrade check (msan)": TestConfig( - "package_msan", job_config=JobConfig(**upgrade_test_common_params) # type: ignore + JobNames.UPGRADE_TEST_MSAN: TestConfig( + Build.PACKAGE_MSAN, job_config=JobConfig(**upgrade_test_common_params) # type: ignore ), - "Upgrade check (debug)": TestConfig( - "package_debug", job_config=JobConfig(**upgrade_test_common_params) # type: ignore + JobNames.UPGRADE_TEST_DEBUG: TestConfig( + Build.PACKAGE_DEBUG, job_config=JobConfig(**upgrade_test_common_params) # type: ignore ), - "Integration tests (asan)": TestConfig( - "package_asan", + JobNames.INTEGRATION_TEST_ASAN: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(num_batches=4, **integration_test_common_params), # type: ignore ), - "Integration tests (asan, analyzer)": TestConfig( - "package_asan", + JobNames.INTEGRATION_TEST_ASAN_ANALYZER: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(num_batches=6, **integration_test_common_params), # type: ignore ), - "Integration tests (tsan)": TestConfig( - "package_tsan", + JobNames.INTEGRATION_TEST_TSAN: TestConfig( + Build.PACKAGE_TSAN, job_config=JobConfig(num_batches=6, **integration_test_common_params), # type: ignore ), # FIXME: currently no wf has this job. Try to enable - # "Integration tests (msan)": TestConfig("package_msan", job_config=JobConfig(num_batches=6, **integration_test_common_params) # type: ignore + # "Integration tests (msan)": TestConfig(Build.PACKAGE_MSAN, job_config=JobConfig(num_batches=6, **integration_test_common_params) # type: ignore # ), - "Integration tests (release)": TestConfig( - "package_release", + JobNames.INTEGRATION_TEST: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(num_batches=4, **integration_test_common_params), # type: ignore ), - "Integration tests flaky check (asan)": TestConfig( - "package_asan", job_config=JobConfig(**integration_test_common_params) # type: ignore + JobNames.INTEGRATION_TEST_FLAKY: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(**integration_test_common_params) # type: ignore ), - "Compatibility check (amd64)": TestConfig( - "package_release", job_config=JobConfig(digest=compatibility_check_digest) + JobNames.COMPATIBILITY_TEST: TestConfig( + Build.PACKAGE_RELEASE, + job_config=JobConfig(digest=compatibility_check_digest), ), - "Compatibility check (aarch64)": TestConfig( - "package_aarch64", job_config=JobConfig(digest=compatibility_check_digest) + JobNames.COMPATIBILITY_TEST_ARM: TestConfig( + Build.PACKAGE_AARCH64, + job_config=JobConfig(digest=compatibility_check_digest), ), - "Unit tests (release)": TestConfig( - "binary_release", job_config=JobConfig(**unit_test_common_params) # type: ignore + JobNames.UNIT_TEST: TestConfig( + Build.BINARY_RELEASE, job_config=JobConfig(**unit_test_common_params) # type: ignore ), - "Unit tests (asan)": TestConfig( - "package_asan", job_config=JobConfig(**unit_test_common_params) # type: ignore + JobNames.UNIT_TEST_ASAN: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(**unit_test_common_params) # type: ignore ), - "Unit tests (msan)": TestConfig( - "package_msan", job_config=JobConfig(**unit_test_common_params) # type: ignore + JobNames.UNIT_TEST_MSAN: TestConfig( + Build.PACKAGE_MSAN, job_config=JobConfig(**unit_test_common_params) # type: ignore ), - "Unit tests (tsan)": TestConfig( - "package_tsan", job_config=JobConfig(**unit_test_common_params) # type: ignore + JobNames.UNIT_TEST_TSAN: TestConfig( + Build.PACKAGE_TSAN, job_config=JobConfig(**unit_test_common_params) # type: ignore ), - "Unit tests (ubsan)": TestConfig( - "package_ubsan", job_config=JobConfig(**unit_test_common_params) # type: ignore + JobNames.UNIT_TEST_UBSAN: TestConfig( + Build.PACKAGE_UBSAN, job_config=JobConfig(**unit_test_common_params) # type: ignore ), - "AST fuzzer (debug)": TestConfig( - "package_debug", job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore + JobNames.AST_FUZZER_TEST_DEBUG: TestConfig( + Build.PACKAGE_DEBUG, job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore ), - "AST fuzzer (asan)": TestConfig( - "package_asan", job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore + JobNames.AST_FUZZER_TEST_ASAN: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore ), - "AST fuzzer (msan)": TestConfig( - "package_msan", job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore + JobNames.AST_FUZZER_TEST_MSAN: TestConfig( + Build.PACKAGE_MSAN, job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore ), - "AST fuzzer (tsan)": TestConfig( - "package_tsan", job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore + JobNames.AST_FUZZER_TEST_TSAN: TestConfig( + Build.PACKAGE_TSAN, job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore ), - "AST fuzzer (ubsan)": TestConfig( - "package_ubsan", job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore + JobNames.AST_FUZZER_TEST_UBSAN: TestConfig( + Build.PACKAGE_UBSAN, job_config=JobConfig(**astfuzzer_test_common_params) # type: ignore ), - "Stateless tests flaky check (asan)": TestConfig( + JobNames.STATELESS_TEST_FLAKY_ASAN: TestConfig( # replace to non-default - "package_asan", + Build.PACKAGE_ASAN, job_config=JobConfig(**{**statless_test_common_params, "timeout": 3600}), # type: ignore ), - # FIXME: add digest and params - "ClickHouse Keeper Jepsen": TestConfig("binary_release"), - # FIXME: add digest and params - "ClickHouse Server Jepsen": TestConfig("binary_release"), - "Performance Comparison": TestConfig( - "package_release", + JobNames.JEPSEN_KEEPER: TestConfig( + Build.BINARY_RELEASE, + job_config=JobConfig( + run_by_label="jepsen-test", run_command="jepsen_check.py keeper" + ), + ), + JobNames.JEPSEN_SERVER: TestConfig( + Build.BINARY_RELEASE, + job_config=JobConfig( + run_by_label="jepsen-test", run_command="jepsen_check.py server" + ), + ), + JobNames.PERFORMANCE_TEST_AMD64: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(num_batches=4, **perf_test_common_params), # type: ignore ), - "Performance Comparison Aarch64": TestConfig( - "package_aarch64", + JobNames.PERFORMANCE_TEST_ARM64: TestConfig( + Build.PACKAGE_AARCH64, job_config=JobConfig(num_batches=4, run_by_label="pr-performance", **perf_test_common_params), # type: ignore ), - "SQLancer (release)": TestConfig( - "package_release", job_config=JobConfig(**sqllancer_test_common_params) # type: ignore + JobNames.SQLANCER: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(**sqllancer_test_common_params) # type: ignore ), - "SQLancer (debug)": TestConfig( - "package_debug", job_config=JobConfig(**sqllancer_test_common_params) # type: ignore + JobNames.SQLANCER_DEBUG: TestConfig( + Build.PACKAGE_DEBUG, job_config=JobConfig(**sqllancer_test_common_params) # type: ignore ), - "Sqllogic test (release)": TestConfig( - "package_release", job_config=JobConfig(**sqllogic_test_params) # type: ignore + JobNames.SQL_LOGIC_TEST: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(**sqllogic_test_params) # type: ignore ), - "SQLTest": TestConfig( - "package_release", job_config=JobConfig(**sql_test_params) # type: ignore + JobNames.SQL_LOGIC_TEST: TestConfig( + Build.PACKAGE_RELEASE, job_config=JobConfig(**sql_test_params) # type: ignore ), - "ClickBench (amd64)": TestConfig("package_release"), - "ClickBench (aarch64)": TestConfig("package_aarch64"), - # FIXME: add digest and params - "libFuzzer tests": TestConfig("fuzzers"), # type: ignore + JobNames.CLCIKBENCH_TEST: TestConfig(Build.PACKAGE_RELEASE), + JobNames.CLCIKBENCH_TEST_ARM: TestConfig(Build.PACKAGE_AARCH64), + JobNames.LIBFUZZER_TEST: TestConfig(Build.FUZZERS), # type: ignore }, ) CI_CONFIG.validate() @@ -861,18 +1061,18 @@ CI_CONFIG.validate() # checks required by Mergeable Check REQUIRED_CHECKS = [ "PR Check", - "ClickHouse build check", - "ClickHouse special build check", - "Docs Check", - "Fast test", - "Stateful tests (release)", - "Stateless tests (release)", - "Style Check", - "Unit tests (asan)", - "Unit tests (msan)", - "Unit tests (release)", - "Unit tests (tsan)", - "Unit tests (ubsan)", + JobNames.BUILD_CHECK, + JobNames.BUILD_CHECK_SPECIAL, + JobNames.DOCS_CHECK, + JobNames.FAST_TEST, + JobNames.STATEFUL_TEST_RELEASE, + JobNames.STATELESS_TEST_RELEASE, + JobNames.STYLE_CHECK, + JobNames.UNIT_TEST_ASAN, + JobNames.UNIT_TEST_MSAN, + JobNames.UNIT_TEST, + JobNames.UNIT_TEST_TSAN, + JobNames.UNIT_TEST_UBSAN, ] diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py new file mode 100644 index 00000000000..3c267cff79d --- /dev/null +++ b/tests/ci/ci_utils.py @@ -0,0 +1,19 @@ +from contextlib import contextmanager +import os +from typing import Union, Iterator +from pathlib import Path + + +class WithIter(type): + def __iter__(cls): + return (v for k, v in cls.__dict__.items() if not k.startswith("_")) + + +@contextmanager +def cd(path: Union[Path, str]) -> Iterator[None]: + oldpwd = os.getcwd() + os.chdir(path) + try: + yield + finally: + os.chdir(oldpwd) diff --git a/tests/ci/clickbench.py b/tests/ci/clickbench.py index f9fadae4e03..72827929ff9 100644 --- a/tests/ci/clickbench.py +++ b/tests/ci/clickbench.py @@ -6,34 +6,22 @@ import logging import os import subprocess import sys -import atexit from pathlib import Path from typing import List, Tuple -from github import Github - from build_download_helper import download_all_deb_packages from clickhouse_helper import ( CiLogsCredentials, - ClickHouseHelper, - prepare_tests_results_for_clickhouse, ) from commit_status_helper import ( - RerunHelper, - get_commit, override_status, - post_commit_status, - update_mergeable_check, ) from docker_images_helper import get_docker_image, pull_image, DockerImage from env_helper import TEMP_PATH, REPORT_PATH -from get_robot_token import get_best_robot_token from pr_info import FORCE_TESTS_LABEL, PRInfo -from s3_helper import S3Helper from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results -from report import TestResults +from report import JobReport, TestResults def get_image_name() -> str: @@ -128,18 +116,8 @@ def main(): args = parse_args() check_name = args.check_name - gh = Github(get_best_robot_token(), per_page=100) - pr_info = PRInfo() - commit = get_commit(gh, pr_info.sha) - atexit.register(update_mergeable_check, commit, pr_info, check_name) - - rerun_helper = RerunHelper(commit, check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - image_name = get_image_name() docker_image = pull_image(get_docker_image(image_name)) @@ -186,39 +164,20 @@ def main(): logging.warning("Failed to change files owner in %s, ignoring it", temp_path) ci_logs_credentials.clean_ci_logs_from_credentials(run_log_path) - s3_helper = S3Helper() state, description, test_results, additional_logs = process_results( result_path, server_log_path ) state = override_status(state, check_name) - ch_helper = ClickHouseHelper() - - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_results, - [run_log_path] + additional_logs, - check_name, - ) - - print(f"::notice:: {check_name} Report url: {report_url}") - post_commit_status( - commit, state, report_url, description, check_name, pr_info, dump_to_file=True - ) - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - state, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - check_name, - ) - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + JobReport( + description=description, + test_results=test_results, + status=state, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=[run_log_path] + additional_logs, + ).dump() if state != "success": if FORCE_TESTS_LABEL in pr_info.labels: diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py index e3da81a54ad..a0c6294d8fd 100644 --- a/tests/ci/compatibility_check.py +++ b/tests/ci/compatibility_check.py @@ -8,22 +8,11 @@ import logging import subprocess import sys -from github import Github - from build_download_helper import download_builds_filter -from clickhouse_helper import ( - ClickHouseHelper, - prepare_tests_results_for_clickhouse, -) -from commit_status_helper import RerunHelper, get_commit, post_commit_status from docker_images_helper import DockerImage, get_docker_image, pull_image from env_helper import TEMP_PATH, REPORT_PATH -from get_robot_token import get_best_robot_token -from pr_info import PRInfo -from report import TestResults, TestResult -from s3_helper import S3Helper +from report import JobReport, TestResults, TestResult from stopwatch import Stopwatch -from upload_result_helper import upload_results IMAGE_UBUNTU = "clickhouse/test-old-ubuntu" IMAGE_CENTOS = "clickhouse/test-old-centos" @@ -149,16 +138,6 @@ def main(): temp_path.mkdir(parents=True, exist_ok=True) reports_path.mkdir(parents=True, exist_ok=True) - pr_info = PRInfo() - - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - - rerun_helper = RerunHelper(commit, args.check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - packages_path = temp_path / "packages" packages_path.mkdir(parents=True, exist_ok=True) @@ -219,7 +198,6 @@ def main(): else: raise Exception("Can't determine max glibc version") - s3_helper = S3Helper() state, description, test_results, additional_logs = process_result( result_path, server_log_path, @@ -228,38 +206,14 @@ def main(): max_glibc_version, ) - ch_helper = ClickHouseHelper() - - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_results, - additional_logs, - args.check_name, - ) - print(f"::notice ::Report url: {report_url}") - post_commit_status( - commit, - state, - report_url, - description, - args.check_name, - pr_info, - dump_to_file=True, - ) - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - state, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - args.check_name, - ) - - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + JobReport( + description=description, + test_results=test_results, + status=state, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=additional_logs, + ).dump() if state == "failure": sys.exit(1) diff --git a/tests/ci/digest_helper.py b/tests/ci/digest_helper.py index c0cbae13a45..8d6ec127f6e 100644 --- a/tests/ci/digest_helper.py +++ b/tests/ci/digest_helper.py @@ -11,6 +11,8 @@ from sys import modules from docker_images_helper import get_images_info from ci_config import DigestConfig from git_helper import Runner +from env_helper import ROOT_DIR +from ci_utils import cd DOCKER_DIGEST_LEN = 12 JOB_DIGEST_LEN = 10 @@ -67,17 +69,18 @@ def digest_paths( The order is processed as given""" hash_object = hash_object or md5() paths_all: List[Path] = [] - for p in paths: - if isinstance(p, str) and "*" in p: - for path in Path(".").glob(p): - bisect.insort(paths_all, path.absolute()) # type: ignore[misc] - else: - bisect.insort(paths_all, Path(p).absolute()) # type: ignore[misc] - for path in paths_all: # type: ignore - if path.exists(): - digest_path(path, hash_object, exclude_files, exclude_dirs) - else: - raise AssertionError(f"Invalid path: {path}") + with cd(ROOT_DIR): + for p in paths: + if isinstance(p, str) and "*" in p: + for path in Path(".").glob(p): + bisect.insort(paths_all, path.absolute()) # type: ignore[misc] + else: + bisect.insort(paths_all, Path(p).absolute()) # type: ignore[misc] + for path in paths_all: # type: ignore + if path.exists(): + digest_path(path, hash_object, exclude_files, exclude_dirs) + else: + raise AssertionError(f"Invalid path: {path}") return hash_object @@ -86,15 +89,16 @@ def digest_script(path_str: str) -> HASH: path = Path(path_str) parent = path.parent md5_hash = md5() - try: - for script in modules.values(): - script_path = getattr(script, "__file__", "") - if parent.absolute().as_posix() in script_path: - logger.debug("Updating the hash with %s", script_path) - _digest_file(Path(script_path), md5_hash) - except RuntimeError: - logger.warning("The modules size has changed, retry calculating digest") - return digest_script(path_str) + with cd(ROOT_DIR): + try: + for script in modules.values(): + script_path = getattr(script, "__file__", "") + if parent.absolute().as_posix() in script_path: + logger.debug("Updating the hash with %s", script_path) + _digest_file(Path(script_path), md5_hash) + except RuntimeError: + logger.warning("The modules size has changed, retry calculating digest") + return digest_script(path_str) return md5_hash @@ -113,17 +117,18 @@ class DockerDigester: def get_image_digest(self, name: str) -> str: assert isinstance(name, str) - deps = [name] - digest = None - while deps: - dep_name = deps.pop(0) - digest = digest_path( - self.images_info[dep_name]["path"], - digest, - exclude_files=self.EXCLUDE_FILES, - ) - deps += self.images_info[dep_name]["deps"] - assert digest + with cd(ROOT_DIR): + deps = [name] + digest = None + while deps: + dep_name = deps.pop(0) + digest = digest_path( + self.images_info[dep_name]["path"], + digest, + exclude_files=self.EXCLUDE_FILES, + ) + deps += self.images_info[dep_name]["deps"] + assert digest return digest.hexdigest()[0:DOCKER_DIGEST_LEN] def get_all_digests(self) -> Dict: diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index ae6e81c4c9f..a25669d85d0 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -12,7 +12,7 @@ from github import Github from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from commit_status_helper import format_description, get_commit, post_commit_status -from env_helper import ROOT_DIR, RUNNER_TEMP, GITHUB_RUN_URL +from env_helper import RUNNER_TEMP, GITHUB_RUN_URL from get_robot_token import get_best_robot_token from pr_info import PRInfo from report import TestResults, TestResult @@ -170,8 +170,6 @@ def parse_args() -> argparse.Namespace: def main(): - # to be always aligned with docker paths from image.json - os.chdir(ROOT_DIR) logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py index 97e7ca3b5c0..f87246be24b 100644 --- a/tests/ci/docker_manifests_merge.py +++ b/tests/ci/docker_manifests_merge.py @@ -21,7 +21,6 @@ from pr_info import PRInfo from report import TestResult from s3_helper import S3Helper from stopwatch import Stopwatch -from env_helper import ROOT_DIR from upload_result_helper import upload_results from docker_images_helper import docker_login, get_images_oredered_list @@ -126,8 +125,6 @@ def create_manifest( def main(): - # to be aligned with docker paths from image.json - os.chdir(ROOT_DIR) logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index b75808890bd..b9e5c13ec42 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -10,11 +10,7 @@ from pathlib import Path from os import path as p, makedirs from typing import Dict, List -from github import Github - from build_check import get_release_or_pr -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import format_description, get_commit, post_commit_status from docker_images_helper import DockerImageData, docker_login from env_helper import ( GITHUB_RUN_URL, @@ -23,15 +19,12 @@ from env_helper import ( S3_BUILDS_BUCKET, S3_DOWNLOAD, ) -from get_robot_token import get_best_robot_token from git_helper import Git from pr_info import PRInfo -from report import TestResults, TestResult -from s3_helper import S3Helper +from report import JobReport, TestResults, TestResult from stopwatch import Stopwatch from tee_popen import TeePopen from build_download_helper import read_build_urls -from upload_result_helper import upload_results from version_helper import ( ClickHouseVersion, get_tagged_versions, @@ -346,7 +339,6 @@ def main(): image = DockerImageData(args.image_path, args.image_repo, False) args.release_type = auto_release_type(args.version, args.release_type) tags = gen_tags(args.version, args.release_type) - NAME = f"Docker image {image.repo} building check" pr_info = None repo_urls = dict() direct_urls: Dict[str, List[str]] = dict() @@ -384,7 +376,6 @@ def main(): if args.push: docker_login() - NAME = f"Docker image {image.repo} build and push" logging.info("Following tags will be created: %s", ", ".join(tags)) status = "success" @@ -398,38 +389,18 @@ def main(): ) if test_results[-1].status != "OK": status = "failure" - pr_info = pr_info or PRInfo() - s3_helper = S3Helper() - - url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) - - print(f"::notice ::Report url: {url}") - - if not args.reports: - return description = f"Processed tags: {', '.join(tags)}" + JobReport( + description=description, + test_results=test_results, + status=status, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=[], + ).dump() - description = format_description(description) - - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - post_commit_status( - commit, status, url, description, NAME, pr_info, dump_to_file=True - ) - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - status, - stopwatch.duration_seconds, - stopwatch.start_time_str, - url, - NAME, - ) - ch_helper = ClickHouseHelper() - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) if status != "success": sys.exit(1) diff --git a/tests/ci/docs_check.py b/tests/ci/docs_check.py index 1424ab8895d..a982cbc2a32 100644 --- a/tests/ci/docs_check.py +++ b/tests/ci/docs_check.py @@ -1,29 +1,16 @@ #!/usr/bin/env python3 import argparse -import atexit import logging import subprocess import sys from pathlib import Path -from github import Github - -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import ( - RerunHelper, - get_commit, - post_commit_status, - update_mergeable_check, -) from docker_images_helper import get_docker_image, pull_image from env_helper import TEMP_PATH, REPO_COPY -from get_robot_token import get_best_robot_token from pr_info import PRInfo -from report import TestResults, TestResult -from s3_helper import S3Helper +from report import JobReport, TestResults, TestResult from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results NAME = "Docs Check" @@ -60,26 +47,16 @@ def main(): pr_info = PRInfo(need_changed_files=True) - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - - rerun_helper = RerunHelper(commit, NAME) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - atexit.register(update_mergeable_check, commit, pr_info, NAME) - if not pr_info.has_changes_in_documentation() and not args.force: logging.info("No changes in documentation") - post_commit_status( - commit, - "success", - "", - "No changes in docs", - NAME, - pr_info, - dump_to_file=True, - ) + JobReport( + description="No changes in docs", + test_results=[], + status="success", + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=[], + ).dump() sys.exit(0) if pr_info.has_changes_in_documentation(): @@ -134,28 +111,15 @@ def main(): else: test_results.append(TestResult("Non zero exit code", "FAIL")) - s3_helper = S3Helper() - ch_helper = ClickHouseHelper() + JobReport( + description=description, + test_results=test_results, + status=status, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=additional_files, + ).dump() - report_url = upload_results( - s3_helper, pr_info.number, pr_info.sha, test_results, additional_files, NAME - ) - print("::notice ::Report url: {report_url}") - post_commit_status( - commit, status, report_url, description, NAME, pr_info, dump_to_file=True - ) - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - status, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - NAME, - ) - - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) if status == "failure": sys.exit(1) diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index 093537fdeb0..c8ddcf25057 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -5,34 +5,15 @@ import subprocess import os import csv import sys -import atexit from pathlib import Path from typing import Tuple -from github import Github - -from build_check import get_release_or_pr -from clickhouse_helper import ( - ClickHouseHelper, - prepare_tests_results_for_clickhouse, -) -from commit_status_helper import ( - RerunHelper, - get_commit, - post_commit_status, - update_mergeable_check, - format_description, -) from docker_images_helper import DockerImage, get_docker_image, pull_image from env_helper import S3_BUILDS_BUCKET, TEMP_PATH, REPO_COPY -from get_robot_token import get_best_robot_token from pr_info import FORCE_TESTS_LABEL, PRInfo -from report import TestResult, TestResults, read_test_results -from s3_helper import S3Helper +from report import JobReport, TestResult, TestResults, read_test_results from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results -from version_helper import get_version_from_repo NAME = "Fast test" @@ -121,23 +102,8 @@ def main(): pr_info = PRInfo() - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - - atexit.register(update_mergeable_check, commit, pr_info, NAME) - - rerun_helper = RerunHelper(commit, NAME) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - status = rerun_helper.get_finished_status() - if status is not None and status.state != "success": - sys.exit(1) - sys.exit(0) - docker_image = pull_image(get_docker_image("clickhouse/fasttest")) - s3_helper = S3Helper() - workspace = temp_path / "fasttest-workspace" workspace.mkdir(parents=True, exist_ok=True) @@ -204,47 +170,17 @@ def main(): if timeout_expired: test_results.append(TestResult.create_check_timeout_expired(args.timeout)) state = "failure" - description = format_description(test_results[-1].name) + description = test_results[-1].name - ch_helper = ClickHouseHelper() - s3_path_prefix = "/".join( - ( - get_release_or_pr(pr_info, get_version_from_repo())[0], - pr_info.sha, - "fast_tests", - ) - ) - build_urls = s3_helper.upload_build_directory_to_s3( - output_path / "binaries", - s3_path_prefix, - keep_dirs_in_s3_path=False, - upload_symlinks=False, - ) - - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_results, - additional_logs, - NAME, - build_urls, - ) - print(f"::notice ::Report url: {report_url}") - post_commit_status( - commit, state, report_url, description, NAME, pr_info, dump_to_file=True - ) - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - state, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - NAME, - ) - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + JobReport( + description=description, + test_results=test_results, + status=state, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=additional_logs, + build_dir_for_upload=str(output_path / "binaries"), + ).dump() # Refuse other checks to run if fast test failed if state != "success": diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 89fcb9ce350..b7e6c656b1f 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -20,7 +20,6 @@ from clickhouse_helper import ( prepare_tests_results_for_clickhouse, ) from commit_status_helper import ( - RerunHelper, get_commit, override_status, post_commit_status, @@ -247,13 +246,14 @@ def main(): flaky_check = "flaky" in check_name.lower() run_changed_tests = flaky_check or validate_bugfix_check - gh = Github(get_best_robot_token(), per_page=100) # For validate_bugfix_check we need up to date information about labels, so pr_event_from_api is used pr_info = PRInfo( need_changed_files=run_changed_tests, pr_event_from_api=validate_bugfix_check ) + # FIXME: move to job report and remove + gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) atexit.register(update_mergeable_check, commit, pr_info, check_name) @@ -279,11 +279,6 @@ def main(): run_by_hash_total = 0 check_name_with_group = check_name - rerun_helper = RerunHelper(commit, check_name_with_group) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - tests_to_run = [] if run_changed_tests: tests_to_run = get_tests_to_run(pr_info) diff --git a/tests/ci/git_helper.py b/tests/ci/git_helper.py index 598ffbafb51..f15f1273bb9 100644 --- a/tests/ci/git_helper.py +++ b/tests/ci/git_helper.py @@ -92,7 +92,7 @@ class Runner: return self._cwd = value - def __call__(self, *args, **kwargs): + def __call__(self, *args: Any, **kwargs: Any) -> str: return self.run(*args, **kwargs) diff --git a/tests/ci/install_check.py b/tests/ci/install_check.py index 5ef65f3f38b..4fc112c6d9f 100644 --- a/tests/ci/install_check.py +++ b/tests/ci/install_check.py @@ -2,7 +2,6 @@ import argparse -import atexit import logging import sys import subprocess @@ -10,30 +9,15 @@ from pathlib import Path from shutil import copy2 from typing import Dict -from github import Github from build_download_helper import download_builds_filter -from clickhouse_helper import ( - ClickHouseHelper, - prepare_tests_results_for_clickhouse, -) -from commit_status_helper import ( - RerunHelper, - format_description, - get_commit, - post_commit_status, - update_mergeable_check, -) + from compress_files import compress_fast from docker_images_helper import DockerImage, pull_image, get_docker_image -from env_helper import CI, REPORT_PATH, TEMP_PATH as TEMP -from get_robot_token import get_best_robot_token -from pr_info import PRInfo -from report import TestResults, TestResult, FAILURE, FAIL, OK, SUCCESS -from s3_helper import S3Helper +from env_helper import REPORT_PATH, TEMP_PATH as TEMP +from report import JobReport, TestResults, TestResult, FAILURE, FAIL, OK, SUCCESS from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results RPM_IMAGE = "clickhouse/install-rpm-test" @@ -274,20 +258,6 @@ def main(): TEMP_PATH.mkdir(parents=True, exist_ok=True) LOGS_PATH.mkdir(parents=True, exist_ok=True) - pr_info = PRInfo() - - if CI: - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - atexit.register(update_mergeable_check, commit, pr_info, args.check_name) - - rerun_helper = RerunHelper(commit, args.check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info( - "Check is already finished according to github status, exiting" - ) - sys.exit(0) - deb_image = pull_image(get_docker_image(DEB_IMAGE)) rpm_image = pull_image(get_docker_image(RPM_IMAGE)) @@ -331,54 +301,21 @@ def main(): test_results.extend(test_install_tgz(rpm_image)) state = SUCCESS - test_status = OK description = "Packages installed successfully" if FAIL in (result.status for result in test_results): state = FAILURE - test_status = FAIL description = "Failed to install packages: " + ", ".join( result.name for result in test_results ) - s3_helper = S3Helper() - - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_results, - [], - args.check_name, - ) - print(f"::notice ::Report url: {report_url}") - if not CI: - return - - ch_helper = ClickHouseHelper() - - description = format_description(description) - - post_commit_status( - commit, - state, - report_url, - description, - args.check_name, - pr_info, - dump_to_file=True, - ) - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - test_status, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - args.check_name, - ) - - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + JobReport( + description=description, + test_results=test_results, + status=state, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=[], + ).dump() if state == FAILURE: sys.exit(1) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index c65f162f770..18b3d2c2898 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -13,7 +13,6 @@ from typing import Dict, List, Tuple from build_download_helper import download_all_deb_packages from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from commit_status_helper import ( - RerunHelper, get_commit, override_status, post_commit_status, @@ -189,14 +188,10 @@ def main(): logging.info("Skipping '%s' (no pr-bugfix in '%s')", check_name, pr_info.labels) sys.exit(0) + # FIXME: switch to JobReport and remove: gh = GitHub(get_best_robot_token()) commit = get_commit(gh, pr_info.sha) - rerun_helper = RerunHelper(commit, check_name_with_group) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - images = [pull_image(get_docker_image(i)) for i in IMAGES] result_path = temp_path / "output_dir" result_path.mkdir(parents=True, exist_ok=True) diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py index 73ae231e7b7..93e33d62293 100644 --- a/tests/ci/jepsen_check.py +++ b/tests/ci/jepsen_check.py @@ -11,23 +11,22 @@ from typing import Any, List import boto3 # type: ignore import requests # type: ignore -from github import Github -from build_download_helper import get_build_name_for_check -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import RerunHelper, get_commit, post_commit_status +from build_download_helper import ( + download_build_with_progress, + get_build_name_for_check, + read_build_urls, +) from compress_files import compress_fast -from env_helper import REPO_COPY, TEMP_PATH, S3_BUILDS_BUCKET, S3_DOWNLOAD -from get_robot_token import get_best_robot_token, get_parameter_from_ssm +from env_helper import REPO_COPY, REPORT_PATH, S3_URL, TEMP_PATH, S3_BUILDS_BUCKET +from get_robot_token import get_parameter_from_ssm +from git_helper import git_runner from pr_info import PRInfo -from report import TestResults, TestResult -from s3_helper import S3Helper +from report import JobReport, TestResults, TestResult from ssh import SSHKey from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results -from version_helper import get_version_from_repo -from build_check import get_release_or_pr + JEPSEN_GROUP_NAME = "jepsen_group" @@ -184,16 +183,8 @@ def main(): logging.info("Not jepsen test label in labels list, skipping") sys.exit(0) - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - check_name = KEEPER_CHECK_NAME if args.program == "keeper" else SERVER_CHECK_NAME - rerun_helper = RerunHelper(commit, check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - if not os.path.exists(TEMP_PATH): os.makedirs(TEMP_PATH) @@ -212,27 +203,36 @@ def main(): # always use latest docker_image = KEEPER_IMAGE_NAME if args.program == "keeper" else SERVER_IMAGE_NAME - build_name = get_build_name_for_check(check_name) - - release_or_pr, _ = get_release_or_pr(pr_info, get_version_from_repo()) - - # This check run separately from other checks because it requires exclusive - # run (see .github/workflows/jepsen.yml) So we cannot add explicit - # dependency on a build job and using busy loop on it's results. For the - # same reason we are using latest docker image. - build_url = ( - f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{release_or_pr}/{pr_info.sha}/" - f"{build_name}/clickhouse" - ) - head = requests.head(build_url) - counter = 0 - while head.status_code != 200: - time.sleep(10) + if pr_info.is_scheduled() or pr_info.is_dispatched(): + # get latest clcikhouse by the static link for latest master buit - get its version and provide permanent url for this version to the jepsen + build_url = f"{S3_URL}/{S3_BUILDS_BUCKET}/master/amd64/clickhouse" + download_build_with_progress(build_url, Path(TEMP_PATH) / "clickhouse") + git_runner.run(f"chmod +x {TEMP_PATH}/clickhouse") + sha = git_runner.run( + f"{TEMP_PATH}/clickhouse local -q \"select value from system.build_options where name='GIT_HASH'\"" + ) + version_full = git_runner.run( + f'{TEMP_PATH}/clickhouse local -q "select version()"' + ) + version = ".".join(version_full.split(".")[0:2]) + assert len(sha) == 40, f"failed to fetch sha from the binary. result: {sha}" + assert ( + version + ), f"failed to fetch version from the binary. result: {version_full}" + build_url = ( + f"{S3_URL}/{S3_BUILDS_BUCKET}/{version}/{sha}/binary_release/clickhouse" + ) + print(f"Clickhouse version: [{version_full}], sha: [{sha}], url: [{build_url}]") head = requests.head(build_url) - counter += 1 - if counter >= 180: - logging.warning("Cannot fetch build in 30 minutes, exiting") - sys.exit(0) + assert head.status_code == 200, f"Clickhouse binary not found: {build_url}" + else: + build_name = get_build_name_for_check(check_name) + urls = read_build_urls(build_name, REPORT_PATH) + build_url = None + for url in urls: + if url.endswith("clickhouse"): + build_url = url + assert build_url, "No build url found in the report" extra_args = "" if args.program == "server": @@ -281,32 +281,16 @@ def main(): description = "No Jepsen output log" test_result = [TestResult("No Jepsen output log", "FAIL")] - s3_helper = S3Helper() - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_result, - [run_log_path] + additional_data, - check_name, - ) + JobReport( + description=description, + test_results=test_result, + status=status, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=[run_log_path] + additional_data, + check_name=check_name, + ).dump() - print(f"::notice ::Report url: {report_url}") - post_commit_status( - commit, status, report_url, description, check_name, pr_info, dump_to_file=True - ) - - ch_helper = ClickHouseHelper() - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_result, - status, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - check_name, - ) - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) clear_autoscaling_group() diff --git a/tests/ci/libfuzzer_test_check.py b/tests/ci/libfuzzer_test_check.py index 49699b7d2fd..5f41afe9fb6 100644 --- a/tests/ci/libfuzzer_test_check.py +++ b/tests/ci/libfuzzer_test_check.py @@ -4,28 +4,18 @@ import argparse import logging import os import sys -import atexit import zipfile from pathlib import Path from typing import List -from github import Github - from build_download_helper import download_fuzzers from clickhouse_helper import ( CiLogsCredentials, ) -from commit_status_helper import ( - RerunHelper, - get_commit, - update_mergeable_check, -) from docker_images_helper import DockerImage, pull_image, get_docker_image from env_helper import REPORT_PATH, TEMP_PATH, REPO_COPY -from get_robot_token import get_best_robot_token from pr_info import PRInfo -from report import TestResults from stopwatch import Stopwatch @@ -116,28 +106,16 @@ def main(): check_name = args.check_name kill_timeout = args.kill_timeout - gh = Github(get_best_robot_token(), per_page=100) pr_info = PRInfo() - commit = get_commit(gh, pr_info.sha) - atexit.register(update_mergeable_check, commit, pr_info, check_name) temp_path.mkdir(parents=True, exist_ok=True) if "RUN_BY_HASH_NUM" in os.environ: run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "0")) run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL", "0")) - check_name_with_group = ( - check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]" - ) else: run_by_hash_num = 0 run_by_hash_total = 0 - check_name_with_group = check_name - - rerun_helper = RerunHelper(commit, check_name_with_group) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) docker_image = pull_image(get_docker_image("clickhouse/libfuzzer")) diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index 75f40ae7feb..524da916a5e 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -8,11 +8,10 @@ import subprocess import traceback import re from pathlib import Path -from typing import Dict from github import Github -from commit_status_helper import RerunHelper, get_commit, post_commit_status +from commit_status_helper import get_commit from ci_config import CI_CONFIG from docker_images_helper import pull_image, get_docker_image from env_helper import ( @@ -26,11 +25,11 @@ from env_helper import ( ) from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo -from s3_helper import S3Helper from tee_popen import TeePopen from clickhouse_helper import get_instance_type, get_instance_id from stopwatch import Stopwatch from build_download_helper import download_builds_filter +from report import JobReport IMAGE_NAME = "clickhouse/performance-comparison" @@ -123,23 +122,7 @@ def main(): is_aarch64 = "aarch64" in os.getenv("CHECK_NAME", "Performance Comparison").lower() if pr_info.number != 0 and is_aarch64 and "pr-performance" not in pr_info.labels: - status = "success" - message = "Skipped, not labeled with 'pr-performance'" - report_url = GITHUB_RUN_URL - post_commit_status( - commit, - status, - report_url, - message, - check_name_with_group, - pr_info, - dump_to_file=True, - ) - sys.exit(0) - - rerun_helper = RerunHelper(commit, check_name_with_group) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") + print("Skipped, not labeled with 'pr-performance'") sys.exit(0) check_name_prefix = ( @@ -202,6 +185,13 @@ def main(): subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + def too_many_slow(msg): + match = re.search(r"(|.* )(\d+) slower.*", msg) + # This threshold should be synchronized with the value in + # https://github.com/ClickHouse/ClickHouse/blob/master/docker/test/performance-comparison/report.py#L629 + threshold = 5 + return int(match.group(2).strip()) > threshold if match else False + paths = { "compare.log": compare_log_path, "output.7z": result_path / "output.7z", @@ -212,32 +202,12 @@ def main(): "run.log": run_log_path, } - s3_prefix = f"{pr_info.number}/{pr_info.sha}/{check_name_prefix}/" - s3_helper = S3Helper() - uploaded = {} # type: Dict[str, str] - for name, path in paths.items(): - try: - uploaded[name] = s3_helper.upload_test_report_to_s3( - Path(path), s3_prefix + name - ) - except Exception: - uploaded[name] = "" - traceback.print_exc() - - # Upload all images and flamegraphs to S3 - try: - s3_helper.upload_test_directory_to_s3( - Path(result_path) / "images", s3_prefix + "images" - ) - except Exception: - traceback.print_exc() - - def too_many_slow(msg): - match = re.search(r"(|.* )(\d+) slower.*", msg) - # This threshold should be synchronized with the value in - # https://github.com/ClickHouse/ClickHouse/blob/master/docker/test/performance-comparison/report.py#L629 - threshold = 5 - return int(match.group(2).strip()) > threshold if match else False + # FIXME: where images come from? dir does not exist atm. + image_files = ( + list((Path(result_path) / "images").iterdir()) + if (Path(result_path) / "images").exists() + else [] + ) # Try to fetch status from the report. status = "" @@ -269,24 +239,15 @@ def main(): status = "failure" message = "No message in report." - report_url = GITHUB_RUN_URL - - report_url = ( - uploaded["report.html"] - or uploaded["output.7z"] - or uploaded["compare.log"] - or uploaded["run.log"] - ) - - post_commit_status( - commit, - status, - report_url, - message, - check_name_with_group, - pr_info, - dump_to_file=True, - ) + JobReport( + description=message, + test_results=[], + status=status, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=[v for _, v in paths.items()] + image_files, + check_name=check_name_with_group, + ).dump() if status == "error": sys.exit(1) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 15558c81c7e..d1be459666f 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -3,15 +3,20 @@ import json import logging import os from typing import Dict, List, Set, Union +from urllib.parse import quote +# isort: off +# for some reason this line moves to the end from unidiff import PatchSet # type: ignore +# isort: on + from build_download_helper import get_gh_api from env_helper import ( - GITHUB_REPOSITORY, - GITHUB_SERVER_URL, - GITHUB_RUN_URL, GITHUB_EVENT_PATH, + GITHUB_REPOSITORY, + GITHUB_RUN_URL, + GITHUB_SERVER_URL, ) FORCE_TESTS_LABEL = "force tests" @@ -38,6 +43,14 @@ DIFF_IN_DOCUMENTATION_EXT = [ RETRY_SLEEP = 0 +class EventType: + UNKNOWN = 0 + PUSH = 1 + PULL_REQUEST = 2 + SCHEDULE = 3 + DISPATCH = 4 + + def get_pr_for_commit(sha, ref): if not ref: return None @@ -61,11 +74,13 @@ def get_pr_for_commit(sha, ref): if pr["head"]["ref"] in ref: return pr our_prs.append(pr) - print("Cannot find PR with required ref", ref, "returning first one") + print( + f"Cannot find PR with required ref {ref}, sha {sha} - returning first one" + ) first_pr = our_prs[0] return first_pr except Exception as ex: - print("Cannot fetch PR info from commit", ex) + print(f"Cannot fetch PR info from commit {ref}, {sha}", ex) return None @@ -99,6 +114,7 @@ class PRInfo: # release_pr and merged_pr are used for docker images additional cache self.release_pr = 0 self.merged_pr = 0 + self.event_type = EventType.UNKNOWN ref = github_event.get("ref", "refs/heads/master") if ref and ref.startswith("refs/heads/"): ref = ref[11:] @@ -115,6 +131,7 @@ class PRInfo: github_event["pull_request"] = prs_for_sha[0] if "pull_request" in github_event: # pull request and other similar events + self.event_type = EventType.PULL_REQUEST self.number = github_event["pull_request"]["number"] # type: int if pr_event_from_api: try: @@ -175,6 +192,7 @@ class PRInfo: self.diff_urls.append(self.compare_pr_url(github_event["pull_request"])) elif "commits" in github_event: + self.event_type = EventType.PUSH # `head_commit` always comes with `commits` commit_message = github_event["head_commit"]["message"] # type: str if commit_message.startswith("Merge pull request #"): @@ -243,6 +261,11 @@ class PRInfo: ) ) else: + if "schedule" in github_event: + self.event_type = EventType.SCHEDULE + else: + # assume this is a dispatch + self.event_type = EventType.DISPATCH print("event.json does not match pull_request or push:") print(json.dumps(github_event, sort_keys=True, indent=4)) self.sha = os.getenv( @@ -263,14 +286,24 @@ class PRInfo: if need_changed_files: self.fetch_changed_files() + def is_master(self) -> bool: + return self.number == 0 and self.base_ref == "master" + + def is_scheduled(self): + return self.event_type == EventType.SCHEDULE + + def is_dispatched(self): + return self.event_type == EventType.DISPATCH + def compare_pr_url(self, pr_object: dict) -> str: return self.compare_url(pr_object["base"]["label"], pr_object["head"]["label"]) @staticmethod def compare_url(first: str, second: str) -> str: + """the first and second are URL encoded to not fail on '#' and other symbols""" return ( "https://api.github.com/repos/" - f"{GITHUB_REPOSITORY}/compare/{first}...{second}" + f"{GITHUB_REPOSITORY}/compare/{quote(first)}...{quote(second)}" ) def fetch_changed_files(self): diff --git a/tests/ci/release.py b/tests/ci/release.py index 0076c6ab985..f96845dad95 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -122,7 +122,7 @@ class Release: self.version = get_version_from_repo(git=self._git) def get_stable_release_type(self) -> str: - if self.version.minor % 5 == 3: # our 3 and 8 are LTS + if self.version.is_lts: return VersionType.LTS return VersionType.STABLE diff --git a/tests/ci/report.py b/tests/ci/report.py index bf400aebd4b..b478f737963 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -1,8 +1,18 @@ # -*- coding: utf-8 -*- from ast import literal_eval -from dataclasses import dataclass +from dataclasses import asdict, dataclass from pathlib import Path -from typing import Dict, Final, Iterable, List, Literal, Optional, Tuple +from typing import ( + Dict, + Final, + Iterable, + List, + Literal, + Optional, + Sequence, + Tuple, + Union, +) from html import escape import csv import datetime @@ -12,6 +22,7 @@ import os from build_download_helper import get_gh_api from ci_config import BuildConfig, CI_CONFIG +from env_helper import REPORT_PATH, TEMP_PATH logger = logging.getLogger(__name__) @@ -221,6 +232,7 @@ HTML_TEST_PART = """ """ BASE_HEADERS = ["Test name", "Test status"] +JOB_REPORT_FILE = Path(TEMP_PATH) / "job_report.json" @dataclass @@ -229,10 +241,10 @@ class TestResult: status: str # the following fields are optional time: Optional[float] = None - log_files: Optional[List[Path]] = None + log_files: Optional[Union[Sequence[str], Sequence[Path]]] = None raw_logs: Optional[str] = None # the field for uploaded logs URLs - log_urls: Optional[List[str]] = None + log_urls: Optional[Sequence[str]] = None def set_raw_logs(self, raw_logs: str) -> None: self.raw_logs = raw_logs @@ -245,9 +257,8 @@ class TestResult: f"Malformed input: must be a list literal: {log_files_literal}" ) for log_path in log_paths: - file = Path(log_path) - assert file.exists(), file - self.log_files.append(file) + assert Path(log_path).exists(), log_path + self.log_files.append(log_path) @staticmethod def create_check_timeout_expired(timeout: float) -> "TestResult": @@ -257,6 +268,53 @@ class TestResult: TestResults = List[TestResult] +@dataclass +class JobReport: + status: str + description: str + test_results: TestResults + start_time: str + duration: float + additional_files: Union[Sequence[str], Sequence[Path]] + # clcikhouse version, build job only + version: str = "" + # checkname to set in commit status, set if differs from jjob name + check_name: str = "" + # directory with artifacts to upload on s3 + build_dir_for_upload: Union[Path, str] = "" + # if False no GH commit status will be created by CI + need_commit_status: bool = True + + @classmethod + def exist(cls) -> bool: + return JOB_REPORT_FILE.is_file() + + @classmethod + def load(cls): # type: ignore + res = {} + with open(JOB_REPORT_FILE, "r") as json_file: + res = json.load(json_file) + # Deserialize the nested lists of TestResult + test_results_data = res.get("test_results", []) + test_results = [TestResult(**result) for result in test_results_data] + del res["test_results"] + return JobReport(test_results=test_results, **res) + + @classmethod + def cleanup(cls): + if JOB_REPORT_FILE.exists(): + JOB_REPORT_FILE.unlink() + + def dump(self): + def path_converter(obj): + if isinstance(obj, Path): + return str(obj) + raise TypeError("Type not serializable") + + with open(JOB_REPORT_FILE, "w") as json_file: + json.dump(asdict(self), json_file, default=path_converter, indent=2) + + def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestResults: results = [] # type: TestResults with open(results_path, "r", encoding="utf-8") as descriptor: @@ -296,14 +354,72 @@ class BuildResult: log_url: str build_urls: List[str] version: str - status: StatusType + status: str elapsed_seconds: int job_api_url: str + pr_number: int = 0 + head_ref: str = "dummy_branch_name" _job_name: Optional[str] = None _job_html_url: Optional[str] = None _job_html_link: Optional[str] = None _grouped_urls: Optional[List[List[str]]] = None + @classmethod + def cleanup(cls): + if Path(REPORT_PATH).exists(): + for file in Path(REPORT_PATH).iterdir(): + if "build_report" in file.name and file.name.endswith(".json"): + file.unlink() + + @classmethod + def load(cls, build_name: str, pr_number: int, head_ref: str): # type: ignore + """ + loads report from a report file matched with given @pr_number and/or a @head_ref + """ + report_path = Path(REPORT_PATH) / BuildResult.get_report_name( + build_name, pr_number or head_ref + ) + return cls.load_from_file(report_path) + + @classmethod + def load_any(cls, build_name: str, pr_number: int, head_ref: str): # type: ignore + """ + loads report from suitable report file with the following priority: + 1. report from PR with the same @pr_number + 2. report from branch with the same @head_ref + 3. report from the master + 4. any other report + """ + reports = [] + for file in Path(REPORT_PATH).iterdir(): + if f"{build_name}.json" in file.name: + reports.append(file) + if not reports: + return None + file_path = None + for file in reports: + if pr_number and f"_{pr_number}_" in file.name: + file_path = file + break + if f"_{head_ref}_" in file.name: + file_path = file + break + if "_master_" in file.name: + file_path = file + break + return cls.load_from_file(file_path or reports[-1]) + + @classmethod + def load_from_file(cls, file: Union[Path, str]): # type: ignore + if not Path(file).exists(): + return None + with open(file, "r") as json_file: + res = json.load(json_file) + return BuildResult(**res) + + def as_json(self) -> str: + return json.dumps(asdict(self), indent=2) + @property def build_config(self) -> Optional[BuildConfig]: return CI_CONFIG.build_config.get(self.build_name, None) @@ -373,10 +489,6 @@ class BuildResult: def _wrong_config_message(self) -> str: return "missing" - @property - def file_name(self) -> Path: - return self.get_report_name(self.build_name) - @property def is_missing(self) -> bool: "The report is created for missing json file" @@ -427,37 +539,18 @@ class BuildResult: self._job_html_url = job_data.get("html_url", "") @staticmethod - def get_report_name(name: str) -> Path: - return Path(f"build_report_{name}.json") - - @staticmethod - def read_json(directory: Path, build_name: str) -> "BuildResult": - path = directory / BuildResult.get_report_name(build_name) - try: - with open(path, "r", encoding="utf-8") as pf: - data = json.load(pf) # type: dict - except FileNotFoundError: - logger.warning( - "File %s for build named '%s' is not found", path, build_name - ) - return BuildResult.missing_result(build_name) - - return BuildResult( - data.get("build_name", build_name), - data.get("log_url", ""), - data.get("build_urls", []), - data.get("version", ""), - data.get("status", ERROR), - data.get("elapsed_seconds", 0), - data.get("job_api_url", ""), - ) + def get_report_name(name: str, suffix: Union[str, int]) -> Path: + assert "/" not in str(suffix) + return Path(f"build_report_{suffix}_{name}.json") @staticmethod def missing_result(build_name: str) -> "BuildResult": return BuildResult(build_name, "", [], "missing", ERROR, 0, "missing") - def write_json(self, directory: Path) -> Path: - path = directory / self.file_name + def write_json(self, directory: Union[Path, str] = REPORT_PATH) -> Path: + path = Path(directory) / self.get_report_name( + self.build_name, self.pr_number or self.head_ref + ) path.write_text( json.dumps( { @@ -468,6 +561,8 @@ class BuildResult: "status": self.status, "elapsed_seconds": self.elapsed_seconds, "job_api_url": self.job_api_url, + "pr_number": self.pr_number, + "head_ref": self.head_ref, } ), encoding="utf-8", @@ -532,10 +627,17 @@ def _get_status_style(status: str, colortheme: Optional[ColorTheme] = None) -> s def _get_html_url_name(url): + base_name = "" if isinstance(url, str): - return os.path.basename(url).replace("%2B", "+").replace("%20", " ") + base_name = os.path.basename(url) if isinstance(url, tuple): - return url[1].replace("%2B", "+").replace("%20", " ") + base_name = url[1] + + if "?" in base_name: + base_name = base_name.split("?")[0] + + if base_name is not None: + return base_name.replace("%2B", "+").replace("%20", " ") return None @@ -744,7 +846,7 @@ def create_build_html_report( build_result.build_config is not None and build_result.build_config.sparse_checkout ): - comment += " (note: sparse checkout is used)" + comment += " (note: sparse checkout is used, see update-submodules.sh)" row.append(f"{comment}") row.append("") diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py index 35641ba0455..f85ab2be9a3 100644 --- a/tests/ci/sqlancer_check.py +++ b/tests/ci/sqlancer_check.py @@ -6,29 +6,15 @@ import subprocess import sys from pathlib import Path -from github import Github - from build_download_helper import get_build_name_for_check, read_build_urls -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import ( - RerunHelper, - format_description, - get_commit, - post_commit_status, -) from docker_images_helper import DockerImage, pull_image, get_docker_image from env_helper import ( - GITHUB_RUN_URL, REPORT_PATH, TEMP_PATH, ) -from get_robot_token import get_best_robot_token -from pr_info import PRInfo -from report import TestResults, TestResult -from s3_helper import S3Helper +from report import JobReport, TestResults, TestResult from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results IMAGE_NAME = "clickhouse/sqlancer-test" @@ -58,16 +44,6 @@ def main(): check_name ), "Check name must be provided as an input arg or in CHECK_NAME env" - pr_info = PRInfo() - - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - - rerun_helper = RerunHelper(commit, check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - docker_image = pull_image(get_docker_image(IMAGE_NAME)) build_name = get_build_name_for_check(check_name) @@ -118,9 +94,6 @@ def main(): paths += [workspace_path / f"{t}.err" for t in tests] paths += [workspace_path / f"{t}.out" for t in tests] - s3_helper = S3Helper() - report_url = GITHUB_RUN_URL - status = "success" test_results = [] # type: TestResults # Try to get status message saved by the SQLancer @@ -139,33 +112,17 @@ def main(): status = "failure" description = "Task failed: $?=" + str(retcode) - description = format_description(description) + if not test_results: + test_results = [TestResult(name=__file__, status=status)] - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_results, - paths, - check_name, - ) - - post_commit_status( - commit, status, report_url, description, check_name, pr_info, dump_to_file=True - ) - print(f"::notice:: {check_name} Report url: {report_url}") - - ch_helper = ClickHouseHelper() - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - status, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - check_name, - ) - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + JobReport( + description=description, + test_results=test_results, + status=status, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=paths, + ).dump() if __name__ == "__main__": diff --git a/tests/ci/sqllogic_test.py b/tests/ci/sqllogic_test.py index 96b4de4517f..a7b3e3cf69e 100755 --- a/tests/ci/sqllogic_test.py +++ b/tests/ci/sqllogic_test.py @@ -5,28 +5,25 @@ import csv import logging import os import subprocess -import sys from pathlib import Path -from typing import List, Tuple - -from github import Github +from typing import Tuple from build_download_helper import download_all_deb_packages -from commit_status_helper import ( - RerunHelper, - get_commit, - override_status, - post_commit_status, -) +from commit_status_helper import override_status from docker_images_helper import DockerImage, pull_image, get_docker_image from env_helper import REPORT_PATH, TEMP_PATH, REPO_COPY -from get_robot_token import get_best_robot_token -from pr_info import PRInfo -from report import OK, FAIL, ERROR, SUCCESS, TestResults, TestResult, read_test_results -from s3_helper import S3Helper +from report import ( + OK, + FAIL, + ERROR, + SUCCESS, + JobReport, + TestResults, + TestResult, + read_test_results, +) from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results NO_CHANGES_MSG = "Nothing to run" @@ -104,15 +101,6 @@ def main(): kill_timeout > 0 ), "kill timeout must be provided as an input arg or in KILL_TIMEOUT env" - pr_info = PRInfo() - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - - rerun_helper = RerunHelper(commit, check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - docker_image = pull_image(get_docker_image(IMAGE_NAME)) repo_tests_path = repo_path / "tests" @@ -150,8 +138,6 @@ def main(): logging.info("Files in result folder %s", os.listdir(result_path)) - s3_helper = S3Helper() - status = None description = None @@ -186,29 +172,19 @@ def main(): ) ) - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_results, - additional_logs, - check_name, - ) - - print( - f"::notice:: {check_name}" - f", Result: '{status}'" - f", Description: '{description}'" - f", Report url: '{report_url}'" - ) - # Until it pass all tests, do not block CI, report "success" assert description is not None # FIXME: force SUCCESS until all cases are fixed status = SUCCESS - post_commit_status( - commit, status, report_url, description, check_name, pr_info, dump_to_file=True - ) + + JobReport( + description=description, + test_results=test_results, + status=status, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=additional_logs, + ).dump() if __name__ == "__main__": diff --git a/tests/ci/sqltest.py b/tests/ci/sqltest.py index edb64d9f106..b2105d4f5c0 100644 --- a/tests/ci/sqltest.py +++ b/tests/ci/sqltest.py @@ -7,25 +7,15 @@ import sys from pathlib import Path from typing import Dict -from github import Github from build_download_helper import get_build_name_for_check, read_build_urls -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import ( - RerunHelper, - get_commit, - post_commit_status, -) from docker_images_helper import pull_image, get_docker_image from env_helper import ( - GITHUB_RUN_URL, REPORT_PATH, TEMP_PATH, ) -from get_robot_token import get_best_robot_token from pr_info import PRInfo -from report import TestResult -from s3_helper import S3Helper +from report import JobReport, TestResult from stopwatch import Stopwatch IMAGE_NAME = "clickhouse/sqltest" @@ -62,14 +52,6 @@ def main(): pr_info = PRInfo() - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - - rerun_helper = RerunHelper(commit, check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - docker_image = pull_image(get_docker_image(IMAGE_NAME)) build_name = get_build_name_for_check(check_name) @@ -109,10 +91,6 @@ def main(): subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) - check_name_lower = ( - check_name.lower().replace("(", "").replace(")", "").replace(" ", "") - ) - s3_prefix = f"{pr_info.number}/{pr_info.sha}/sqltest_{check_name_lower}/" paths = { "run.log": run_log_path, "server.log.zst": workspace_path / "server.log.zst", @@ -120,43 +98,18 @@ def main(): "report.html": workspace_path / "report.html", "test.log": workspace_path / "test.log", } - path_urls = {} # type: Dict[str, str] - - s3_helper = S3Helper() - for f in paths: - try: - path_urls[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + f) - except Exception as ex: - logging.info("Exception uploading file %s text %s", f, ex) - path_urls[f] = "" - - report_url = GITHUB_RUN_URL - if path_urls["report.html"]: - report_url = path_urls["report.html"] - status = "success" description = "See the report" - test_result = TestResult(description, "OK") + test_results = [TestResult(description, "OK")] - ch_helper = ClickHouseHelper() - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - [test_result], - status, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - check_name, - ) - - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) - - logging.info("Result: '%s', '%s', '%s'", status, description, report_url) - print(f"::notice ::Report url: {report_url}") - post_commit_status( - commit, status, report_url, description, check_name, pr_info, dump_to_file=True - ) + JobReport( + description=description, + test_results=test_results, + status=status, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=[v for _, v in paths.items()], + ).dump() if __name__ == "__main__": diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 05250c14fd1..46bb2261aba 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -8,29 +8,15 @@ import sys from pathlib import Path from typing import List, Tuple -from github import Github - from build_download_helper import download_all_deb_packages -from clickhouse_helper import ( - CiLogsCredentials, - ClickHouseHelper, - prepare_tests_results_for_clickhouse, -) -from commit_status_helper import ( - RerunHelper, - get_commit, - post_commit_status, - format_description, -) +from clickhouse_helper import CiLogsCredentials + from docker_images_helper import DockerImage, pull_image, get_docker_image from env_helper import REPORT_PATH, TEMP_PATH, REPO_COPY -from get_robot_token import get_best_robot_token from pr_info import PRInfo -from report import TestResult, TestResults, read_test_results -from s3_helper import S3Helper +from report import JobReport, TestResult, TestResults, read_test_results from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results def get_additional_envs() -> List[str]: @@ -139,14 +125,6 @@ def run_stress_test(docker_image_name: str) -> None: pr_info = PRInfo() - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - - rerun_helper = RerunHelper(commit, check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - docker_image = pull_image(get_docker_image(docker_image_name)) packages_path = temp_path / "packages" @@ -194,7 +172,6 @@ def run_stress_test(docker_image_name: str) -> None: subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) ci_logs_credentials.clean_ci_logs_from_credentials(run_log_path) - s3_helper = S3Helper() state, description, test_results, additional_logs = process_results( result_path, server_log_path, run_log_path ) @@ -202,34 +179,16 @@ def run_stress_test(docker_image_name: str) -> None: if timeout_expired: test_results.append(TestResult.create_check_timeout_expired(timeout)) state = "failure" - description = format_description(test_results[-1].name) + description = test_results[-1].name - ch_helper = ClickHouseHelper() - - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_results, - additional_logs, - check_name, - ) - print(f"::notice ::Report url: {report_url}") - - post_commit_status( - commit, state, report_url, description, check_name, pr_info, dump_to_file=True - ) - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - state, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - check_name, - ) - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + JobReport( + description=description, + test_results=test_results, + status=state, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=additional_logs, + ).dump() if state == "failure": sys.exit(1) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index b37dcb59237..4f791a5ee01 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 import argparse -import atexit import csv import logging import os @@ -9,24 +8,14 @@ import sys from pathlib import Path from typing import List, Tuple -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import ( - RerunHelper, - get_commit, - post_commit_status, - update_mergeable_check, -) + from docker_images_helper import get_docker_image, pull_image from env_helper import REPO_COPY, TEMP_PATH -from get_robot_token import get_best_robot_token from git_helper import GIT_PREFIX, git_runner -from github_helper import GitHub from pr_info import PRInfo -from report import TestResults, read_test_results -from s3_helper import S3Helper +from report import JobReport, TestResults, read_test_results from ssh import SSHKey from stopwatch import Stopwatch -from upload_result_helper import upload_results NAME = "Style Check" @@ -142,21 +131,6 @@ def main(): temp_path.mkdir(parents=True, exist_ok=True) pr_info = PRInfo() - gh = GitHub(get_best_robot_token(), create_cache_dir=False) - commit = get_commit(gh, pr_info.sha) - - atexit.register(update_mergeable_check, commit, pr_info, NAME) - - rerun_helper = RerunHelper(commit, NAME) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - # Finish with the same code as previous - state = rerun_helper.get_finished_status().state # type: ignore - # state == "success" -> code = 0 - code = int(state != "success") - sys.exit(code) - - s3_helper = S3Helper() IMAGE_NAME = "clickhouse/style-test" image = pull_image(get_docker_image(IMAGE_NAME)) @@ -180,28 +154,18 @@ def main(): checkout_last_ref(pr_info) state, description, test_results, additional_files = process_result(temp_path) - ch_helper = ClickHouseHelper() - report_url = upload_results( - s3_helper, pr_info.number, pr_info.sha, test_results, additional_files, NAME - ) - print(f"::notice ::Report url: {report_url}") - post_commit_status( - commit, state, report_url, description, NAME, pr_info, dump_to_file=True - ) - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - state, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - NAME, - ) - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + JobReport( + description=description, + test_results=test_results, + status=state, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=additional_files, + ).dump() if state in ["error", "failure"]: + print(f"Style check failed: [{description}]") sys.exit(1) diff --git a/tests/ci/test_digest.py b/tests/ci/test_digest.py index 86ba16ec525..44117bb604b 100644 --- a/tests/ci/test_digest.py +++ b/tests/ci/test_digest.py @@ -5,6 +5,7 @@ from hashlib import md5 from pathlib import Path import digest_helper as dh +from env_helper import ROOT_DIR _12 = b"12\n" _13 = b"13\n" @@ -13,7 +14,7 @@ _14 = b"14\n" # pylint:disable=protected-access class TestDigests(unittest.TestCase): - tests_dir = Path("tests/digests") + tests_dir = Path(ROOT_DIR) / "tests/ci/tests/digests" broken_link = tests_dir / "broken-symlink" empty_digest = "d41d8cd98f00b204e9800998ecf8427e" diff --git a/tests/ci/unit_tests_check.py b/tests/ci/unit_tests_check.py index f1238a00bd4..495547e1dfc 100644 --- a/tests/ci/unit_tests_check.py +++ b/tests/ci/unit_tests_check.py @@ -5,33 +5,15 @@ import logging import os import sys import subprocess -import atexit from pathlib import Path -from typing import List, Tuple - -from github import Github +from typing import Tuple from build_download_helper import download_unit_tests -from clickhouse_helper import ( - ClickHouseHelper, - prepare_tests_results_for_clickhouse, -) -from commit_status_helper import ( - RerunHelper, - get_commit, - post_commit_status, - update_mergeable_check, -) from docker_images_helper import pull_image, get_docker_image from env_helper import REPORT_PATH, TEMP_PATH -from get_robot_token import get_best_robot_token -from pr_info import PRInfo -from report import ERROR, FAILURE, FAIL, OK, SUCCESS, TestResults, TestResult -from s3_helper import S3Helper +from report import ERROR, FAILURE, FAIL, OK, SUCCESS, JobReport, TestResults, TestResult from stopwatch import Stopwatch from tee_popen import TeePopen -from upload_result_helper import upload_results - IMAGE_NAME = "clickhouse/unit-test" @@ -182,18 +164,6 @@ def main(): temp_path = Path(TEMP_PATH) temp_path.mkdir(parents=True, exist_ok=True) - pr_info = PRInfo() - - gh = Github(get_best_robot_token(), per_page=100) - commit = get_commit(gh, pr_info.sha) - - atexit.register(update_mergeable_check, commit, pr_info, check_name) - - rerun_helper = RerunHelper(commit, check_name) - if rerun_helper.is_already_finished_by_status(): - logging.info("Check is already finished according to github status, exiting") - sys.exit(0) - docker_image = pull_image(get_docker_image(IMAGE_NAME)) download_unit_tests(check_name, REPORT_PATH, TEMP_PATH) @@ -222,35 +192,18 @@ def main(): subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {TEMP_PATH}", shell=True) - s3_helper = S3Helper() state, description, test_results = process_results(test_output) - - ch_helper = ClickHouseHelper() - - report_url = upload_results( - s3_helper, - pr_info.number, - pr_info.sha, - test_results, - [run_log_path] + [p for p in test_output.iterdir() if not p.is_dir()], - check_name, - ) - print(f"::notice ::Report url: {report_url}") - post_commit_status( - commit, state, report_url, description, check_name, pr_info, dump_to_file=True - ) - - prepared_events = prepare_tests_results_for_clickhouse( - pr_info, - test_results, - state, - stopwatch.duration_seconds, - stopwatch.start_time_str, - report_url, - check_name, - ) - - ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + additional_files = [run_log_path] + [ + p for p in test_output.iterdir() if not p.is_dir() + ] + JobReport( + description=description, + test_results=test_results, + status=state, + start_time=stopwatch.start_time_str, + duration=stopwatch.duration_seconds, + additional_files=additional_files, + ).dump() if state == "failure": sys.exit(1) diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py index bf52e6069dd..6fa9c1dd873 100644 --- a/tests/ci/upload_result_helper.py +++ b/tests/ci/upload_result_helper.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Sequence, Union import os import logging @@ -15,13 +15,13 @@ from s3_helper import S3Helper def process_logs( s3_client: S3Helper, - additional_logs: List[Path], + additional_logs: Union[Sequence[str], Sequence[Path]], s3_path_prefix: str, test_results: TestResults, ) -> List[str]: logging.info("Upload files to s3 %s", additional_logs) - processed_logs = {} # type: Dict[Path, str] + processed_logs = {} # type: Dict[str, str] # Firstly convert paths of logs from test_results to urls to s3. for test_result in test_results: if test_result.log_files is None: @@ -31,22 +31,24 @@ def process_logs( test_result.log_urls = [] for path in test_result.log_files: if path in processed_logs: - test_result.log_urls.append(processed_logs[path]) + test_result.log_urls.append(processed_logs[str(path)]) elif path: url = s3_client.upload_test_report_to_s3( - path, s3_path_prefix + "/" + path.name + Path(path), s3_path_prefix + "/" + str(path) ) test_result.log_urls.append(url) - processed_logs[path] = url + processed_logs[str(path)] = url additional_urls = [] for log_path in additional_logs: - if log_path.is_file(): + if Path(log_path).is_file(): additional_urls.append( s3_client.upload_test_report_to_s3( - log_path, s3_path_prefix + "/" + os.path.basename(log_path) + Path(log_path), s3_path_prefix + "/" + os.path.basename(log_path) ) ) + else: + logging.error("File %s is missing - skip", log_path) return additional_urls @@ -56,7 +58,7 @@ def upload_results( pr_number: int, commit_sha: str, test_results: TestResults, - additional_files: List[Path], + additional_files: Union[Sequence[Path], Sequence[str]], check_name: str, additional_urls: Optional[List[str]] = None, ) -> str: @@ -65,12 +67,11 @@ def upload_results( normalized_check_name = normalized_check_name.replace(*r) # Preserve additional_urls to not modify the original one - original_additional_urls = additional_urls or [] + additional_urls = additional_urls or [] s3_path_prefix = f"{pr_number}/{commit_sha}/{normalized_check_name}" - additional_urls = process_logs( - s3_client, additional_files, s3_path_prefix, test_results + additional_urls.extend( + process_logs(s3_client, additional_files, s3_path_prefix, test_results) ) - additional_urls.extend(original_additional_urls) branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commits/master" branch_name = "master" @@ -79,6 +80,13 @@ def upload_results( branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_number}" commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{commit_sha}" + ready_report_url = None + for url in additional_urls: + if "report.html" in url: + ready_report_url = url + additional_urls.remove(ready_report_url) + break + if additional_urls: raw_log_url = additional_urls.pop(0) else: @@ -88,21 +96,25 @@ def upload_results( ReportColorTheme.bugfixcheck if "bugfix validate check" in check_name else None ) - html_report = create_test_html_report( - check_name, - test_results, - raw_log_url, - GITHUB_RUN_URL, - GITHUB_JOB_URL(), - branch_url, - branch_name, - commit_url, - additional_urls, - statuscolors=statuscolors, - ) - report_path = Path("report.html") - report_path.write_text(html_report, encoding="utf-8") + if test_results or not ready_report_url: + html_report = create_test_html_report( + check_name, + test_results, + raw_log_url, + GITHUB_RUN_URL, + GITHUB_JOB_URL(), + branch_url, + branch_name, + commit_url, + additional_urls, + statuscolors=statuscolors, + ) + report_path = Path("report.html") + report_path.write_text(html_report, encoding="utf-8") + url = s3_client.upload_test_report_to_s3(report_path, s3_path_prefix + ".html") + else: + logging.info("report.html was prepared by test job itself") + url = ready_report_url - url = s3_client.upload_test_report_to_s3(report_path, s3_path_prefix + ".html") logging.info("Search result in url %s", url) return url diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index 21d2e3940ce..30b0c2d96be 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -2,7 +2,7 @@ import logging import os.path as p from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser, ArgumentTypeError -from typing import Any, Dict, List, Literal, Optional, Tuple, Union +from typing import Any, Dict, Iterable, List, Literal, Optional, Set, Tuple, Union from git_helper import TWEAK, Git, get_tags, git_runner, removeprefix @@ -120,6 +120,7 @@ class ClickHouseVersion: @property def githash(self) -> str: + "returns the CURRENT git SHA1" if self._git is not None: return self._git.sha return "0000000000000000000000000000000000000000" @@ -138,6 +139,11 @@ class ClickHouseVersion: (str(self.major), str(self.minor), str(self.patch), str(self.tweak)) ) + @property + def is_lts(self) -> bool: + """our X.3 and X.8 are LTS""" + return self.minor % 5 == 3 + def as_dict(self) -> VERSIONS: return { "revision": self.revision, @@ -181,6 +187,21 @@ class ClickHouseVersion: def __le__(self, other: "ClickHouseVersion") -> bool: return self == other or self < other + def __hash__(self): + return hash(self.__repr__) + + def __str__(self): + return f"{self.string}" + + def __repr__(self): + return ( + f"" + ) + + +ClickHouseVersions = List[ClickHouseVersion] + class VersionType: LTS = "lts" @@ -267,7 +288,7 @@ def version_arg(version: str) -> ClickHouseVersion: raise ArgumentTypeError(f"version {version} does not match tag of plain version") -def get_tagged_versions() -> List[ClickHouseVersion]: +def get_tagged_versions() -> ClickHouseVersions: versions = [] for tag in get_tags(): try: @@ -278,6 +299,40 @@ def get_tagged_versions() -> List[ClickHouseVersion]: return sorted(versions) +def get_supported_versions( + versions: Optional[Iterable[ClickHouseVersion]] = None, +) -> Set[ClickHouseVersion]: + supported_stable = set() # type: Set[ClickHouseVersion] + supported_lts = set() # type: Set[ClickHouseVersion] + if versions: + versions = list(versions) + else: + # checks that repo is not shallow in background + versions = get_tagged_versions() + versions.sort() + versions.reverse() + for version in versions: + if len(supported_stable) < 3: + if not { + sv + for sv in supported_stable + if version.major == sv.major and version.minor == sv.minor + }: + supported_stable.add(version) + if (version.description == VersionType.LTS or version.is_lts) and len( + supported_lts + ) < 2: + if not { + sv + for sv in supported_lts + if version.major == sv.major and version.minor == sv.minor + }: + supported_lts.add(version) + if len(supported_stable) == 3 and len(supported_lts) == 2: + break + return supported_lts.union(supported_stable) + + def update_cmake_version( version: ClickHouseVersion, versions_path: str = FILE_WITH_VERSION_PATH, diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index e511d773577..5e68f2d4b53 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -1,12 +1,12 @@ #!/usr/bin/env python3 -from collections import namedtuple import fnmatch import json import time +from collections import namedtuple +from urllib.parse import quote import requests # type: ignore - from lambda_shared.pr import TRUSTED_CONTRIBUTORS from lambda_shared.token import get_cached_access_token @@ -129,7 +129,7 @@ def _exec_post_with_retry(url, token, data=None): def _get_pull_requests_from(repo_url, owner, branch, token): - url = f"{repo_url}/pulls?head={owner}:{branch}" + url = f"{repo_url}/pulls?head={quote(owner)}:{quote(branch)}" return _exec_get_with_retry(url, token) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index c7049b0e0c8..6ead5bd2873 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -948,6 +948,8 @@ class TestCase: else "" ) + self.show_whitespaces_in_diff = args.show_whitespaces_in_diff + # should skip test, should increment skipped_total, skip reason def should_skip_test(self, suite) -> Optional[FailureReason]: tags = self.tags @@ -1171,7 +1173,7 @@ class TestCase: ) if result_is_different: - diff = Popen( + diff_proc = Popen( [ "diff", "-U", @@ -1182,7 +1184,18 @@ class TestCase: encoding="latin-1", stdout=PIPE, universal_newlines=True, - ).communicate()[0] + ) + if self.show_whitespaces_in_diff: + sed_proc = Popen( + ["sed", "-e", "s/[ \t]\\+$/&$/g"], + stdin=diff_proc.stdout, + stdout=PIPE, + ) + diff_proc.stdout.close() # Allow diff to receive a SIGPIPE if cat exits. + diff = sed_proc.communicate()[0].decode("utf-8", errors="ignore") + else: + diff = diff_proc.communicate()[0] + if diff.startswith("Binary files "): diff += "Content of stdout:\n===================\n" file = open(self.stdout_file, "rb") @@ -2780,6 +2793,12 @@ def parse_args(): help="Total test groups for crc32(test_name) % run_by_hash_total == run_by_hash_num", ) + parser.add_argument( + "--show-whitespaces-in-diff", + action="store_true", + help="Display $ characters after line with trailing whitespaces in diff output", + ) + group = parser.add_mutually_exclusive_group(required=False) group.add_argument( "--zookeeper", diff --git a/tests/config/config.d/block_number.xml b/tests/config/config.d/block_number.xml new file mode 100644 index 00000000000..b56f1f1afc2 --- /dev/null +++ b/tests/config/config.d/block_number.xml @@ -0,0 +1,6 @@ + + + + 0 + + diff --git a/tests/config/config.d/filesystem_caches_path.xml b/tests/config/config.d/filesystem_caches_path.xml index ca946db2e0a..dd6933e9524 100644 --- a/tests/config/config.d/filesystem_caches_path.xml +++ b/tests/config/config.d/filesystem_caches_path.xml @@ -1,3 +1,4 @@ /var/lib/clickhouse/filesystem_caches/ + /var/lib/clickhouse/filesystem_caches/ diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 18652826d83..1429dfff724 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -4,11 +4,17 @@ s3 s3_disk/ - http://localhost:11111/test/common/ + http://localhost:11111/test/s3/ clickhouse clickhouse 20000 + + s3_plain + http://localhost:11111/test/s3_plain/ + clickhouse + clickhouse + cache s3_disk diff --git a/tests/config/config.d/storage_conf_02963.xml b/tests/config/config.d/storage_conf_02963.xml new file mode 100644 index 00000000000..0672965e99d --- /dev/null +++ b/tests/config/config.d/storage_conf_02963.xml @@ -0,0 +1,15 @@ + + + + + object_storage + s3 + s3_disk/ + http://localhost:11111/test/common/ + clickhouse + clickhouse + 20000 + + + + diff --git a/tests/config/install.sh b/tests/config/install.sh index 016671304d8..cfe810cda84 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -64,6 +64,7 @@ ln -sf $SRC_PATH/config.d/backups.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/filesystem_caches_path.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/validate_tcp_client_information.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/zero_copy_destructive_operations.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/block_number.xml $DEST_SERVER_PATH/config.d/ # Not supported with fasttest. if [ "${DEST_SERVER_PATH}" = "/etc/clickhouse-server" ] @@ -155,7 +156,7 @@ if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TR ;; "generate-template-key") ln -sf $SRC_PATH/config.d/storage_metadata_with_full_object_key.xml $DEST_SERVER_PATH/config.d/ - ln -sf $SRC_PATH/config.d/s3_storage_policy_with_template_object_key.xml $DEST_SERVER_PATH/config.d/ + ln -sf $SRC_PATH/config.d/s3_storage_policy_with_template_object_key.xml $DEST_SERVER_PATH/config.d/s3_storage_policy_by_default.xml ;; "generate-suffix"|*) ln -sf $SRC_PATH/config.d/s3_storage_policy_by_default.xml $DEST_SERVER_PATH/config.d/ @@ -176,6 +177,7 @@ if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then ln -sf $SRC_PATH/config.d/storage_conf.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/storage_conf_02944.xml $DEST_SERVER_PATH/config.d/ + ln -sf $SRC_PATH/config.d/storage_conf_02963.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/users.d/s3_cache.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/s3_cache_new.xml $DEST_SERVER_PATH/users.d/ fi diff --git a/tests/config/users.d/readonly.xml b/tests/config/users.d/readonly.xml index 0fe1e3fe6d9..799de11decf 100644 --- a/tests/config/users.d/readonly.xml +++ b/tests/config/users.d/readonly.xml @@ -9,7 +9,8 @@ - + + ::1 127.0.0.1 diff --git a/tests/config/users.d/session_log_test.xml b/tests/config/users.d/session_log_test.xml index cc2c2c5fcde..f93b0efd828 100644 --- a/tests/config/users.d/session_log_test.xml +++ b/tests/config/users.d/session_log_test.xml @@ -18,7 +18,8 @@ - + + ::1 127.0.0.1 diff --git a/tests/integration/helpers/0_common_instance_config.xml b/tests/integration/helpers/0_common_instance_config.xml index 535cf252274..73792affee6 100644 --- a/tests/integration/helpers/0_common_instance_config.xml +++ b/tests/integration/helpers/0_common_instance_config.xml @@ -27,4 +27,6 @@ true 2 + + / diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index c0b145b047a..1d96563251b 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3776,7 +3776,9 @@ class ClickHouseInstance: except Exception as e: logging.warning(f"Stop ClickHouse raised an error {e}") - def start_clickhouse(self, start_wait_sec=60, retry_start=True): + def start_clickhouse( + self, start_wait_sec=60, retry_start=True, expected_to_fail=False + ): if not self.stay_alive: raise Exception( "ClickHouse can be started again only with stay_alive=True instance" @@ -3794,10 +3796,15 @@ class ClickHouseInstance: ["bash", "-c", "{} --daemon".format(self.clickhouse_start_command)], user=str(os.getuid()), ) + if expected_to_fail: + self.wait_start_failed(start_wait_sec + start_time - time.time()) + return time.sleep(1) continue else: logging.debug("Clickhouse process running.") + if expected_to_fail: + raise Exception("ClickHouse was expected not to be running.") try: self.wait_start(start_wait_sec + start_time - time.time()) return @@ -3849,6 +3856,30 @@ class ClickHouseInstance: if last_err is not None: raise last_err + def wait_start_failed(self, start_wait_sec): + start_time = time.time() + while time.time() <= start_time + start_wait_sec: + pid = self.get_process_pid("clickhouse") + if pid is None: + return + time.sleep(1) + logging.error( + f"No time left to shutdown. Process is still running. Will dump threads." + ) + ps_clickhouse = self.exec_in_container( + ["bash", "-c", "ps -C clickhouse"], nothrow=True, user="root" + ) + logging.info(f"PS RESULT:\n{ps_clickhouse}") + pid = self.get_process_pid("clickhouse") + if pid is not None: + self.exec_in_container( + ["bash", "-c", f"gdb -batch -ex 'thread apply all bt full' -p {pid}"], + user="root", + ) + raise Exception( + "ClickHouse server is still running, but was expected to shutdown. Check logs." + ) + def restart_clickhouse(self, stop_start_wait_sec=60, kill=False): self.stop_clickhouse(stop_start_wait_sec, kill) self.start_clickhouse(stop_start_wait_sec) diff --git a/tests/integration/helpers/s3_tools.py b/tests/integration/helpers/s3_tools.py index 777b3394dc1..0c3538c3c39 100644 --- a/tests/integration/helpers/s3_tools.py +++ b/tests/integration/helpers/s3_tools.py @@ -36,6 +36,14 @@ def get_file_contents(minio_client, bucket, s3_path): return data_str.decode() +def list_s3_objects(minio_client, bucket, prefix=""): + prefix_len = len(prefix) + return [ + obj.object_name[prefix_len:] + for obj in minio_client.list_objects(bucket, prefix=prefix, recursive=True) + ] + + # Creates S3 bucket for tests and allows anonymous read-write access to it. def prepare_s3_bucket(started_cluster): # Allows read-write access for bucket without authorization. diff --git a/tests/queries/0_stateless/02696_inverted_idx_checksums.reference b/tests/integration/test_attach_partition_distinct_expression_replicated/__init__.py similarity index 100% rename from tests/queries/0_stateless/02696_inverted_idx_checksums.reference rename to tests/integration/test_attach_partition_distinct_expression_replicated/__init__.py diff --git a/tests/integration/test_attach_partition_distinct_expression_replicated/configs/remote_servers.xml b/tests/integration/test_attach_partition_distinct_expression_replicated/configs/remote_servers.xml new file mode 100644 index 00000000000..b40730e9f7d --- /dev/null +++ b/tests/integration/test_attach_partition_distinct_expression_replicated/configs/remote_servers.xml @@ -0,0 +1,17 @@ + + + + + true + + replica1 + 9000 + + + replica2 + 9000 + + + + + diff --git a/tests/integration/test_attach_partition_distinct_expression_replicated/test.py b/tests/integration/test_attach_partition_distinct_expression_replicated/test.py new file mode 100644 index 00000000000..1d8ac4e9e37 --- /dev/null +++ b/tests/integration/test_attach_partition_distinct_expression_replicated/test.py @@ -0,0 +1,214 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + +cluster = ClickHouseCluster(__file__) + +replica1 = cluster.add_instance( + "replica1", with_zookeeper=True, main_configs=["configs/remote_servers.xml"] +) +replica2 = cluster.add_instance( + "replica2", with_zookeeper=True, main_configs=["configs/remote_servers.xml"] +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + except Exception as ex: + print(ex) + finally: + cluster.shutdown() + + +def cleanup(nodes): + for node in nodes: + node.query("DROP TABLE IF EXISTS source SYNC") + node.query("DROP TABLE IF EXISTS destination SYNC") + + +def create_table(node, table_name, replicated): + replica = node.name + engine = ( + f"ReplicatedMergeTree('/clickhouse/tables/1/{table_name}', '{replica}')" + if replicated + else "MergeTree()" + ) + partition_expression = ( + "toYYYYMMDD(timestamp)" if table_name == "source" else "toYYYYMM(timestamp)" + ) + node.query_with_retry( + """ + CREATE TABLE {table_name}(timestamp DateTime) + ENGINE = {engine} + ORDER BY tuple() PARTITION BY {partition_expression} + SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1; + """.format( + table_name=table_name, + engine=engine, + partition_expression=partition_expression, + ) + ) + + +def test_both_replicated(start_cluster): + for node in [replica1, replica2]: + create_table(node, "source", True) + create_table(node, "destination", True) + + replica1.query("INSERT INTO source VALUES ('2010-03-02 02:01:01')") + replica1.query("SYSTEM SYNC REPLICA source") + replica1.query("SYSTEM SYNC REPLICA destination") + replica1.query( + f"ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source" + ) + + assert_eq_with_retry( + replica1, f"SELECT * FROM destination", "2010-03-02 02:01:01\n" + ) + assert_eq_with_retry( + replica1, + f"SELECT * FROM destination", + replica2.query(f"SELECT * FROM destination"), + ) + + cleanup([replica1, replica2]) + + +def test_only_destination_replicated(start_cluster): + create_table(replica1, "source", False) + create_table(replica1, "destination", True) + create_table(replica2, "destination", True) + + replica1.query("INSERT INTO source VALUES ('2010-03-02 02:01:01')") + replica1.query("SYSTEM SYNC REPLICA destination") + replica1.query( + f"ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source" + ) + + assert_eq_with_retry( + replica1, f"SELECT * FROM destination", "2010-03-02 02:01:01\n" + ) + assert_eq_with_retry( + replica1, + f"SELECT * FROM destination", + replica2.query(f"SELECT * FROM destination"), + ) + + cleanup([replica1, replica2]) + + +def test_both_replicated_partitioned_to_unpartitioned(start_cluster): + def create_tables(nodes): + for node in nodes: + source_engine = ( + f"ReplicatedMergeTree('/clickhouse/tables/1/source', '{node.name}')" + ) + node.query( + """ + CREATE TABLE source(timestamp DateTime) + ENGINE = {engine} + ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp) + SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1; + """.format( + engine=source_engine, + ) + ) + + destination_engine = f"ReplicatedMergeTree('/clickhouse/tables/1/destination', '{node.name}')" + node.query( + """ + CREATE TABLE destination(timestamp DateTime) + ENGINE = {engine} + ORDER BY tuple() PARTITION BY tuple() + SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1; + """.format( + engine=destination_engine, + ) + ) + + create_tables([replica1, replica2]) + + replica1.query("INSERT INTO source VALUES ('2010-03-02 02:01:01')") + replica1.query("INSERT INTO source VALUES ('2010-03-03 02:01:01')") + replica1.query("SYSTEM SYNC REPLICA source") + replica1.query("SYSTEM SYNC REPLICA destination") + + replica1.query( + f"ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source" + ) + replica1.query( + f"ALTER TABLE destination ATTACH PARTITION ID '20100303' FROM source" + ) + + assert_eq_with_retry( + replica1, + f"SELECT * FROM destination ORDER BY timestamp", + "2010-03-02 02:01:01\n2010-03-03 02:01:01\n", + ) + assert_eq_with_retry( + replica1, + f"SELECT * FROM destination ORDER BY timestamp", + replica2.query(f"SELECT * FROM destination ORDER BY timestamp"), + ) + + cleanup([replica1, replica2]) + + +def test_both_replicated_different_exp_same_id(start_cluster): + def create_tables(nodes): + for node in nodes: + source_engine = ( + f"ReplicatedMergeTree('/clickhouse/tables/1/source', '{node.name}')" + ) + node.query( + """ + CREATE TABLE source(a UInt16,b UInt16,c UInt16,extra UInt64,Path String,Time DateTime,Value Float64,Timestamp Int64,sign Int8) + ENGINE = {engine} + ORDER BY tuple() PARTITION BY a % 3 + SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1; + """.format( + engine=source_engine, + ) + ) + + destination_engine = f"ReplicatedMergeTree('/clickhouse/tables/1/destination', '{node.name}')" + node.query( + """ + CREATE TABLE destination(a UInt16,b UInt16,c UInt16,extra UInt64,Path String,Time DateTime,Value Float64,Timestamp Int64,sign Int8) + ENGINE = {engine} + ORDER BY tuple() PARTITION BY a + SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, max_cleanup_delay_period=1; + """.format( + engine=destination_engine, + ) + ) + + create_tables([replica1, replica2]) + + replica1.query( + "INSERT INTO source (a, b, c, extra, sign) VALUES (1, 5, 9, 1000, 1)" + ) + replica1.query( + "INSERT INTO source (a, b, c, extra, sign) VALUES (2, 6, 10, 1000, 1)" + ) + replica1.query("SYSTEM SYNC REPLICA source") + replica1.query("SYSTEM SYNC REPLICA destination") + + replica1.query(f"ALTER TABLE destination ATTACH PARTITION 1 FROM source") + replica1.query(f"ALTER TABLE destination ATTACH PARTITION 2 FROM source") + + assert_eq_with_retry( + replica1, + f"SELECT * FROM destination ORDER BY a", + "1\t5\t9\t1000\t\t1970-01-01 00:00:00\t0\t0\t1\n2\t6\t10\t1000\t\t1970-01-01 00:00:00\t0\t0\t1\n", + ) + assert_eq_with_retry( + replica1, + f"SELECT * FROM destination ORDER BY a", + replica2.query(f"SELECT * FROM destination ORDER BY a"), + ) + + cleanup([replica1, replica2]) diff --git a/tests/integration/test_backup_restore_new/configs/shutdown_cancel_backups.xml b/tests/integration/test_backup_restore_new/configs/shutdown_cancel_backups.xml new file mode 100644 index 00000000000..e0c0e0b32cd --- /dev/null +++ b/tests/integration/test_backup_restore_new/configs/shutdown_cancel_backups.xml @@ -0,0 +1,3 @@ + + false + diff --git a/tests/integration/test_backup_restore_new/configs/slow_backups.xml b/tests/integration/test_backup_restore_new/configs/slow_backups.xml new file mode 100644 index 00000000000..96860e65fdc --- /dev/null +++ b/tests/integration/test_backup_restore_new/configs/slow_backups.xml @@ -0,0 +1,7 @@ + + + true + + 2 + 2 + diff --git a/tests/integration/test_backup_restore_new/test_cancel_backup.py b/tests/integration/test_backup_restore_new/test_cancel_backup.py new file mode 100644 index 00000000000..06bcb5eadfc --- /dev/null +++ b/tests/integration/test_backup_restore_new/test_cancel_backup.py @@ -0,0 +1,232 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV, assert_eq_with_retry +import uuid +import re + + +cluster = ClickHouseCluster(__file__) + +main_configs = [ + "configs/backups_disk.xml", + "configs/slow_backups.xml", + "configs/shutdown_cancel_backups.xml", +] + +node = cluster.add_instance( + "node", + main_configs=main_configs, + external_dirs=["/backups/"], + stay_alive=True, +) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def drop_after_test(): + try: + yield + finally: + node.query("DROP TABLE IF EXISTS tbl SYNC") + + +# Generate the backup name. +def get_backup_name(backup_id): + return f"Disk('backups', '{backup_id}')" + + +# Start making a backup asynchronously. +def start_backup(backup_id): + node.query( + f"BACKUP TABLE tbl TO {get_backup_name(backup_id)} SETTINGS id='{backup_id}' ASYNC" + ) + + assert ( + node.query(f"SELECT status FROM system.backups WHERE id='{backup_id}'") + == "CREATING_BACKUP\n" + ) + assert ( + node.query( + f"SELECT count() FROM system.processes WHERE query_kind='Backup' AND query LIKE '%{backup_id}%'" + ) + == "1\n" + ) + + +# Wait for the backup to be completed. +def wait_backup(backup_id): + assert_eq_with_retry( + node, + f"SELECT status FROM system.backups WHERE id='{backup_id}'", + "BACKUP_CREATED", + retry_count=60, + sleep_time=5, + ) + + backup_duration = int( + node.query( + f"SELECT end_time - start_time FROM system.backups WHERE id='{backup_id}'" + ) + ) + assert backup_duration >= 3 # Backup is not expected to be too quick in this test. + + +# Cancel the specified backup. +def cancel_backup(backup_id): + node.query( + f"KILL QUERY WHERE query_kind='Backup' AND query LIKE '%{backup_id}%' SYNC" + ) + assert ( + node.query(f"SELECT status FROM system.backups WHERE id='{backup_id}'") + == "BACKUP_CANCELLED\n" + ) + expected_error = "QUERY_WAS_CANCELLED" + assert expected_error in node.query( + f"SELECT error FROM system.backups WHERE id='{backup_id}'" + ) + assert ( + node.query( + f"SELECT count() FROM system.processes WHERE query_kind='Backup' AND query LIKE '%{backup_id}%'" + ) + == "0\n" + ) + node.query("SYSTEM FLUSH LOGS") + kill_duration_ms = int( + node.query( + f"SELECT query_duration_ms FROM system.query_log WHERE query_kind='KillQuery' AND query LIKE '%{backup_id}%' AND type='QueryFinish'" + ) + ) + assert kill_duration_ms < 2000 # Query must be cancelled quickly + + +# Start restoring from a backup. +def start_restore(restore_id, backup_id): + node.query( + f"RESTORE TABLE tbl FROM {get_backup_name(backup_id)} SETTINGS id='{restore_id}' ASYNC" + ) + + assert ( + node.query(f"SELECT status FROM system.backups WHERE id='{restore_id}'") + == "RESTORING\n" + ) + assert ( + node.query( + f"SELECT count() FROM system.processes WHERE query_kind='Restore' AND query LIKE '%{restore_id}%'" + ) + == "1\n" + ) + + +# Wait for the restore operation to be completed. +def wait_restore(restore_id): + assert_eq_with_retry( + node, + f"SELECT status FROM system.backups WHERE id='{restore_id}'", + "RESTORED", + retry_count=60, + sleep_time=5, + ) + + restore_duration = int( + node.query( + f"SELECT end_time - start_time FROM system.backups WHERE id='{restore_id}'" + ) + ) + assert ( + restore_duration >= 3 + ) # Restore is not expected to be too quick in this test. + + +# Cancel the specified restore operation. +def cancel_restore(restore_id): + node.query( + f"KILL QUERY WHERE query_kind='Restore' AND query LIKE '%{restore_id}%' SYNC" + ) + assert ( + node.query(f"SELECT status FROM system.backups WHERE id='{restore_id}'") + == "RESTORE_CANCELLED\n" + ) + expected_error = "QUERY_WAS_CANCELLED" + assert expected_error in node.query( + f"SELECT error FROM system.backups WHERE id='{restore_id}'" + ) + assert ( + node.query( + f"SELECT count() FROM system.processes WHERE query_kind='Restore' AND query LIKE '%{restore_id}%'" + ) + == "0\n" + ) + node.query("SYSTEM FLUSH LOGS") + kill_duration_ms = int( + node.query( + f"SELECT query_duration_ms FROM system.query_log WHERE query_kind='KillQuery' AND query LIKE '%{restore_id}%' AND type='QueryFinish'" + ) + ) + assert kill_duration_ms < 2000 # Query must be cancelled quickly + + +# Test that BACKUP and RESTORE operations can be cancelled with KILL QUERY. +def test_cancel_backup(): + # We use partitioning so backups would contain more files. + node.query( + "CREATE TABLE tbl (x UInt64) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY x%5" + ) + + node.query(f"INSERT INTO tbl SELECT number FROM numbers(500)") + + try_backup_id_1 = uuid.uuid4().hex + start_backup(try_backup_id_1) + cancel_backup(try_backup_id_1) + + backup_id = uuid.uuid4().hex + start_backup(backup_id) + wait_backup(backup_id) + + node.query(f"DROP TABLE tbl SYNC") + + try_restore_id_1 = uuid.uuid4().hex + start_restore(try_restore_id_1, backup_id) + cancel_restore(try_restore_id_1) + + node.query(f"DROP TABLE tbl SYNC") + + restore_id = uuid.uuid4().hex + start_restore(restore_id, backup_id) + wait_restore(restore_id) + + +# Test that shutdown cancels a running backup and doesn't wait until it finishes. +def test_shutdown_cancel_backup(): + node.query( + "CREATE TABLE tbl (x UInt64) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY x%5" + ) + + node.query(f"INSERT INTO tbl SELECT number FROM numbers(500)") + + backup_id = uuid.uuid4().hex + start_backup(backup_id) + + node.restart_clickhouse() # Must cancel the backup. + + # The information about this cancelled backup must be stored in system.backup_log + assert node.query( + f"SELECT status FROM system.backup_log WHERE id='{backup_id}' ORDER BY status" + ) == TSV(["CREATING_BACKUP", "BACKUP_CANCELLED"]) + + # The table can't be restored from this backup. + expected_error = "Backup .* not found" + node.query("DROP TABLE tbl SYNC") + assert re.search( + expected_error, + node.query_and_get_error( + f"RESTORE TABLE tbl FROM {get_backup_name(backup_id)}" + ), + ) diff --git a/tests/integration/test_backup_restore_new/test_shutdown_wait_backup.py b/tests/integration/test_backup_restore_new/test_shutdown_wait_backup.py new file mode 100644 index 00000000000..326e0c5da6c --- /dev/null +++ b/tests/integration/test_backup_restore_new/test_shutdown_wait_backup.py @@ -0,0 +1,82 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV, assert_eq_with_retry +import uuid + + +cluster = ClickHouseCluster(__file__) + +main_configs = [ + "configs/backups_disk.xml", + "configs/slow_backups.xml", +] + +node = cluster.add_instance( + "node", + main_configs=main_configs, + external_dirs=["/backups/"], + stay_alive=True, +) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def drop_after_test(): + try: + yield + finally: + node.query("DROP TABLE IF EXISTS tbl SYNC") + + +# Generate the backup name. +def get_backup_name(backup_id): + return f"Disk('backups', '{backup_id}')" + + +# Start making a backup asynchronously. +def start_backup(backup_id): + node.query( + f"BACKUP TABLE tbl TO {get_backup_name(backup_id)} SETTINGS id='{backup_id}' ASYNC" + ) + + assert ( + node.query(f"SELECT status FROM system.backups WHERE id='{backup_id}'") + == "CREATING_BACKUP\n" + ) + assert ( + node.query( + f"SELECT count() FROM system.processes WHERE query_kind='Backup' AND query LIKE '%{backup_id}%'" + ) + == "1\n" + ) + + +# Test that shutdown doesn't cancel a running backup and waits until it finishes. +def test_shutdown_wait_backup(): + node.query( + "CREATE TABLE tbl (x UInt64) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY x%5" + ) + + node.query(f"INSERT INTO tbl SELECT number FROM numbers(500)") + + backup_id = uuid.uuid4().hex + start_backup(backup_id) + + node.restart_clickhouse() # Must wait for the backup. + + # The information about this backup must be stored in system.backup_log + assert node.query( + f"SELECT status FROM system.backup_log WHERE id='{backup_id}' ORDER BY status" + ) == TSV(["CREATING_BACKUP", "BACKUP_CREATED"]) + + # The table can be restored from this backup. + node.query("DROP TABLE tbl SYNC") + node.query(f"RESTORE TABLE tbl FROM {get_backup_name(backup_id)}") diff --git a/tests/integration/test_cluster_discovery/common.py b/tests/integration/test_cluster_discovery/common.py new file mode 100644 index 00000000000..c66e9361973 --- /dev/null +++ b/tests/integration/test_cluster_discovery/common.py @@ -0,0 +1,41 @@ +import time + + +def check_on_cluster( + nodes, + expected, + *, + what, + cluster_name="test_auto_cluster", + msg=None, + retries=5, + query_params={}, +): + """ + Select data from `system.clusters` on specified nodes and check the result + """ + assert 1 <= retries <= 6 + + node_results = {} + for retry in range(1, retries + 1): + for node in nodes: + if node_results.get(node.name) == expected: + # do not retry node after success + continue + query_text = ( + f"SELECT {what} FROM system.clusters WHERE cluster = '{cluster_name}'" + ) + node_results[node.name] = int(node.query(query_text, **query_params)) + + if all(actual == expected for actual in node_results.values()): + break + + print(f"Retry {retry}/{retries} unsuccessful, result: {node_results}") + + if retry != retries: + time.sleep(2**retry) + else: + msg = msg or f"Wrong '{what}' result" + raise Exception( + f"{msg}: {node_results}, expected: {expected} (after {retries} retries)" + ) diff --git a/tests/integration/test_cluster_discovery/config/config_with_pwd.xml b/tests/integration/test_cluster_discovery/config/config_with_pwd.xml new file mode 100644 index 00000000000..8a2f138bccb --- /dev/null +++ b/tests/integration/test_cluster_discovery/config/config_with_pwd.xml @@ -0,0 +1,21 @@ + + 1 + + + + + /clickhouse/discovery/test_auto_cluster_with_pwd + user1 + password123 + + + + + /clickhouse/discovery/test_auto_cluster_with_wrong_pwd + user1 + wrongpass1234 + + + + + diff --git a/tests/integration/test_cluster_discovery/config/config_with_secret1.xml b/tests/integration/test_cluster_discovery/config/config_with_secret1.xml new file mode 100644 index 00000000000..d91c7d448eb --- /dev/null +++ b/tests/integration/test_cluster_discovery/config/config_with_secret1.xml @@ -0,0 +1,22 @@ + + 1 + + + + + + /clickhouse/discovery/test_auto_cluster_with_secret + secret123 + + + + + + /clickhouse/discovery/test_auto_cluster_with_wrong_secret + correctsecret321 + + + + + + diff --git a/tests/integration/test_cluster_discovery/config/config_with_secret2.xml b/tests/integration/test_cluster_discovery/config/config_with_secret2.xml new file mode 100644 index 00000000000..6dfca01c940 --- /dev/null +++ b/tests/integration/test_cluster_discovery/config/config_with_secret2.xml @@ -0,0 +1,22 @@ + + 1 + + + + + + /clickhouse/discovery/test_auto_cluster_with_secret + secret123 + + + + + + /clickhouse/discovery/test_auto_cluster_with_wrong_secret + wrongsecret333 + + + + + + diff --git a/tests/integration/test_cluster_discovery/config/users.d/users_with_pwd.xml b/tests/integration/test_cluster_discovery/config/users.d/users_with_pwd.xml new file mode 100644 index 00000000000..eaf33c8f70a --- /dev/null +++ b/tests/integration/test_cluster_discovery/config/users.d/users_with_pwd.xml @@ -0,0 +1,17 @@ + + + + + + + + + passwordAbc + default + + + password123 + default + + + diff --git a/tests/integration/test_cluster_discovery/test.py b/tests/integration/test_cluster_discovery/test.py index ad3deb5b142..ab21c72cec4 100644 --- a/tests/integration/test_cluster_discovery/test.py +++ b/tests/integration/test_cluster_discovery/test.py @@ -1,7 +1,8 @@ import pytest import functools -import time +from .common import check_on_cluster + from helpers.cluster import ClickHouseCluster @@ -36,39 +37,6 @@ def start_cluster(): cluster.shutdown() -def check_on_cluster( - nodes, expected, *, what, cluster_name="test_auto_cluster", msg=None, retries=5 -): - """ - Select data from `system.clusters` on specified nodes and check the result - """ - assert 1 <= retries <= 6 - - node_results = {} - for retry in range(1, retries + 1): - for node in nodes: - if node_results.get(node.name) == expected: - # do not retry node after success - continue - query_text = ( - f"SELECT {what} FROM system.clusters WHERE cluster = '{cluster_name}'" - ) - node_results[node.name] = int(node.query(query_text)) - - if all(actual == expected for actual in node_results.values()): - break - - print(f"Retry {retry}/{retries} unsuccessful, result: {node_results}") - - if retry != retries: - time.sleep(2**retry) - else: - msg = msg or f"Wrong '{what}' result" - raise Exception( - f"{msg}: {node_results}, expected: {expected} (after {retries} retries)" - ) - - def test_cluster_discovery_startup_and_stop(start_cluster): """ Start cluster, check nodes count in system.clusters, diff --git a/tests/integration/test_cluster_discovery/test_password.py b/tests/integration/test_cluster_discovery/test_password.py new file mode 100644 index 00000000000..bf593260d6f --- /dev/null +++ b/tests/integration/test_cluster_discovery/test_password.py @@ -0,0 +1,72 @@ +import pytest + +from .common import check_on_cluster + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +nodes = { + "node0": cluster.add_instance( + "node0", + main_configs=["config/config_with_pwd.xml", "config/config_with_secret1.xml"], + user_configs=["config/users.d/users_with_pwd.xml"], + stay_alive=True, + with_zookeeper=True, + ), + "node1": cluster.add_instance( + "node1", + main_configs=["config/config_with_pwd.xml", "config/config_with_secret2.xml"], + user_configs=["config/users.d/users_with_pwd.xml"], + stay_alive=True, + with_zookeeper=True, + ), +} + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_connect_with_password(start_cluster): + check_on_cluster( + [nodes["node0"], nodes["node1"]], + len(nodes), + cluster_name="test_auto_cluster_with_pwd", + what="count()", + msg="Wrong nodes count in cluster", + query_params={"password": "passwordAbc"}, + ) + + result = nodes["node0"].query( + "SELECT sum(number) FROM clusterAllReplicas('test_auto_cluster_with_pwd', numbers(3)) GROUP BY hostname()", + password="passwordAbc", + ) + assert result == "3\n3\n", result + + result = nodes["node0"].query_and_get_error( + "SELECT sum(number) FROM clusterAllReplicas('test_auto_cluster_with_wrong_pwd', numbers(3)) GROUP BY hostname()", + password="passwordAbc", + ) + assert "Authentication failed" in result, result + + result = nodes["node0"].query( + "SELECT sum(number) FROM clusterAllReplicas('test_auto_cluster_with_secret', numbers(3)) GROUP BY hostname()", + password="passwordAbc", + ) + assert result == "3\n3\n", result + + result = nodes["node0"].query_and_get_error( + "SELECT sum(number) FROM clusterAllReplicas('test_auto_cluster_with_wrong_secret', numbers(3)) GROUP BY hostname()", + password="passwordAbc", + ) + + # With an incorrect secret, we don't get "Authentication failed", but the connection is simply dropped. + # So, we get messages like "Connection reset by peer" or "Attempt to read after eof". + # We only check that an error occurred and the message is not empty. + assert result diff --git a/tests/integration/test_dictionaries_update_and_reload/test.py b/tests/integration/test_dictionaries_update_and_reload/test.py index 3d96d0b8dd4..648ea847afb 100644 --- a/tests/integration/test_dictionaries_update_and_reload/test.py +++ b/tests/integration/test_dictionaries_update_and_reload/test.py @@ -281,7 +281,7 @@ def test_reload_after_fail_in_cache_dictionary(started_cluster): query_and_get_error = instance.query_and_get_error # Can't get a value from the cache dictionary because the source (table `test.xypairs`) doesn't respond. - expected_error = "Table test.xypairs does not exist" + expected_error = "UNKNOWN_TABLE" update_error = "Could not update cache dictionary cache_xypairs now" assert expected_error in query_and_get_error( "SELECT dictGetUInt64('cache_xypairs', 'y', toUInt64(1))" diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index a71fdeff302..4b175d188ef 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -172,7 +172,7 @@ def test_incorrect_usage(cluster): assert "Table is read-only" in result result = node2.query_and_get_error("OPTIMIZE TABLE test0 FINAL") - assert "Only read-only operations are supported" in result + assert "Table is in readonly mode due to static storage" in result node2.query("DROP TABLE test0 SYNC") diff --git a/tests/integration/test_filesystem_cache/config.d/remove_filesystem_caches_path.xml b/tests/integration/test_filesystem_cache/config.d/remove_filesystem_caches_path.xml new file mode 100644 index 00000000000..57417af1a39 --- /dev/null +++ b/tests/integration/test_filesystem_cache/config.d/remove_filesystem_caches_path.xml @@ -0,0 +1,3 @@ + + + diff --git a/tests/integration/test_filesystem_cache/test.py b/tests/integration/test_filesystem_cache/test.py index ab1bc4e4344..eb5f896f7a9 100644 --- a/tests/integration/test_filesystem_cache/test.py +++ b/tests/integration/test_filesystem_cache/test.py @@ -27,6 +27,14 @@ def cluster(): "config.d/storage_conf_2.xml", ], ) + cluster.add_instance( + "node_no_filesystem_caches_path", + main_configs=[ + "config.d/storage_conf.xml", + "config.d/remove_filesystem_caches_path.xml", + ], + stay_alive=True, + ) logging.info("Starting cluster...") cluster.start() @@ -194,3 +202,124 @@ def test_caches_with_the_same_configuration_2(cluster, node_name): ).strip() == f"cache1\t{size}\ncache2\t{size}" ) + + +def test_custom_cached_disk(cluster): + node = cluster.instances["node_no_filesystem_caches_path"] + + assert "Cannot create cached custom disk without" in node.query_and_get_error( + f""" + DROP TABLE IF EXISTS test SYNC; + CREATE TABLE test (a Int32) + ENGINE = MergeTree() ORDER BY tuple() + SETTINGS disk = disk(type = cache, path = 'kek', max_size = 1, disk = 'hdd_blob'); + """ + ) + + node.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + /var/lib/clickhouse/filesystem_caches/ + + " > /etc/clickhouse-server/config.d/filesystem_caches_path.xml + """, + ] + ) + node.restart_clickhouse() + + node.query( + f""" + CREATE TABLE test (a Int32) + ENGINE = MergeTree() ORDER BY tuple() + SETTINGS disk = disk(type = cache, name = 'custom_cached', path = 'kek', max_size = 1, disk = 'hdd_blob'); + """ + ) + + assert ( + "/var/lib/clickhouse/filesystem_caches/kek" + == node.query( + "SELECT cache_path FROM system.disks WHERE name = 'custom_cached'" + ).strip() + ) + + node.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + /var/lib/clickhouse/custom_caches/ + + " > /etc/clickhouse-server/config.d/custom_filesystem_caches_path.xml + """, + ] + ) + node.exec_in_container( + [ + "bash", + "-c", + "rm /etc/clickhouse-server/config.d/remove_filesystem_caches_path.xml", + ] + ) + node.restart_clickhouse() + + node.query( + f""" + CREATE TABLE test2 (a Int32) + ENGINE = MergeTree() ORDER BY tuple() + SETTINGS disk = disk(type = cache, name = 'custom_cached2', path = 'kek2', max_size = 1, disk = 'hdd_blob'); + """ + ) + + assert ( + "/var/lib/clickhouse/custom_caches/kek2" + == node.query( + "SELECT cache_path FROM system.disks WHERE name = 'custom_cached2'" + ).strip() + ) + + node.exec_in_container( + ["bash", "-c", "rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml"] + ) + node.restart_clickhouse() + + node.query( + f""" + CREATE TABLE test3 (a Int32) + ENGINE = MergeTree() ORDER BY tuple() + SETTINGS disk = disk(type = cache, name = 'custom_cached3', path = 'kek3', max_size = 1, disk = 'hdd_blob'); + """ + ) + + assert ( + "/var/lib/clickhouse/custom_caches/kek3" + == node.query( + "SELECT cache_path FROM system.disks WHERE name = 'custom_cached3'" + ).strip() + ) + + assert "Filesystem cache path must lie inside" in node.query_and_get_error( + f""" + CREATE TABLE test4 (a Int32) + ENGINE = MergeTree() ORDER BY tuple() + SETTINGS disk = disk(type = cache, name = 'custom_cached4', path = '/kek4', max_size = 1, disk = 'hdd_blob'); + """ + ) + + node.query( + f""" + CREATE TABLE test4 (a Int32) + ENGINE = MergeTree() ORDER BY tuple() + SETTINGS disk = disk(type = cache, name = 'custom_cached4', path = '/var/lib/clickhouse/custom_caches/kek4', max_size = 1, disk = 'hdd_blob'); + """ + ) + + assert ( + "/var/lib/clickhouse/custom_caches/kek4" + == node.query( + "SELECT cache_path FROM system.disks WHERE name = 'custom_cached4'" + ).strip() + ) diff --git a/tests/queries/0_stateless/02862_index_inverted_incorrect_args.reference b/tests/integration/test_limit_materialized_view_count/__init__.py similarity index 100% rename from tests/queries/0_stateless/02862_index_inverted_incorrect_args.reference rename to tests/integration/test_limit_materialized_view_count/__init__.py diff --git a/tests/integration/test_limit_materialized_view_count/configs/max_num_limit.xml b/tests/integration/test_limit_materialized_view_count/configs/max_num_limit.xml new file mode 100644 index 00000000000..7fcf9005a08 --- /dev/null +++ b/tests/integration/test_limit_materialized_view_count/configs/max_num_limit.xml @@ -0,0 +1,3 @@ + + 1 + diff --git a/tests/integration/test_limit_materialized_view_count/test.py b/tests/integration/test_limit_materialized_view_count/test.py new file mode 100644 index 00000000000..c14c5b2055e --- /dev/null +++ b/tests/integration/test_limit_materialized_view_count/test.py @@ -0,0 +1,49 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + main_configs=["configs/max_num_limit.xml"], + stay_alive=True, +) + +config = """ + 2 + +""" + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + for _, node in cluster.instances.items(): + node.query( + f""" + CREATE TABLE test_tb (a String) ENGINE = MergeTree ORDER BY a; + """ + ) + yield cluster + finally: + cluster.shutdown() + + +def test_limit_materialized_view_count(started_cluster): + node.query( + "CREATE MATERIALIZED VIEW test_view1 ENGINE = MergeTree ORDER BY a AS SELECT * FROM test_tb;" + ) + assert "Too many materialized views" in node.query_and_get_error( + "CREATE MATERIALIZED VIEW test_view2 ENGINE = MergeTree ORDER BY a AS SELECT * FROM test_tb;" + ) + + node.replace_config("/etc/clickhouse-server/config.d/max_num_limit.xml", config) + node.restart_clickhouse() + + node.query( + "CREATE MATERIALIZED VIEW test_view2 ENGINE = MergeTree ORDER BY a AS SELECT * FROM test_tb;" + ) + assert "Too many materialized views" in node.query_and_get_error( + "CREATE MATERIALIZED VIEW test_view3 ENGINE = MergeTree ORDER BY a AS SELECT * FROM test_tb;" + ) diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py index 1ae3face5b1..f3e113c95d3 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/test.py +++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py @@ -601,3 +601,34 @@ def test_big_insert(cluster): assert max_single_part_upload_size == block.size id += 1 assert checked + + +def test_endpoint(cluster): + node = cluster.instances[NODE_NAME] + account_name = "devstoreaccount1" + container_name = "cont2" + data_prefix = "data_prefix" + port = cluster.azurite_port + + container_client = cluster.blob_service_client.get_container_client(container_name) + container_client.create_container() + + node.query( + f""" + DROP TABLE IF EXISTS test SYNC; + + CREATE TABLE test (a Int32) + ENGINE = MergeTree() ORDER BY tuple() + SETTINGS disk = disk( + type = azure_blob_storage, + endpoint = 'http://azurite1:{port}/{account_name}/{container_name}/{data_prefix}', + account_name = 'devstoreaccount1', + account_key = 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', + container_already_exists = 1, + skip_access_check = 0); + + INSERT INTO test SELECT number FROM numbers(10); + """ + ) + + assert 10 == int(node.query("SELECT count() FROM test")) diff --git a/tests/integration/test_parallel_replicas_custom_key_failover/test.py b/tests/integration/test_parallel_replicas_custom_key_failover/test.py index 2b5aa2682d5..d7e73208798 100644 --- a/tests/integration/test_parallel_replicas_custom_key_failover/test.py +++ b/tests/integration/test_parallel_replicas_custom_key_failover/test.py @@ -34,20 +34,6 @@ def create_tables(cluster, table_name): f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3') ORDER BY (key)" ) - # create distributed table - node1.query(f"DROP TABLE IF EXISTS {table_name}_d SYNC") - node1.query( - f""" - CREATE TABLE {table_name}_d AS {table_name} - Engine=Distributed( - {cluster}, - currentDatabase(), - {table_name}, - key - ) - """ - ) - # populate data node1.query( f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(1000)" @@ -75,10 +61,10 @@ def test_parallel_replicas_custom_key_failover( filter_type, prefer_localhost_replica, ): - cluster = "test_single_shard_multiple_replicas" + cluster_name = "test_single_shard_multiple_replicas" table = "test_table" - create_tables(cluster, table) + create_tables(cluster_name, table) expected_result = "" for i in range(4): @@ -87,7 +73,7 @@ def test_parallel_replicas_custom_key_failover( log_comment = uuid.uuid4() assert ( node1.query( - f"SELECT key, count() FROM {table}_d GROUP BY key ORDER BY key", + f"SELECT key, count() FROM cluster('{cluster_name}', currentDatabase(), test_table) GROUP BY key ORDER BY key", settings={ "log_comment": log_comment, "prefer_localhost_replica": prefer_localhost_replica, @@ -95,8 +81,10 @@ def test_parallel_replicas_custom_key_failover( "parallel_replicas_custom_key": custom_key, "parallel_replicas_custom_key_filter_type": filter_type, "use_hedged_requests": use_hedged_requests, - # "async_socket_for_remote": 0, - # "async_query_sending_for_remote": 0, + # avoid considering replica delay on connection choice + # otherwise connection can be not distributed evenly among available nodes + # and so custom key secondary queries (we check it bellow) + "max_replica_delay_for_distributed_queries": 0, }, ) == expected_result @@ -115,14 +103,20 @@ def test_parallel_replicas_custom_key_failover( if prefer_localhost_replica == 0: assert ( node1.query( - f"SELECT 'subqueries', count() FROM clusterAllReplicas({cluster}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' AND query_id != initial_query_id SETTINGS skip_unavailable_shards=1" + f"SELECT 'subqueries', count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' AND query_id != initial_query_id SETTINGS skip_unavailable_shards=1" ) == "subqueries\t4\n" ) - assert ( - node1.query( - f"SELECT h, count() FROM clusterAllReplicas({cluster}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h SETTINGS skip_unavailable_shards=1" + # currently this assert is flaky with asan and tsan builds, disable the assert in such cases for now + # will be investigated separately + if ( + not node1.is_built_with_thread_sanitizer() + and not node1.is_built_with_address_sanitizer() + ): + assert ( + node1.query( + f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1" + ) + == "n1\t3\nn3\t2\n" ) - == "n1\t3\nn3\t2\n" - ) diff --git a/tests/queries/0_stateless/02895_forbid_create_inverted_index.reference b/tests/integration/test_parallel_replicas_custom_key_load_balancing/__init__.py similarity index 100% rename from tests/queries/0_stateless/02895_forbid_create_inverted_index.reference rename to tests/integration/test_parallel_replicas_custom_key_load_balancing/__init__.py diff --git a/tests/integration/test_parallel_replicas_custom_key_load_balancing/configs/remote_servers.xml b/tests/integration/test_parallel_replicas_custom_key_load_balancing/configs/remote_servers.xml new file mode 100644 index 00000000000..8b050571c3f --- /dev/null +++ b/tests/integration/test_parallel_replicas_custom_key_load_balancing/configs/remote_servers.xml @@ -0,0 +1,26 @@ + + + + + false + + n1 + 9000 + + + n2 + 9000 + + + n3 + 9000 + + + n4 + 9000 + + + + + + diff --git a/tests/integration/test_parallel_replicas_custom_key_load_balancing/test.py b/tests/integration/test_parallel_replicas_custom_key_load_balancing/test.py new file mode 100644 index 00000000000..b9d4d029703 --- /dev/null +++ b/tests/integration/test_parallel_replicas_custom_key_load_balancing/test.py @@ -0,0 +1,118 @@ +import pytest +import uuid +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance( + "n1", main_configs=["configs/remote_servers.xml"], with_zookeeper=True +) +node2 = cluster.add_instance( + "n2", main_configs=["configs/remote_servers.xml"], with_zookeeper=True +) +node3 = cluster.add_instance( + "n3", main_configs=["configs/remote_servers.xml"], with_zookeeper=True +) +node4 = cluster.add_instance( + "n4", main_configs=["configs/remote_servers.xml"], with_zookeeper=True +) + +nodes = [node1, node2, node3, node4] + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def create_tables(table_name): + for i in range(0, 4): + nodes[i].query(f"DROP TABLE IF EXISTS {table_name} SYNC") + nodes[i].query( + f"CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r{i+1}') ORDER BY (key)" + ) + + # populate data + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(1000)" + ) + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(1000, 1000)" + ) + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(2000, 1000)" + ) + node1.query( + f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers(3000, 1000)" + ) + node2.query(f"SYSTEM SYNC REPLICA {table_name}") + node3.query(f"SYSTEM SYNC REPLICA {table_name}") + node4.query(f"SYSTEM SYNC REPLICA {table_name}") + + +@pytest.mark.parametrize("use_hedged_requests", [1, 0]) +@pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"]) +@pytest.mark.parametrize("filter_type", ["default", "range"]) +def test_parallel_replicas_custom_key_load_balancing( + start_cluster, + use_hedged_requests, + custom_key, + filter_type, +): + cluster_name = "test_single_shard_multiple_replicas" + table = "test_table" + + create_tables(table) + + expected_result = "" + for i in range(4): + expected_result += f"{i}\t1000\n" + + log_comment = uuid.uuid4() + assert ( + node1.query( + f"SELECT key, count() FROM cluster('{cluster_name}', currentDatabase(), test_table) GROUP BY key ORDER BY key", + settings={ + "log_comment": log_comment, + "prefer_localhost_replica": 0, + "max_parallel_replicas": 4, + "parallel_replicas_custom_key": custom_key, + "parallel_replicas_custom_key_filter_type": filter_type, + "use_hedged_requests": use_hedged_requests, + # avoid considering replica delay on connection choice + # otherwise connection can be not distributed evenly among available nodes + # and so custom key secondary queries (we check it bellow) + "max_replica_delay_for_distributed_queries": 0, + }, + ) + == expected_result + ) + + for node in nodes: + node.query("system flush logs") + + # the subqueries should be spread over available nodes + query_id = node1.query( + f"SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment = '{log_comment}' AND type = 'QueryFinish' AND initial_query_id = query_id" + ) + assert query_id != "" + query_id = query_id[:-1] + + assert ( + node1.query( + f"SELECT 'subqueries', count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' AND query_id != initial_query_id SETTINGS skip_unavailable_shards=1" + ) + == "subqueries\t4\n" + ) + + # check queries per node + assert ( + node1.query( + f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1" + ) + == "n1\t2\nn2\t1\nn3\t1\nn4\t1\n" + ) diff --git a/tests/integration/test_parallel_replicas_invisible_parts/__init__.py b/tests/integration/test_parallel_replicas_invisible_parts/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_parallel_replicas_invisible_parts/configs/remote_servers.xml b/tests/integration/test_parallel_replicas_invisible_parts/configs/remote_servers.xml new file mode 100644 index 00000000000..4f1217df38b --- /dev/null +++ b/tests/integration/test_parallel_replicas_invisible_parts/configs/remote_servers.xml @@ -0,0 +1,28 @@ + + + + + + node0 + 9000 + + + node1 + 9000 + + + node2 + 9000 + + + node3 + 9000 + + + node4 + 9000 + + + + + diff --git a/tests/integration/test_parallel_replicas_invisible_parts/test.py b/tests/integration/test_parallel_replicas_invisible_parts/test.py new file mode 100644 index 00000000000..cab3fb46fe9 --- /dev/null +++ b/tests/integration/test_parallel_replicas_invisible_parts/test.py @@ -0,0 +1,130 @@ +# We had a couple of bugs where difference in snapshots on replicas (totally normal situation) lead to wrong query result: +# #58722 - some marks were assigned more than once to different replicas because mark segments were not properly cleaned up after moving between queues +# #58844 - replica was assigned reading from a part it didn't see +# In this test we emulate a situation when each replica has unique snapshot of data parts. +# Specifically each of 5 nodes sees 10 parts, half of these parts is common for all nodes, the other half is unique for the specific node. +# This way we will trigger the logic that caused problems in the first issue, because none of the nodes will have all the parts from main node's snapshot in its own snapshot, +# also there is a good chance to schedule reading from one of unique parts to a replica that doesn't see them if something is off with visibility checks. + +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +cluster_name = "parallel_replicas" +total_parts = 10 + +nodes = [ + cluster.add_instance( + f"node{num}", + main_configs=["configs/remote_servers.xml"], + with_zookeeper=True, + macros={"replica": f"node{num}", "shard": "shard"}, + ) + for num in range(5) +] + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def _create_tables(table_name, table_size, index_granularity): + nodes[0].query(f"DROP TABLE IF EXISTS {table_name} ON CLUSTER {cluster_name}") + + nodes[0].query( + f""" + CREATE TABLE IF NOT EXISTS {table_name} ON CLUSTER '{cluster_name}' (key Int64, value String) + Engine=ReplicatedMergeTree('/test_parallel_replicas/shard/{table_name}/', '{{replica}}') + ORDER BY (key) + SETTINGS index_granularity = {index_granularity}, max_bytes_to_merge_at_max_space_in_pool = 0, max_bytes_to_merge_at_max_space_in_pool = 1 + """ + ) + + assert table_size % total_parts == 0 and total_parts % 2 == 0 + for i in range(total_parts // 2): + nodes[0].query( + f""" + INSERT INTO {table_name} + SELECT number, toString(number) FROM numbers_mt({table_size / total_parts}) + SETTINGS insert_deduplicate = 0 + """ + ) + + nodes[0].query(f"SYSTEM SYNC REPLICA ON CLUSTER {cluster_name} {table_name}") + + nodes[0].query(f"SYSTEM STOP FETCHES ON CLUSTER {cluster_name} {table_name}") + + for node in nodes: + for _ in range(total_parts // 2): + node.query( + f""" + INSERT INTO {table_name} + SELECT number, toString(number) FROM numbers_mt({table_size / total_parts}) + SETTINGS insert_deduplicate = 0 + """ + ) + + +def _create_query(query_tmpl, table_name): + return query_tmpl.format(table_name=table_name) + + +def _get_result_with_parallel_replicas( + query, cluster_name, parallel_replicas_mark_segment_size +): + return nodes[0].query( + query, + settings={ + "allow_experimental_parallel_reading_from_replicas": 2, + "max_parallel_replicas": len(nodes), + "cluster_for_parallel_replicas": f"{cluster_name}", + "parallel_replicas_mark_segment_size": parallel_replicas_mark_segment_size, + }, + ) + + +@pytest.mark.parametrize( + "query_tmpl", + [ + "SELECT sum(key) FROM {table_name}", + ], +) +@pytest.mark.parametrize( + "table_size", + [1000, 10000, 100000], +) +@pytest.mark.parametrize( + "index_granularity", + [11, 101], +) +@pytest.mark.parametrize( + "parallel_replicas_mark_segment_size", + [1, 11], +) +def test_reading_with_invisible_parts( + start_cluster, + query_tmpl, + table_size, + index_granularity, + parallel_replicas_mark_segment_size, +): + table_name = f"tbl_{len(query_tmpl)}_{cluster_name}_{table_size}_{index_granularity}_{parallel_replicas_mark_segment_size}" + _create_tables(table_name, table_size, index_granularity) + + query = _create_query(query_tmpl, table_name) + + assert table_size % total_parts == 0 + rows_per_part = table_size // total_parts + expected = total_parts * ((rows_per_part * (rows_per_part - 1)) // 2) + assert ( + _get_result_with_parallel_replicas( + query, cluster_name, parallel_replicas_mark_segment_size + ) + == f"{expected}\n" + ) diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 5553f400c0d..c7dae2359c4 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -1093,6 +1093,8 @@ def test_dependent_loading(started_cluster): f"SELECT toDateTime64('{nested_time}', 6) < toDateTime64('{time}', 6)" ) + instance.query(f"DROP TABLE {table} SYNC") + if __name__ == "__main__": cluster.start() diff --git a/tests/integration/test_prometheus_endpoint/test.py b/tests/integration/test_prometheus_endpoint/test.py index 4eedc84b6c4..f140ebdfbe7 100644 --- a/tests/integration/test_prometheus_endpoint/test.py +++ b/tests/integration/test_prometheus_endpoint/test.py @@ -40,6 +40,8 @@ def get_and_check_metrics(retries): response = requests.get( "http://{host}:{port}/metrics".format(host=node.ip_address, port=8001), allow_redirects=False, + # less then default keep-alive timeout (10 seconds) + timeout=5, ) if response.status_code != 200: diff --git a/tests/integration/test_reload_query_masking_rules/__init__.py b/tests/integration/test_reload_query_masking_rules/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_reload_query_masking_rules/configs/changed_settings.xml b/tests/integration/test_reload_query_masking_rules/configs/changed_settings.xml new file mode 100644 index 00000000000..d681496d843 --- /dev/null +++ b/tests/integration/test_reload_query_masking_rules/configs/changed_settings.xml @@ -0,0 +1,19 @@ + + + system + query_log
+ toYYYYMM(event_date) + 7500 + 1048576 + 8192 + 524288 + false +
+ + + + TOPSECRET.TOPSECRET + [hidden] + + +
diff --git a/tests/integration/test_reload_query_masking_rules/configs/empty_settings.xml b/tests/integration/test_reload_query_masking_rules/configs/empty_settings.xml new file mode 100644 index 00000000000..82647ff82b5 --- /dev/null +++ b/tests/integration/test_reload_query_masking_rules/configs/empty_settings.xml @@ -0,0 +1,12 @@ + + + system + query_log
+ toYYYYMM(event_date) + 7500 + 1048576 + 8192 + 524288 + false +
+
diff --git a/tests/integration/test_reload_query_masking_rules/test.py b/tests/integration/test_reload_query_masking_rules/test.py new file mode 100644 index 00000000000..f269aefbacb --- /dev/null +++ b/tests/integration/test_reload_query_masking_rules/test.py @@ -0,0 +1,57 @@ +import pytest +import os +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry, assert_logs_contain_with_retry + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node", user_configs=["configs/empty_settings.xml"]) + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def reset_to_normal_settings_after_test(): + try: + node.copy_file_to_container( + os.path.join(SCRIPT_DIR, "configs/empty_settings.xml"), + "/etc/clickhouse-server/config.d/z.xml", + ) + node.query("SYSTEM RELOAD CONFIG") + yield + finally: + pass + + +# @pytest.mark.parametrize("reload_strategy", ["force", "timeout"]) +def test_reload_query_masking_rules(): + # At first, empty configuration is fed to ClickHouse. The query + # "SELECT 'TOPSECRET.TOPSECRET'" will not be redacted, and the new masking + # event will not be registered + node.query("SELECT 'TOPSECRET.TOPSECRET'") + assert_logs_contain_with_retry(node, "SELECT 'TOPSECRET.TOPSECRET'") + assert not node.contains_in_log(r"SELECT '\[hidden\]'") + node.rotate_logs() + + node.copy_file_to_container( + os.path.join(SCRIPT_DIR, "configs/changed_settings.xml"), + "/etc/clickhouse-server/config.d/z.xml", + ) + + node.query("SYSTEM RELOAD CONFIG") + + # Now the same query will be redacted in the logs and the counter of events + # will be incremented + node.query("SELECT 'TOPSECRET.TOPSECRET'") + + assert_logs_contain_with_retry(node, r"SELECT '\[hidden\]'") + assert not node.contains_in_log("SELECT 'TOPSECRET.TOPSECRET'") + + node.rotate_logs() diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index 1fc3fe37044..b47f86a843d 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -338,6 +338,8 @@ def test_alter_drop_part(started_cluster, engine): main_node.query(f"INSERT INTO {database}.alter_drop_part VALUES (123)") if engine == "MergeTree": dummy_node.query(f"INSERT INTO {database}.alter_drop_part VALUES (456)") + else: + main_node.query(f"SYSTEM SYNC REPLICA {database}.alter_drop_part PULL") main_node.query(f"ALTER TABLE {database}.alter_drop_part DROP PART '{part_name}'") assert main_node.query(f"SELECT CounterID FROM {database}.alter_drop_part") == "" if engine == "ReplicatedMergeTree": @@ -1077,7 +1079,7 @@ def test_startup_without_zk(started_cluster): err = main_node.query_and_get_error( "CREATE DATABASE startup ENGINE = Replicated('/clickhouse/databases/startup', 'shard1', 'replica1');" ) - assert "ZooKeeper" in err + assert "ZooKeeper" in err or "Coordination::Exception" in err main_node.query( "CREATE DATABASE startup ENGINE = Replicated('/clickhouse/databases/startup', 'shard1', 'replica1');" ) @@ -1396,3 +1398,47 @@ def test_modify_comment(started_cluster): main_node.query("DROP DATABASE modify_comment_db SYNC") dummy_node.query("DROP DATABASE modify_comment_db SYNC") + + +def test_table_metadata_corruption(started_cluster): + main_node.query("DROP DATABASE IF EXISTS table_metadata_corruption") + dummy_node.query("DROP DATABASE IF EXISTS table_metadata_corruption") + + main_node.query( + "CREATE DATABASE table_metadata_corruption ENGINE = Replicated('/clickhouse/databases/table_metadata_corruption', 'shard1', 'replica1');" + ) + dummy_node.query( + "CREATE DATABASE table_metadata_corruption ENGINE = Replicated('/clickhouse/databases/table_metadata_corruption', 'shard1', 'replica2');" + ) + + create_some_tables("table_metadata_corruption") + + main_node.query("SYSTEM SYNC DATABASE REPLICA table_metadata_corruption") + dummy_node.query("SYSTEM SYNC DATABASE REPLICA table_metadata_corruption") + + # Server should handle this by throwing an exception during table loading, which should lead to server shutdown + corrupt = "sed --follow-symlinks -i 's/ReplicatedMergeTree/CorruptedMergeTree/' /var/lib/clickhouse/metadata/table_metadata_corruption/rmt1.sql" + + print(f"Corrupting metadata using `{corrupt}`") + dummy_node.stop_clickhouse(kill=True) + dummy_node.exec_in_container(["bash", "-c", corrupt]) + + query = ( + "SELECT name, uuid, create_table_query FROM system.tables WHERE database='table_metadata_corruption' AND name NOT LIKE '.inner_id.%' " + "ORDER BY name SETTINGS show_table_uuid_in_table_create_query_if_not_nil=1" + ) + expected = main_node.query(query) + + # We expect clickhouse server to shutdown without LOGICAL_ERRORs or deadlocks + dummy_node.start_clickhouse(expected_to_fail=True) + assert not dummy_node.contains_in_log("LOGICAL_ERROR") + + fix_corrupt = "sed --follow-symlinks -i 's/CorruptedMergeTree/ReplicatedMergeTree/' /var/lib/clickhouse/metadata/table_metadata_corruption/rmt1.sql" + print(f"Fix corrupted metadata using `{fix_corrupt}`") + dummy_node.exec_in_container(["bash", "-c", fix_corrupt]) + + dummy_node.start_clickhouse() + assert_eq_with_retry(dummy_node, query, expected) + + main_node.query("DROP DATABASE IF EXISTS table_metadata_corruption") + dummy_node.query("DROP DATABASE IF EXISTS table_metadata_corruption") diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index 621d2b89fc5..25f0b58e0f5 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -26,8 +26,14 @@ from pyspark.sql.functions import current_timestamp from datetime import datetime from pyspark.sql.functions import monotonically_increasing_id, row_number from pyspark.sql.window import Window +from minio.deleteobjects import DeleteObject -from helpers.s3_tools import prepare_s3_bucket, upload_directory, get_file_contents +from helpers.s3_tools import ( + prepare_s3_bucket, + upload_directory, + get_file_contents, + list_s3_objects, +) SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -55,6 +61,7 @@ def started_cluster(): main_configs=["configs/config.d/named_collections.xml"], user_configs=["configs/users.d/users.xml"], with_minio=True, + stay_alive=True, ) logging.info("Starting cluster...") @@ -111,12 +118,12 @@ def get_delta_metadata(delta_metadata_file): return combined_json -def create_delta_table(node, table_name): +def create_delta_table(node, table_name, bucket="root"): node.query( f""" DROP TABLE IF EXISTS {table_name}; CREATE TABLE {table_name} - ENGINE=DeltaLake(s3, filename = '{table_name}/')""" + ENGINE=DeltaLake(s3, filename = '{table_name}/', url = 'http://minio1:9001/{bucket}/')""" ) @@ -401,3 +408,106 @@ def test_types(started_cluster): ["e", "Nullable(Bool)"], ] ) + + +def test_restart_broken(started_cluster): + instance = started_cluster.instances["node1"] + spark = started_cluster.spark_session + minio_client = started_cluster.minio_client + bucket = "broken" + TABLE_NAME = "test_restart_broken" + + if not minio_client.bucket_exists(bucket): + minio_client.make_bucket(bucket) + + parquet_data_path = create_initial_data_file( + started_cluster, + instance, + "SELECT number, toString(number) FROM numbers(100)", + TABLE_NAME, + ) + + write_delta_from_file(spark, parquet_data_path, f"/{TABLE_NAME}") + upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") + create_delta_table(instance, TABLE_NAME, bucket=bucket) + assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 + + s3_objects = list_s3_objects(minio_client, bucket, prefix="") + assert ( + len( + list( + minio_client.remove_objects( + bucket, + [DeleteObject(obj) for obj in s3_objects], + ) + ) + ) + == 0 + ) + minio_client.remove_bucket(bucket) + + instance.restart_clickhouse() + + assert "NoSuchBucket" in instance.query_and_get_error( + f"SELECT count() FROM {TABLE_NAME}" + ) + + minio_client.make_bucket(bucket) + + upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") + + assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 + + +def test_restart_broken_table_function(started_cluster): + instance = started_cluster.instances["node1"] + spark = started_cluster.spark_session + minio_client = started_cluster.minio_client + bucket = "broken2" + TABLE_NAME = "test_restart_broken_table_function" + + if not minio_client.bucket_exists(bucket): + minio_client.make_bucket(bucket) + + parquet_data_path = create_initial_data_file( + started_cluster, + instance, + "SELECT number, toString(number) FROM numbers(100)", + TABLE_NAME, + ) + + write_delta_from_file(spark, parquet_data_path, f"/{TABLE_NAME}") + upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") + instance.query( + f""" + DROP TABLE IF EXISTS {TABLE_NAME}; + CREATE TABLE {TABLE_NAME} + AS deltaLake(s3, filename = '{TABLE_NAME}/', url = 'http://minio1:9001/{bucket}/')""" + ) + assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 + + s3_objects = list_s3_objects(minio_client, bucket, prefix="") + assert ( + len( + list( + minio_client.remove_objects( + bucket, + [DeleteObject(obj) for obj in s3_objects], + ) + ) + ) + == 0 + ) + minio_client.remove_bucket(bucket) + + instance.restart_clickhouse() + + assert "NoSuchBucket" in instance.query_and_get_error( + f"SELECT count() FROM {TABLE_NAME}" + ) + + minio_client.make_bucket(bucket) + + upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "") + + assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py index 9a75dc50d61..d9dee0541b0 100644 --- a/tests/integration/test_storage_iceberg/test.py +++ b/tests/integration/test_storage_iceberg/test.py @@ -27,8 +27,14 @@ from datetime import datetime from pyspark.sql.functions import monotonically_increasing_id, row_number from pyspark.sql.window import Window from pyspark.sql.readwriter import DataFrameWriter, DataFrameWriterV2 +from minio.deleteobjects import DeleteObject -from helpers.s3_tools import prepare_s3_bucket, upload_directory, get_file_contents +from helpers.s3_tools import ( + prepare_s3_bucket, + upload_directory, + get_file_contents, + list_s3_objects, +) SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -61,6 +67,7 @@ def started_cluster(): main_configs=["configs/config.d/named_collections.xml"], user_configs=["configs/users.d/users.xml"], with_minio=True, + stay_alive=True, ) logging.info("Starting cluster...") @@ -135,12 +142,12 @@ def generate_data(spark, start, end): return df -def create_iceberg_table(node, table_name, format="Parquet"): +def create_iceberg_table(node, table_name, format="Parquet", bucket="root"): node.query( f""" DROP TABLE IF EXISTS {table_name}; CREATE TABLE {table_name} - ENGINE=Iceberg(s3, filename = 'iceberg_data/default/{table_name}/', format={format})""" + ENGINE=Iceberg(s3, filename = 'iceberg_data/default/{table_name}/', format={format}, url = 'http://minio1:9001/{bucket}/')""" ) @@ -399,6 +406,8 @@ def test_evolved_schema(started_cluster, format_version): assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 + expected_data = instance.query(f"SELECT * FROM {TABLE_NAME} order by a, b") + spark.sql(f"ALTER TABLE {TABLE_NAME} ADD COLUMNS (x bigint)") files = upload_directory( minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", "" @@ -407,6 +416,11 @@ def test_evolved_schema(started_cluster, format_version): error = instance.query_and_get_error(f"SELECT * FROM {TABLE_NAME}") assert "UNSUPPORTED_METHOD" in error + data = instance.query( + f"SELECT * FROM {TABLE_NAME} SETTINGS iceberg_engine_ignore_schema_evolution=1" + ) + assert data == expected_data + def test_row_based_deletes(started_cluster): instance = started_cluster.instances["node1"] @@ -551,3 +565,56 @@ def test_metadata_file_format_with_uuid(started_cluster, format_version): create_iceberg_table(instance, TABLE_NAME) assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 500 + + +def test_restart_broken(started_cluster): + instance = started_cluster.instances["node1"] + spark = started_cluster.spark_session + minio_client = started_cluster.minio_client + bucket = "broken2" + TABLE_NAME = "test_restart_broken_table_function" + + if not minio_client.bucket_exists(bucket): + minio_client.make_bucket(bucket) + + parquet_data_path = create_initial_data_file( + started_cluster, + instance, + "SELECT number, toString(number) FROM numbers(100)", + TABLE_NAME, + ) + + write_iceberg_from_file(spark, parquet_data_path, TABLE_NAME, format_version="1") + files = upload_directory( + minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", "" + ) + create_iceberg_table(instance, TABLE_NAME, bucket=bucket) + assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 + + s3_objects = list_s3_objects(minio_client, bucket, prefix="") + assert ( + len( + list( + minio_client.remove_objects( + bucket, + [DeleteObject(obj) for obj in s3_objects], + ) + ) + ) + == 0 + ) + minio_client.remove_bucket(bucket) + + instance.restart_clickhouse() + + assert "NoSuchBucket" in instance.query_and_get_error( + f"SELECT count() FROM {TABLE_NAME}" + ) + + minio_client.make_bucket(bucket) + + files = upload_directory( + minio_client, bucket, f"/iceberg_data/default/{TABLE_NAME}/", "" + ) + + assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100 diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 2176b0151ff..dea1ea49851 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -892,12 +892,14 @@ def test_kafka_formats(kafka_cluster): """ expected_rows_count = raw_expected.count("\n") - instance.query_with_retry( + result_checker = lambda res: res.count("\n") == expected_rows_count + res = instance.query_with_retry( f"SELECT * FROM test.kafka_{list(all_formats.keys())[-1]}_mv;", retry_count=30, sleep_time=1, - check_callback=lambda res: res.count("\n") == expected_rows_count, + check_callback=result_checker, ) + assert result_checker(res) for format_name, format_opts in list(all_formats.items()): logging.debug(("Checking {}".format(format_name))) @@ -3808,12 +3810,14 @@ def test_kafka_formats_with_broken_message(kafka_cluster): """ expected_rows_count = raw_expected.count("\n") - instance.query_with_retry( + result_checker = lambda res: res.count("\n") == expected_rows_count + res = instance.query_with_retry( f"SELECT * FROM test.kafka_data_{list(all_formats.keys())[-1]}_mv;", retry_count=30, sleep_time=1, - check_callback=lambda res: res.count("\n") == expected_rows_count, + check_callback=result_checker, ) + assert result_checker(res) for format_name, format_opts in list(all_formats.items()): logging.debug(f"Checking {format_name}") @@ -4446,7 +4450,7 @@ def test_block_based_formats_1(kafka_cluster): kafka_group_name = '{topic}', kafka_format = 'PrettySpace'; - INSERT INTO test.kafka SELECT number * 10 as key, number * 100 as value FROM numbers(5) settings max_block_size=2, optimize_trivial_insert_select=0; + INSERT INTO test.kafka SELECT number * 10 as key, number * 100 as value FROM numbers(5) settings max_block_size=2, optimize_trivial_insert_select=0, output_format_pretty_color=1; """ ) @@ -4931,6 +4935,89 @@ def test_formats_errors(kafka_cluster): instance.query("DROP TABLE test.view") +def test_multiple_read_in_materialized_views(kafka_cluster, max_retries=15): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + topic = "multiple_read_from_mv" + kafka_create_topic(admin_client, topic) + + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka_multiple_read_input; + DROP TABLE IF EXISTS test.kafka_multiple_read_table; + DROP TABLE IF EXISTS test.kafka_multiple_read_mv; + + CREATE TABLE test.kafka_multiple_read_input (id Int64) + ENGINE = Kafka + SETTINGS + kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_format = 'JSONEachRow'; + + CREATE TABLE test.kafka_multiple_read_table (id Int64) + ENGINE = MergeTree + ORDER BY id; + + + CREATE MATERIALIZED VIEW IF NOT EXISTS test.kafka_multiple_read_mv TO test.kafka_multiple_read_table AS + SELECT id + FROM test.kafka_multiple_read_input + WHERE id NOT IN ( + SELECT id + FROM test.kafka_multiple_read_table + WHERE id IN ( + SELECT id + FROM test.kafka_multiple_read_input + ) + ); + """ + ) + + kafka_produce( + kafka_cluster, topic, [json.dumps({"id": 42}), json.dumps({"id": 43})] + ) + + expected_result = "42\n43\n" + res = instance.query_with_retry( + f"SELECT id FROM test.kafka_multiple_read_table ORDER BY id", + retry_count=30, + sleep_time=0.5, + check_callback=lambda res: res == expected_result, + ) + assert res == expected_result + + # Verify that the query deduplicates the records as it meant to be + messages = [] + for i in range(0, 10): + messages.append(json.dumps({"id": 42})) + messages.append(json.dumps({"id": 43})) + + messages.append(json.dumps({"id": 44})) + + kafka_produce(kafka_cluster, topic, messages) + + expected_result = "42\n43\n44\n" + res = instance.query_with_retry( + f"SELECT id FROM test.kafka_multiple_read_table ORDER BY id", + retry_count=30, + sleep_time=0.5, + check_callback=lambda res: res == expected_result, + ) + assert res == expected_result + + kafka_delete_topic(admin_client, topic) + instance.query( + f""" + DROP TABLE test.kafka_multiple_read_input; + DROP TABLE test.kafka_multiple_read_table; + DROP TABLE test.kafka_multiple_read_mv; + """ + ) + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_mysql/test.py b/tests/integration/test_storage_mysql/test.py index 3e3132949e7..e2257026dc7 100644 --- a/tests/integration/test_storage_mysql/test.py +++ b/tests/integration/test_storage_mysql/test.py @@ -859,6 +859,55 @@ def test_settings(started_cluster): conn.close() +def test_mysql_point(started_cluster): + table_name = "test_mysql_point" + node1.query(f"DROP TABLE IF EXISTS {table_name}") + + conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + drop_mysql_table(conn, table_name) + with conn.cursor() as cursor: + cursor.execute( + f""" + CREATE TABLE `clickhouse`.`{table_name}` ( + `id` int NOT NULL, + `point` Point NOT NULL, + PRIMARY KEY (`id`)) ENGINE=InnoDB; + """ + ) + cursor.execute( + f"INSERT INTO `clickhouse`.`{table_name}` SELECT 1, Point(15, 20)" + ) + assert 1 == cursor.execute(f"SELECT count(*) FROM `clickhouse`.`{table_name}`") + + conn.commit() + + result = node1.query( + f"DESCRIBE mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" + ) + assert result.strip() == "id\tInt32\t\t\t\t\t\npoint\tPoint" + + assert 1 == int( + node1.query( + f"SELECT count() FROM mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" + ) + ) + assert ( + "(15,20)" + == node1.query( + f"SELECT point FROM mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" + ).strip() + ) + + node1.query("DROP TABLE IF EXISTS test") + node1.query( + f"CREATE TABLE test (id Int32, point Point) Engine=MySQL('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" + ) + assert "(15,20)" == node1.query(f"SELECT point FROM test").strip() + + drop_mysql_table(conn, table_name) + conn.close() + + if __name__ == "__main__": with contextmanager(started_cluster)() as cluster: for name, instance in list(cluster.instances.items()): diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index f26a273fe5e..b778e9fb556 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -3156,7 +3156,7 @@ def test_block_based_formats_1(rabbitmq_cluster): ) instance.query( - "INSERT INTO test.rabbitmq SELECT number * 10 as key, number * 100 as value FROM numbers(5) settings max_block_size=2, optimize_trivial_insert_select=0;" + "INSERT INTO test.rabbitmq SELECT number * 10 as key, number * 100 as value FROM numbers(5) settings max_block_size=2, optimize_trivial_insert_select=0, output_format_pretty_color=1;" ) insert_messages = [] @@ -3538,3 +3538,14 @@ def test_rabbitmq_handle_error_mode_stream(rabbitmq_cluster): expected = "".join(sorted(expected)) assert broken_messages == expected + + +def test_attach_broken_table(rabbitmq_cluster): + instance.query( + "ATTACH TABLE rabbit_queue UUID '2d1cdf1a-f060-4a61-a7c9-5b59e59992c6' (`payload` String) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'nonexisting:5671', rabbitmq_format = 'JSONEachRow', rabbitmq_username = 'test', rabbitmq_password = 'test'" + ) + + error = instance.query_and_get_error("SELECT * FROM rabbit_queue") + assert "CANNOT_CONNECT_RABBITMQ" in error + error = instance.query_and_get_error("INSERT INTO rabbit_queue VALUES ('test')") + assert "CANNOT_CONNECT_RABBITMQ" in error diff --git a/tests/integration/test_structured_logging_json/test.py b/tests/integration/test_structured_logging_json/test.py index cbd066abc91..6d1455f6e0e 100644 --- a/tests/integration/test_structured_logging_json/test.py +++ b/tests/integration/test_structured_logging_json/test.py @@ -32,6 +32,30 @@ def is_json(log_json): return True +def validate_log_level(config, logs): + root = ET.fromstring(config) + key = root.findtext(".//names/level") or "level" + + valid_level_values = { + "Fatal", + "Critical", + "Error", + "Warning", + "Notice", + "Information", + "Debug", + "Trace", + "Test", + } + + length = min(10, len(logs)) + for i in range(0, length): + json_log = json.loads(logs[i]) + if json_log[key] not in valid_level_values: + return False + return True + + def validate_log_config_relation(config, logs, config_type): root = ET.fromstring(config) keys_in_config = set() @@ -78,8 +102,10 @@ def validate_logs(logs): def valiade_everything(config, node, config_type): node.query("SELECT 1") logs = node.grep_in_log("").split("\n") - return validate_logs(logs) and validate_log_config_relation( - config, logs, config_type + return ( + validate_logs(logs) + and validate_log_config_relation(config, logs, config_type) + and validate_log_level(config, logs) ) diff --git a/tests/integration/test_system_reload_async_metrics/__init__.py b/tests/integration/test_system_reload_async_metrics/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_system_reload_async_metrics/configs/default.xml b/tests/integration/test_system_reload_async_metrics/configs/default.xml new file mode 100644 index 00000000000..eb168c1967d --- /dev/null +++ b/tests/integration/test_system_reload_async_metrics/configs/default.xml @@ -0,0 +1,6 @@ + + + 60000 + 60000 + + diff --git a/tests/integration/test_system_reload_async_metrics/test.py b/tests/integration/test_system_reload_async_metrics/test.py new file mode 100644 index 00000000000..f0572cd2db6 --- /dev/null +++ b/tests/integration/test_system_reload_async_metrics/test.py @@ -0,0 +1,47 @@ +import os +import pytest +import shutil +import time +from helpers.cluster import ClickHouseCluster + +# Tests that SYSTEM RELOAD ASYNCHRONOUS METRICS works. + +# Config default.xml sets a large refresh interval of asynchronous metrics, so that the periodic updates don't interfere with the manual +# update below. +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=["configs/default.xml"], + stay_alive=True, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +CONFIG_DIR = os.path.join(SCRIPT_DIR, "configs") + + +def test_system_reload_async_metrics(start_cluster): + node.query("SYSTEM DROP QUERY CACHE") + + res1 = node.query( + "SELECT value FROM system.asynchronous_metrics WHERE metric = 'NumberOfTables'" + ) + + # create table and test that the table creation is reflected in the asynchronous metrics + node.query("CREATE TABLE tab (col UInt64) ENGINE MergeTree ORDER BY tuple()") + + node.query("SYSTEM RELOAD ASYNCHRONOUS METRICS") + + res2 = node.query( + "SELECT value FROM system.asynchronous_metrics WHERE metric = 'NumberOfTables'" + ) + assert int(res1.rstrip()) + 1 == int(res2.rstrip()) diff --git a/tests/jepsen.clickhouse/src/jepsen/clickhouse/utils.clj b/tests/jepsen.clickhouse/src/jepsen/clickhouse/utils.clj index 9a77f070ca3..42b6b457b34 100644 --- a/tests/jepsen.clickhouse/src/jepsen/clickhouse/utils.clj +++ b/tests/jepsen.clickhouse/src/jepsen/clickhouse/utils.clj @@ -45,11 +45,14 @@ dest-file (str dest-folder "/clickhouse") dest-symlink (str root-folder "/" expected-file-name) wget-opts (concat cu/std-wget-opts [:-O dest-file])] - (when-not (cu/exists? dest-file) - (info "Downloading" url) - (do (c/exec :mkdir :-p dest-folder) - (c/cd dest-folder + (if-not (cu/exists? dest-file) + (do + (info "Downloading" url) + (do (c/exec :mkdir :-p dest-folder) + (c/cd dest-folder (cu/wget-helper! wget-opts url)))) + (info "Binary is already downloaded")) + (c/exec :rm :-rf dest-symlink) (c/exec :ln :-s dest-file dest-symlink) dest-symlink)) diff --git a/tests/performance/final_big_column.xml b/tests/performance/final_big_column.xml new file mode 100644 index 00000000000..1fd586d2d90 --- /dev/null +++ b/tests/performance/final_big_column.xml @@ -0,0 +1,21 @@ + + + 1 + 20G + + + + CREATE TABLE optimized_select_final (d Date, key UInt64, value String) + ENGINE = ReplacingMergeTree() + PARTITION BY toYYYYMM(d) ORDER BY key + + + INSERT INTO optimized_select_final SELECT toDate('2000-01-01'), 2*number, randomPrintableASCII(1000) FROM numbers(5000000) + INSERT INTO optimized_select_final SELECT toDate('2020-01-01'), 2*number+1, randomPrintableASCII(1000) FROM numbers(5000000) + + SELECT * FROM optimized_select_final FINAL FORMAT Null SETTINGS max_threads = 8 + SELECT * FROM optimized_select_final FINAL WHERE key % 10 = 0 FORMAT Null + + DROP TABLE IF EXISTS optimized_select_final + + diff --git a/tests/performance/norm_distance.xml b/tests/performance/norm_distance.xml index 3dc8cbd70e6..1e879607dac 100644 --- a/tests/performance/norm_distance.xml +++ b/tests/performance/norm_distance.xml @@ -4,8 +4,9 @@ element_type - UInt8 - Int16 + + + Int32 Int64 Float32 @@ -21,6 +22,7 @@ + INSERT INTO vecs_{element_type} @@ -28,46 +30,27 @@ SELECT number AS n, [ - rand(n*10), - rand(n*10+1), - rand(n*10+2), - rand(n*10+3), - rand(n*10+4), - rand(n*10+5), - rand(n*10+6), - rand(n*10+7), - rand(n*10+8), - rand(n*10+9) + rand(n*10), rand(n*10+1), rand(n*10+2), rand(n*10+3), rand(n*10+4), rand(n*10+5), rand(n*10+6), rand(n*10+7), rand(n*10+8), rand(n*10+9), + rand(n*10+10), rand(n*10+11), rand(n*10+12), rand(n*10+13), rand(n*10+14), rand(n*10+15), rand(n*10+16), rand(n*10+17), rand(n*10+18), rand(n*10+19), + rand(n*10+20), rand(n*10+21), rand(n*10+22), rand(n*10+23), rand(n*10+24), rand(n*10+25), rand(n*10+26), rand(n*10+27), rand(n*10+28), rand(n*10+29), + rand(n*10+30), rand(n*10+31), rand(n*10+32), rand(n*10+33), rand(n*10+34), rand(n*10+35), rand(n*10+36), rand(n*10+37), rand(n*10+38), rand(n*10+39), + rand(n*10+40), rand(n*10+41), rand(n*10+42), rand(n*10+43), rand(n*10+44), rand(n*10+45), rand(n*10+46), rand(n*10+47), rand(n*10+48), rand(n*10+49), + rand(n*10+50), rand(n*10+51), rand(n*10+52), rand(n*10+53), rand(n*10+54), rand(n*10+55), rand(n*10+56), rand(n*10+57), rand(n*10+58), rand(n*10+59), + rand(n*10+60), rand(n*10+61), rand(n*10+62), rand(n*10+63), rand(n*10+64), rand(n*10+65), rand(n*10+66), rand(n*10+67), rand(n*10+68), rand(n*10+69), + rand(n*10+70), rand(n*10+71), rand(n*10+72), rand(n*10+73), rand(n*10+74), rand(n*10+75), rand(n*10+76), rand(n*10+77), rand(n*10+78), rand(n*10+79), + rand(n*10+80), rand(n*10+81), rand(n*10+82), rand(n*10+83), rand(n*10+84), rand(n*10+85), rand(n*10+86), rand(n*10+87), rand(n*10+88), rand(n*10+89), + rand(n*10+90), rand(n*10+91), rand(n*10+92), rand(n*10+93), rand(n*10+94), rand(n*10+95), rand(n*10+96), rand(n*10+97), rand(n*10+98), rand(n*10+99), + rand(n*10+100), rand(n*10+101), rand(n*10+102), rand(n*10+103), rand(n*10+104), rand(n*10+105), rand(n*10+106), rand(n*10+107), rand(n*10+108), rand(n*10+109), + rand(n*10+110), rand(n*10+111), rand(n*10+112), rand(n*10+113), rand(n*10+114), rand(n*10+115), rand(n*10+116), rand(n*10+117), rand(n*10+118), rand(n*10+119), + rand(n*10+120), rand(n*10+121), rand(n*10+122), rand(n*10+123), rand(n*10+124), rand(n*10+125), rand(n*10+126), rand(n*10+127), rand(n*10+128), rand(n*10+129), + rand(n*10+130), rand(n*10+131), rand(n*10+132), rand(n*10+133), rand(n*10+134), rand(n*10+135), rand(n*10+136), rand(n*10+137), rand(n*10+138), rand(n*10+139), + rand(n*10+140), rand(n*10+141), rand(n*10+142), rand(n*10+143), rand(n*10+144), rand(n*10+145), rand(n*10+146), rand(n*10+147), rand(n*10+148), rand(n*10+149) ] AS v FROM system.numbers - LIMIT 10000000 + LIMIT 5000000 ); - - - - CREATE TABLE tuples_{element_type} ( - t Tuple( - {element_type}, - {element_type}, - {element_type}, - {element_type}, - {element_type}, - {element_type}, - {element_type}, - {element_type}, - {element_type}, - {element_type} - ) - ) ENGINE=Memory; - - - - INSERT INTO tuples_{element_type} - SELECT (v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8], v[9], v[10]) FROM vecs_{element_type}; - - 1 @@ -84,17 +67,11 @@ - - SELECT sum(dist) FROM (SELECT {norm}Norm(t) AS dist FROM tuples_{element_type}) - WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT {norm}Distance(a, t) AS dist FROM tuples_{element_type}) - WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT cosineDistance(a, t) AS dist FROM tuples_{element_type}) - SELECT sum(dist) FROM (SELECT {norm}Norm(v) AS dist FROM vecs_{element_type}) WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT {norm}Distance(a, v) AS dist FROM vecs_{element_type}) WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT cosineDistance(a, v) AS dist FROM vecs_{element_type}) DROP TABLE vecs_{element_type} - DROP TABLE tuples_{element_type} diff --git a/tests/performance/norm_distance_float.xml b/tests/performance/norm_distance_float.xml deleted file mode 100644 index e71d8eb6281..00000000000 --- a/tests/performance/norm_distance_float.xml +++ /dev/null @@ -1,95 +0,0 @@ - - - - - element_type - - Float32 - Float64 - - - - - - CREATE TABLE vecs_{element_type} ( - v Array({element_type}) - ) ENGINE=Memory; - - - - - - INSERT INTO vecs_{element_type} - SELECT v FROM ( - SELECT - number AS n, - [ - rand(n*10), - rand(n*10+1), - rand(n*10+2), - rand(n*10+3), - rand(n*10+4), - rand(n*10+5), - rand(n*10+6), - rand(n*10+7), - rand(n*10+8), - rand(n*10+9), - rand(n*10), - rand(n*10+1), - rand(n*10+2), - rand(n*10+3), - rand(n*10+4), - rand(n*10+5), - rand(n*10+6), - rand(n*10+7), - rand(n*10+8), - rand(n*10+9), - rand(n*10), - rand(n*10+1), - rand(n*10+2), - rand(n*10+3), - rand(n*10+4), - rand(n*10+5), - rand(n*10+6), - rand(n*10+7), - rand(n*10+8), - rand(n*10+9), - rand(n*10), - rand(n*10+1), - rand(n*10+2), - rand(n*10+3), - rand(n*10+4), - rand(n*10+5), - rand(n*10+6), - rand(n*10+7) - ] AS v - FROM system.numbers - LIMIT 10000000 - ); - - - - 1 - - - - - - norm - - L1 - L2 - L2Squared - Linf - - - - - - SELECT sum(dist) FROM (SELECT {norm}Norm(v) AS dist FROM vecs_{element_type}) - WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT {norm}Distance(a, v) AS dist FROM vecs_{element_type}) - WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT cosineDistance(a, v) AS dist FROM vecs_{element_type}) - - DROP TABLE vecs_{element_type} - - diff --git a/tests/performance/parallel_final.xml b/tests/performance/parallel_final.xml index d7ea0240105..97261f93983 100644 --- a/tests/performance/parallel_final.xml +++ b/tests/performance/parallel_final.xml @@ -9,51 +9,53 @@ - collapsing + replacing - collapsing_final_16p_ord - collapsing_final_16p_rnd - collapsing_final_16p_int_keys_ord - collapsing_final_16p_int_keys_rnd - collapsing_final_16p_str_keys_ord - collapsing_final_16p_str_keys_rnd - collapsing_final_1024p_ord - collapsing_final_1024p_rnd - collapsing_final_1p_ord + replacing_final_16p_ord + replacing_final_16p_rnd + replacing_final_16p_int_keys_ord + replacing_final_16p_int_keys_rnd + replacing_final_16p_str_keys_ord + replacing_final_16p_str_keys_rnd + replacing_final_1024p_ord + replacing_final_1024p_rnd + replacing_final_1p_ord - create table collapsing_final_16p_ord (key1 UInt32, key2 String, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1, key2) partition by intDiv(key1, 8192 * 64) - create table collapsing_final_16p_rnd (key1 UInt32, key2 String, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1, key2) partition by key1 % 16 - create table collapsing_final_16p_int_keys_ord (key1 UInt32, key2 UInt32, key3 UInt32, key4 UInt32, key5 UInt32, key6 UInt32, key7 UInt32, key8 UInt32, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1, key2, key3, key4, key5, key6, key7, key8) partition by intDiv(key1, 8192 * 64) - create table collapsing_final_16p_int_keys_rnd (key1 UInt32, key2 UInt32, key3 UInt32, key4 UInt32, key5 UInt32, key6 UInt32, key7 UInt32, key8 UInt32, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1, key2, key3, key4, key5, key6, key7, key8) partition by key1 % 16 - create table collapsing_final_16p_str_keys_ord (key1 UInt32, key2 String, key3 String, key4 String, key5 String, key6 String, key7 String, key8 String, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1, key2, key3, key4, key5, key6, key7, key8) partition by intDiv(key1, 8192 * 64) - create table collapsing_final_16p_str_keys_rnd (key1 UInt32, key2 String, key3 String, key4 String, key5 String, key6 String, key7 String, key8 String, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1, key2, key3, key4, key5, key6, key7, key8) partition by key1 % 16 - create table collapsing_final_1024p_ord (key1 UInt32, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1) partition by intDiv(key1, 8192 * 2) - create table collapsing_final_1024p_rnd (key1 UInt32, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1) partition by key1 % 1024 - create table collapsing_final_1p_ord (key1 UInt64, key2 UInt64, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1, key2) + create table replacing_final_16p_ord (key1 UInt32, key2 String, ver Int8, s UInt64) engine = ReplacingMergeTree(ver) order by (key1, key2) partition by intDiv(key1, 8192 * 64) + create table replacing_final_16p_rnd (key1 UInt32, key2 String, ver Int8, s UInt64) engine = ReplacingMergeTree(ver) order by (key1, key2) partition by key1 % 16 + create table replacing_final_16p_int_keys_ord (key1 UInt32, key2 UInt32, key3 UInt32, key4 UInt32, key5 UInt32, key6 UInt32, key7 UInt32, key8 UInt32, ver Int8, s UInt64) engine = ReplacingMergeTree(ver) order by (key1, key2, key3, key4, key5, key6, key7, key8) partition by intDiv(key1, 8192 * 64) + create table replacing_final_16p_int_keys_rnd (key1 UInt32, key2 UInt32, key3 UInt32, key4 UInt32, key5 UInt32, key6 UInt32, key7 UInt32, key8 UInt32, ver Int8, s UInt64) engine = ReplacingMergeTree(ver) order by (key1, key2, key3, key4, key5, key6, key7, key8) partition by key1 % 16 + create table replacing_final_16p_str_keys_ord (key1 UInt32, key2 String, key3 String, key4 String, key5 String, key6 String, key7 String, key8 String, ver Int8, s UInt64) engine = ReplacingMergeTree(ver) order by (key1, key2, key3, key4, key5, key6, key7, key8) partition by intDiv(key1, 8192 * 64) + create table replacing_final_16p_str_keys_rnd (key1 UInt32, key2 String, key3 String, key4 String, key5 String, key6 String, key7 String, key8 String, ver Int8, s UInt64) engine = ReplacingMergeTree(ver) order by (key1, key2, key3, key4, key5, key6, key7, key8) partition by key1 % 16 + create table replacing_final_1024p_ord (key1 UInt32, ver Int8, s UInt64) engine = ReplacingMergeTree(ver) order by (key1) partition by intDiv(key1, 8192 * 2) + create table replacing_final_1024p_rnd (key1 UInt32, ver Int8, s UInt64) engine = ReplacingMergeTree(ver) order by (key1) partition by key1 % 1024 + create table replacing_final_1p_ord (key1 UInt64, key2 UInt64, ver Int8, s UInt64) engine = ReplacingMergeTree(ver) order by (key1, key2) - insert into collapsing_final_16p_ord select number, number, 1, number from numbers_mt(8388608) - insert into collapsing_final_16p_rnd select sipHash64(number), number, 1, number from numbers_mt(8388608) - insert into collapsing_final_16p_int_keys_ord select number, number, number, number, number, number, number, number, 1, number from numbers_mt(8388608) - insert into collapsing_final_16p_int_keys_rnd select sipHash64(number), number, number, number, number, number, number, number, 1, number from numbers_mt(8388608) - insert into collapsing_final_16p_str_keys_ord select number, number, number, number, number, number, number, number, 1, number from numbers_mt(8388608) - insert into collapsing_final_16p_str_keys_rnd select sipHash64(number), number, number, number, number, number, number, number, 1, number from numbers_mt(8388608) + insert into replacing_final_16p_ord select number, number, 1, number from numbers_mt(8388608) + insert into replacing_final_16p_rnd select sipHash64(number), number, 1, number from numbers_mt(8388608) + insert into replacing_final_16p_int_keys_ord select number, number, number, number, number, number, number, number, 1, number from numbers_mt(8388608) + insert into replacing_final_16p_int_keys_rnd select sipHash64(number), number, number, number, number, number, number, number, 1, number from numbers_mt(8388608) + insert into replacing_final_16p_str_keys_ord select number, number, number, number, number, number, number, number, 1, number from numbers_mt(8388608) + insert into replacing_final_16p_str_keys_rnd select sipHash64(number), number, number, number, number, number, number, number, 1, number from numbers_mt(8388608) - insert into collapsing_final_1024p_ord select number, 1, number from numbers_mt(16777216) - insert into collapsing_final_1024p_rnd select number, 1, number from numbers_mt(16777216) + insert into replacing_final_1024p_ord select number, 1, number from numbers_mt(16777216) + insert into replacing_final_1024p_rnd select number, 1, number from numbers_mt(16777216) - insert into collapsing_final_1p_ord select number, number + 1, 1, number from numbers_mt(5e7) + insert into replacing_final_1p_ord select number, number + 1, 1, number from numbers_mt(5e7) - optimize table {collapsing} final + optimize table {replacing} final - SELECT count() FROM {collapsing} final - SELECT sum(s) FROM {collapsing} final group by key1 limit 10 - SELECT sum(s) FROM {collapsing} final group by key1 % 8192 limit 10 + SELECT count() FROM {replacing} final + SELECT sum(s) FROM {replacing} final group by key1 limit 10 + SELECT sum(s) FROM {replacing} final group by key1 % 8192 limit 10 - DROP TABLE IF EXISTS {collapsing} + DROP TABLE IF EXISTS {replacing} + + DROP TABLE IF EXISTS {replacing} diff --git a/tests/performance/scripts/compare.sh b/tests/performance/scripts/compare.sh index 7dc522dca7a..39c6854fbf9 100755 --- a/tests/performance/scripts/compare.sh +++ b/tests/performance/scripts/compare.sh @@ -444,10 +444,10 @@ create view query_logs as create table query_run_metric_arrays engine File(TSV, 'analyze/query-run-metric-arrays.tsv') as with ( - -- sumMapState with the list of all keys with '-0.' values. Negative zero is because - -- sumMap removes keys with positive zeros. + -- sumMapState with the list of all keys with nullable '0' values because sumMap removes keys with default values + -- and 0::Nullable != NULL with (select groupUniqArrayArray(mapKeys(ProfileEvents)) from query_logs) as all_names - select arrayReduce('sumMapState', [(all_names, arrayMap(x->-0., all_names))]) + select arrayReduce('sumMapState', [(all_names, arrayMap(x->0::Nullable(Float64), all_names))]) ) as all_metrics select test, query_index, version, query_id, (finalizeAggregation( @@ -456,17 +456,15 @@ create table query_run_metric_arrays engine File(TSV, 'analyze/query-run-metric- all_metrics, arrayReduce('sumMapState', [(mapKeys(ProfileEvents), - arrayMap(x->toFloat64(x), mapValues(ProfileEvents)))] + arrayMap(x->toNullable(toFloat64(x)), mapValues(ProfileEvents)))] ), arrayReduce('sumMapState', [( ['client_time', 'server_time', 'memory_usage'], - arrayMap(x->if(x != 0., x, -0.), [ - toFloat64(query_runs.time), - toFloat64(query_duration_ms / 1000.), - toFloat64(memory_usage)]))]) + [toNullable(toFloat64(query_runs.time)), toNullable(toFloat64(query_duration_ms / 1000.)), toNullable(toFloat64(memory_usage))] + )]) ] )) as metrics_tuple).1 metric_names, - metrics_tuple.2 metric_values + arrayMap(x->if(isNaN(x),0,x), metrics_tuple.2) metric_values from query_logs right join query_runs on query_logs.query_id = query_runs.query_id diff --git a/tests/performance/scripts/download.sh b/tests/performance/scripts/download.sh index cb243b655c6..7a740a38fd2 100755 --- a/tests/performance/scripts/download.sh +++ b/tests/performance/scripts/download.sh @@ -67,8 +67,8 @@ function download mkdir ~/fg ||: ( cd ~/fg - wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl" - wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/difffolded.pl" + wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/cd9ee4c4449775a2f867acf31c84b7fe4b132ad5/flamegraph.pl" + wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/cd9ee4c4449775a2f867acf31c84b7fe4b132ad5/difffolded.pl" chmod +x ~/fg/difffolded.pl chmod +x ~/fg/flamegraph.pl ) & diff --git a/tests/performance/scripts/entrypoint.sh b/tests/performance/scripts/entrypoint.sh index 95ffe44b654..0c3bfa550f4 100755 --- a/tests/performance/scripts/entrypoint.sh +++ b/tests/performance/scripts/entrypoint.sh @@ -14,7 +14,8 @@ SCRIPT_DIR="$(dirname "$(readlink -f "$0")")" function curl_with_retry { for _ in 1 2 3 4 5 6 7 8 9 10; do - if curl --fail --head "$1";then + if curl --fail --head "$1" + then return 0 else sleep 1 @@ -117,8 +118,8 @@ then # far in the future and have unrelated test changes. base=$(git -C right/ch merge-base pr origin/master) git -C right/ch diff --name-only "$base" pr -- . | tee all-changed-files.txt - git -C right/ch diff --name-only "$base" pr -- tests/performance | tee changed-test-definitions.txt - git -C right/ch diff --name-only "$base" pr -- :!tests/performance :!docker/test/performance-comparison | tee other-changed-files.txt + git -C right/ch diff --name-only "$base" pr -- tests/performance/*.xml | tee changed-test-definitions.txt + git -C right/ch diff --name-only "$base" pr -- :!tests/performance/*.xml :!docker/test/performance-comparison | tee other-changed-files.txt fi # Set python output encoding so that we can print queries with non-ASCII letters. diff --git a/tests/performance/uniq_without_key_dist.xml b/tests/performance/uniq_without_key_dist.xml new file mode 100644 index 00000000000..600b378a7f7 --- /dev/null +++ b/tests/performance/uniq_without_key_dist.xml @@ -0,0 +1,22 @@ + + + + uniq_keys + + 100000 + 250000 + 500000 + 1000000 + 5000000 + + + + + create table t_{uniq_keys}(a UInt64) engine=MergeTree order by tuple() + + insert into t_{uniq_keys} select number % {uniq_keys} from numbers_mt(5e7) + + SELECT uniqExact(a) FROM remote('127.0.0.{{1,2}}', default, t_{uniq_keys}) SETTINGS max_threads=5 + + drop table t_{uniq_keys} + diff --git a/tests/queries/0_stateless/00085_visible_width_of_tuple_of_dates.sql b/tests/queries/0_stateless/00085_visible_width_of_tuple_of_dates.sql index 56e93d24087..09208b9151b 100644 --- a/tests/queries/0_stateless/00085_visible_width_of_tuple_of_dates.sql +++ b/tests/queries/0_stateless/00085_visible_width_of_tuple_of_dates.sql @@ -1 +1,2 @@ +SET output_format_pretty_color=1; SELECT (toDate('2000-01-01'), toDate('2000-01-01')) AS x FORMAT PrettyCompact; diff --git a/tests/queries/0_stateless/00089_group_by_arrays_of_fixed.sql b/tests/queries/0_stateless/00089_group_by_arrays_of_fixed.sql index 60ec1cb3396..a068671b999 100644 --- a/tests/queries/0_stateless/00089_group_by_arrays_of_fixed.sql +++ b/tests/queries/0_stateless/00089_group_by_arrays_of_fixed.sql @@ -1 +1 @@ -SELECT arr, count() AS c FROM (SELECT arrayMap(x -> x % 2, groupArray(number)) AS arr FROM (SELECT number FROM system.numbers LIMIT 10000) GROUP BY number % ((number * 0xABCDEF0123456789 % 1234) + 1)) GROUP BY arr ORDER BY c DESC, arr ASC; +SELECT arr, count() AS c FROM (SELECT arrayMap(x -> x % 2, arraySort(groupArray(number))) AS arr FROM (SELECT number FROM system.numbers LIMIT 10000) GROUP BY number % ((number * 0xABCDEF0123456789 % 1234) + 1)) GROUP BY arr ORDER BY c DESC, arr ASC; \ No newline at end of file diff --git a/tests/queries/0_stateless/00098_k_union_all.sql b/tests/queries/0_stateless/00098_k_union_all.sql index 311e5bb19c4..059d27075d7 100644 --- a/tests/queries/0_stateless/00098_k_union_all.sql +++ b/tests/queries/0_stateless/00098_k_union_all.sql @@ -1,3 +1,4 @@ +SET output_format_pretty_color=1; SELECT 1 FORMAT PrettySpace; SELECT 1 UNION ALL SELECT 1 FORMAT PrettySpace; SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 FORMAT PrettySpace; diff --git a/tests/queries/0_stateless/00273_quantiles.reference b/tests/queries/0_stateless/00273_quantiles.reference index 55f7d871fe6..2abeaa39dc5 100644 --- a/tests/queries/0_stateless/00273_quantiles.reference +++ b/tests/queries/0_stateless/00273_quantiles.reference @@ -2,10 +2,12 @@ [500] [500] [500] +[497.78] [0,1,10,50,100,200,300,400,500,600,700,800,900,950,990,999,1000] [0,1,10,50,100,200,300,400,500,600,700,800,900,950,990,999,1000] [0,1,10,50,99.6,199.7,299.8,399.9,500,600.1,700.2,800.3,900.4,950,990,999,1000] [0,1,10,50,100,200,300,400,500,600,700,800,900,950,990,999,1000] +[0,1.01,10.07,49.9,100.49,198.37,301.91,399.47,497.78,595.95,699.36,804.46,907.03,944.05,982.58,1002.43,1002.43] 1 333334 [699140.3,835642,967430.8] [699999,833333,966666] 2 266667 [426546,536239,638933.4] [426665,533332,639999] 3 114285 [296938,342324,388778] [297142,342856,388570] diff --git a/tests/queries/0_stateless/00273_quantiles.sql b/tests/queries/0_stateless/00273_quantiles.sql index f5b739b8be1..791ced6bc5d 100644 --- a/tests/queries/0_stateless/00273_quantiles.sql +++ b/tests/queries/0_stateless/00273_quantiles.sql @@ -2,11 +2,13 @@ SELECT quantiles(0.5)(x) FROM (SELECT number AS x FROM system.numbers LIMIT 1001 SELECT quantilesExact(0.5)(x) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); SELECT quantilesTDigest(0.5)(x) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); SELECT quantilesDeterministic(0.5)(x, x) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); +SELECT arrayMap(a -> round(a, 2), quantilesDD(0.01, 0.5)(x)) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); SELECT quantiles(0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 0.999, 1)(x) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); SELECT quantilesExact(0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 0.999, 1)(x) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); SELECT quantilesTDigest(0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 0.999, 1)(x) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); SELECT quantilesDeterministic(0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 0.999, 1)(x, x) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); +SELECT arrayMap(a -> round(a, 2), quantilesDD(0.01, 0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 0.999, 1)(x)) FROM (SELECT number AS x FROM system.numbers LIMIT 1001); -- The result slightly differs but it's ok since `quantilesDeterministic` is an approximate function. SET max_bytes_before_external_group_by = 0; diff --git a/tests/queries/0_stateless/00298_enum_width_and_cast.sql b/tests/queries/0_stateless/00298_enum_width_and_cast.sql index 3dda3e41270..35241f5124d 100644 --- a/tests/queries/0_stateless/00298_enum_width_and_cast.sql +++ b/tests/queries/0_stateless/00298_enum_width_and_cast.sql @@ -1,5 +1,6 @@ DROP TABLE IF EXISTS enum; +SET output_format_pretty_color=1; CREATE TABLE enum (x Enum8('Hello' = -100, '\\' = 0, '\t\\t' = 111), y UInt8) ENGINE = TinyLog; INSERT INTO enum (y) VALUES (0); SELECT * FROM enum ORDER BY x, y FORMAT PrettyCompact; diff --git a/tests/queries/0_stateless/00405_PrettyCompactMonoBlock.sh b/tests/queries/0_stateless/00405_PrettyCompactMonoBlock.sh index a5eca3d987e..710774700e9 100755 --- a/tests/queries/0_stateless/00405_PrettyCompactMonoBlock.sh +++ b/tests/queries/0_stateless/00405_PrettyCompactMonoBlock.sh @@ -5,10 +5,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh echo 'one block' -${CLICKHOUSE_LOCAL} --query="SELECT * FROM numbers(2)" --format PrettyCompactMonoBlock +${CLICKHOUSE_LOCAL} --query="SELECT * FROM numbers(2) SETTINGS output_format_pretty_color=1" --format PrettyCompactMonoBlock echo 'two blocks' -${CLICKHOUSE_LOCAL} --query="SELECT * FROM numbers(1) UNION ALL SELECT * FROM numbers(1)" --format PrettyCompactMonoBlock +${CLICKHOUSE_LOCAL} --query="SELECT * FROM numbers(1) UNION ALL SELECT * FROM numbers(1) SETTINGS output_format_pretty_color=1" --format PrettyCompactMonoBlock echo 'extremes' -${CLICKHOUSE_LOCAL} --query="SELECT * FROM numbers(3)" --format PrettyCompactMonoBlock --extremes=1 +${CLICKHOUSE_LOCAL} --query="SELECT * FROM numbers(3) SETTINGS output_format_pretty_color=1" --format PrettyCompactMonoBlock --extremes=1 echo 'totals' -${CLICKHOUSE_LOCAL} --query="SELECT sum(number) FROM numbers(3) GROUP BY number%2 WITH TOTALS ORDER BY number%2" --format PrettyCompactMonoBlock +${CLICKHOUSE_LOCAL} --query="SELECT sum(number) FROM numbers(3) GROUP BY number%2 WITH TOTALS ORDER BY number%2 SETTINGS output_format_pretty_color=1" --format PrettyCompactMonoBlock diff --git a/tests/queries/0_stateless/00405_output_format_pretty_color.reference b/tests/queries/0_stateless/00405_output_format_pretty_color.reference new file mode 100644 index 00000000000..aebdb5f1343 --- /dev/null +++ b/tests/queries/0_stateless/00405_output_format_pretty_color.reference @@ -0,0 +1,363 @@ +0 +┏━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ +┃ hello ┃ world ┃ tuple ┃ sometimes_nulls ┃ +┡━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ +│ 0 │ 0 │ (0,'0') │ ᴺᵁᴸᴸ │ +├───────┼───────┼─────────┼─────────────────┤ +│ 1 │ 1 │ (1,'1') │ 1 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 2 │ 2 │ (2,'2') │ 2 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 3 │ 3 │ (3,'3') │ ᴺᵁᴸᴸ │ +├───────┼───────┼─────────┼─────────────────┤ +│ 4 │ 4 │ (4,'4') │ 1 │ +└───────┴───────┴─────────┴─────────────────┘ +┏━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ +┃ hello ┃ world ┃ tuple ┃ sometimes_nulls ┃ +┡━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ +│ 5 │ 5 │ (5,'5') │ 2 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 6 │ 6 │ (6,'6') │ ᴺᵁᴸᴸ │ +├───────┼───────┼─────────┼─────────────────┤ +│ 7 │ 7 │ (7,'7') │ 1 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 8 │ 8 │ (8,'8') │ 2 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 9 │ 9 │ (9,'9') │ ᴺᵁᴸᴸ │ +└───────┴───────┴─────────┴─────────────────┘ +┌─hello─┬─world─┬─tuple───┬─sometimes_nulls─┐ +│ 0 │ 0 │ (0,'0') │ ᴺᵁᴸᴸ │ +│ 1 │ 1 │ (1,'1') │ 1 │ +│ 2 │ 2 │ (2,'2') │ 2 │ +│ 3 │ 3 │ (3,'3') │ ᴺᵁᴸᴸ │ +│ 4 │ 4 │ (4,'4') │ 1 │ +└───────┴───────┴─────────┴─────────────────┘ +┌─hello─┬─world─┬─tuple───┬─sometimes_nulls─┐ +│ 5 │ 5 │ (5,'5') │ 2 │ +│ 6 │ 6 │ (6,'6') │ ᴺᵁᴸᴸ │ +│ 7 │ 7 │ (7,'7') │ 1 │ +│ 8 │ 8 │ (8,'8') │ 2 │ +│ 9 │ 9 │ (9,'9') │ ᴺᵁᴸᴸ │ +└───────┴───────┴─────────┴─────────────────┘ + hello world tuple sometimes_nulls + + 0 0 (0,'0') ᴺᵁᴸᴸ + 1 1 (1,'1') 1 + 2 2 (2,'2') 2 + 3 3 (3,'3') ᴺᵁᴸᴸ + 4 4 (4,'4') 1 + hello world tuple sometimes_nulls + + 5 5 (5,'5') 2 + 6 6 (6,'6') ᴺᵁᴸᴸ + 7 7 (7,'7') 1 + 8 8 (8,'8') 2 + 9 9 (9,'9') ᴺᵁᴸᴸ +┌─hello─┬─world─┬─tuple───┬─sometimes_nulls─┐ +│ 0 │ 0 │ (0,'0') │ ᴺᵁᴸᴸ │ +│ 1 │ 1 │ (1,'1') │ 1 │ +│ 2 │ 2 │ (2,'2') │ 2 │ +│ 3 │ 3 │ (3,'3') │ ᴺᵁᴸᴸ │ +│ 4 │ 4 │ (4,'4') │ 1 │ +│ 5 │ 5 │ (5,'5') │ 2 │ +│ 6 │ 6 │ (6,'6') │ ᴺᵁᴸᴸ │ +│ 7 │ 7 │ (7,'7') │ 1 │ +│ 8 │ 8 │ (8,'8') │ 2 │ +│ 9 │ 9 │ (9,'9') │ ᴺᵁᴸᴸ │ +└───────┴───────┴─────────┴─────────────────┘ +┏━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ +┃ hello ┃ world ┃ tuple ┃ sometimes_nulls ┃ +┡━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ +│ 0 │ 0 │ (0,'0') │ ᴺᵁᴸᴸ │ +├───────┼───────┼─────────┼─────────────────┤ +│ 1 │ 1 │ (1,'1') │ 1 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 2 │ 2 │ (2,'2') │ 2 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 3 │ 3 │ (3,'3') │ ᴺᵁᴸᴸ │ +├───────┼───────┼─────────┼─────────────────┤ +│ 4 │ 4 │ (4,'4') │ 1 │ +└───────┴───────┴─────────┴─────────────────┘ +┏━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ +┃ hello ┃ world ┃ tuple ┃ sometimes_nulls ┃ +┡━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ +│ 5 │ 5 │ (5,'5') │ 2 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 6 │ 6 │ (6,'6') │ ᴺᵁᴸᴸ │ +├───────┼───────┼─────────┼─────────────────┤ +│ 7 │ 7 │ (7,'7') │ 1 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 8 │ 8 │ (8,'8') │ 2 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 9 │ 9 │ (9,'9') │ ᴺᵁᴸᴸ │ +└───────┴───────┴─────────┴─────────────────┘ +┌─hello─┬─world─┬─tuple───┬─sometimes_nulls─┐ +│ 0 │ 0 │ (0,'0') │ ᴺᵁᴸᴸ │ +│ 1 │ 1 │ (1,'1') │ 1 │ +│ 2 │ 2 │ (2,'2') │ 2 │ +│ 3 │ 3 │ (3,'3') │ ᴺᵁᴸᴸ │ +│ 4 │ 4 │ (4,'4') │ 1 │ +└───────┴───────┴─────────┴─────────────────┘ +┌─hello─┬─world─┬─tuple───┬─sometimes_nulls─┐ +│ 5 │ 5 │ (5,'5') │ 2 │ +│ 6 │ 6 │ (6,'6') │ ᴺᵁᴸᴸ │ +│ 7 │ 7 │ (7,'7') │ 1 │ +│ 8 │ 8 │ (8,'8') │ 2 │ +│ 9 │ 9 │ (9,'9') │ ᴺᵁᴸᴸ │ +└───────┴───────┴─────────┴─────────────────┘ + hello world tuple sometimes_nulls + + 0 0 (0,'0') ᴺᵁᴸᴸ + 1 1 (1,'1') 1 + 2 2 (2,'2') 2 + 3 3 (3,'3') ᴺᵁᴸᴸ + 4 4 (4,'4') 1 + hello world tuple sometimes_nulls + + 5 5 (5,'5') 2 + 6 6 (6,'6') ᴺᵁᴸᴸ + 7 7 (7,'7') 1 + 8 8 (8,'8') 2 + 9 9 (9,'9') ᴺᵁᴸᴸ +1 +┏━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ +┃ hello ┃ world ┃ tuple  ┃ sometimes_nulls ┃ +┡━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ +│ 0 │ 0 │ (0,'0') │ ᴺᵁᴸᴸ │ +├───────┼───────┼─────────┼─────────────────┤ +│ 1 │ 1 │ (1,'1') │ 1 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 2 │ 2 │ (2,'2') │ 2 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 3 │ 3 │ (3,'3') │ ᴺᵁᴸᴸ │ +├───────┼───────┼─────────┼─────────────────┤ +│ 4 │ 4 │ (4,'4') │ 1 │ +└───────┴───────┴─────────┴─────────────────┘ +┏━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ +┃ hello ┃ world ┃ tuple  ┃ sometimes_nulls ┃ +┡━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ +│ 5 │ 5 │ (5,'5') │ 2 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 6 │ 6 │ (6,'6') │ ᴺᵁᴸᴸ │ +├───────┼───────┼─────────┼─────────────────┤ +│ 7 │ 7 │ (7,'7') │ 1 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 8 │ 8 │ (8,'8') │ 2 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 9 │ 9 │ (9,'9') │ ᴺᵁᴸᴸ │ +└───────┴───────┴─────────┴─────────────────┘ +┌─hello─┬─world─┬─tuple───┬─sometimes_nulls─┐ +│ 0 │ 0 │ (0,'0') │ ᴺᵁᴸᴸ │ +│ 1 │ 1 │ (1,'1') │ 1 │ +│ 2 │ 2 │ (2,'2') │ 2 │ +│ 3 │ 3 │ (3,'3') │ ᴺᵁᴸᴸ │ +│ 4 │ 4 │ (4,'4') │ 1 │ +└───────┴───────┴─────────┴─────────────────┘ +┌─hello─┬─world─┬─tuple───┬─sometimes_nulls─┐ +│ 5 │ 5 │ (5,'5') │ 2 │ +│ 6 │ 6 │ (6,'6') │ ᴺᵁᴸᴸ │ +│ 7 │ 7 │ (7,'7') │ 1 │ +│ 8 │ 8 │ (8,'8') │ 2 │ +│ 9 │ 9 │ (9,'9') │ ᴺᵁᴸᴸ │ +└───────┴───────┴─────────┴─────────────────┘ + hello world tuple sometimes_nulls + + 0 0 (0,'0') ᴺᵁᴸᴸ + 1 1 (1,'1') 1 + 2 2 (2,'2') 2 + 3 3 (3,'3') ᴺᵁᴸᴸ + 4 4 (4,'4') 1 + hello world tuple sometimes_nulls + + 5 5 (5,'5') 2 + 6 6 (6,'6') ᴺᵁᴸᴸ + 7 7 (7,'7') 1 + 8 8 (8,'8') 2 + 9 9 (9,'9') ᴺᵁᴸᴸ +┌─hello─┬─world─┬─tuple───┬─sometimes_nulls─┐ +│ 0 │ 0 │ (0,'0') │ ᴺᵁᴸᴸ │ +│ 1 │ 1 │ (1,'1') │ 1 │ +│ 2 │ 2 │ (2,'2') │ 2 │ +│ 3 │ 3 │ (3,'3') │ ᴺᵁᴸᴸ │ +│ 4 │ 4 │ (4,'4') │ 1 │ +│ 5 │ 5 │ (5,'5') │ 2 │ +│ 6 │ 6 │ (6,'6') │ ᴺᵁᴸᴸ │ +│ 7 │ 7 │ (7,'7') │ 1 │ +│ 8 │ 8 │ (8,'8') │ 2 │ +│ 9 │ 9 │ (9,'9') │ ᴺᵁᴸᴸ │ +└───────┴───────┴─────────┴─────────────────┘ +┏━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ +┃ hello ┃ world ┃ tuple ┃ sometimes_nulls ┃ +┡━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ +│ 0 │ 0 │ (0,'0') │ ᴺᵁᴸᴸ │ +├───────┼───────┼─────────┼─────────────────┤ +│ 1 │ 1 │ (1,'1') │ 1 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 2 │ 2 │ (2,'2') │ 2 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 3 │ 3 │ (3,'3') │ ᴺᵁᴸᴸ │ +├───────┼───────┼─────────┼─────────────────┤ +│ 4 │ 4 │ (4,'4') │ 1 │ +└───────┴───────┴─────────┴─────────────────┘ +┏━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ +┃ hello ┃ world ┃ tuple ┃ sometimes_nulls ┃ +┡━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ +│ 5 │ 5 │ (5,'5') │ 2 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 6 │ 6 │ (6,'6') │ ᴺᵁᴸᴸ │ +├───────┼───────┼─────────┼─────────────────┤ +│ 7 │ 7 │ (7,'7') │ 1 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 8 │ 8 │ (8,'8') │ 2 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 9 │ 9 │ (9,'9') │ ᴺᵁᴸᴸ │ +└───────┴───────┴─────────┴─────────────────┘ +┌─hello─┬─world─┬─tuple───┬─sometimes_nulls─┐ +│ 0 │ 0 │ (0,'0') │ ᴺᵁᴸᴸ │ +│ 1 │ 1 │ (1,'1') │ 1 │ +│ 2 │ 2 │ (2,'2') │ 2 │ +│ 3 │ 3 │ (3,'3') │ ᴺᵁᴸᴸ │ +│ 4 │ 4 │ (4,'4') │ 1 │ +└───────┴───────┴─────────┴─────────────────┘ +┌─hello─┬─world─┬─tuple───┬─sometimes_nulls─┐ +│ 5 │ 5 │ (5,'5') │ 2 │ +│ 6 │ 6 │ (6,'6') │ ᴺᵁᴸᴸ │ +│ 7 │ 7 │ (7,'7') │ 1 │ +│ 8 │ 8 │ (8,'8') │ 2 │ +│ 9 │ 9 │ (9,'9') │ ᴺᵁᴸᴸ │ +└───────┴───────┴─────────┴─────────────────┘ + hello world tuple sometimes_nulls + + 0 0 (0,'0') ᴺᵁᴸᴸ + 1 1 (1,'1') 1 + 2 2 (2,'2') 2 + 3 3 (3,'3') ᴺᵁᴸᴸ + 4 4 (4,'4') 1 + hello world tuple sometimes_nulls + + 5 5 (5,'5') 2 + 6 6 (6,'6') ᴺᵁᴸᴸ + 7 7 (7,'7') 1 + 8 8 (8,'8') 2 + 9 9 (9,'9') ᴺᵁᴸᴸ +auto +┏━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ +┃ hello ┃ world ┃ tuple ┃ sometimes_nulls ┃ +┡━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ +│ 0 │ 0 │ (0,'0') │ ᴺᵁᴸᴸ │ +├───────┼───────┼─────────┼─────────────────┤ +│ 1 │ 1 │ (1,'1') │ 1 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 2 │ 2 │ (2,'2') │ 2 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 3 │ 3 │ (3,'3') │ ᴺᵁᴸᴸ │ +├───────┼───────┼─────────┼─────────────────┤ +│ 4 │ 4 │ (4,'4') │ 1 │ +└───────┴───────┴─────────┴─────────────────┘ +┏━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ +┃ hello ┃ world ┃ tuple ┃ sometimes_nulls ┃ +┡━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ +│ 5 │ 5 │ (5,'5') │ 2 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 6 │ 6 │ (6,'6') │ ᴺᵁᴸᴸ │ +├───────┼───────┼─────────┼─────────────────┤ +│ 7 │ 7 │ (7,'7') │ 1 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 8 │ 8 │ (8,'8') │ 2 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 9 │ 9 │ (9,'9') │ ᴺᵁᴸᴸ │ +└───────┴───────┴─────────┴─────────────────┘ +┌─hello─┬─world─┬─tuple───┬─sometimes_nulls─┐ +│ 0 │ 0 │ (0,'0') │ ᴺᵁᴸᴸ │ +│ 1 │ 1 │ (1,'1') │ 1 │ +│ 2 │ 2 │ (2,'2') │ 2 │ +│ 3 │ 3 │ (3,'3') │ ᴺᵁᴸᴸ │ +│ 4 │ 4 │ (4,'4') │ 1 │ +└───────┴───────┴─────────┴─────────────────┘ +┌─hello─┬─world─┬─tuple───┬─sometimes_nulls─┐ +│ 5 │ 5 │ (5,'5') │ 2 │ +│ 6 │ 6 │ (6,'6') │ ᴺᵁᴸᴸ │ +│ 7 │ 7 │ (7,'7') │ 1 │ +│ 8 │ 8 │ (8,'8') │ 2 │ +│ 9 │ 9 │ (9,'9') │ ᴺᵁᴸᴸ │ +└───────┴───────┴─────────┴─────────────────┘ + hello world tuple sometimes_nulls + + 0 0 (0,'0') ᴺᵁᴸᴸ + 1 1 (1,'1') 1 + 2 2 (2,'2') 2 + 3 3 (3,'3') ᴺᵁᴸᴸ + 4 4 (4,'4') 1 + hello world tuple sometimes_nulls + + 5 5 (5,'5') 2 + 6 6 (6,'6') ᴺᵁᴸᴸ + 7 7 (7,'7') 1 + 8 8 (8,'8') 2 + 9 9 (9,'9') ᴺᵁᴸᴸ +┌─hello─┬─world─┬─tuple───┬─sometimes_nulls─┐ +│ 0 │ 0 │ (0,'0') │ ᴺᵁᴸᴸ │ +│ 1 │ 1 │ (1,'1') │ 1 │ +│ 2 │ 2 │ (2,'2') │ 2 │ +│ 3 │ 3 │ (3,'3') │ ᴺᵁᴸᴸ │ +│ 4 │ 4 │ (4,'4') │ 1 │ +│ 5 │ 5 │ (5,'5') │ 2 │ +│ 6 │ 6 │ (6,'6') │ ᴺᵁᴸᴸ │ +│ 7 │ 7 │ (7,'7') │ 1 │ +│ 8 │ 8 │ (8,'8') │ 2 │ +│ 9 │ 9 │ (9,'9') │ ᴺᵁᴸᴸ │ +└───────┴───────┴─────────┴─────────────────┘ +┏━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ +┃ hello ┃ world ┃ tuple ┃ sometimes_nulls ┃ +┡━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ +│ 0 │ 0 │ (0,'0') │ ᴺᵁᴸᴸ │ +├───────┼───────┼─────────┼─────────────────┤ +│ 1 │ 1 │ (1,'1') │ 1 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 2 │ 2 │ (2,'2') │ 2 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 3 │ 3 │ (3,'3') │ ᴺᵁᴸᴸ │ +├───────┼───────┼─────────┼─────────────────┤ +│ 4 │ 4 │ (4,'4') │ 1 │ +└───────┴───────┴─────────┴─────────────────┘ +┏━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ +┃ hello ┃ world ┃ tuple ┃ sometimes_nulls ┃ +┡━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ +│ 5 │ 5 │ (5,'5') │ 2 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 6 │ 6 │ (6,'6') │ ᴺᵁᴸᴸ │ +├───────┼───────┼─────────┼─────────────────┤ +│ 7 │ 7 │ (7,'7') │ 1 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 8 │ 8 │ (8,'8') │ 2 │ +├───────┼───────┼─────────┼─────────────────┤ +│ 9 │ 9 │ (9,'9') │ ᴺᵁᴸᴸ │ +└───────┴───────┴─────────┴─────────────────┘ +┌─hello─┬─world─┬─tuple───┬─sometimes_nulls─┐ +│ 0 │ 0 │ (0,'0') │ ᴺᵁᴸᴸ │ +│ 1 │ 1 │ (1,'1') │ 1 │ +│ 2 │ 2 │ (2,'2') │ 2 │ +│ 3 │ 3 │ (3,'3') │ ᴺᵁᴸᴸ │ +│ 4 │ 4 │ (4,'4') │ 1 │ +└───────┴───────┴─────────┴─────────────────┘ +┌─hello─┬─world─┬─tuple───┬─sometimes_nulls─┐ +│ 5 │ 5 │ (5,'5') │ 2 │ +│ 6 │ 6 │ (6,'6') │ ᴺᵁᴸᴸ │ +│ 7 │ 7 │ (7,'7') │ 1 │ +│ 8 │ 8 │ (8,'8') │ 2 │ +│ 9 │ 9 │ (9,'9') │ ᴺᵁᴸᴸ │ +└───────┴───────┴─────────┴─────────────────┘ + hello world tuple sometimes_nulls + + 0 0 (0,'0') ᴺᵁᴸᴸ + 1 1 (1,'1') 1 + 2 2 (2,'2') 2 + 3 3 (3,'3') ᴺᵁᴸᴸ + 4 4 (4,'4') 1 + hello world tuple sometimes_nulls + + 5 5 (5,'5') 2 + 6 6 (6,'6') ᴺᵁᴸᴸ + 7 7 (7,'7') 1 + 8 8 (8,'8') 2 + 9 9 (9,'9') ᴺᵁᴸᴸ diff --git a/tests/queries/0_stateless/00405_output_format_pretty_color.sql b/tests/queries/0_stateless/00405_output_format_pretty_color.sql new file mode 100644 index 00000000000..bc2d0c3adbf --- /dev/null +++ b/tests/queries/0_stateless/00405_output_format_pretty_color.sql @@ -0,0 +1,32 @@ +SET output_format_pretty_color = 0; +SHOW SETTING output_format_pretty_color; + +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT Pretty; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT PrettyCompact; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT PrettySpace; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT PrettyCompactMonoBlock; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT PrettyNoEscapes; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT PrettyCompactNoEscapes; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT PrettySpaceNoEscapes; + +SET output_format_pretty_color = 1; +SHOW SETTING output_format_pretty_color; + +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT Pretty; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT PrettyCompact; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT PrettySpace; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT PrettyCompactMonoBlock; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT PrettyNoEscapes; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT PrettyCompactNoEscapes; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT PrettySpaceNoEscapes; + +SET output_format_pretty_color = 'auto'; +SHOW SETTING output_format_pretty_color; + +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT Pretty; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT PrettyCompact; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT PrettySpace; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT PrettyCompactMonoBlock; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT PrettyNoEscapes; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT PrettyCompactNoEscapes; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT PrettySpaceNoEscapes; diff --git a/tests/queries/0_stateless/00405_pretty_formats.sql b/tests/queries/0_stateless/00405_pretty_formats.sql index 3c8af776278..00bb09a1c30 100644 --- a/tests/queries/0_stateless/00405_pretty_formats.sql +++ b/tests/queries/0_stateless/00405_pretty_formats.sql @@ -1,3 +1,5 @@ +SET output_format_pretty_color = 1; + SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT Pretty; SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT PrettyCompact; SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 10 SETTINGS max_block_size = 5 FORMAT PrettySpace; diff --git a/tests/queries/0_stateless/00476_pretty_formats_and_widths.sql b/tests/queries/0_stateless/00476_pretty_formats_and_widths.sql index be98d9ab5cc..ece046b738e 100644 --- a/tests/queries/0_stateless/00476_pretty_formats_and_widths.sql +++ b/tests/queries/0_stateless/00476_pretty_formats_and_widths.sql @@ -1,3 +1,4 @@ +SET output_format_pretty_color=1; SELECT toUInt64(round(exp10(number))) AS x, toString(x) AS s FROM system.numbers LIMIT 10 FORMAT Pretty; SELECT toUInt64(round(exp10(number))) AS x, toString(x) AS s FROM system.numbers LIMIT 10 FORMAT PrettyCompact; SELECT toUInt64(round(exp10(number))) AS x, toString(x) AS s FROM system.numbers LIMIT 10 FORMAT PrettySpace; diff --git a/tests/queries/0_stateless/00818_inner_join_bug_3567.sql b/tests/queries/0_stateless/00818_inner_join_bug_3567.sql index cc0b63f9def..2dec5ce3221 100644 --- a/tests/queries/0_stateless/00818_inner_join_bug_3567.sql +++ b/tests/queries/0_stateless/00818_inner_join_bug_3567.sql @@ -1,3 +1,4 @@ +SET output_format_pretty_color = 1; SET allow_experimental_analyzer = 1; DROP TABLE IF EXISTS table1; diff --git a/tests/queries/0_stateless/00918_json_functions.reference b/tests/queries/0_stateless/00918_json_functions.reference index 5264d51fa73..43b15ded93d 100644 --- a/tests/queries/0_stateless/00918_json_functions.reference +++ b/tests/queries/0_stateless/00918_json_functions.reference @@ -44,11 +44,14 @@ hello (-100,200,300) [-100,0,0] [-100,NULL,NULL] +[-100,NULL,NULL] [0,200,0] [NULL,200,NULL] +[NULL,200,NULL] -100 200 \N +\N 1 Thursday Friday @@ -209,11 +212,14 @@ hello (-100,200,300) [-100,0,0] [-100,NULL,NULL] +[-100,NULL,NULL] [0,200,0] [NULL,200,NULL] +[NULL,200,NULL] -100 200 \N +\N 1 Thursday Friday diff --git a/tests/queries/0_stateless/00918_json_functions.sql b/tests/queries/0_stateless/00918_json_functions.sql index 16cc72f7fdc..e19dd17670e 100644 --- a/tests/queries/0_stateless/00918_json_functions.sql +++ b/tests/queries/0_stateless/00918_json_functions.sql @@ -56,11 +56,14 @@ SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Float3 SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Tuple(Int8, Float32, UInt16)'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Int8)'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Nullable(Int8))'); +SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(LowCardinality(Nullable(Int8)))'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(UInt8)'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Nullable(UInt8))'); +SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(LowCardinality(Nullable(UInt8)))'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1, 'Int8'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2, 'Int32'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 4, 'Nullable(Int64)'); +SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 4, 'LowCardinality(Nullable(Int64))'); SELECT JSONExtract('{"passed": true}', 'passed', 'UInt8'); SELECT JSONExtract('{"day": "Thursday"}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)'); SELECT JSONExtract('{"day": 5}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)'); @@ -241,11 +244,14 @@ SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Float3 SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Tuple(Int8, Float32, UInt16)'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Int8)'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Nullable(Int8))'); +SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(LowCardinality(Nullable(Int8)))'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(UInt8)'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Nullable(UInt8))'); +SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(LowCardinality(Nullable(UInt8)))'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1, 'Int8'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2, 'Int32'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 4, 'Nullable(Int64)'); +SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 4, 'LowCardinality(Nullable(Int64))'); SELECT JSONExtract('{"passed": true}', 'passed', 'UInt8'); SELECT JSONExtract('{"day": "Thursday"}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)'); SELECT JSONExtract('{"day": 5}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)'); diff --git a/tests/queries/0_stateless/01018_ambiguous_column.sql b/tests/queries/0_stateless/01018_ambiguous_column.sql index 620bdb6ba3f..a94e1cd4601 100644 --- a/tests/queries/0_stateless/01018_ambiguous_column.sql +++ b/tests/queries/0_stateless/01018_ambiguous_column.sql @@ -1,3 +1,4 @@ +SET output_format_pretty_color=1; SET allow_experimental_analyzer = 1; select * from system.one cross join system.one; diff --git a/tests/queries/0_stateless/01074_partial_revokes.sql b/tests/queries/0_stateless/01074_partial_revokes.sql index 8c92b9511c7..973d77b3f63 100644 --- a/tests/queries/0_stateless/01074_partial_revokes.sql +++ b/tests/queries/0_stateless/01074_partial_revokes.sql @@ -43,7 +43,7 @@ REVOKE SELECT ON db.* FROM test_user_01074; GRANT SELECT ON db.table TO test_user_01074; REVOKE SELECT(col1) ON db.table FROM test_user_01074; SHOW GRANTS FOR test_user_01074; -SELECT * FROM system.grants WHERE user_name = 'test_user_01074' format Pretty; +SELECT * FROM system.grants WHERE user_name = 'test_user_01074' SETTINGS output_format_pretty_color=1 FORMAT Pretty; SELECT '--cleanup'; REVOKE SELECT ON *.* FROM test_user_01074; @@ -73,7 +73,7 @@ SELECT '--grant option 1'; GRANT SELECT ON *.* TO test_user_01074 WITH GRANT OPTION; REVOKE GRANT OPTION FOR SELECT(col1) ON db.table FROM test_user_01074; SHOW GRANTS FOR test_user_01074; -SELECT * FROM system.grants WHERE user_name = 'test_user_01074' format Pretty; +SELECT * FROM system.grants WHERE user_name = 'test_user_01074' SETTINGS output_format_pretty_color=1 FORMAT Pretty; SELECT '--cleanup'; REVOKE SELECT ON *.* FROM test_user_01074; diff --git a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.reference b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.reference index ba26f12fddf..c175bb6b6f8 100644 --- a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.reference +++ b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.reference @@ -17,6 +17,12 @@ distributed 0 4 1 4 2 4 +test_cluster_1_shard_3_replicas_1_unavailable +distributed +local +test_cluster_1_shard_3_replicas_1_unavailable with storageCluster +distributed +local parallel_distributed_insert_select=2 test_shard_localhost 0 @@ -35,3 +41,21 @@ local 0 2 1 2 2 2 +test_cluster_1_shard_3_replicas_1_unavailable +distributed +0 1 +1 1 +2 1 +local +0 1 +1 1 +2 1 +test_cluster_1_shard_3_replicas_1_unavailable with storageCluster +distributed +1 1 +2 1 +3 1 +local +1 1 +2 1 +3 1 diff --git a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql index 8118d067325..ec650551baf 100644 --- a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql +++ b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql @@ -87,6 +87,60 @@ DROP TABLE local_01099_b; DROP TABLE distributed_01099_a; DROP TABLE distributed_01099_b; +--- test_cluster_1_shard_3_replicas_1_unavailable + +SELECT 'test_cluster_1_shard_3_replicas_1_unavailable'; + +CREATE TABLE local_01099_a (number UInt64) ENGINE = MergeTree() ORDER BY number; +CREATE TABLE local_01099_b (number UInt64) ENGINE = MergeTree() ORDER BY number; +CREATE TABLE distributed_01099_a AS local_01099_a ENGINE = Distributed('test_cluster_1_shard_3_replicas_1_unavailable', currentDatabase(), local_01099_a, rand()); +CREATE TABLE distributed_01099_b AS local_01099_b ENGINE = Distributed('test_cluster_1_shard_3_replicas_1_unavailable', currentDatabase(), local_01099_b, rand()); + +SYSTEM STOP DISTRIBUTED SENDS distributed_01099_b; +SET prefer_localhost_replica=0; -- to require distributed send for local replica too +INSERT INTO local_01099_a SELECT number from system.numbers limit 3; +INSERT INTO distributed_01099_b SELECT * from distributed_01099_a; +SET prefer_localhost_replica=1; + +-- distributed sends disabled, but they are not required, since insert is done into local table. +-- (since parallel_distributed_insert_select=2) +SELECT 'distributed'; +SELECT number, count(number) FROM distributed_01099_b group by number order by number; +SELECT 'local'; +SELECT number, count(number) FROM local_01099_b group by number order by number; + +DROP TABLE local_01099_a; +DROP TABLE local_01099_b; +SET send_logs_level='fatal'; +DROP TABLE distributed_01099_a; +DROP TABLE distributed_01099_b; +SET send_logs_level='warning'; + +--- test_cluster_1_shard_3_replicas_1_unavailable with storageCluster + +SELECT 'test_cluster_1_shard_3_replicas_1_unavailable with storageCluster'; + +CREATE TABLE local_01099_b (number UInt64) ENGINE = MergeTree() ORDER BY number; +CREATE TABLE distributed_01099_b AS local_01099_b ENGINE = Distributed('test_cluster_1_shard_3_replicas_1_unavailable', currentDatabase(), local_01099_b, rand()); + +SYSTEM STOP DISTRIBUTED SENDS distributed_01099_b; +SET prefer_localhost_replica=0; -- to require distributed send for local replica too +SET send_logs_level='error'; +INSERT INTO distributed_01099_b SELECT * FROM urlCluster('test_cluster_two_shards', 'http://localhost:8123/?query=select+{1,2,3}+format+TSV', 'TSV', 's String'); +SET send_logs_level='warning'; +SET prefer_localhost_replica=1; + +-- distributed sends disabled, but they are not required, since insert is done into local table. +-- (since parallel_distributed_insert_select=2) +SELECT 'distributed'; +SELECT number, count(number) FROM distributed_01099_b group by number order by number; +SELECT 'local'; +SELECT number, count(number) FROM local_01099_b group by number order by number; + +DROP TABLE local_01099_b; +SET send_logs_level='fatal'; +DROP TABLE distributed_01099_b; +SET send_logs_level='warning'; SET parallel_distributed_insert_select=2; SELECT 'parallel_distributed_insert_select=2'; @@ -164,3 +218,56 @@ DROP TABLE local_01099_a; DROP TABLE local_01099_b; DROP TABLE distributed_01099_a; DROP TABLE distributed_01099_b; + +--- test_cluster_1_shard_3_replicas_1_unavailable + +SELECT 'test_cluster_1_shard_3_replicas_1_unavailable'; + +CREATE TABLE local_01099_a (number UInt64) ENGINE = MergeTree() ORDER BY number; +CREATE TABLE local_01099_b (number UInt64) ENGINE = MergeTree() ORDER BY number; +CREATE TABLE distributed_01099_a AS local_01099_a ENGINE = Distributed('test_cluster_1_shard_3_replicas_1_unavailable', currentDatabase(), local_01099_a, rand()); +CREATE TABLE distributed_01099_b AS local_01099_b ENGINE = Distributed('test_cluster_1_shard_3_replicas_1_unavailable', currentDatabase(), local_01099_b, rand()); + +SYSTEM STOP DISTRIBUTED SENDS distributed_01099_b; +SET prefer_localhost_replica=0; -- to require distributed send for local replica too +INSERT INTO local_01099_a SELECT number from system.numbers limit 3; +INSERT INTO distributed_01099_b SELECT * from distributed_01099_a; +SET prefer_localhost_replica=1; + +-- distributed sends disabled, but they are not required, since insert is done into local table. +-- (since parallel_distributed_insert_select=2) +SELECT 'distributed'; +SELECT number, count(number) FROM distributed_01099_b group by number order by number; +SELECT 'local'; +SELECT number, count(number) FROM local_01099_b group by number order by number; + +DROP TABLE local_01099_a; +DROP TABLE local_01099_b; +DROP TABLE distributed_01099_a; +DROP TABLE distributed_01099_b; + +--- test_cluster_1_shard_3_replicas_1_unavailable with storageCluster + +SELECT 'test_cluster_1_shard_3_replicas_1_unavailable with storageCluster'; + +CREATE TABLE local_01099_b (number UInt64) ENGINE = MergeTree() ORDER BY number; +CREATE TABLE distributed_01099_b AS local_01099_b ENGINE = Distributed('test_cluster_1_shard_3_replicas_1_unavailable', currentDatabase(), local_01099_b, rand()); + +SYSTEM STOP DISTRIBUTED SENDS distributed_01099_b; +SET prefer_localhost_replica=0; -- to require distributed send for local replica too +SET send_logs_level='error'; +INSERT INTO distributed_01099_b SELECT * FROM urlCluster('test_cluster_two_shards', 'http://localhost:8123/?query=select+{1,2,3}+format+TSV', 'TSV', 's String'); +SET send_logs_level='warning'; +SET prefer_localhost_replica=1; + +-- distributed sends disabled, but they are not required, since insert is done into local table. +-- (since parallel_distributed_insert_select=2) +SELECT 'distributed'; +SELECT number, count(number) FROM distributed_01099_b group by number order by number; +SELECT 'local'; +SELECT number, count(number) FROM local_01099_b group by number order by number; + +DROP TABLE local_01099_b; +SET send_logs_level='fatal'; +DROP TABLE distributed_01099_b; +SET send_logs_level='warning'; diff --git a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_long.reference b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_long.reference index ea04f155f24..1f7d5d44df4 100644 --- a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_long.reference +++ b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_long.reference @@ -1,8 +1,8 @@ SELECT - sum(n + 1), - sum(1 + n), - sum(n - 1), - sum(1 - n) + sum(n) + (1 * count(n)), + (1 * count(n)) + sum(n), + sum(n) - (1 * count(n)), + (1 * count(n)) - sum(n) FROM ( SELECT number AS n @@ -59,10 +59,10 @@ FROM FROM numbers(10) ) SELECT - sum(n + -1), - sum(-1 + n), - sum(n - -1), - sum(-1 - n) + sum(n) + (-1 * count(n)), + (-1 * count(n)) + sum(n), + sum(n) - (-1 * count(n)), + (-1 * count(n)) - sum(n) FROM ( SELECT number AS n @@ -418,7 +418,7 @@ FROM SELECT number AS n FROM numbers(10) ) -SELECT ((sum(n + 1) + sum(1 + n)) + sum(n - 1)) + sum(1 - n) +SELECT (((sum(n) + (1 * count(n))) + ((1 * count(n)) + sum(n))) + (sum(n) - (1 * count(n)))) + ((1 * count(n)) - sum(n)) FROM ( SELECT number AS n diff --git a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.reference b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.reference index 9e0d871041b..54448ba3b68 100644 --- a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.reference +++ b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.reference @@ -8,3 +8,21 @@ FROM WHERE (a > 0) AND (b > 0) HAVING c > 0 2 +SELECT min(n) + 1 AS c +FROM +( + SELECT number AS n + FROM numbers(10) + WHERE (n + 1) > 0 +) +WHERE ((n + 1) AS a) > 0 +HAVING c > 0 +1 +SELECT min(n) + 1 AS c +FROM +( + SELECT number AS n + FROM numbers(10) +) +HAVING c > 0 +1 diff --git a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.sql b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.sql index 242a253e67c..7c27994aca8 100644 --- a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.sql +++ b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.sql @@ -3,3 +3,9 @@ SET convert_query_to_cnf = 0; explain syntax select min((n as a) + (1 as b)) c from (select number n from numbers(10)) where a > 0 and b > 0 having c > 0; select min((n as a) + (1 as b)) c from (select number n from numbers(10)) where a > 0 and b > 0 having c > 0; + +explain syntax select min((n + 1) as a) c from (select number n from numbers(10)) where a > 0 having c > 0; +select min((n + 1) as a) c from (select number n from numbers(10)) where a > 0 having c > 0; + +explain syntax select min(n + 1) as c from (select number n from numbers(10)) having c > 0; +select min(n + 1) c from (select number n from numbers(10)) having c > 0; diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 1a3a271528c..6a7e4748130 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -120,6 +120,7 @@ SYSTEM RELOAD DICTIONARY ['SYSTEM RELOAD DICTIONARIES','RELOAD DICTIONARY','RELO SYSTEM RELOAD MODEL ['SYSTEM RELOAD MODELS','RELOAD MODEL','RELOAD MODELS'] GLOBAL SYSTEM RELOAD SYSTEM RELOAD FUNCTION ['SYSTEM RELOAD FUNCTIONS','RELOAD FUNCTION','RELOAD FUNCTIONS'] GLOBAL SYSTEM RELOAD SYSTEM RELOAD EMBEDDED DICTIONARIES ['RELOAD EMBEDDED DICTIONARIES'] GLOBAL SYSTEM RELOAD +SYSTEM RELOAD ASYNCHRONOUS METRICS ['RELOAD ASYNCHRONOUS METRICS'] GLOBAL SYSTEM RELOAD SYSTEM RELOAD [] \N SYSTEM SYSTEM RESTART DISK ['SYSTEM RESTART DISK'] GLOBAL SYSTEM SYSTEM MERGES ['SYSTEM STOP MERGES','SYSTEM START MERGES','STOP MERGES','START MERGES'] TABLE SYSTEM @@ -150,6 +151,7 @@ SYSTEM THREAD FUZZER ['SYSTEM START THREAD FUZZER','SYSTEM STOP THREAD FUZZER',' SYSTEM UNFREEZE ['SYSTEM UNFREEZE'] GLOBAL SYSTEM SYSTEM FAILPOINT ['SYSTEM ENABLE FAILPOINT','SYSTEM DISABLE FAILPOINT'] GLOBAL SYSTEM SYSTEM LISTEN ['SYSTEM START LISTEN','SYSTEM STOP LISTEN'] GLOBAL SYSTEM +SYSTEM JEMALLOC ['SYSTEM JEMALLOC PURGE','SYSTEM JEMALLOC ENABLE PROFILE','SYSTEM JEMALLOC DISABLE PROFILE','SYSTEM JEMALLOC FLUSH PROFILE'] GLOBAL SYSTEM SYSTEM [] \N ALL dictGet ['dictHas','dictGetHierarchy','dictIsIn'] DICTIONARY ALL displaySecretsInShowAndSelect [] GLOBAL ALL diff --git a/tests/queries/0_stateless/01293_pretty_max_value_width.sql b/tests/queries/0_stateless/01293_pretty_max_value_width.sql index 992aec06f0a..2c9c56d0076 100644 --- a/tests/queries/0_stateless/01293_pretty_max_value_width.sql +++ b/tests/queries/0_stateless/01293_pretty_max_value_width.sql @@ -1,3 +1,4 @@ +SET output_format_pretty_color=1; SELECT 'привет' AS x, 'мир' AS y FORMAT Pretty; SET output_format_pretty_max_value_width = 5; diff --git a/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.reference b/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.reference index 9459d4ba2a0..6de0a5be0a5 100644 --- a/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.reference +++ b/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.reference @@ -1 +1,24 @@ 1.1 +SELECT dictGet(\'dictdb_01376.dict_exists\', \'value\', toUInt64(1)) AS val +FROM numbers(2) +GROUP BY toUInt64(1) +QUERY id: 0 + PROJECTION COLUMNS + val Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: dictGet, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 3 + CONSTANT id: 4, constant_value: \'dictdb_01376.dict_exists\', constant_value_type: String + CONSTANT id: 5, constant_value: \'value\', constant_value_type: String + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 8, nodes: 1 + CONSTANT id: 9, constant_value: UInt64_2, constant_value_type: UInt8 + GROUP BY + LIST id: 10, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + SETTINGS allow_experimental_analyzer=1 diff --git a/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.sql b/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.sql index 29ffcb46fbf..5a070b443aa 100644 --- a/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.sql +++ b/tests/queries/0_stateless/01376_GROUP_BY_injective_elimination_dictGet.sql @@ -23,7 +23,7 @@ INSERT INTO dictdb_01376.table_for_dict VALUES (1, 1.1); CREATE DICTIONARY IF NOT EXISTS dictdb_01376.dict_exists ( key_column UInt64, - value Float64 DEFAULT 77.77 + value Float64 DEFAULT 77.77 INJECTIVE ) PRIMARY KEY key_column SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_for_dict' DB 'dictdb_01376')) @@ -32,6 +32,14 @@ LAYOUT(FLAT()); SELECT dictGet('dictdb_01376.dict_exists', 'value', toUInt64(1)) as val FROM numbers(2) GROUP BY val; +EXPLAIN SYNTAX SELECT dictGet('dictdb_01376.dict_exists', 'value', toUInt64(1)) as val FROM numbers(2) GROUP BY val; + +EXPLAIN QUERY TREE +SELECT dictGet('dictdb_01376.dict_exists', 'value', number) as val +FROM numbers(2) +GROUP BY val +SETTINGS allow_experimental_analyzer = 1; + DROP DICTIONARY dictdb_01376.dict_exists; DROP TABLE dictdb_01376.table_for_dict; DROP DATABASE dictdb_01376; diff --git a/tests/queries/0_stateless/01451_normalize_query.reference b/tests/queries/0_stateless/01451_normalize_query.reference index 339ad34ea77..b331e139dc4 100644 --- a/tests/queries/0_stateless/01451_normalize_query.reference +++ b/tests/queries/0_stateless/01451_normalize_query.reference @@ -23,3 +23,23 @@ SELECT ?.. SELECT ? xyz11 SELECT ?, xyz11 SELECT ?.. +? - ? +?.. +-?.. +? - ?.. +f(-?..) +[-?..] +? + ? +?.. +-?.. +? + ?.. +f(+?..) +[+?..] +?.. + ? +?.. - ?.. +f(+?..), ? +[+?..] - ? +-?.. - [+?..] - ? +(+?..) - ? +-?.. - (+?..) - ? +(+?..) diff --git a/tests/queries/0_stateless/01451_normalize_query.sql b/tests/queries/0_stateless/01451_normalize_query.sql index 3c01a975712..14c74d95019 100644 --- a/tests/queries/0_stateless/01451_normalize_query.sql +++ b/tests/queries/0_stateless/01451_normalize_query.sql @@ -23,4 +23,23 @@ SELECT normalizeQuery('SELECT 1, ''xyz11'''); SELECT normalizeQuery('SELECT $doc$VALUE$doc$ xyz11'); SELECT normalizeQuery('SELECT $doc$VALUE$doc$, xyz11'); SELECT normalizeQuery('SELECT $doc$VALUE$doc$, ''xyz11'''); - +SELECT normalizeQuery('1 - 2'); +SELECT normalizeQuery('1, -2, 3'); +SELECT normalizeQuery('-1, -2, 3'); +SELECT normalizeQuery('1 - 2, 3, 4'); +SELECT normalizeQuery('f(-2, 3)'); +SELECT normalizeQuery('[-1, 2, 3]'); +SELECT normalizeQuery('1 + 2'); +SELECT normalizeQuery('1, +2, 3'); +SELECT normalizeQuery('-1, +2, 3'); +SELECT normalizeQuery('1 + 2, 3, 4'); +SELECT normalizeQuery('f(+2, 3)'); +SELECT normalizeQuery('[+1, 2, 3]'); +SELECT normalizeQuery('1, 2, 3 + 4'); +SELECT normalizeQuery('1, 2 - 3, 4'); +SELECT normalizeQuery('f(+2, 3), 1'); +SELECT normalizeQuery('[+1, 2, 3] - 1'); +SELECT normalizeQuery('-1, 1 - [+1, 2, 3] - 1'); +SELECT normalizeQuery('(+1, 2, 3) - 1'); +SELECT normalizeQuery('-1, 1 - (+1, 2, 3) - 1'); +SELECT normalizeQuery('(+1, 2, -3)'); diff --git a/tests/queries/0_stateless/01470_columns_transformers.reference b/tests/queries/0_stateless/01470_columns_transformers.reference index 8fa86582018..8eab5a16b8b 100644 --- a/tests/queries/0_stateless/01470_columns_transformers.reference +++ b/tests/queries/0_stateless/01470_columns_transformers.reference @@ -49,10 +49,43 @@ SELECT any(toDate(k)) FROM columns_transformers AS a SELECT - sum(i + 1 AS i), + sum(i) + (1 * count(i)), sum(j), sum(k) FROM columns_transformers +SelectWithUnionQuery (children 1) + ExpressionList (children 1) + SelectQuery (children 2) + ExpressionList (children 1) + Asterisk (children 1) + ColumnsTransformerList (children 2) + ColumnsReplaceTransformer (children 1) + ColumnsReplaceTransformer::Replacement (children 1) + Function plus (children 1) + ExpressionList (children 2) + Identifier i + Literal UInt64_1 + ColumnsApplyTransformer + TablesInSelectQuery (children 1) + TablesInSelectQueryElement (children 1) + TableExpression (children 1) + TableIdentifier columns_transformers +SELECT sum(i) + (1 * count(i)) +FROM columns_transformers +SelectWithUnionQuery (children 1) + ExpressionList (children 1) + SelectQuery (children 2) + ExpressionList (children 1) + Function sum (children 1) + ExpressionList (children 1) + Function plus (alias m) (children 1) + ExpressionList (children 2) + Identifier i + Literal UInt64_1 + TablesInSelectQuery (children 1) + TablesInSelectQueryElement (children 1) + TableExpression (children 1) + TableIdentifier columns_transformers SELECT avg(i) + 1, avg(j) + 2, diff --git a/tests/queries/0_stateless/01470_columns_transformers.sql b/tests/queries/0_stateless/01470_columns_transformers.sql index 8840ce3f3b5..1490dabdcec 100644 --- a/tests/queries/0_stateless/01470_columns_transformers.sql +++ b/tests/queries/0_stateless/01470_columns_transformers.sql @@ -35,6 +35,9 @@ EXPLAIN SYNTAX SELECT * EXCEPT(i) APPLY(sum) from columns_transformers; EXPLAIN SYNTAX SELECT columns_transformers.* EXCEPT(j) APPLY(avg) from columns_transformers; EXPLAIN SYNTAX SELECT a.* APPLY(toDate) EXCEPT(i, j) APPLY(any) from columns_transformers a; EXPLAIN SYNTAX SELECT * REPLACE(i + 1 AS i) APPLY(sum) from columns_transformers; +EXPLAIN AST SELECT * REPLACE(i + 1 AS i) APPLY(sum) from columns_transformers; +EXPLAIN SYNTAX SELECT sum(i + 1 AS m) from columns_transformers; +EXPLAIN AST SELECT sum(i + 1 AS m) from columns_transformers; EXPLAIN SYNTAX SELECT columns_transformers.* REPLACE(j + 2 AS j, i + 1 AS i) APPLY(avg) from columns_transformers; EXPLAIN SYNTAX SELECT a.* APPLY(toDate) REPLACE(i + 1 AS i) APPLY(any) from columns_transformers a; diff --git a/tests/queries/0_stateless/01472_many_rows_in_totals.sql b/tests/queries/0_stateless/01472_many_rows_in_totals.sql index bea8c255f21..f3d3a1fcca2 100644 --- a/tests/queries/0_stateless/01472_many_rows_in_totals.sql +++ b/tests/queries/0_stateless/01472_many_rows_in_totals.sql @@ -1,3 +1,5 @@ +set output_format_pretty_color=1; + -- Disable external aggregation because it may produce several blocks instead of one. set max_bytes_before_external_group_by = 0; set output_format_write_statistics = 0; diff --git a/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.sql b/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.sql index f8ec0be74d7..3536b628ef2 100644 --- a/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.sql +++ b/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.sql @@ -1,3 +1,4 @@ +SET output_format_pretty_color=1; SELECT * FROM numbers(10) FORMAT Pretty; SELECT * FROM numbers(10) FORMAT PrettyCompact; SELECT * FROM numbers(10) FORMAT PrettyCompactMonoBlock; diff --git a/tests/queries/0_stateless/01670_neighbor_lc_bug.sql b/tests/queries/0_stateless/01670_neighbor_lc_bug.sql index f216befbb06..3cb194ccc64 100644 --- a/tests/queries/0_stateless/01670_neighbor_lc_bug.sql +++ b/tests/queries/0_stateless/01670_neighbor_lc_bug.sql @@ -40,6 +40,7 @@ FROM ORDER BY val_string, rowNr ) ORDER BY rowNr, val_string, str_m1, str_p1, val_low, low_m1, low_p1 +SETTINGS output_format_pretty_color=1 format PrettyCompact; drop table if exists neighbor_test; diff --git a/tests/queries/0_stateless/01671_merge_join_and_constants.sql b/tests/queries/0_stateless/01671_merge_join_and_constants.sql index 5cabd6f7f06..7a84bd4e97a 100644 --- a/tests/queries/0_stateless/01671_merge_join_and_constants.sql +++ b/tests/queries/0_stateless/01671_merge_join_and_constants.sql @@ -1,3 +1,4 @@ +SET output_format_pretty_color=1; SET allow_experimental_analyzer = 1; DROP TABLE IF EXISTS table1; diff --git a/tests/queries/0_stateless/01861_explain_pipeline.sql b/tests/queries/0_stateless/01861_explain_pipeline.sql index 93c82b6e265..99ea52ebfa4 100644 --- a/tests/queries/0_stateless/01861_explain_pipeline.sql +++ b/tests/queries/0_stateless/01861_explain_pipeline.sql @@ -3,8 +3,8 @@ CREATE TABLE test(a Int, b Int) Engine=ReplacingMergeTree order by a SETTINGS in INSERT INTO test select number, number from numbers(5); INSERT INTO test select number, number from numbers(5,2); set max_threads =1; -explain pipeline select * from test final; +explain pipeline select * from test final SETTINGS enable_vertical_final = 0; select * from test final; set max_threads =2; -explain pipeline select * from test final; +explain pipeline select * from test final SETTINGS enable_vertical_final = 0; DROP TABLE test; diff --git a/tests/queries/0_stateless/01915_json_extract_raw_string.reference b/tests/queries/0_stateless/01915_json_extract_raw_string.reference index e88c7e018d2..99ebfdcf5bd 100644 --- a/tests/queries/0_stateless/01915_json_extract_raw_string.reference +++ b/tests/queries/0_stateless/01915_json_extract_raw_string.reference @@ -1,5 +1,6 @@ ('123','456','[7,8,9]') \N +\N 123 123 diff --git a/tests/queries/0_stateless/01915_json_extract_raw_string.sql b/tests/queries/0_stateless/01915_json_extract_raw_string.sql index e81d527a3da..4b46db31559 100644 --- a/tests/queries/0_stateless/01915_json_extract_raw_string.sql +++ b/tests/queries/0_stateless/01915_json_extract_raw_string.sql @@ -2,6 +2,7 @@ select JSONExtract('{"a": "123", "b": 456, "c": [7, 8, 9]}', 'Tuple(a String, b String, c String)'); with '{"string_value":null}' as json select JSONExtract(json, 'string_value', 'Nullable(String)'); +with '{"string_value":null}' as json select JSONExtract(json, 'string_value', 'LowCardinality(Nullable(String))'); select JSONExtractString('{"a": 123}', 'a'); select JSONExtractString('{"a": "123"}', 'a'); diff --git a/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql b/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql index 83dd708c575..f344b7007d0 100644 --- a/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql +++ b/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql @@ -1,9 +1,9 @@ SET allow_experimental_analyzer = 1; -SELECT dictGet(t.nest.a, concat(currentDatabase(), '.dict.dict'), 's', number) FROM numbers(5); -- { serverError 36 } +SELECT dictGet(t.nest.a, concat(currentDatabase(), '.dict.dict'), 's', number) FROM numbers(5); -- { serverError INVALID_IDENTIFIER } -SELECT dictGetFloat64(t.b.s, 'database_for_dict.dict1', dictGetFloat64('Ta\0', toUInt64('databas\0_for_dict.dict1databas\0_for_dict.dict1', dictGetFloat64('', '', toUInt64(1048577), toDate(NULL)), NULL), toDate(dictGetFloat64(257, 'database_for_dict.dict1database_for_dict.dict1', '', toUInt64(NULL), 2, toDate(NULL)), '2019-05-2\0')), NULL, toUInt64(dictGetFloat64('', '', toUInt64(-9223372036854775808), toDate(NULL)), NULL)); -- { serverError 36 } +SELECT dictGetFloat64(t.b.s, 'database_for_dict.dict1', dictGetFloat64('Ta\0', toUInt64('databas\0_for_dict.dict1databas\0_for_dict.dict1', dictGetFloat64('', '', toUInt64(1048577), toDate(NULL)), NULL), toDate(dictGetFloat64(257, 'database_for_dict.dict1database_for_dict.dict1', '', toUInt64(NULL), 2, toDate(NULL)), '2019-05-2\0')), NULL, toUInt64(dictGetFloat64('', '', toUInt64(-9223372036854775808), toDate(NULL)), NULL)); -- { serverError INVALID_IDENTIFIER } -SELECT NULL AND (2147483648 AND NULL) AND -2147483647, toUUID(((1048576 AND NULL) AND (2147483647 AND 257 AND NULL AND -2147483649) AND NULL) IN (test_01103.t1_distr.id), '00000000-e1fe-11e\0-bb8f\0853d60c00749'), stringToH3('89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff'); -- { serverError 36 } +SELECT NULL AND (2147483648 AND NULL) AND -2147483647, toUUID(((1048576 AND NULL) AND (2147483647 AND 257 AND NULL AND -2147483649) AND NULL) IN (test_01103.t1_distr.id), '00000000-e1fe-11e\0-bb8f\0853d60c00749'), stringToH3('89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff'); -- { serverError INVALID_IDENTIFIER } SELECT 'still alive'; diff --git a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql index 695f233ed13..043febbcf55 100644 --- a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql +++ b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql @@ -1,4 +1,6 @@ -- Tags: no-replicated-database, no-asan, no-tsan, no-msan, no-ubsan +SET max_bytes_before_external_group_by = 0; + SET max_memory_usage = '100M'; SELECT cityHash64(rand() % 1000) as n, groupBitmapState(number) FROM numbers_mt(200000000) GROUP BY n FORMAT Null; -- { serverError 241 } diff --git a/tests/queries/0_stateless/02013_json_function_null_column.reference b/tests/queries/0_stateless/02013_json_function_null_column.reference index ab702ab52cb..a8ffccc46bf 100644 --- a/tests/queries/0_stateless/02013_json_function_null_column.reference +++ b/tests/queries/0_stateless/02013_json_function_null_column.reference @@ -1,9 +1,11 @@ \N Nullable(String) +\N LowCardinality(Nullable(String)) String \N Nullable(String) Nullable(String) \N Nullable(Nothing) \N Nullable(Nothing) +\N Nullable(Nothing) b \N @@ -21,3 +23,6 @@ true a \N \N +('value') +(NULL) +(NULL) diff --git a/tests/queries/0_stateless/02013_json_function_null_column.sql b/tests/queries/0_stateless/02013_json_function_null_column.sql index 94a2320cefb..963d0ee55cc 100644 --- a/tests/queries/0_stateless/02013_json_function_null_column.sql +++ b/tests/queries/0_stateless/02013_json_function_null_column.sql @@ -1,9 +1,12 @@ SELECT JSONExtract('{"string_value":null}', 'string_value', 'Nullable(String)') as x, toTypeName(x); +SELECT JSONExtract('{"string_value":null}', 'string_value', 'LowCardinality(Nullable(String))') as x, toTypeName(x); SELECT JSONExtract('{"string_value":null}', 'string_value', 'String') as x, toTypeName(x); SELECT JSONExtract(toNullable('{"string_value":null}'), 'string_value', 'Nullable(String)') as x, toTypeName(x); +SELECT JSONExtract(toNullable('{"string_value":null}'), 'string_value', 'LowCardinality(Nullable(String))') as x, toTypeName(x); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT JSONExtract(toNullable('{"string_value":null}'), 'string_value', 'String') as x, toTypeName(x); SELECT JSONExtract(NULL, 'string_value', 'Nullable(String)') as x, toTypeName(x); +SELECT JSONExtract(NULL, 'string_value', 'LowCardinality(Nullable(String))') as x, toTypeName(x); SELECT JSONExtract(NULL, 'string_value', 'String') as x, toTypeName(x); SELECT JSONExtractString('["a", "b", "c", "d", "e"]', idx) FROM (SELECT arrayJoin([2, NULL, 2147483646, 65535, 65535, 3]) AS idx); @@ -11,6 +14,7 @@ SELECT JSONExtractInt('[1]', toNullable(1)); SELECT JSONExtractBool('[1]', toNullable(1)); SELECT JSONExtractFloat('[1]', toNullable(1)); SELECT JSONExtractString('["a"]', toNullable(1)); +SELECT JSONExtractInt('[1]', toLowCardinality(toNullable(1))); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT JSONExtractArrayRaw('["1"]', toNullable(1)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT JSONExtractKeysAndValuesRaw('["1"]', toNullable(1)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } @@ -23,3 +27,7 @@ SELECT JSONExtract('[1]', toNullable(1), 'Nullable(Float)'); SELECT JSONExtract('["a"]', toNullable(1), 'Nullable(String)'); SELECT JSONExtract('["a"]', toNullable(1), 'Nullable(Int)'); SELECT JSONExtract('["-a"]', toNullable(1), 'Nullable(Int)'); + +SELECT JSONExtract(materialize('{"key":"value"}'), 'Tuple(key LowCardinality(Nullable(String)))'); +SELECT JSONExtract(materialize('{"key":null}'), 'Tuple(key LowCardinality(Nullable(String)))'); +SELECT JSONExtract(materialize('{"not_a_key":"value"}'), 'Tuple(key LowCardinality(Nullable(String)))'); diff --git a/tests/queries/0_stateless/02015_async_inserts_stress_long.sh b/tests/queries/0_stateless/02015_async_inserts_stress_long.sh index 437df01d445..2f7e15f201a 100755 --- a/tests/queries/0_stateless/02015_async_inserts_stress_long.sh +++ b/tests/queries/0_stateless/02015_async_inserts_stress_long.sh @@ -11,7 +11,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function insert1() { url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT CSV 1,"a" 2,"b" @@ -22,7 +23,8 @@ function insert1() function insert2() { url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT JSONEachRow {"id": 5, "s": "e"} {"id": 6, "s": "f"}' done } @@ -30,28 +32,32 @@ function insert2() function insert3() { url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO FUNCTION remote('127.0.0.1', $CLICKHOUSE_DATABASE, async_inserts) VALUES (7, 'g') (8, 'h')" done } function select1() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts FORMAT Null" done } function select2() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${CLICKHOUSE_CLIENT} -q "SELECT * FROM system.asynchronous_inserts FORMAT Null" done } function truncate1() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do sleep 0.1 ${CLICKHOUSE_CLIENT} -q "TRUNCATE TABLE async_inserts" done @@ -70,14 +76,14 @@ export -f select2 export -f truncate1 for _ in {1..5}; do - timeout $TIMEOUT bash -c insert1 & - timeout $TIMEOUT bash -c insert2 & - timeout $TIMEOUT bash -c insert3 & + insert1 $TIMEOUT & + insert2 $TIMEOUT & + insert3 $TIMEOUT & done -timeout $TIMEOUT bash -c select1 & -timeout $TIMEOUT bash -c select2 & -timeout $TIMEOUT bash -c truncate1 & +select1 $TIMEOUT & +select2 $TIMEOUT & +truncate1 $TIMEOUT & wait echo "OK" diff --git a/tests/queries/0_stateless/02152_bool_type.sql b/tests/queries/0_stateless/02152_bool_type.sql index e9efde0795f..1ed3620c149 100644 --- a/tests/queries/0_stateless/02152_bool_type.sql +++ b/tests/queries/0_stateless/02152_bool_type.sql @@ -1,3 +1,5 @@ +SET output_format_pretty_color=1; + SELECT CAST('True', 'Bool'); SELECT CAST('TrUe', 'Bool'); SELECT CAST('true', 'Bool'); diff --git a/tests/queries/0_stateless/02158_explain_ast_alter_commands.reference b/tests/queries/0_stateless/02158_explain_ast_alter_commands.reference index 030d5a8f5af..518ecfdb141 100644 --- a/tests/queries/0_stateless/02158_explain_ast_alter_commands.reference +++ b/tests/queries/0_stateless/02158_explain_ast_alter_commands.reference @@ -9,7 +9,7 @@ AlterCommand MODIFY_TTL (children 1) AlterCommand MATERIALIZE_TTL (children 1) AlterCommand MODIFY_SETTING (children 1) - AlterCommand RESET_SETTING + AlterCommand RESET_SETTING (children 1) AlterCommand MODIFY_QUERY (children 1) AlterCommand REMOVE_TTL AlterCommand REMOVE_SAMPLE_BY diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference index 91f0ecb8606..7e6d64b6b9f 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.reference +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -10,14 +10,14 @@ test intervals - test microseconds 1980-12-12 12:12:12.123456 1980-12-12 12:12:12.123400 -1980-12-12 12:12:12.123456 -1980-12-12 12:12:12.123456 +1980-12-12 12:12:12.123457 +1980-12-12 12:12:12.123457 1930-12-12 12:12:12.123456 1930-12-12 12:12:12.123400 1930-12-12 12:12:12.123456 2220-12-12 12:12:12.123456 2220-12-12 12:12:12.123400 -2220-12-12 12:12:12.123456 +2220-12-12 12:12:12.123457 - test milliseconds 1980-12-12 12:12:12.123 1980-12-12 12:12:12.120 diff --git a/tests/queries/0_stateless/02240_get_type_serialization_streams.reference b/tests/queries/0_stateless/02240_get_type_serialization_streams.reference index 3537720214f..15e9bf87562 100644 --- a/tests/queries/0_stateless/02240_get_type_serialization_streams.reference +++ b/tests/queries/0_stateless/02240_get_type_serialization_streams.reference @@ -1,8 +1,8 @@ ['{ArraySizes}','{ArrayElements, Regular}'] -['{ArraySizes}','{ArrayElements, TupleElement(keys, escape_tuple_delimiter = true), Regular}','{ArrayElements, TupleElement(values, escape_tuple_delimiter = true), Regular}'] -['{TupleElement(1, escape_tuple_delimiter = true), Regular}','{TupleElement(2, escape_tuple_delimiter = true), Regular}','{TupleElement(3, escape_tuple_delimiter = true), Regular}'] +['{ArraySizes}','{ArrayElements, TupleElement(keys), Regular}','{ArrayElements, TupleElement(values), Regular}'] +['{TupleElement(1), Regular}','{TupleElement(2), Regular}','{TupleElement(3), Regular}'] ['{DictionaryKeys, Regular}','{DictionaryIndexes}'] ['{NullMap}','{NullableElements, Regular}'] ['{ArraySizes}','{ArrayElements, Regular}'] -['{ArraySizes}','{ArrayElements, TupleElement(keys, escape_tuple_delimiter = true), Regular}','{ArrayElements, TupleElement(values, escape_tuple_delimiter = true), Regular}'] -['{TupleElement(1, escape_tuple_delimiter = true), Regular}','{TupleElement(2, escape_tuple_delimiter = true), Regular}','{TupleElement(3, escape_tuple_delimiter = true), Regular}','{TupleElement(4, escape_tuple_delimiter = true), Regular}'] +['{ArraySizes}','{ArrayElements, TupleElement(keys), Regular}','{ArrayElements, TupleElement(values), Regular}'] +['{TupleElement(1), Regular}','{TupleElement(2), Regular}','{TupleElement(3), Regular}','{TupleElement(4), Regular}'] diff --git a/tests/queries/0_stateless/02252_jit_profile_events.sql b/tests/queries/0_stateless/02252_jit_profile_events.sql index eca3c06f9f1..fbd6040c21c 100644 --- a/tests/queries/0_stateless/02252_jit_profile_events.sql +++ b/tests/queries/0_stateless/02252_jit_profile_events.sql @@ -19,13 +19,13 @@ SELECT ProfileEvents['CompileFunction'] FROM system.query_log WHERE SET compile_aggregate_expressions = 1; SET min_count_to_compile_aggregate_expression = 0; -SELECT sum(number), sum(number + 1), sum(number + 2) FROM numbers(1) GROUP BY number; +SELECT avg(number), avg(number + 1), avg(number + 2) FROM numbers(1) GROUP BY number; SYSTEM FLUSH LOGS; SELECT ProfileEvents['CompileFunction'] FROM system.query_log WHERE current_database = currentDatabase() AND type = 'QueryFinish' - AND query == 'SELECT sum(number), sum(number + 1), sum(number + 2) FROM numbers(1) GROUP BY number;' + AND query == 'SELECT avg(number), avg(number + 1), avg(number + 2) FROM numbers(1) GROUP BY number;' AND event_date >= yesterday() AND event_time > now() - interval 10 minute LIMIT 1; diff --git a/tests/queries/0_stateless/02294_decimal_second_errors.sql b/tests/queries/0_stateless/02294_decimal_second_errors.sql index b6059dc3d48..52d2279be41 100644 --- a/tests/queries/0_stateless/02294_decimal_second_errors.sql +++ b/tests/queries/0_stateless/02294_decimal_second_errors.sql @@ -4,7 +4,7 @@ SELECT 1 SETTINGS max_execution_time=-Infinity; -- { clientError 72 }; -- Ok values SELECT 1 SETTINGS max_execution_time=-0.5; -SELECT 1 SETTINGS max_execution_time=0.5; +SELECT 1 SETTINGS max_execution_time=5.5; SELECT 1 SETTINGS max_execution_time=-1; SELECT 1 SETTINGS max_execution_time=0.0; SELECT 1 SETTINGS max_execution_time=-0.0; diff --git a/tests/queries/0_stateless/02303_query_kind.reference b/tests/queries/0_stateless/02303_query_kind.reference index 53a0df682b2..9f1c026f889 100644 --- a/tests/queries/0_stateless/02303_query_kind.reference +++ b/tests/queries/0_stateless/02303_query_kind.reference @@ -20,17 +20,17 @@ clickhouse-client --allow_experimental_analyzer=1 --query_kind initial_query -q Expression ((Project names + Projection)) Header: dummy String Aggregating - Header: toString(__table1.dummy) String + Header: __table1.dummy UInt8 Expression ((Before GROUP BY + Change column names to column identifiers)) - Header: toString(__table1.dummy) String + Header: __table1.dummy UInt8 ReadFromStorage (SystemOne) Header: dummy UInt8 clickhouse-local --allow_experimental_analyzer=1 --query_kind initial_query -q explain plan header=1 select toString(dummy) as dummy from system.one group by dummy Expression ((Project names + Projection)) Header: dummy String Aggregating - Header: toString(__table1.dummy) String + Header: __table1.dummy UInt8 Expression ((Before GROUP BY + Change column names to column identifiers)) - Header: toString(__table1.dummy) String + Header: __table1.dummy UInt8 ReadFromStorage (SystemOne) Header: dummy UInt8 diff --git a/tests/queries/0_stateless/02346_inverted_index_mutation.reference b/tests/queries/0_stateless/02346_inverted_index_bug47393.reference similarity index 100% rename from tests/queries/0_stateless/02346_inverted_index_mutation.reference rename to tests/queries/0_stateless/02346_inverted_index_bug47393.reference diff --git a/tests/queries/0_stateless/02346_inverted_index_bug47393.sql b/tests/queries/0_stateless/02346_inverted_index_bug47393.sql new file mode 100644 index 00000000000..166e051b120 --- /dev/null +++ b/tests/queries/0_stateless/02346_inverted_index_bug47393.sql @@ -0,0 +1,25 @@ +SET allow_experimental_inverted_index = 1; + +DROP TABLE IF EXISTS tab; +CREATE TABLE tab +( + id UInt64, + str String, + INDEX idx str TYPE inverted(3) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS min_rows_for_wide_part = 1, min_bytes_for_wide_part = 1; + +INSERT INTO tab (str) VALUES ('I am inverted'); + +SELECT data_version FROM system.parts WHERE database = currentDatabase() AND table = 'tab' AND active = 1; + +-- update column synchronously +ALTER TABLE tab UPDATE str = 'I am not inverted' WHERE 1 SETTINGS mutations_sync=1; + +SELECT data_version FROM system.parts WHERE database = currentDatabase() AND table = 'tab' AND active = 1; + +SELECT str FROM tab WHERE str LIKE '%inverted%' SETTINGS force_data_skipping_indices = 'idx'; + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02346_inverted_index_bug52019.reference b/tests/queries/0_stateless/02346_inverted_index_bug52019.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02862_index_inverted_incorrect_args.sql b/tests/queries/0_stateless/02346_inverted_index_bug52019.sql similarity index 62% rename from tests/queries/0_stateless/02862_index_inverted_incorrect_args.sql rename to tests/queries/0_stateless/02346_inverted_index_bug52019.sql index 7ba122a7155..c61e17d9cea 100644 --- a/tests/queries/0_stateless/02862_index_inverted_incorrect_args.sql +++ b/tests/queries/0_stateless/02346_inverted_index_bug52019.sql @@ -1,9 +1,20 @@ --- https://github.com/ClickHouse/ClickHouse/issues/52019 -DROP TABLE IF EXISTS tab; +-- Test for Bug 52019: Undefined behavior + SET allow_experimental_inverted_index=1; -CREATE TABLE tab (`k` UInt64, `s` Map(String, String), INDEX af mapKeys(s) TYPE inverted(2) GRANULARITY 1) ENGINE = MergeTree ORDER BY k SETTINGS index_granularity = 2, index_granularity_bytes = '10Mi'; + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab ( + k UInt64, + s Map(String, String), + INDEX idx mapKeys(s) TYPE inverted(2) GRANULARITY 1) +ENGINE = MergeTree +ORDER BY k +SETTINGS index_granularity = 2, index_granularity_bytes = '10Mi'; + INSERT INTO tab (k) VALUES (0); SELECT * FROM tab PREWHERE (s[NULL]) = 'Click a03' SETTINGS allow_experimental_analyzer=1; SELECT * FROM tab PREWHERE (s[1]) = 'Click a03' SETTINGS allow_experimental_analyzer=1; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT * FROM tab PREWHERE (s['foo']) = 'Click a03' SETTINGS allow_experimental_analyzer=1; + DROP TABLE tab; diff --git a/tests/queries/0_stateless/02346_inverted_index_bug59039.reference b/tests/queries/0_stateless/02346_inverted_index_bug59039.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02346_inverted_index_bug59039.sql b/tests/queries/0_stateless/02346_inverted_index_bug59039.sql new file mode 100644 index 00000000000..0ef0cb0c733 --- /dev/null +++ b/tests/queries/0_stateless/02346_inverted_index_bug59039.sql @@ -0,0 +1,20 @@ +-- This is supposed to test that DROP INDEX removes all index related files. Can't test this directly but at least run the statement and +-- check that no bad things happen. + +SET allow_experimental_inverted_index = 1; + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab +( + id UInt64, + doc String, + INDEX text_idx doc TYPE inverted +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity = 2, index_granularity_bytes = '10Mi', min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0; + +ALTER TABLE tab DROP INDEX text_idx; + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02346_inverted_index_detach_attach.reference b/tests/queries/0_stateless/02346_inverted_index_detach_attach.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02696_inverted_idx_checksums.sql b/tests/queries/0_stateless/02346_inverted_index_detach_attach.sql similarity index 75% rename from tests/queries/0_stateless/02696_inverted_idx_checksums.sql rename to tests/queries/0_stateless/02346_inverted_index_detach_attach.sql index 92ffa7a6196..762d78922fe 100644 --- a/tests/queries/0_stateless/02696_inverted_idx_checksums.sql +++ b/tests/queries/0_stateless/02346_inverted_index_detach_attach.sql @@ -2,8 +2,8 @@ SET allow_experimental_inverted_index = 1; CREATE TABLE t ( - `key` UInt64, - `str` String, + key UInt64, + str String, INDEX inv_idx str TYPE inverted(0) GRANULARITY 1 ) ENGINE = MergeTree @@ -13,4 +13,4 @@ INSERT INTO t VALUES (1, 'Hello World'); ALTER TABLE t DETACH PART 'all_1_1_0'; -ALTER TABLE t ATTACH PART 'all_1_1_0'; \ No newline at end of file +ALTER TABLE t ATTACH PART 'all_1_1_0'; diff --git a/tests/queries/0_stateless/02346_inverted_index_experimental_flag.reference b/tests/queries/0_stateless/02346_inverted_index_experimental_flag.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02895_forbid_create_inverted_index.sql b/tests/queries/0_stateless/02346_inverted_index_experimental_flag.sql similarity index 72% rename from tests/queries/0_stateless/02895_forbid_create_inverted_index.sql rename to tests/queries/0_stateless/02346_inverted_index_experimental_flag.sql index dc92d9198fb..bf89265372e 100644 --- a/tests/queries/0_stateless/02895_forbid_create_inverted_index.sql +++ b/tests/queries/0_stateless/02346_inverted_index_experimental_flag.sql @@ -1,4 +1,7 @@ +-- Tests that the inverted index can only be supported when allow_experimental_inverted_index = 1. + SET allow_experimental_inverted_index = 0; + DROP TABLE IF EXISTS tab; CREATE TABLE tab ( diff --git a/tests/queries/0_stateless/02346_inverted_index_match_predicate.reference b/tests/queries/0_stateless/02346_inverted_index_match_predicate.reference new file mode 100644 index 00000000000..9dc8d5b76d9 --- /dev/null +++ b/tests/queries/0_stateless/02346_inverted_index_match_predicate.reference @@ -0,0 +1,20 @@ +1 Hello ClickHouse +2 Hello World + Granules: 6/6 + Granules: 2/6 + Granules: 6/6 + Granules: 2/6 +--- +1 Hello ClickHouse +2 Hello World +6 World Champion + Granules: 6/6 + Granules: 3/6 + Granules: 6/6 + Granules: 3/6 +--- +5 OLAP Database + Granules: 6/6 + Granules: 1/6 + Granules: 6/6 + Granules: 1/6 diff --git a/tests/queries/0_stateless/02346_inverted_index_match_predicate.sql b/tests/queries/0_stateless/02346_inverted_index_match_predicate.sql new file mode 100644 index 00000000000..99405c0acf2 --- /dev/null +++ b/tests/queries/0_stateless/02346_inverted_index_match_predicate.sql @@ -0,0 +1,107 @@ +-- Tests that match() utilizes the inverted index + +SET allow_experimental_inverted_index = true; + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab +( + id UInt32, + str String, + INDEX inv_idx(str) TYPE inverted(0) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity = 1; + +INSERT INTO tab VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Good Weather'), (4, 'Say Hello'), (5, 'OLAP Database'), (6, 'World Champion'); + +SELECT * FROM tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id; + +-- Read 2/6 granules +-- Required string: 'Hello ' +-- Alternatives: 'Hello ClickHouse', 'Hello World' + +SELECT * +FROM +( + EXPLAIN PLAN indexes=1 + SELECT * FROM tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id +) +WHERE + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 0; + +SELECT * +FROM +( + EXPLAIN PLAN indexes=1 + SELECT * FROM tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id +) +WHERE + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 1; + +SELECT '---'; + +SELECT * FROM tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id; + +-- Read 3/6 granules +-- Required string: - +-- Alternatives: 'ClickHouse', 'World' + +SELECT * +FROM +( + EXPLAIN PLAN indexes = 1 + SELECT * FROM tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id +) +WHERE + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 0; + +SELECT * +FROM +( + EXPLAIN PLAN indexes = 1 + SELECT * FROM tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id +) +WHERE + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 1; + +SELECT '---'; + +SELECT * FROM tab WHERE match(str, 'OLAP.*') ORDER BY id; + +-- Read 1/6 granules +-- Required string: 'OLAP' +-- Alternatives: - + +SELECT * +FROM +( + EXPLAIN PLAN indexes = 1 + SELECT * FROM tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id +) +WHERE + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 0; + +SELECT * +FROM +( + EXPLAIN PLAN indexes = 1 + SELECT * FROM tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id +) +WHERE + explain LIKE '%Granules: %' +SETTINGS + allow_experimental_analyzer = 1; + +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02346_inverted_index_mutation.sql b/tests/queries/0_stateless/02346_inverted_index_mutation.sql deleted file mode 100644 index 83b73807cd7..00000000000 --- a/tests/queries/0_stateless/02346_inverted_index_mutation.sql +++ /dev/null @@ -1,25 +0,0 @@ -SET allow_experimental_inverted_index=1; - -DROP TABLE IF EXISTS t; -CREATE TABLE t -( - `timestamp` UInt64, - `s` String, - INDEX idx s TYPE inverted(3) GRANULARITY 1 -) -ENGINE = MergeTree -ORDER BY tuple() -SETTINGS min_rows_for_wide_part = 1, min_bytes_for_wide_part = 1; - -INSERT INTO t (s) VALUES ('I am inverted'); - -SELECT data_version FROM system.parts WHERE database=currentDatabase() AND table='t' AND active=1; - --- do update column synchronously -ALTER TABLE t UPDATE s='I am not inverted' WHERE 1 SETTINGS mutations_sync=1; - -SELECT data_version FROM system.parts WHERE database=currentDatabase() AND table='t' AND active=1; - -SELECT s FROM t WHERE s LIKE '%inverted%' SETTINGS force_data_skipping_indices='idx'; - -DROP TABLE t; diff --git a/tests/queries/0_stateless/02346_full_text_search.reference b/tests/queries/0_stateless/02346_inverted_index_search.reference similarity index 100% rename from tests/queries/0_stateless/02346_full_text_search.reference rename to tests/queries/0_stateless/02346_inverted_index_search.reference diff --git a/tests/queries/0_stateless/02346_full_text_search.sql b/tests/queries/0_stateless/02346_inverted_index_search.sql similarity index 100% rename from tests/queries/0_stateless/02346_full_text_search.sql rename to tests/queries/0_stateless/02346_inverted_index_search.sql diff --git a/tests/queries/0_stateless/02366_kql_create_table.reference b/tests/queries/0_stateless/02366_kql_create_table.reference deleted file mode 100644 index 35136b5ff42..00000000000 --- a/tests/queries/0_stateless/02366_kql_create_table.reference +++ /dev/null @@ -1,4 +0,0 @@ --- test create table -- -Theodore -Diaz -Theodore Diaz 28 diff --git a/tests/queries/0_stateless/02366_kql_create_table.sql b/tests/queries/0_stateless/02366_kql_create_table.sql deleted file mode 100644 index b266679b06a..00000000000 --- a/tests/queries/0_stateless/02366_kql_create_table.sql +++ /dev/null @@ -1,29 +0,0 @@ -DROP TABLE IF EXISTS Customers; -CREATE TABLE Customers -( - FirstName Nullable(String), - LastName String, - Occupation String, - Education String, - Age Nullable(UInt8) -) ENGINE = Memory; - -INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); -Select '-- test create table --' ; -Select * from kql(Customers|project FirstName) limit 1;; -DROP TABLE IF EXISTS kql_table1; -CREATE TABLE kql_table1 ENGINE = Memory AS select *, now() as new_column From kql(Customers | project LastName | filter LastName=='Diaz'); -select LastName from kql_table1 limit 1; -DROP TABLE IF EXISTS kql_table2; -CREATE TABLE kql_table2 -( - FirstName Nullable(String), - LastName String, - Age Nullable(UInt8) -) ENGINE = Memory; -INSERT INTO kql_table2 select * from kql(Customers|project FirstName,LastName,Age | filter FirstName=='Theodore'); -select * from kql_table2 limit 1; --- select * from kql(Customers | where FirstName !in ("test", "test2")); -DROP TABLE IF EXISTS Customers; -DROP TABLE IF EXISTS kql_table1; -DROP TABLE IF EXISTS kql_table2; \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_datatype.reference b/tests/queries/0_stateless/02366_kql_datatype.reference deleted file mode 100644 index fe666f3734c..00000000000 --- a/tests/queries/0_stateless/02366_kql_datatype.reference +++ /dev/null @@ -1,105 +0,0 @@ --- bool -true -\N --- int -123 -\N --- long -123 -255 --1 -\N -456 --- real -0.01 -\N -nan -inf --inf --- datetime -2015-12-31 23:59:59.900000000 -2015-12-31 00:00:00.000000000 -2014-05-25 08:20:03.123456000 -2014-11-08 15:55:55.000000000 -2014-11-08 15:55:00.000000000 -2014-11-08 00:00:00.000000000 -\N -2014-05-25 08:20:03.123456000 -2014-11-08 15:55:55.123456000 --- time -1216984.12345 -45055.123 -86400 --86400 -6.000000000000001e-9 -6e-7 -172800 -259200 --- guid -\N --- timespan (time) -172800 -1800 -10 -0.1 -0.00001 -1e-7 -1120343 --- null -1 -\N \N \N \N \N --- decimal -\N -123.345 -100000 --- dynamic -\N -1 -86400 -[1,2,3] -[[1],[2],[3]] -['a','b','c'] --- cast functions -true -1 --- tobool("false") -false -1 --- tobool(1) -true -1 --- tobool(123) -true -1 --- tobool("abc") -\N -\N --- todouble() -123.4 -\N --- toreal() -123.4 -\N --- toint() -1 -\N --- tostring() -123 -1 --- todatetime() -1 -\N --- make_timespan() -01:12:00 01:12:30 1.12:30:55 --- totimespan() -1e-7 -60 -\N -1120343 --- tolong() -123 -\N --- todecimal() -123.345 -\N -\N diff --git a/tests/queries/0_stateless/02366_kql_datatype.sql b/tests/queries/0_stateless/02366_kql_datatype.sql deleted file mode 100644 index ecd29504298..00000000000 --- a/tests/queries/0_stateless/02366_kql_datatype.sql +++ /dev/null @@ -1,117 +0,0 @@ -set dialect = 'kusto'; - -print '-- bool' -print bool(true); -print bool(true); -print bool(null); -print '-- int'; -print int(123); -print int(null); -print int('4'); -- { clientError BAD_ARGUMENTS } -print '-- long'; -print long(123); -print long(0xff); -print long(-1); -print long(null); -print 456; -print '-- real'; -print real(0.01); -print real(null); -print real(nan); -print real(+inf); -print real(-inf); -print double('4.2'); -- { clientError BAD_ARGUMENTS } -print '-- datetime'; -print datetime(2015-12-31 23:59:59.9); -print datetime(2015-12-31); -print datetime('2014-05-25T08:20:03.123456'); -print datetime('2014-11-08 15:55:55'); -print datetime('2014-11-08 15:55'); -print datetime('2014-11-08'); -print datetime(null); -print datetime('2014-05-25T08:20:03.123456Z'); -print datetime('2014-11-08 15:55:55.123456Z'); -print '-- time'; -print time('14.02:03:04.12345'); -print time('12:30:55.123'); -print time(1d); -print time(-1d); -print time(6nanoseconds); -print time(6tick); -print time(2); -print time(2) + 1d; -print '-- guid' -print guid(74be27de-1e4e-49d9-b579-fe0b331d3642); -print guid(null); -print '-- timespan (time)'; -print timespan(2d); -- 2 days ---print timespan(1.5h); -- 1.5 hour -print timespan(30m); -- 30 minutes -print timespan(10s); -- 10 seconds ---print timespan(0.1s); -- 0.1 second -print timespan(100ms); -- 100 millisecond -print timespan(10microsecond); -- 10 microseconds -print timespan(1tick); -- 100 nanoseconds ---print timespan(1.5h) / timespan(30m); -print timespan('12.23:12:23') / timespan(1s); -print '-- null'; -print isnull(null); -print bool(null), int(null), long(null), real(null), double(null); -print '-- decimal'; -print decimal(null); -print decimal(123.345); -print decimal(1e5); -print '-- dynamic'; -- no support for mixed types and bags for now -print dynamic(null); -print dynamic(1); -print dynamic(timespan(1d)); -print dynamic([1,2,3]); -print dynamic([[1], [2], [3]]); -print dynamic(['a', "b", 'c']); -print '-- cast functions' -print '--tobool("true")'; -- == true -print tobool('true'); -- == true -print tobool('true') == toboolean('true'); -- == true -print '-- tobool("false")'; -- == false -print tobool('false'); -- == false -print tobool('false') == toboolean('false'); -- == false -print '-- tobool(1)'; -- == true -print tobool(1); -- == true -print tobool(1) == toboolean(1); -- == true -print '-- tobool(123)'; -- == true -print tobool(123); -- == true -print tobool(123) == toboolean(123); -- == true -print '-- tobool("abc")'; -- == null -print tobool('abc'); -- == null -print tobool('abc') == toboolean('abc'); -- == null -print '-- todouble()'; -print todouble('123.4'); -print todouble('abc') == null; -print '-- toreal()'; -print toreal("123.4"); -print toreal('abc') == null; -print '-- toint()'; -print toint("123") == int(123); -print toint('abc'); -print '-- tostring()'; -print tostring(123); -print tostring(null) == ''; -print '-- todatetime()'; -print todatetime("2015-12-24") == datetime(2015-12-24); -print todatetime('abc') == null; -print '-- make_timespan()'; -print v1=make_timespan(1,12), v2=make_timespan(1,12,30), v3=make_timespan(1,12,30,55.123); -print '-- totimespan()'; -print totimespan(1tick); -print totimespan('0.00:01:00'); -print totimespan('abc'); -print totimespan('12.23:12:23') / totimespan(1s); --- print totimespan(strcat('12.', '23', ':12:', '23')) / timespan(1s); -> 1120343 -print '-- tolong()'; -print tolong('123'); -print tolong('abc'); -print '-- todecimal()'; -print todecimal(123.345); -print todecimal(null); -print todecimal('abc'); --- print todecimal(4 * 2 + 3); -> 11 diff --git a/tests/queries/0_stateless/02366_kql_distinct.reference b/tests/queries/0_stateless/02366_kql_distinct.reference deleted file mode 100644 index 2100f44f18c..00000000000 --- a/tests/queries/0_stateless/02366_kql_distinct.reference +++ /dev/null @@ -1,27 +0,0 @@ --- distinct * -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Peter Nara Skilled Manual Graduate Degree 26 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 -\N why Professional Partial College 38 --- distinct one column -- -Skilled Manual -Management abcd defg -Professional --- distinct two column -- -Skilled Manual Bachelors -Management abcd defg Bachelors -Skilled Manual Graduate Degree -Professional Graduate Degree -Professional Partial College --- distinct with where -- -Skilled Manual Bachelors -Management abcd defg Bachelors -Skilled Manual Graduate Degree -Professional Graduate Degree -Professional Partial College --- distinct with where, order -- -Skilled Manual Bachelors -Skilled Manual Graduate Degree -Professional Graduate Degree diff --git a/tests/queries/0_stateless/02366_kql_distinct.sql b/tests/queries/0_stateless/02366_kql_distinct.sql deleted file mode 100644 index 3c997eb4865..00000000000 --- a/tests/queries/0_stateless/02366_kql_distinct.sql +++ /dev/null @@ -1,28 +0,0 @@ -DROP TABLE IF EXISTS Customers; -CREATE TABLE Customers -( - FirstName Nullable(String), - LastName String, - Occupation String, - Education String, - Age Nullable(UInt8) -) ENGINE = Memory; - -INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); - -set dialect = 'kusto'; - -print '-- distinct * --'; -Customers | distinct *; - -print '-- distinct one column --'; -Customers | distinct Occupation; - -print '-- distinct two column --'; -Customers | distinct Occupation, Education; - -print '-- distinct with where --'; -Customers where Age <30 | distinct Occupation, Education; - -print '-- distinct with where, order --'; -Customers |where Age <30 | order by Age| distinct Occupation, Education; diff --git a/tests/queries/0_stateless/02366_kql_extend.reference b/tests/queries/0_stateless/02366_kql_extend.reference deleted file mode 100644 index 2936c9ea19c..00000000000 --- a/tests/queries/0_stateless/02366_kql_extend.reference +++ /dev/null @@ -1,32 +0,0 @@ --- extend #1 -- -Aldi Apple 4 2016-09-10 400 -Costco Apple 2 2016-09-11 200 --- extend #2 -- -Apple 200 -Apple 400 --- extend #3 -- -Apple cost 480 on average based on 5 samples. -Snargaluff cost 28080 on average based on 5 samples. --- extend #4 -- -1 --- extend #5 -- -Aldi Apple 4 2016-09-10 Apple was purchased from Aldi for $4 on 2016-09-10 400 -Costco Apple 2 2016-09-11 Apple was purchased from Costco for $2 on 2016-09-11 200 --- extend #6 -- -Aldi Apple 2016-09-10 400 -Costco Apple 2016-09-11 200 -Aldi Apple 2016-09-10 600 -Costco Snargaluff 2016-09-12 10000 -Aldi Apple 2016-09-12 700 -Aldi Snargaluff 2016-09-11 40000 -Costco Snargaluff 2016-09-12 10400 -Aldi Apple 2016-09-12 500 -Aldi Snargaluff 2016-09-11 60000 -Costco Snargaluff 2016-09-10 20000 --- extend #7 -- -5 --- extend #8 -- --- extend #9 -- --- extend #10 -- --- extend #11 -- -5 [2,1] diff --git a/tests/queries/0_stateless/02366_kql_extend.sql b/tests/queries/0_stateless/02366_kql_extend.sql deleted file mode 100644 index 0a3c1f3dcd4..00000000000 --- a/tests/queries/0_stateless/02366_kql_extend.sql +++ /dev/null @@ -1,61 +0,0 @@ --- datatable(Supplier:string, Fruit:string, Price: real, Purchase:datetime) --- [ --- 'Aldi','Apple',4,'2016-09-10', --- 'Costco','Apple',2,'2016-09-11', --- 'Aldi','Apple',6,'2016-09-10', --- 'Costco','Snargaluff',100,'2016-09-12', --- 'Aldi','Apple',7,'2016-09-12', --- 'Aldi','Snargaluff',400,'2016-09-11', --- 'Costco','Snargaluff',104,'2016-09-12', --- 'Aldi','Apple',5,'2016-09-12', --- 'Aldi','Snargaluff',600,'2016-09-11', --- 'Costco','Snargaluff',200,'2016-09-10', --- ] - - -DROP TABLE IF EXISTS Ledger; -CREATE TABLE Ledger -( - Supplier Nullable(String), - Fruit String , - Price Float64, - Purchase Date -) ENGINE = Memory; -INSERT INTO Ledger VALUES ('Aldi','Apple',4,'2016-09-10'), ('Costco','Apple',2,'2016-09-11'), ('Aldi','Apple',6,'2016-09-10'), ('Costco','Snargaluff',100,'2016-09-12'), ('Aldi','Apple',7,'2016-09-12'), ('Aldi','Snargaluff',400,'2016-09-11'),('Costco','Snargaluff',104,'2016-09-12'),('Aldi','Apple',5,'2016-09-12'),('Aldi','Snargaluff',600,'2016-09-11'),('Costco','Snargaluff',200,'2016-09-10'); - --- This test requies sorting after some of aggregations but I don't know KQL, sorry -set max_bytes_before_external_group_by = 0; -set dialect = 'kusto'; - -print '-- extend #1 --'; -Ledger | extend PriceInCents = 100 * Price | take 2; - -print '-- extend #2 --'; -Ledger | extend PriceInCents = 100 * Price | sort by PriceInCents asc | project Fruit, PriceInCents | take 2; - -print '-- extend #3 --'; -Ledger | extend PriceInCents = 100 * Price | sort by PriceInCents asc | project Fruit, PriceInCents | summarize AveragePrice = avg(PriceInCents), Purchases = count() by Fruit | extend Sentence = strcat(Fruit, ' cost ', tostring(AveragePrice), ' on average based on ', tostring(Purchases), ' samples.') | project Sentence; - -print '-- extend #4 --'; -Ledger | extend a = Price | extend b = a | extend c = a, d = b + 500 | extend Pass = bool(b == a and c == a and d == b + 500) | summarize binary_all_and(Pass); - -print '-- extend #5 --'; -Ledger | take 2 | extend strcat(Fruit, ' was purchased from ', Supplier, ' for $', tostring(Price), ' on ', tostring(Purchase)) | extend PriceInCents = 100 * Price; - -print '-- extend #6 --'; -Ledger | extend Price = 100 * Price; - -print '-- extend #7 --'; -print a = 4 | extend a = 5; - -print '-- extend #8 --'; --- print x = 5 | extend array_sort_desc(range(0, x), range(1, x + 1)) - -print '-- extend #9 --'; -print x = 19 | extend = 4 + ; -- { clientError SYNTAX_ERROR } - -print '-- extend #10 --'; -Ledger | extend PriceInCents = * Price | sort by PriceInCents asc | project Fruit, PriceInCents | summarize AveragePrice = avg(PriceInCents), Purchases = count() by Fruit | extend Sentence = strcat(Fruit, ' cost ', tostring(AveragePrice), ' on average based on ', tostring(Purchases), ' samples.') | project Sentence; -- { clientError SYNTAX_ERROR } - -print '-- extend #11 --'; -- should ideally return this in the future: 5 [2,1] because of the alias ex -print x = 5 | extend ex = array_sort_desc(dynamic([1, 2]), dynamic([3, 4])); diff --git a/tests/queries/0_stateless/02366_kql_func_binary.reference b/tests/queries/0_stateless/02366_kql_func_binary.reference deleted file mode 100644 index 6276cd6d867..00000000000 --- a/tests/queries/0_stateless/02366_kql_func_binary.reference +++ /dev/null @@ -1,7 +0,0 @@ - -- binary functions -4 7 -1 -1 -1 -7 3 -1 diff --git a/tests/queries/0_stateless/02366_kql_func_binary.sql b/tests/queries/0_stateless/02366_kql_func_binary.sql deleted file mode 100644 index 824022b564c..00000000000 --- a/tests/queries/0_stateless/02366_kql_func_binary.sql +++ /dev/null @@ -1,8 +0,0 @@ -set dialect='kusto'; -print ' -- binary functions'; -print binary_and(4,7), binary_or(4,7); -print binary_shift_left(1, 1) == binary_shift_left(1, 65); -print binary_shift_right(2, 1) == binary_shift_right(2, 65); -print binary_shift_right(binary_shift_left(1, 65), 65) == 1; -print binary_xor(2, 5), bitset_count_ones(42); -print bitset_count_ones(binary_shift_left(binary_and(4,7), 1)); diff --git a/tests/queries/0_stateless/02366_kql_func_datetime.reference b/tests/queries/0_stateless/02366_kql_func_datetime.reference deleted file mode 100644 index 40d8d7e19ac..00000000000 --- a/tests/queries/0_stateless/02366_kql_func_datetime.reference +++ /dev/null @@ -1,76 +0,0 @@ --- dayofmonth() -31 --- dayofweek() -4.00:00:00 --- dayofyear() -365 --- getmonth() -10 --- getyear() -2015 --- hoursofday() -23 --- startofday() -2017-01-01 00:00:00.000000000 -2016-12-31 00:00:00.000000000 -2017-01-02 00:00:00.000000000 --- endofday() -2017-01-01 23:59:59.999999000 -2016-12-31 23:59:59.999999000 -2017-01-02 23:59:59.999999000 --- endofmonth() -2017-01-31 23:59:59.999999000 -2016-12-31 23:59:59.999999000 -2017-02-28 23:59:59.999999000 -2022-09-30 23:59:59.999999000 --- startofweek() -2017-01-01 00:00:00.000000000 -2016-12-25 00:00:00.000000000 -2017-01-08 00:00:00.000000000 --- endofweek() -2017-01-07 23:59:59.999999000 -2016-12-31 23:59:59.999999000 -2017-01-14 23:59:59.999999000 --- startofyear() -2017-01-01 00:00:00.000000000 -2016-01-01 00:00:00.000000000 -2018-01-01 00:00:00.000000000 --- endofyear() -2017-12-31 23:59:59.999999000 -2016-12-31 23:59:59.999999000 -2018-12-31 23:59:59.999999000 --- unixtime_seconds_todatetime() -2019-01-01 00:00:00.000000000 -1970-01-02 00:00:00.000000000 -1969-12-31 00:00:00.000000000 --- unixtime_microseconds_todatetime -2019-01-01 00:00:00.000000 --- unixtime_milliseconds_todatetime() -2019-01-01 00:00:00.000 --- unixtime_nanoseconds_todatetime() -2019-01-01 00:00:00.000000000 --- weekofyear() -52 --- monthofyear() -12 --- weekofyear() -52 --- now() -1 --- make_datetime() -1 -2017-10-01 12:10:00.0000000 -2017-10-01 12:11:00.0000000 --- format_datetime -15-12-14 02:03:04.1234500 -17-01-29 [09:00:05] 2017-01-29 [09:00:05] 17-01-29 [09:00:05 AM] --- format_timespan() -02:03:04.1234500 -29.09:00:05:12 --- ago() --- datetime_diff() -17 2 13 4 29 2 5 10 --- datetime_part() -2017 4 10 44 30 303 01 02 03 --- datetime_add() -2018-01-01 00:00:00.0000000 2017-04-01 00:00:00.0000000 2017-02-01 00:00:00.0000000 2017-01-08 00:00:00.0000000 2017-01-02 00:00:00.0000000 2017-01-01 01:00:00.0000000 2017-01-01 00:01:00.0000000 2017-01-01 00:00:01.0000000 diff --git a/tests/queries/0_stateless/02366_kql_func_datetime.sql b/tests/queries/0_stateless/02366_kql_func_datetime.sql deleted file mode 100644 index b1fba4166a9..00000000000 --- a/tests/queries/0_stateless/02366_kql_func_datetime.sql +++ /dev/null @@ -1,86 +0,0 @@ -set dialect = 'kusto'; - -print '-- dayofmonth()'; -print dayofmonth(datetime(2015-12-31)); -print '-- dayofweek()'; -print dayofweek(datetime(2015-12-31)); -print '-- dayofyear()'; -print dayofyear(datetime(2015-12-31)); -print '-- getmonth()'; -print getmonth(datetime(2015-10-12)); -print '-- getyear()'; -print getyear(datetime(2015-10-12)); -print '-- hoursofday()'; -print hourofday(datetime(2015-12-31 23:59:59.9)); -print '-- startofday()'; -print startofday(datetime(2017-01-01 10:10:17)); -print startofday(datetime(2017-01-01 10:10:17), -1); -print startofday(datetime(2017-01-01 10:10:17), 1); -print '-- endofday()'; -print endofday(datetime(2017-01-01 10:10:17)); -print endofday(datetime(2017-01-01 10:10:17), -1); -print endofday(datetime(2017-01-01 10:10:17), 1); -print '-- endofmonth()'; -print endofmonth(datetime(2017-01-01 10:10:17)); -print endofmonth(datetime(2017-01-01 10:10:17), -1); -print endofmonth(datetime(2017-01-01 10:10:17), 1); -print endofmonth(datetime(2022-09-23)); -print '-- startofweek()'; -print startofweek(datetime(2017-01-01 10:10:17)); -print startofweek(datetime(2017-01-01 10:10:17), -1); -print startofweek(datetime(2017-01-01 10:10:17), 1); -print '-- endofweek()'; -print endofweek(datetime(2017-01-01 10:10:17)); -print endofweek(datetime(2017-01-01 10:10:17), -1); -print endofweek(datetime(2017-01-01 10:10:17), 1); -print '-- startofyear()'; -print startofyear(datetime(2017-01-01 10:10:17)); -print startofyear(datetime(2017-01-01 10:10:17), -1); -print startofyear(datetime(2017-01-01 10:10:17), 1); -print '-- endofyear()'; -print endofyear(datetime(2017-01-01 10:10:17)); -print endofyear(datetime(2017-01-01 10:10:17), -1); -print endofyear(datetime(2017-01-01 10:10:17), 1); -print '-- unixtime_seconds_todatetime()'; -print unixtime_seconds_todatetime(1546300800); -print unixtime_seconds_todatetime(1d); -print unixtime_seconds_todatetime(-1d); -print '-- unixtime_microseconds_todatetime'; -print unixtime_microseconds_todatetime(1546300800000000); -print '-- unixtime_milliseconds_todatetime()'; -print unixtime_milliseconds_todatetime(1546300800000); -print '-- unixtime_nanoseconds_todatetime()'; -print unixtime_nanoseconds_todatetime(1546300800000000000); -print '-- weekofyear()'; -print week_of_year(datetime(2000-01-01)); -print '-- monthofyear()'; -print monthofyear(datetime(2015-12-31)); -print '-- weekofyear()'; -print week_of_year(datetime(2000-01-01)); -print '-- now()'; -print getyear(now(-2d))>1900; -print '-- make_datetime()'; -print make_datetime(2017,10,01,12,10) == datetime(2017-10-01 12:10:00); -print year_month_day_hour_minute = make_datetime(2017,10,01,12,10); -print year_month_day_hour_minute_second = make_datetime(2017,10,01,12,11,0.1234567); -print '-- format_datetime'; -print format_datetime(datetime(2015-12-14 02:03:04.12345), 'y-M-d h:m:s.fffffff'); -print v1=format_datetime(datetime(2017-01-29 09:00:05),'yy-MM-dd [HH:mm:ss]'), v2=format_datetime(datetime(2017-01-29 09:00:05), 'yyyy-M-dd [H:mm:ss]'), v3=format_datetime(datetime(2017-01-29 09:00:05), 'yy-MM-dd [hh:mm:ss tt]'); -print '-- format_timespan()'; -print format_timespan(time('14.02:03:04.12345'), 'h:m:s.fffffff'); -print v1=format_timespan(time('29.09:00:05.12345'), 'dd.hh:mm:ss:FF'); --- print v2=format_timespan(time('29.09:00:05.12345'), 'ddd.h:mm:ss [fffffff]'); == '029.9:00:05 [1234500]' -print '-- ago()'; --- print ago(1d) - now(); -print '-- datetime_diff()'; -print year = datetime_diff('year',datetime(2017-01-01),datetime(2000-12-31)), quarter = datetime_diff('quarter',datetime(2017-07-01),datetime(2017-03-30)), month = datetime_diff('month',datetime(2017-01-01),datetime(2015-12-30)), week = datetime_diff('week',datetime(2017-10-29 00:00),datetime(2017-09-30 23:59)), day = datetime_diff('day',datetime(2017-10-29 00:00),datetime(2017-09-30 23:59)), hour = datetime_diff('hour',datetime(2017-10-31 01:00),datetime(2017-10-30 23:59)), minute = datetime_diff('minute',datetime(2017-10-30 23:05:01),datetime(2017-10-30 23:00:59)), second = datetime_diff('second',datetime(2017-10-30 23:00:10.100),datetime(2017-10-30 23:00:00.900)); --- millisecond = datetime_diff('millisecond',datetime(2017-10-30 23:00:00.200100),datetime(2017-10-30 23:00:00.100900)), --- microsecond = datetime_diff('microsecond',datetime(2017-10-30 23:00:00.1009001),datetime(2017-10-30 23:00:00.1008009)), --- nanosecond = datetime_diff('nanosecond',datetime(2017-10-30 23:00:00.0000000),datetime(2017-10-30 23:00:00.0000007)) -print '-- datetime_part()'; -print year = datetime_part("year", datetime(2017-10-30 01:02:03.7654321)),quarter = datetime_part("quarter", datetime(2017-10-30 01:02:03.7654321)),month = datetime_part("month", datetime(2017-10-30 01:02:03.7654321)),weekOfYear = datetime_part("week_of_year", datetime(2017-10-30 01:02:03.7654321)),day = datetime_part("day", datetime(2017-10-30 01:02:03.7654321)),dayOfYear = datetime_part("dayOfYear", datetime(2017-10-30 01:02:03.7654321)),hour = datetime_part("hour", datetime(2017-10-30 01:02:03.7654321)),minute = datetime_part("minute", datetime(2017-10-30 01:02:03.7654321)),second = datetime_part("second", datetime(2017-10-30 01:02:03.7654321)); --- millisecond = datetime_part("millisecond", dt), --- microsecond = datetime_part("microsecond", dt), --- nanosecond = datetime_part("nanosecond", dt) -print '-- datetime_add()'; -print year = datetime_add('year',1,make_datetime(2017,1,1)),quarter = datetime_add('quarter',1,make_datetime(2017,1,1)),month = datetime_add('month',1,make_datetime(2017,1,1)),week = datetime_add('week',1,make_datetime(2017,1,1)),day = datetime_add('day',1,make_datetime(2017,1,1)),hour = datetime_add('hour',1,make_datetime(2017,1,1)),minute = datetime_add('minute',1,make_datetime(2017,1,1)),second = datetime_add('second',1,make_datetime(2017,1,1)); \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_func_dynamic.reference b/tests/queries/0_stateless/02366_kql_func_dynamic.reference deleted file mode 100644 index 564f1eebc4b..00000000000 --- a/tests/queries/0_stateless/02366_kql_func_dynamic.reference +++ /dev/null @@ -1,152 +0,0 @@ --- constant index value -1 c ['A',NULL,'C'] --- array_length() -1 -1 --- array_sum() -1 -1 --- array_index_of() -3 -1 --- array_iif() -[1,5,3] -[1,5,3] -[1,5,NULL] -[NULL,NULL,NULL] --- array_concat() -[1,2,3,4,5,6] --- array_reverse() -[] -[1] -[4,3,2,1] -['example','an','is','this'] --- array_rotate_left() -[] -[] -[] -[3,4,5,1,2] -[1,2,3,4,5] -[3,4,5,1,2] -[4,5,1,2,3] -[1,2,3,4,5] -[4,5,1,2,3] --- array_rotate_right() -[] -[] -[] -[4,5,1,2,3] -[1,2,3,4,5] -[4,5,1,2,3] -[3,4,5,1,2] -[1,2,3,4,5] -[3,4,5,1,2] --- array_shift_left() -[] -[] -[] -[3,4,5,NULL,NULL] -[NULL,NULL,1,2,3] -[3,4,5,-1,-1] -['c','',''] --- array_shift_right() -[] -[] -[] -[3,4,5,NULL,NULL] -[NULL,NULL,1,2,3] -[3,4,5,-1,-1] -['c','',''] --- array_slice() -[3,4] --- array_split() -[[1],[2,3],[4,5]] -[[1,2],[3,4,5]] -[[1],[2,3],[4,5]] -[[1,2,3,4],[],[4,5]] --- array_sort_asc() -(['a','c','c','d',NULL]) -([1,2,3,4]) -['a','b','c'] -(['p','q','r'],['hello','clickhouse','world']) -([NULL,'a','c','c','d']) -([NULL,'a','c','c','d']) -([NULL,NULL,NULL]) -[1,2,3,NULL,NULL] -['a','e','b','c','d'] -(['George','John','Paul','Ringo']) -(['blue','green','yellow',NULL,NULL]) -([NULL,NULL,'blue','green','yellow']) --- array_sort_desc() -(['d','c','c','a',NULL]) -([4,3,2,1]) -['c','b','a'] -(['r','q','p'],['world','clickhouse','hello']) -([NULL,'d','c','c','a']) -([NULL,'d','c','c','a']) -([NULL,NULL,NULL]) -[3,2,1,NULL,NULL] -['d','c','b','e','a'] -(['Ringo','Paul','John','George']) -(['yellow','green','blue',NULL,NULL]) -([NULL,NULL,'yellow','green','blue']) --- jaccard_index() -0.75 -0 -0 -nan -0 -0.75 -0.25 --- pack_array() -1 2 4 [1,2,4] -['ab','0.0.0.42','4.2'] --- repeat() -[] -[1,1,1] -['asd','asd','asd'] -[86400,86400,86400] -[true,true,true] -[NULL] -[NULL] --- set_difference() -[] -[] -[] -[] -[4,5,6] -[4] -[1,3] -[1,2,3] -['d','s'] -['Chewbacca','Han Solo'] --- set_has_element() -0 -1 -0 -1 -0 --- set_intersect() -[] -[1,2,3] -[1,2,3] -[] -[5] -[] -['a'] -['Darth Vader'] --- set_union() -[] -[1,2,3] -[1,2,3,4,5,6] -[1,2,3,4] -[1,2,3,4,5] -[1,2,3] -['a','d','f','s'] -['Chewbacca','Darth Sidious','Darth Vader','Han Solo'] --- zip() -[] -[[1,2],[3,4],[5,6]] -[['Darth','Vader','has a suit'],['Master','Yoda','doesn\'t have a suit']] -[[1,10],[2,20],[3,NULL]] -[[NULL,1],[NULL,2],[NULL,3]] diff --git a/tests/queries/0_stateless/02366_kql_func_dynamic.sql b/tests/queries/0_stateless/02366_kql_func_dynamic.sql deleted file mode 100644 index b0956f032d0..00000000000 --- a/tests/queries/0_stateless/02366_kql_func_dynamic.sql +++ /dev/null @@ -1,161 +0,0 @@ -DROP TABLE IF EXISTS array_test; -CREATE TABLE array_test (floats Array(Float64), - strings Array(String), - nullable_strings Array(Nullable(String)) - ) ENGINE=Memory; -INSERT INTO array_test VALUES([1.0, 2.5], ['a', 'c'], ['A', NULL, 'C']); -set dialect = 'kusto'; -print '-- constant index value'; -array_test | project floats[0], strings[1], nullable_strings; -print '-- array_length()'; -print array_length(dynamic(['John', 'Denver', 'Bob', 'Marley'])) == 4; -print array_length(dynamic([1, 2, 3])) == 3; -print '-- array_sum()'; -print array_sum(dynamic([2, 5, 3])) == 10; -print array_sum(dynamic([2.5, 5.5, 3])) == 11; -print '-- array_index_of()'; -print array_index_of(dynamic(['John', 'Denver', 'Bob', 'Marley']), 'Marley'); -print array_index_of(dynamic([1, 2, 3]), 2); -print '-- array_iif()'; -print array_iif(dynamic([true,false,true]), dynamic([1,2,3]), dynamic([4,5,6])); -print array_iif(dynamic([1,0,1]), dynamic([1,2,3]), dynamic([4,5,6])); -print array_iif(dynamic([true,false,true]), dynamic([1,2]), dynamic([4,5,6])); -print array_iif(dynamic(['a','b','c']), dynamic([1,2,3]), dynamic([4,5,6])); -print '-- array_concat()'; -print array_concat(dynamic([1,2,3]),dynamic([4,5,6])); -print '-- array_reverse()'; -print array_reverse(dynamic([])); -print array_reverse(dynamic([1])); -print array_reverse(dynamic([1,2,3,4])); -print array_reverse(dynamic(["this", "is", "an", "example"])); -print '-- array_rotate_left()'; -print array_rotate_left(dynamic([]), 0); -print array_rotate_left(dynamic([]), 500); -print array_rotate_left(dynamic([]), -500); -print array_rotate_left(dynamic([1,2,3,4,5]), 2); -print array_rotate_left(dynamic([1,2,3,4,5]), 5); -print array_rotate_left(dynamic([1,2,3,4,5]), 7); -print array_rotate_left(dynamic([1,2,3,4,5]), -2); -print array_rotate_left(dynamic([1,2,3,4,5]), -5); -print array_rotate_left(dynamic([1,2,3,4,5]), -7); -print '-- array_rotate_right()'; -print array_rotate_right(dynamic([]), 0); -print array_rotate_right(dynamic([]), 500); -print array_rotate_right(dynamic([]), -500); -print array_rotate_right(dynamic([1,2,3,4,5]), 2); -print array_rotate_right(dynamic([1,2,3,4,5]), 5); -print array_rotate_right(dynamic([1,2,3,4,5]), 7); -print array_rotate_right(dynamic([1,2,3,4,5]), -2); -print array_rotate_right(dynamic([1,2,3,4,5]), -5); -print array_rotate_right(dynamic([1,2,3,4,5]), -7); -print '-- array_shift_left()'; -print array_shift_left(dynamic([]), 0); -print array_shift_left(dynamic([]), 555); -print array_shift_left(dynamic([]), -555); -print array_shift_left(dynamic([1,2,3,4,5]), 2); -print array_shift_left(dynamic([1,2,3,4,5]), -2); -print array_shift_left(dynamic([1,2,3,4,5]), 2, -1); -print array_shift_left(dynamic(['a', 'b', 'c']), 2); -print '-- array_shift_right()'; -print array_shift_left(dynamic([]), 0); -print array_shift_left(dynamic([]), 555); -print array_shift_left(dynamic([]), -555); -print array_shift_right(dynamic([1,2,3,4,5]), -2); -print array_shift_right(dynamic([1,2,3,4,5]), 2); -print array_shift_right(dynamic([1,2,3,4,5]), -2, -1); -print array_shift_right(dynamic(['a', 'b', 'c']), -2); -print '-- array_slice()'; ---print array_slice(dynamic([1,2,3]), 1, 2); -- will enable whe analyzer dixed -print array_slice(dynamic([1,2,3,4,5]), -3, -2); -print '-- array_split()'; -print array_split(dynamic([1,2,3,4,5]), dynamic([1,-2])); -print array_split(dynamic([1,2,3,4,5]), 2); -print array_split(dynamic([1,2,3,4,5]), dynamic([1,3])); -print array_split(dynamic([1,2,3,4,5]), dynamic([-1,-2])); -print '-- array_sort_asc()'; -print array_sort_asc(dynamic([null, 'd', 'a', 'c', 'c'])); -print array_sort_asc(dynamic([4, 1, 3, 2])); -print array_sort_asc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))[0]; -print array_sort_asc(dynamic(['q', 'p', 'r']), dynamic(['clickhouse','hello', 'world'])); -print array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , false); -print array_sort_asc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2); -print array_sort_asc( dynamic([null, null, null]) , false); -print array_sort_asc(dynamic([2, 1, null,3, null]), dynamic([20, 10, 40, 30, 50]), 1 < 2)[0]; -print array_sort_asc(dynamic(['1','3','4','5','2']),dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]))[3]; -print array_sort_asc(split("John,Paul,George,Ringo", ",")); -print array_sort_asc(dynamic([null,"blue","yellow","green",null])); -print array_sort_asc(dynamic([null,"blue","yellow","green",null]), false); -print '-- array_sort_desc()'; -print array_sort_desc(dynamic([null, 'd', 'a', 'c', 'c'])); -print array_sort_desc(dynamic([4, 1, 3, 2])); -print array_sort_desc(dynamic(['b', 'a', 'c']), dynamic(['q', 'p', 'r']))[0]; -print array_sort_desc(dynamic(['q', 'p', 'r']), dynamic(['clickhouse','hello', 'world'])); -print array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , false); -print array_sort_desc( dynamic(['d', null, 'a', 'c', 'c']) , 1 > 2); -print array_sort_desc( dynamic([null, null, null]) , false); -print array_sort_desc(dynamic([2, 1, null,3, null]), dynamic([20, 10, 40, 30, 50]), 1 < 2)[0]; -print array_sort_desc(dynamic(['1','3','4','5','2']),dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]), dynamic(["a","b","c","d","e"]))[3]; -print array_sort_desc(split("John,Paul,George,Ringo", ",")); -print array_sort_desc(dynamic([null,"blue","yellow","green",null])); -print array_sort_desc(dynamic([null,"blue","yellow","green",null]), false); -print '-- jaccard_index()'; -print jaccard_index(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3, 4, 4, 4])); -print jaccard_index(dynamic([1, 2, 3]), dynamic([])); -print jaccard_index(dynamic([]), dynamic([1, 2, 3, 4])); -print jaccard_index(dynamic([]), dynamic([])); -print jaccard_index(dynamic([1, 2, 3]), dynamic([4, 5, 6, 7])); -print jaccard_index(dynamic(['a', 's', 'd']), dynamic(['f', 'd', 's', 'a'])); -print jaccard_index(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])); -print '-- pack_array()'; -print pack_array(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -print x = 1 | extend y = x * 2 | extend z = y * 2 | extend pack_array(x,y,z); -print pack_array(strcat('a', 'b'), format_ipv4(42), tostring(4.2)); -print '-- repeat()'; -print repeat(1, 0); -print repeat(1, 3); -print repeat("asd", 3); -print repeat(timespan(1d), 3); -print repeat(true, 3); -print repeat(1, -3); -print repeat(6.7,-4); -print '-- set_difference()'; -print set_difference(dynamic([]), dynamic([])); -print set_difference(dynamic([]), dynamic([9])); -print set_difference(dynamic([]), dynamic(["asd"])); -print set_difference(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])); -print array_sort_asc(set_difference(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[0]; -print set_difference(dynamic([4]), dynamic([1, 2, 3])); -print array_sort_asc(set_difference(dynamic([1, 2, 3, 4, 5]), dynamic([5]), dynamic([2, 4])))[0]; -print array_sort_asc(set_difference(dynamic([1, 2, 3]), dynamic([])))[0]; -print array_sort_asc(set_difference(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])))[0]; -print array_sort_asc(set_difference(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])))[0]; -print '-- set_has_element()'; -print set_has_element(dynamic([]), 9); -print set_has_element(dynamic(["this", "is", "an", "example"]), "example"); -print set_has_element(dynamic(["this", "is", "an", "example"]), "examplee"); -print set_has_element(dynamic([1, 2, 3]), 2); -print set_has_element(dynamic([1, 2, 3, 4.2]), 4); -print '-- set_intersect()'; -print set_intersect(dynamic([]), dynamic([])); -print array_sort_asc(set_intersect(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])))[0]; -print array_sort_asc(set_intersect(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[0]; -print set_intersect(dynamic([4]), dynamic([1, 2, 3])); -print set_intersect(dynamic([1, 2, 3, 4, 5]), dynamic([1, 3, 5]), dynamic([2, 5])); -print set_intersect(dynamic([1, 2, 3]), dynamic([])); -print set_intersect(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])); -print set_intersect(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])); -print '-- set_union()'; -print set_union(dynamic([]), dynamic([])); -print array_sort_asc(set_union(dynamic([1, 1, 2, 2, 3, 3]), dynamic([1, 2, 3])))[0]; -print array_sort_asc(set_union(dynamic([1, 4, 2, 3, 5, 4, 6]), dynamic([1, 2, 3])))[0]; -print array_sort_asc(set_union(dynamic([4]), dynamic([1, 2, 3])))[0]; -print array_sort_asc(set_union(dynamic([1, 3, 4]), dynamic([5]), dynamic([2, 4])))[0]; -print array_sort_asc(set_union(dynamic([1, 2, 3]), dynamic([])))[0]; -print array_sort_asc(set_union(dynamic(['a', 's', 'd']), dynamic(['a', 'f'])))[0]; -print array_sort_asc(set_union(dynamic(['Chewbacca', 'Darth Vader', 'Han Solo']), dynamic(['Darth Sidious', 'Darth Vader'])))[0]; -print '-- zip()'; -print zip(dynamic([]), dynamic([])); -print zip(dynamic([1,3,5]), dynamic([2,4,6])); -print zip(dynamic(['Darth','Master']), dynamic(['Vader','Yoda']), dynamic(['has a suit','doesn\'t have a suit'])); -print zip(dynamic([1,2,3]), dynamic([10,20])); -print zip(dynamic([]), dynamic([1,2,3])); \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_func_ip.reference b/tests/queries/0_stateless/02366_kql_func_ip.reference deleted file mode 100644 index 2a0bbf53fff..00000000000 --- a/tests/queries/0_stateless/02366_kql_func_ip.reference +++ /dev/null @@ -1,123 +0,0 @@ --- ipv4_is_private(\'127.0.0.1\') -0 --- ipv4_is_private(\'10.1.2.3\') -1 --- ipv4_is_private(\'192.168.1.1/24\') -1 -ipv4_is_private(strcat(\'192.\',\'168.\',\'1.\',\'1\',\'/24\')) -1 --- ipv4_is_private(\'abc\') -\N --- ipv4_netmask_suffix(\'192.168.1.1/24\') -24 --- ipv4_netmask_suffix(\'192.168.1.1\') -32 --- ipv4_netmask_suffix(\'127.0.0.1/16\') -16 --- ipv4_netmask_suffix(\'abc\') -\N -ipv4_netmask_suffix(strcat(\'127.\', \'0.\', \'0.1/16\')) -16 --- ipv4_is_in_range(\'127.0.0.1\', \'127.0.0.1\') -1 --- ipv4_is_in_range(\'192.168.1.6\', \'192.168.1.1/24\') -1 --- ipv4_is_in_range(\'192.168.1.1\', \'192.168.2.1/24\') -0 --- ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\') -0 --- ipv4_is_in_range(\'abc\', \'127.0.0.1\') -\N --- parse_ipv6(127.0.0.1) -0000:0000:0000:0000:0000:ffff:7f00:0001 --- parse_ipv6(fe80::85d:e82c:9446:7994) -fe80:0000:0000:0000:085d:e82c:9446:7994 --- parse_ipv4(\'127.0.0.1\') -2130706433 --- parse_ipv4(\'192.1.168.1\') < parse_ipv4(\'192.1.168.2\') -1 --- parse_ipv4(arrayStringConcat([\'127\', \'0\', \'0\', \'1\'], \'.\')) --- parse_ipv4_mask(\'127.0.0.1\', 24) == 2130706432 -2130706432 --- parse_ipv4_mask(\'abc\', 31) -\N -\N --- parse_ipv4_mask(\'192.1.168.2\', 31) == parse_ipv4_mask(\'192.1.168.3\', 31) -3221334018 -3221334018 --- ipv4_is_match(\'127.0.0.1\', \'127.0.0.1\') -1 --- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\') -0 --- ipv4_is_match(\'192.168.1.1/24\', \'192.168.1.255/24\') -1 --- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\', 24) -1 --- ipv4_is_match(\'abc\', \'def\', 24) -\N --- ipv4_compare() -0 --1 -1 -0 -0 -0 -0 -0 -0 -0 -0 --- format_ipv4() -192.168.1.0 -192.168.1.1 -192.168.1.0 -192.168.1.0 -1 -1 -127.0.0.0 --- format_ipv4_mask() -192.168.1.0/24 -192.168.1.0/24 -192.168.1.0/24 -192.168.1.1/32 -192.168.1.0/24 -1 -1 -127.0.0.0/24 --- parse_ipv6_mask() -0000:0000:0000:0000:0000:0000:0000:0000 -fe80:0000:0000:0000:085d:e82c:9446:7900 -0000:0000:0000:0000:0000:ffff:c0a8:ff00 -0000:0000:0000:0000:0000:ffff:c0a8:ff00 -0000:0000:0000:0000:0000:ffff:ffff:ffff -fe80:0000:0000:0000:085d:e82c:9446:7994 -fe80:0000:0000:0000:085d:e82c:9446:7900 -0000:0000:0000:0000:0000:ffff:c0a8:ffff -0000:0000:0000:0000:0000:ffff:c0a8:ff00 --- ipv6_is_match() -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 diff --git a/tests/queries/0_stateless/02366_kql_func_ip.sql b/tests/queries/0_stateless/02366_kql_func_ip.sql deleted file mode 100644 index c9b335f203a..00000000000 --- a/tests/queries/0_stateless/02366_kql_func_ip.sql +++ /dev/null @@ -1,131 +0,0 @@ -set dialect='kusto'; -print '-- ipv4_is_private(\'127.0.0.1\')'; -print ipv4_is_private('127.0.0.1'); -print '-- ipv4_is_private(\'10.1.2.3\')'; -print ipv4_is_private('10.1.2.3'); -print '-- ipv4_is_private(\'192.168.1.1/24\')'; -print ipv4_is_private('192.168.1.1/24'); -print 'ipv4_is_private(strcat(\'192.\',\'168.\',\'1.\',\'1\',\'/24\'))'; -print ipv4_is_private(strcat('192.','168.','1.','1','/24')); -print '-- ipv4_is_private(\'abc\')'; -print ipv4_is_private('abc'); -- == null - -print '-- ipv4_netmask_suffix(\'192.168.1.1/24\')'; -print ipv4_netmask_suffix('192.168.1.1/24'); -- == 24 -print '-- ipv4_netmask_suffix(\'192.168.1.1\')'; -print ipv4_netmask_suffix('192.168.1.1'); -- == 32 -print '-- ipv4_netmask_suffix(\'127.0.0.1/16\')'; -print ipv4_netmask_suffix('127.0.0.1/16'); -- == 16 -print '-- ipv4_netmask_suffix(\'abc\')'; -print ipv4_netmask_suffix('abc'); -- == null -print 'ipv4_netmask_suffix(strcat(\'127.\', \'0.\', \'0.1/16\'))'; -print ipv4_netmask_suffix(strcat('127.', '0.', '0.1/16')); -- == 16 - -print '-- ipv4_is_in_range(\'127.0.0.1\', \'127.0.0.1\')'; -print ipv4_is_in_range('127.0.0.1', '127.0.0.1'); -- == true -print '-- ipv4_is_in_range(\'192.168.1.6\', \'192.168.1.1/24\')'; -print ipv4_is_in_range('192.168.1.6', '192.168.1.1/24'); -- == true -print '-- ipv4_is_in_range(\'192.168.1.1\', \'192.168.2.1/24\')'; -print ipv4_is_in_range('192.168.1.1', '192.168.2.1/24'); -- == false -print '-- ipv4_is_in_range(strcat(\'192.\',\'168.\', \'1.1\'), \'192.168.2.1/24\')'; -print ipv4_is_in_range(strcat('192.','168.', '1.1'), '192.168.2.1/24'); -- == false -print '-- ipv4_is_in_range(\'abc\', \'127.0.0.1\')'; -- == null -print ipv4_is_in_range('abc', '127.0.0.1'); - -print '-- parse_ipv6(127.0.0.1)'; -print parse_ipv6('127.0.0.1'); -print '-- parse_ipv6(fe80::85d:e82c:9446:7994)'; -print parse_ipv6('fe80::85d:e82c:9446:7994'); -print '-- parse_ipv4(\'127.0.0.1\')'; -print parse_ipv4('127.0.0.1'); -print '-- parse_ipv4(\'192.1.168.1\') < parse_ipv4(\'192.1.168.2\')'; -print parse_ipv4('192.1.168.1') < parse_ipv4('192.1.168.2'); -print '-- parse_ipv4(arrayStringConcat([\'127\', \'0\', \'0\', \'1\'], \'.\'))'; -print parse_ipv4(arrayStringConcat(['127', '0', '0', '1'], '.')); -- { clientError UNKNOWN_FUNCTION } - -print '-- parse_ipv4_mask(\'127.0.0.1\', 24) == 2130706432'; -print parse_ipv4_mask('127.0.0.1', 24); -print '-- parse_ipv4_mask(\'abc\', 31)'; -print parse_ipv4_mask('abc', 31) -print '-- parse_ipv4_mask(\'192.1.168.2\', 1000)'; -print parse_ipv4_mask('192.1.168.2', 1000); -print '-- parse_ipv4_mask(\'192.1.168.2\', 31) == parse_ipv4_mask(\'192.1.168.3\', 31)'; ---print parse_ipv4_mask('192.1.168.2', 31) == parse_ipv4_mask('192.1.168.3', 31); // this qual failed in analyzer 3221334018 -print parse_ipv4_mask('192.1.168.2', 31); -print parse_ipv4_mask('192.1.168.3', 31); -print '-- ipv4_is_match(\'127.0.0.1\', \'127.0.0.1\')'; -print ipv4_is_match('127.0.0.1', '127.0.0.1'); -print '-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\')'; -print ipv4_is_match('192.168.1.1', '192.168.1.255'); -print '-- ipv4_is_match(\'192.168.1.1/24\', \'192.168.1.255/24\')'; -print ipv4_is_match('192.168.1.1/24', '192.168.1.255/24'); -print '-- ipv4_is_match(\'192.168.1.1\', \'192.168.1.255\', 24)'; -print ipv4_is_match('192.168.1.1', '192.168.1.255', 24); -print '-- ipv4_is_match(\'abc\', \'def\', 24)'; -print ipv4_is_match('abc', 'dev', 24); -print '-- ipv4_compare()'; -print ipv4_compare('127.0.0.1', '127.0.0.1'); -print ipv4_compare('192.168.1.1', '192.168.1.255'); -print ipv4_compare('192.168.1.255', '192.168.1.1'); -print ipv4_compare('192.168.1.1/24', '192.168.1.255/24'); -print ipv4_compare('192.168.1.1', '192.168.1.255', 24); -print ipv4_compare('192.168.1.1/24', '192.168.1.255'); -print ipv4_compare('192.168.1.1', '192.168.1.255/24'); -print ipv4_compare('192.168.1.1/30', '192.168.1.255/24'); -print ipv4_compare('192.168.1.1', '192.168.1.0', 31); -print ipv4_compare('192.168.1.1/24', '192.168.1.255', 31); -print ipv4_compare('192.168.1.1', '192.168.1.255', 24); -print '-- format_ipv4()'; -print format_ipv4('192.168.1.255', 24); -print format_ipv4('192.168.1.1', 32); -print format_ipv4('192.168.1.1/24', 32); -print format_ipv4(3232236031, 24); -print format_ipv4('192.168.1.1/24', -1) == ''; -print format_ipv4('abc', 24) == ''; -print format_ipv4(strcat('127.0', '.0.', '1', '/32'), 12 + 12); -print '-- format_ipv4_mask()'; -print format_ipv4_mask('192.168.1.255', 24); -print format_ipv4_mask(3232236031, 24); -print format_ipv4_mask('192.168.1.1', 24); -print format_ipv4_mask('192.168.1.1', 32); -print format_ipv4_mask('192.168.1.1/24', 32); -print format_ipv4_mask('192.168.1.1/24', -1) == ''; -print format_ipv4_mask('abc', 24) == ''; -print format_ipv4_mask(strcat('127.0', '.0.', '1', '/32'), 12 + 12); -print '-- parse_ipv6_mask()'; -print parse_ipv6_mask("127.0.0.1", 24); -print parse_ipv6_mask("fe80::85d:e82c:9446:7994", 120); -print parse_ipv6_mask("192.168.255.255", 120); -print parse_ipv6_mask("192.168.255.255/24", 124); -print parse_ipv6_mask("255.255.255.255", 128); -print parse_ipv6_mask("fe80::85d:e82c:9446:7994", 128); -print parse_ipv6_mask("fe80::85d:e82c:9446:7994/120", 124); -print parse_ipv6_mask("::192.168.255.255", 128); -print parse_ipv6_mask("::192.168.255.255/24", 128); -print '-- ipv6_is_match()'; -print ipv6_is_match('::ffff:7f00:1', '127.0.0.1') == true; -print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995') == false; -print ipv6_is_match('192.168.1.1/24', '192.168.1.255/24') == true; -print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7995/127') == true; -print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127) == true; -print ipv6_is_match('192.168.1.1', '192.168.1.1'); -- // Equal IPs -print ipv6_is_match('192.168.1.1/24', '192.168.1.255'); -- // 24 bit IP4-prefix is used for comparison -print ipv6_is_match('192.168.1.1', '192.168.1.255/24'); -- // 24 bit IP4-prefix is used for comparison -print ipv6_is_match('192.168.1.1/30', '192.168.1.255/24'); -- // 24 bit IP4-prefix is used for comparison -print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7994'); -- // Equal IPs -print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998'); -- // 120 bit IP6-prefix is used for comparison -print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7998/120'); -- // 120 bit IP6-prefix is used for comparison -print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998/120'); -- // 120 bit IP6-prefix is used for comparison -print ipv6_is_match('192.168.1.1', '::ffff:c0a8:0101'); -- // Equal IPs -print ipv6_is_match('192.168.1.1/24', '::ffff:c0a8:01ff'); -- // 24 bit IP-prefix is used for comparison -print ipv6_is_match('::ffff:c0a8:0101', '192.168.1.255/24'); -- // 24 bit IP-prefix is used for comparison -print ipv6_is_match('::192.168.1.1/30', '192.168.1.255/24'); -- // 24 bit IP-prefix is used for comparison -print ipv6_is_match('192.168.1.1', '192.168.1.0', 31); -- // 31 bit IP4-prefix is used for comparison -print ipv6_is_match('192.168.1.1/24', '192.168.1.255', 31); -- // 24 bit IP4-prefix is used for comparison -print ipv6_is_match('192.168.1.1', '192.168.1.255', 24); -- // 24 bit IP4-prefix is used for comparison -print ipv6_is_match('fe80::85d:e82c:9446:7994', 'fe80::85d:e82c:9446:7995', 127); -- // 127 bit IP6-prefix is used for comparison -print ipv6_is_match('fe80::85d:e82c:9446:7994/127', 'fe80::85d:e82c:9446:7998', 120); -- // 120 bit IP6-prefix is used for comparison -print ipv6_is_match('fe80::85d:e82c:9446:7994/120', 'fe80::85d:e82c:9446:7998', 127); -- // 120 bit IP6-prefix is used for comparison -print ipv6_is_match('192.168.1.1/24', '::ffff:c0a8:01ff', 127); -- // 127 bit IP6-prefix is used for comparison -print ipv6_is_match('::ffff:c0a8:0101', '192.168.1.255', 120); -- // 120 bit IP6-prefix is used for comparison -print ipv6_is_match('::192.168.1.1/30', '192.168.1.255/24', 127); -- // 120 bit IP6-prefix is used for comparison \ No newline at end of file diff --git a/tests/queries/0_stateless/02366_kql_func_math.reference b/tests/queries/0_stateless/02366_kql_func_math.reference deleted file mode 100644 index 92f283abcb6..00000000000 --- a/tests/queries/0_stateless/02366_kql_func_math.reference +++ /dev/null @@ -1,4 +0,0 @@ --- isnan -- -1 -0 -0 diff --git a/tests/queries/0_stateless/02366_kql_func_math.sql b/tests/queries/0_stateless/02366_kql_func_math.sql deleted file mode 100644 index 4e83622eb6b..00000000000 --- a/tests/queries/0_stateless/02366_kql_func_math.sql +++ /dev/null @@ -1,7 +0,0 @@ -set dialect = 'kusto'; -print '-- isnan --'; -print isnan(double(nan)); -print isnan(4.2); -print isnan(4); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } -print isnan(real(+inf)); -print isnan(dynamic(null)); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } diff --git a/tests/queries/0_stateless/02366_kql_func_scalar.reference b/tests/queries/0_stateless/02366_kql_func_scalar.reference deleted file mode 100644 index b7fa62c5d43..00000000000 --- a/tests/queries/0_stateless/02366_kql_func_scalar.reference +++ /dev/null @@ -1,16 +0,0 @@ --- bin_at() -4.5 --12:0:0 -2017-05-14 12:00:00.000000000 -2017-05-14 00:00:00.000000000 -2018-02-25 15:14:00.000000000 5 -2018-02-24 15:14:00.000000000 3 -2018-02-23 15:14:00.000000000 4 --- bin() -4 -1970-05-11 00:00:00.000000000 -336:0:0 -1970-05-11 13:45:07.345000000 -1970-05-11 13:45:07.345623000 -2022-09-26 10:13:23.987232000 -1970-05-11 13:45:07.456336000 diff --git a/tests/queries/0_stateless/02366_kql_func_scalar.sql b/tests/queries/0_stateless/02366_kql_func_scalar.sql deleted file mode 100644 index d7e94cfd9d1..00000000000 --- a/tests/queries/0_stateless/02366_kql_func_scalar.sql +++ /dev/null @@ -1,26 +0,0 @@ -DROP TABLE IF EXISTS Bin_at_test; -CREATE TABLE Bin_at_test -( - `Date` DateTime('UTC'), - Num Nullable(UInt8) -) ENGINE = Memory; -INSERT INTO Bin_at_test VALUES ('2018-02-24T15:14:01',3), ('2018-02-23T16:14:01',4), ('2018-02-26T15:14:01',5); - -set dialect = 'kusto'; -print '-- bin_at()'; -print bin_at(6.5, 2.5, 7); -print bin_at(1h, 1d, 12h); -print bin_at(datetime(2017-05-15 10:20:00.0), 1d, datetime(1970-01-01 12:00:00.0)); -print bin_at(datetime(2017-05-17 10:20:00.0), 7d, datetime(2017-06-04 00:00:00.0)); -Bin_at_test | summarize sum(Num) by d = todatetime(bin_at(Date, 1d, datetime('2018-02-24 15:14:00'))) | order by d; -print '-- bin()'; -print bin(4.5, 1); -print bin(datetime(1970-05-11 13:45:07), 1d); -print bin(16d, 7d); -print bin(datetime(1970-05-11 13:45:07.345623), 1ms); --- print bin(datetime(2022-09-26 10:13:23.987234), 6ms); -> 2022-09-26 10:13:23.982000000 -print bin(datetime(1970-05-11 13:45:07.345623), 1microsecond); -print bin(datetime(2022-09-26 10:13:23.987234), 6microseconds); -print bin(datetime(1970-05-11 13:45:07.456345672), 16microseconds); --- print bin(datetime(2022-09-26 10:13:23.987234128), 1tick); -> 2022-09-26 10:13:23.987234100 --- print bin(datetime(2022-09-26 10:13:23.987234128), 99nanosecond); -> null diff --git a/tests/queries/0_stateless/02366_kql_func_string.reference b/tests/queries/0_stateless/02366_kql_func_string.reference deleted file mode 100644 index 9bdd38ca5db..00000000000 --- a/tests/queries/0_stateless/02366_kql_func_string.reference +++ /dev/null @@ -1,360 +0,0 @@ --- test String Functions -- --- Customers |where Education contains \'degree\' -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 - --- Customers |where Education !contains \'degree\' -\N why Professional Partial College 38 -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Apple Skilled Manual Bachelors 28 - --- Customers |where Education contains \'Degree\' -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 - --- Customers |where Education !contains \'Degree\' -\N why Professional Partial College 38 -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Apple Skilled Manual Bachelors 28 - --- Customers | where FirstName endswith \'RE\' -Theodore Diaz Skilled Manual Bachelors 28 - --- Customers | where ! FirstName endswith \'RE\' -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 -Stephanie Cox Management abcd defg Bachelors 33 -Apple Skilled Manual Bachelors 28 - ---Customers | where FirstName endswith_cs \'re\' -Theodore Diaz Skilled Manual Bachelors 28 - --- Customers | where FirstName !endswith_cs \'re\' -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 -Stephanie Cox Management abcd defg Bachelors 33 -Apple Skilled Manual Bachelors 28 - --- Customers | where Occupation == \'Skilled Manual\' -Peter Nara Skilled Manual Graduate Degree 26 -Theodore Diaz Skilled Manual Bachelors 28 -Apple Skilled Manual Bachelors 28 - --- Customers | where Occupation != \'Skilled Manual\' -\N why Professional Partial College 38 -Latoya Shen Professional Graduate Degree 25 -Stephanie Cox Management abcd defg Bachelors 33 - --- Customers | where Occupation has \'skilled\' -Peter Nara Skilled Manual Graduate Degree 26 -Theodore Diaz Skilled Manual Bachelors 28 -Apple Skilled Manual Bachelors 28 - --- Customers | where Occupation !has \'skilled\' -\N why Professional Partial College 38 -Latoya Shen Professional Graduate Degree 25 -Stephanie Cox Management abcd defg Bachelors 33 - --- Customers | where Occupation has \'Skilled\' -Peter Nara Skilled Manual Graduate Degree 26 -Theodore Diaz Skilled Manual Bachelors 28 -Apple Skilled Manual Bachelors 28 - --- Customers | where Occupation !has \'Skilled\' -\N why Professional Partial College 38 -Latoya Shen Professional Graduate Degree 25 -Stephanie Cox Management abcd defg Bachelors 33 - --- Customers | where Occupation hasprefix_cs \'Ab\' - --- Customers | where Occupation !hasprefix_cs \'Ab\' -\N why Professional Partial College 38 -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Apple Skilled Manual Bachelors 28 - --- Customers | where Occupation hasprefix_cs \'ab\' -Stephanie Cox Management abcd defg Bachelors 33 - --- Customers | where Occupation !hasprefix_cs \'ab\' -\N why Professional Partial College 38 -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 -Theodore Diaz Skilled Manual Bachelors 28 -Apple Skilled Manual Bachelors 28 - --- Customers | where Occupation hassuffix \'Ent\' -Stephanie Cox Management abcd defg Bachelors 33 - --- Customers | where Occupation !hassuffix \'Ent\' -\N why Professional Partial College 38 -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 -Theodore Diaz Skilled Manual Bachelors 28 -Apple Skilled Manual Bachelors 28 - --- Customers | where Occupation hassuffix \'ent\' -Stephanie Cox Management abcd defg Bachelors 33 - --- Customers | where Occupation hassuffix \'ent\' -Stephanie Cox Management abcd defg Bachelors 33 - --- Customers |where Education in (\'Bachelors\',\'High School\') -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Apple Skilled Manual Bachelors 28 - --- Customers | where Education !in (\'Bachelors\',\'High School\') -\N why Professional Partial College 38 -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 - --- Customers | where FirstName matches regex \'P.*r\' -Peter Nara Skilled Manual Graduate Degree 26 - --- Customers | where FirstName startswith \'pet\' -Peter Nara Skilled Manual Graduate Degree 26 - --- Customers | where FirstName !startswith \'pet\' -Latoya Shen Professional Graduate Degree 25 -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Apple Skilled Manual Bachelors 28 - --- Customers | where FirstName startswith_cs \'pet\' - --- Customers | where FirstName !startswith_cs \'pet\' -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Apple Skilled Manual Bachelors 28 - --- Customers | where isempty(LastName) -Apple Skilled Manual Bachelors 28 - --- Customers | where isnotempty(LastName) -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Peter Nara Skilled Manual Graduate Degree 26 -Latoya Shen Professional Graduate Degree 25 -\N why Professional Partial College 38 - --- Customers | where isnotnull(FirstName) -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Apple Skilled Manual Bachelors 28 - --- Customers | where isnull(FirstName) -\N why Professional Partial College 38 - --- Customers | project url_decode(\'https%3A%2F%2Fwww.test.com%2Fhello%20word\') | take 1 -https://www.test.com/hello word - --- Customers | project url_encode(\'https://www.test.com/hello word\') | take 1 -https%3A%2F%2Fwww.test.com%2Fhello%20word - --- Customers | project name_abbr = strcat(substring(FirstName,0,3), \' \', substring(LastName,2)) -\N -Lat en -Pet ra -The az -Ste x -App - --- Customers | project name = strcat(FirstName, \' \', LastName) -\N -Latoya Shen -Peter Nara -Theodore Diaz -Stephanie Cox -Apple - --- Customers | project FirstName, strlen(FirstName) -\N \N -Latoya 6 -Peter 5 -Theodore 8 -Stephanie 9 -Apple 5 - --- Customers | project strrep(FirstName,2,\'_\') -\N -Latoya_Latoya -Peter_Peter -Theodore_Theodore -Stephanie_Stephanie -Apple_Apple - --- Customers | project toupper(FirstName) -\N -LATOYA -PETER -THEODORE -STEPHANIE -APPLE - --- Customers | project tolower(FirstName) -\N -latoya -peter -theodore -stephanie -apple - --- support subquery for in orerator (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) (subquery need to be wraped with bracket inside bracket); TODO: case-insensitive not supported yet -Latoya Shen Professional Graduate Degree 25 -Peter Nara Skilled Manual Graduate Degree 26 -Theodore Diaz Skilled Manual Bachelors 28 -Apple Skilled Manual Bachelors 28 - --- has_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator); TODO: subquery not supported yet -Peter Nara Skilled Manual Graduate Degree 26 -Theodore Diaz Skilled Manual Bachelors 28 -Apple Skilled Manual Bachelors 28 - --- has_any (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator); TODO: subquery not supported yet -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Peter Nara Skilled Manual Graduate Degree 26 -Apple Skilled Manual Bachelors 28 - --- countof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction) -3 -3 -1 - --- extract ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction) -PINEAPPLE ice cream is 20 -PINEAPPLE -20 - -20 -\N -\N -\N -\N -\N -45.6 -45.6 - --- extract_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction); TODO: captureGroups not supported yet -[['T','h','e'],['p','ric','e'],['P','INEAPPL','E'],['i','c','e'],['c','rea','m']] - --- extract_json (https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/extractjsonfunction) - - -John -iPhone -\N -26 -26 -26 -26 -\N - --- split (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction) -['aa','bb'] -['bbb'] -[''] -['a','','b'] -['aa','cc'] -['aabbcc'] -['aaa','bbb','ccc'] -[NULL] - --- strcat_delim (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction); TODO: only support string now. -1-2-Ab - --- indexof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction); TODO: length and occurrence not supported yet -2 -2 --1 --- base64_encode_fromguid() -8jMxriJurkmwahbmqbIS6w== --- base64_decode_toarray() -[] -[75,117,115,116,111] --- base64_decode_toguid() -10e99626-bc2b-4c75-bb3e-fe606de25700 -1 --- base64_encode_tostring - -S3VzdG8x --- base64_decode_tostring - -Kusto1 --- parse_url() -{"Scheme":"scheme","Host":"","Port":"0","Path":"/this/is/a/path","Username":"username","Password":"password","Query Parameters":{"k1":"v1","k2":"v2"},"Fragment":"fragment"} --- parse_urlquery() -{"Query Parameters":{"k1":"v1","k2":"v2","k3":"v3"}} --- strcmp() -0 1 -1 1 --- substring() -CD --- translate() -kusto xxx --- trim() -https://www.ibm.com -Te st1 - asd -asd -sd --- trim_start() -www.ibm.com -Te st1// $ -asdw - -asd --- trim_end() -https -- Te st1 -wasd - -asd --- trim, trim_start, trim_end all at once ---https://bing.com-- -- https://bing.com-- --https://bing.com https://bing.com --- replace_regex -Number was: 1 --- has_any_index() -0 1 -1 -1 --- parse_version() -1000000020000000300000040 -1000000020000000000000000 -1000000020000000000000000 -\N -\N -\N -\N -1000000020000000300000004 -1000000020000000000000000 -1000000020000000300000000 -1000000000000000000000000 --- parse_json() -[1,2,3] -[{"a":123.5,"b":"{\\"c\\":456}"}] --- parse_command_line() -[NULL] -[NULL] --- reverse() -321 -43.321 - -dsa -][ -]3,2,1[ -]\'redaV\',\'htraD\'[ -000000000.00:00:21 51-01-7102 -Peter Nara Skilled Manual Graduate Degree 26 -Latoya Shen Professional Graduate Degree 25 --- parse_csv() -[''] -['aaa'] -['aa','b','cc'] -['record1','a','b','c'] diff --git a/tests/queries/0_stateless/02366_kql_func_string.sql b/tests/queries/0_stateless/02366_kql_func_string.sql deleted file mode 100644 index d251b04e08b..00000000000 --- a/tests/queries/0_stateless/02366_kql_func_string.sql +++ /dev/null @@ -1,313 +0,0 @@ --- Tags: no-fasttest - -DROP TABLE IF EXISTS Customers; -CREATE TABLE Customers -( - FirstName Nullable(String), - LastName String, - Occupation String, - Education String, - Age Nullable(UInt8) -) ENGINE = Memory; - -INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); - --- datatable (Version:string) [ --- '1.2.3.4', --- '1.2', --- '1.2.3', --- '1' --- ] - -DROP TABLE IF EXISTS Versions; -CREATE TABLE Versions -( - Version String -) ENGINE = Memory; -INSERT INTO Versions VALUES ('1.2.3.4'),('1.2'),('1.2.3'),('1'); - - -set dialect='kusto'; -print '-- test String Functions --'; - -print '-- Customers |where Education contains \'degree\''; -Customers |where Education contains 'degree' | order by LastName; -print ''; -print '-- Customers |where Education !contains \'degree\''; -Customers |where Education !contains 'degree' | order by LastName; -print ''; -print '-- Customers |where Education contains \'Degree\''; -Customers |where Education contains 'Degree' | order by LastName; -print ''; -print '-- Customers |where Education !contains \'Degree\''; -Customers |where Education !contains 'Degree' | order by LastName; -print ''; -print '-- Customers | where FirstName endswith \'RE\''; -Customers | where FirstName endswith 'RE' | order by LastName; -print ''; -print '-- Customers | where ! FirstName endswith \'RE\''; -Customers | where FirstName ! endswith 'RE' | order by LastName; -print ''; -print '--Customers | where FirstName endswith_cs \'re\''; -Customers | where FirstName endswith_cs 're' | order by LastName; -print ''; -print '-- Customers | where FirstName !endswith_cs \'re\''; -Customers | where FirstName !endswith_cs 're' | order by LastName; -print ''; -print '-- Customers | where Occupation == \'Skilled Manual\''; -Customers | where Occupation == 'Skilled Manual' | order by LastName; -print ''; -print '-- Customers | where Occupation != \'Skilled Manual\''; -Customers | where Occupation != 'Skilled Manual' | order by LastName; -print ''; -print '-- Customers | where Occupation has \'skilled\''; -Customers | where Occupation has 'skilled' | order by LastName; -print ''; -print '-- Customers | where Occupation !has \'skilled\''; -Customers | where Occupation !has 'skilled' | order by LastName; -print ''; -print '-- Customers | where Occupation has \'Skilled\''; -Customers | where Occupation has 'Skilled'| order by LastName; -print ''; -print '-- Customers | where Occupation !has \'Skilled\''; -Customers | where Occupation !has 'Skilled'| order by LastName; -print ''; -print '-- Customers | where Occupation hasprefix_cs \'Ab\''; -Customers | where Occupation hasprefix_cs 'Ab'| order by LastName; -print ''; -print '-- Customers | where Occupation !hasprefix_cs \'Ab\''; -Customers | where Occupation !hasprefix_cs 'Ab'| order by LastName; -print ''; -print '-- Customers | where Occupation hasprefix_cs \'ab\''; -Customers | where Occupation hasprefix_cs 'ab'| order by LastName; -print ''; -print '-- Customers | where Occupation !hasprefix_cs \'ab\''; -Customers | where Occupation !hasprefix_cs 'ab'| order by LastName; -print ''; -print '-- Customers | where Occupation hassuffix \'Ent\''; -Customers | where Occupation hassuffix 'Ent'| order by LastName; -print ''; -print '-- Customers | where Occupation !hassuffix \'Ent\''; -Customers | where Occupation !hassuffix 'Ent'| order by LastName; -print ''; -print '-- Customers | where Occupation hassuffix \'ent\''; -Customers | where Occupation hassuffix 'ent'| order by LastName; -print ''; -print '-- Customers | where Occupation hassuffix \'ent\''; -Customers | where Occupation hassuffix 'ent'| order by LastName; -print ''; -print '-- Customers |where Education in (\'Bachelors\',\'High School\')'; -Customers |where Education in ('Bachelors','High School')| order by LastName; -print ''; -print '-- Customers | where Education !in (\'Bachelors\',\'High School\')'; -Customers | where Education !in ('Bachelors','High School')| order by LastName; -print ''; -print '-- Customers | where FirstName matches regex \'P.*r\''; -Customers | where FirstName matches regex 'P.*r'| order by LastName; -print ''; -print '-- Customers | where FirstName startswith \'pet\''; -Customers | where FirstName startswith 'pet'| order by LastName; -print ''; -print '-- Customers | where FirstName !startswith \'pet\''; -Customers | where FirstName !startswith 'pet'| order by LastName; -print ''; -print '-- Customers | where FirstName startswith_cs \'pet\''; -Customers | where FirstName startswith_cs 'pet'| order by LastName; -print ''; -print '-- Customers | where FirstName !startswith_cs \'pet\''; -Customers | where FirstName !startswith_cs 'pet'| order by LastName; -print ''; -print '-- Customers | where isempty(LastName)'; -Customers | where isempty(LastName); -print ''; -print '-- Customers | where isnotempty(LastName)'; -Customers | where isnotempty(LastName); -print ''; -print '-- Customers | where isnotnull(FirstName)'; -Customers | where isnotnull(FirstName)| order by LastName; -print ''; -print '-- Customers | where isnull(FirstName)'; -Customers | where isnull(FirstName)| order by LastName; -print ''; -print '-- Customers | project url_decode(\'https%3A%2F%2Fwww.test.com%2Fhello%20word\') | take 1'; -Customers | project url_decode('https%3A%2F%2Fwww.test.com%2Fhello%20word') | take 1; -print ''; -print '-- Customers | project url_encode(\'https://www.test.com/hello word\') | take 1'; -Customers | project url_encode('https://www.test.com/hello word') | take 1; -print ''; -print '-- Customers | project name_abbr = strcat(substring(FirstName,0,3), \' \', substring(LastName,2))'; -Customers | project name_abbr = strcat(substring(FirstName,0,3), ' ', substring(LastName,2))| order by LastName; -print ''; -print '-- Customers | project name = strcat(FirstName, \' \', LastName)'; -Customers | project name = strcat(FirstName, ' ', LastName)| order by LastName; -print ''; -print '-- Customers | project FirstName, strlen(FirstName)'; -Customers | project FirstName, strlen(FirstName)| order by LastName; -print ''; -print '-- Customers | project strrep(FirstName,2,\'_\')'; -Customers | project strrep(FirstName,2,'_')| order by LastName; -print ''; -print '-- Customers | project toupper(FirstName)'; -Customers | project toupper(FirstName)| order by LastName; -print ''; -print '-- Customers | project tolower(FirstName)'; -Customers | project tolower(FirstName)| order by LastName; -print ''; -print '-- support subquery for in orerator (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/in-cs-operator) (subquery need to be wraped with bracket inside bracket); TODO: case-insensitive not supported yet'; -Customers | where Age in ((Customers|project Age|where Age < 30)) | order by LastName; --- Customer | where LastName in~ ("diaz", "cox") -print ''; -print '-- has_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-all-operator); TODO: subquery not supported yet'; -Customers | where Occupation has_all ('manual', 'skilled') | order by LastName; -print ''; -print '-- has_any (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/has-anyoperator); TODO: subquery not supported yet'; -Customers|where Occupation has_any ('Skilled','abcd'); -print ''; -print '-- countof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/countoffunction)'; -Customers | project countof('The cat sat on the mat', 'at') | take 1; -Customers | project countof('The cat sat on the mat', 'at', 'normal') | take 1; -Customers | project countof('The cat sat on the mat', '\\s.he', 'regex') | take 1; -print ''; -print '-- extract ( https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractfunction)'; -print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 0, 'The price of PINEAPPLE ice cream is 20'); -print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 1, 'The price of PINEAPPLE ice cream is 20'); -print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20'); -print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 3, 'The price of PINEAPPLE ice cream is 20'); -print extract('(\\b[A-Z]+\\b).+(\\b\\d+)', 2, 'The price of PINEAPPLE ice cream is 20', typeof(real)); -print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(bool)); -print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(date)); -print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(guid)); -print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(int)); -print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(long)); -print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(real)); -print extract("x=([0-9.]+)", 1, "hello x=45.6|wo" , typeof(decimal)); -print ''; -print '-- extract_all (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/extractallfunction); TODO: captureGroups not supported yet'; -Customers | project extract_all('(\\w)(\\w+)(\\w)','The price of PINEAPPLE ice cream is 20') | take 1; -print ''; -print '-- extract_json (https://learn.microsoft.com/en-us/azure/data-explorer/kusto/query/extractjsonfunction)'; -print extract_json('', ''); -- { serverError BAD_ARGUMENTS } -print extract_json('a', ''); -- { serverError BAD_ARGUMENTS } -print extract_json('$.firstName', ''); -print extract_json('$.phoneNumbers[0].type', ''); -print extractjson('$.firstName', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}'); -print extract_json('$.phoneNumbers[0].type', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(string)); -print extract_json('$.phoneNumbers[0].type', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(int)); -print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}'); -print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(int)); -print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(long)); --- print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(bool)); -> true -print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(double)); -print extract_json('$.age', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(guid)); --- print extract_json('$.phoneNumbers', '{"firstName":"John","lastName":"doe","age":26,"address":{"streetAddress":"naist street","city":"Nara","postalCode":"630-0192"},"phoneNumbers":[{"type":"iPhone","number":"0123-4567-8888"},{"type":"home","number":"0123-4567-8910"}]}', typeof(dynamic)); we won't be able to handle this particular case for a while, because it should return a dictionary -print ''; -print '-- split (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/splitfunction)'; -Customers | project split('aa_bb', '_') | take 1; -Customers | project split('aaa_bbb_ccc', '_', 1) | take 1; -Customers | project split('', '_') | take 1; -Customers | project split('a__b', '_') | take 1; -Customers | project split('aabbcc', 'bb') | take 1; -Customers | project split('aabbcc', '') | take 1; -Customers | project split('aaa_bbb_ccc', '_', -1) | take 1; -Customers | project split('aaa_bbb_ccc', '_', 10) | take 1; -print ''; -print '-- strcat_delim (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/strcat-delimfunction); TODO: only support string now.'; -Customers | project strcat_delim('-', '1', '2', strcat('A','b')) | take 1; --- Customers | project strcat_delim('-', '1', '2', 'A' , 1s); -print ''; -print '-- indexof (https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/indexoffunction); TODO: length and occurrence not supported yet'; -Customers | project indexof('abcdefg','cde') | take 1; -Customers | project indexof('abcdefg','cde',2) | take 1; -Customers | project indexof('abcdefg','cde',6) | take 1; -print '-- base64_encode_fromguid()'; --- print base64_encode_fromguid(guid(null)); -print base64_encode_fromguid(guid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb')); -print base64_encode_fromguid(dynamic(null)); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } -print base64_encode_fromguid("abcd1231"); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } -print '-- base64_decode_toarray()'; -print base64_decode_toarray(''); -print base64_decode_toarray('S3VzdG8='); -print '-- base64_decode_toguid()'; -print base64_decode_toguid("JpbpECu8dUy7Pv5gbeJXAA=="); -print base64_decode_toguid(base64_encode_fromguid(guid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb'))) == guid('ae3133f2-6e22-49ae-b06a-16e6a9b212eb'); -print '-- base64_encode_tostring'; -print base64_encode_tostring(''); -print base64_encode_tostring('Kusto1'); -print '-- base64_decode_tostring'; -print base64_decode_tostring(''); -print base64_decode_tostring('S3VzdG8x'); -print '-- parse_url()'; -print parse_url('scheme://username:password@host:1234/this/is/a/path?k1=v1&k2=v2#fragment'); -print '-- parse_urlquery()'; -print parse_urlquery('k1=v1&k2=v2&k3=v3'); -print '-- strcmp()'; -print strcmp('ABC','ABC'), strcmp('abc','ABC'), strcmp('ABC','abc'), strcmp('abcde','abc'); -print '-- substring()'; -print substring("ABCD", -2, 2); -print '-- translate()'; -print translate('krasp', 'otsku', 'spark'), translate('abc', '', 'ab'), translate('abc', 'x', 'abc'); -print '-- trim()'; -print trim("--", "--https://www.ibm.com--"); -print trim("[^\w]+", strcat("- ","Te st", "1", "// $")); -print trim("", " asd "); -print trim("a$", "asd"); -print trim("^a", "asd"); -print '-- trim_start()'; -print trim_start("https://", "https://www.ibm.com"); -print trim_start("[^\w]+", strcat("- ","Te st", "1", "// $")); -print trim_start("asd$", "asdw"); -print trim_start("asd$", "asd"); -print trim_start("d$", "asd"); -print '-- trim_end()'; -print trim_end("://www.ibm.com", "https://www.ibm.com"); -print trim_end("[^\w]+", strcat("- ","Te st", "1", "// $")); -print trim_end("^asd", "wasd"); -print trim_end("^asd", "asd"); -print trim_end("^a", "asd"); -print '-- trim, trim_start, trim_end all at once'; -print str = "--https://bing.com--", pattern = '--' | extend start = trim_start(pattern, str), end = trim_end(pattern, str), both = trim(pattern, str); -print '-- replace_regex'; -print replace_regex(strcat('Number is ', '1'), 'is (\d+)', 'was: \1'); -print '-- has_any_index()'; -print has_any_index('this is an example', dynamic(['this', 'example'])), has_any_index("this is an example", dynamic(['not', 'example'])), has_any_index("this is an example", dynamic(['not', 'found'])), has_any_index("this is an example", dynamic([])); -print '-- parse_version()'; -print parse_version(42); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } --- print parse_version(''); -> NULL -print parse_version('1.2.3.40'); -print parse_version('1.2'); -print parse_version(strcat('1.', '2')); -print parse_version('1.2.4.5.6'); -print parse_version('moo'); -print parse_version('moo.boo.foo'); -print parse_version(strcat_delim('.', 'moo', 'boo', 'foo')); -Versions | project parse_version(Version); -print '-- parse_json()'; -print parse_json(dynamic([1, 2, 3])); -print parse_json('{"a":123.5, "b":"{\\"c\\":456}"}'); -print '-- parse_command_line()'; -print parse_command_line(55, 'windows'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } --- print parse_command_line((52 + 3) * 4 % 2, 'windows'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -print parse_command_line('', 'windows'); -print parse_command_line(strrep(' ', 6), 'windows'); --- print parse_command_line('echo \"hello world!\" print$?', 'windows'); -> ["echo","hello world!","print$?"] --- print parse_command_line("yolo swag 'asd bcd' \"moo moo \"", 'windows'); -> ["yolo","swag","'asd","bcd'","moo moo "] --- print parse_command_line(strcat_delim(' ', "yolo", "swag", "\'asd bcd\'", "\"moo moo \""), 'windows'); -> ["yolo","swag","'asd","bcd'","moo moo "] -print '-- reverse()'; -print reverse(123); -print reverse(123.34); -print reverse(''); -print reverse("asd"); -print reverse(dynamic([])); -print reverse(dynamic([1, 2, 3])); -print reverse(dynamic(['Darth', "Vader"])); -print reverse(datetime(2017-10-15 12:00)); --- print reverse(timespan(3h)); -> 00:00:30 -Customers | where Education contains 'degree' | order by reverse(FirstName); -print '-- parse_csv()'; -print parse_csv(''); -print parse_csv(65); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -print parse_csv('aaa'); -print result=parse_csv('aa,b,cc'); -print result_multi_record=parse_csv('record1,a,b,c\nrecord2,x,y,z'); --- print result=parse_csv('aa,"b,b,b",cc,"Escaping quotes: ""Title""","line1\nline2"'); -> ["aa","b,b,b","cc","Escaping quotes: \"Title\"","line1\nline2"] --- print parse_csv(strcat(strcat_delim(',', 'aa', '"b,b,b"', 'cc', '"Escaping quotes: ""Title"""', '"line1\nline2"'), '\r\n', strcat_delim(',', 'asd', 'qcf'))); -> ["aa","b,b,b","cc","Escaping quotes: \"Title\"","line1\nline2"] diff --git a/tests/queries/0_stateless/02366_kql_makeseries.reference b/tests/queries/0_stateless/02366_kql_makeseries.reference deleted file mode 100644 index 8e7fde997bf..00000000000 --- a/tests/queries/0_stateless/02366_kql_makeseries.reference +++ /dev/null @@ -1,60 +0,0 @@ --- from to -Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] -Costco Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [0,2,0] -Aldi Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [0,500,0] -Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] --- from -Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] -Costco Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000'] [0,2] -Aldi Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000'] [0,500] -Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] --- to -Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] -Costco Apple ['2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [2,0] -Aldi Snargaluff ['2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [500,0] -Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] --- without from/to -Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] -Costco Apple ['2016-09-11 00:00:00.000000000'] [2] -Aldi Snargaluff ['2016-09-11 00:00:00.000000000'] [500] -Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] --- without by -['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [70,334,54] --- without aggregation alias -Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] -Aldi Snargaluff ['2016-09-11 00:00:00.000000000'] [500] -Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] -Costco Apple ['2016-09-11 00:00:00.000000000'] [2] --- assign group alias -Costco Snargaluff ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [200,0,102] -Aldi Snargaluff ['2016-09-11 00:00:00.000000000'] [500] -Aldi Apple ['2016-09-10 00:00:00.000000000','2016-09-11 00:00:00.000000000','2016-09-12 00:00:00.000000000'] [5,0,6] -Costco Apple ['2016-09-11 00:00:00.000000000'] [2] --- 3d step -Costco Snargaluff ['2016-09-10 00:00:00.000000000'] [134.66666666666666] -Costco Apple ['2016-09-10 00:00:00.000000000'] [2] -Aldi Snargaluff ['2016-09-10 00:00:00.000000000'] [500] -Aldi Apple ['2016-09-10 00:00:00.000000000'] [5.5] --- numeric column -Costco Snargaluff [10,11,12,13,14] [200,0,102,0,0] -Aldi Snargaluff [10,11,12,13,14] [0,500,0,0,0] -Aldi Apple [10,11,12,13,14] [5,0,6,0,0] -Costco Apple [10,11,12,13,14] [0,2,0,0,0] --- from -Costco Snargaluff [10,11,12] [200,0,102] -Aldi Snargaluff [10,11] [0,500] -Aldi Apple [10,11,12] [5,0,6] -Costco Apple [10,11] [0,2] --- to -Costco Snargaluff [8,12,16] [200,102,0] -Aldi Snargaluff [8,12,16] [500,0,0] -Aldi Apple [8,12,16] [5,6,0] -Costco Apple [8,12,16] [2,0,0] --- without from/to -Costco Snargaluff [10,12] [200,102] -Aldi Snargaluff [10] [500] -Aldi Apple [10,12] [5,6] -Costco Apple [10] [2] --- without by -[10,12] [202,54] -['2017-01-01 00:00:00.000000000','2017-01-02 00:00:00.000000000','2017-01-03 00:00:00.000000000','2017-01-04 00:00:00.000000000','2017-01-05 00:00:00.000000000','2017-01-06 00:00:00.000000000','2017-01-07 00:00:00.000000000','2017-01-08 00:00:00.000000000','2017-01-09 00:00:00.000000000'] [4,3,5,0,10.5,4,3,8,6.5] diff --git a/tests/queries/0_stateless/02366_kql_makeseries.sql b/tests/queries/0_stateless/02366_kql_makeseries.sql deleted file mode 100644 index c9ca91c0be0..00000000000 --- a/tests/queries/0_stateless/02366_kql_makeseries.sql +++ /dev/null @@ -1,77 +0,0 @@ --- Azure Data Explore Test Data --- let make_series_test_table = datatable (Supplier:string, Fruit:string, Price: real, Purchase:datetime) --- [ --- 'Aldi','Apple',4,'2016-09-10', --- 'Costco','Apple',2,'2016-09-11', --- 'Aldi','Apple',6,'2016-09-10', --- 'Costco','Snargaluff',100,'2016-09-12', --- 'Aldi','Apple',7,'2016-09-12', --- 'Aldi','Snargaluff',400,'2016-09-11', --- 'Costco','Snargaluff',104,'2016-09-12', --- 'Aldi','Apple',5,'2016-09-12', --- 'Aldi','Snargaluff',600,'2016-09-11', --- 'Costco','Snargaluff',200,'2016-09-10', --- ]; -DROP TABLE IF EXISTS make_series_test_table; -CREATE TABLE make_series_test_table -( - Supplier Nullable(String), - Fruit String , - Price Float64, - Purchase Date -) ENGINE = Memory; -INSERT INTO make_series_test_table VALUES ('Aldi','Apple',4,'2016-09-10'), ('Costco','Apple',2,'2016-09-11'), ('Aldi','Apple',6,'2016-09-10'), ('Costco','Snargaluff',100,'2016-09-12'), ('Aldi','Apple',7,'2016-09-12'), ('Aldi','Snargaluff',400,'2016-09-11'),('Costco','Snargaluff',104,'2016-09-12'),('Aldi','Apple',5,'2016-09-12'),('Aldi','Snargaluff',600,'2016-09-11'),('Costco','Snargaluff',200,'2016-09-10'); -DROP TABLE IF EXISTS make_series_test_table2; -CREATE TABLE make_series_test_table2 -( - Supplier Nullable(String), - Fruit String , - Price Int32, - Purchase Int32 -) ENGINE = Memory; -INSERT INTO make_series_test_table2 VALUES ('Aldi','Apple',4,10),('Costco','Apple',2,11),('Aldi','Apple',6,10),('Costco','Snargaluff',100,12),('Aldi','Apple',7,12),('Aldi','Snargaluff',400,11),('Costco','Snargaluff',104,12),('Aldi','Apple',5,12),('Aldi','Snargaluff',600,11),('Costco','Snargaluff',200,10); -DROP TABLE IF EXISTS make_series_test_table3; -CREATE TABLE make_series_test_table3 -( - timestamp datetime, - metric Float64, -) ENGINE = Memory; -INSERT INTO make_series_test_table3 VALUES (parseDateTimeBestEffort('2016-12-31T06:00', 'UTC'), 50), (parseDateTimeBestEffort('2017-01-01', 'UTC'), 4), (parseDateTimeBestEffort('2017-01-02', 'UTC'), 3), (parseDateTimeBestEffort('2017-01-03', 'UTC'), 4), (parseDateTimeBestEffort('2017-01-03T03:00', 'UTC'), 6), (parseDateTimeBestEffort('2017-01-05', 'UTC'), 8), (parseDateTimeBestEffort('2017-01-05T13:40', 'UTC'), 13), (parseDateTimeBestEffort('2017-01-06', 'UTC'), 4), (parseDateTimeBestEffort('2017-01-07', 'UTC'), 3), (parseDateTimeBestEffort('2017-01-08', 'UTC'), 8), (parseDateTimeBestEffort('2017-01-08T21:00', 'UTC'), 8), (parseDateTimeBestEffort('2017-01-09', 'UTC'), 2), (parseDateTimeBestEffort('2017-01-09T12:00', 'UTC'), 11), (parseDateTimeBestEffort('2017-01-10T05:00', 'UTC'), 5); - --- This test requies sorting after some of aggregations but I don't know KQL, sorry -set max_bytes_before_external_group_by = 0; -set dialect = 'kusto'; - -print '-- from to'; -make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit; -print '-- from'; -make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) step 1d by Supplier, Fruit | order by Supplier, Fruit; -print '-- to'; -make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase to datetime(2016-09-13) step 1d by Supplier, Fruit | order by Supplier, Fruit; -print '-- without from/to'; -make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d by Supplier, Fruit | order by Supplier, Fruit; -print '-- without by'; -make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase step 1d; -print '-- without aggregation alias'; -make_series_test_table | make-series avg(Price) default=0 on Purchase step 1d by Supplier, Fruit; -print '-- assign group alias'; -make_series_test_table | make-series avg(Price) default=0 on Purchase step 1d by Supplier_Name = Supplier, Fruit; -print '-- 3d step'; -make_series_test_table | make-series PriceAvg = avg(Price) default=0 on Purchase from datetime(2016-09-10) to datetime(2016-09-13) step 3d by Supplier, Fruit | order by Supplier, Fruit; - -print '-- numeric column' -print '-- from to'; -make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 to 15 step 1.0 by Supplier, Fruit; -print '-- from'; -make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase from 10 step 1.0 by Supplier, Fruit; -print '-- to'; -make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase to 18 step 4.0 by Supplier, Fruit; -print '-- without from/to'; -make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 2.0 by Supplier, Fruit; -print '-- without by'; -make_series_test_table2 | make-series PriceAvg=avg(Price) default=0 on Purchase step 2.0; - -make_series_test_table3 | make-series avg(metric) default=0 on timestamp from datetime(2017-01-01) to datetime(2017-01-10) step 1d - --- print '-- summarize --' --- make_series_test_table | summarize count() by format_datetime(bin(Purchase, 1d), 'yy-MM-dd'); diff --git a/tests/queries/0_stateless/02366_kql_mvexpand.reference b/tests/queries/0_stateless/02366_kql_mvexpand.reference deleted file mode 100644 index 25be070eb0b..00000000000 --- a/tests/queries/0_stateless/02366_kql_mvexpand.reference +++ /dev/null @@ -1,65 +0,0 @@ --- mv-expand -- --- mv_expand_test_table | mv-expand c -- -1 ['Salmon','Steak','Chicken'] 1 [5,6,7,8] -1 ['Salmon','Steak','Chicken'] 2 [5,6,7,8] -1 ['Salmon','Steak','Chicken'] 3 [5,6,7,8] -1 ['Salmon','Steak','Chicken'] 4 [5,6,7,8] --- mv_expand_test_table | mv-expand c, d -- -1 ['Salmon','Steak','Chicken'] 1 5 -1 ['Salmon','Steak','Chicken'] 2 6 -1 ['Salmon','Steak','Chicken'] 3 7 -1 ['Salmon','Steak','Chicken'] 4 8 --- mv_expand_test_table | mv-expand b | mv-expand c -- -1 Salmon 1 [5,6,7,8] -1 Salmon 2 [5,6,7,8] -1 Salmon 3 [5,6,7,8] -1 Salmon 4 [5,6,7,8] -1 Steak 1 [5,6,7,8] -1 Steak 2 [5,6,7,8] -1 Steak 3 [5,6,7,8] -1 Steak 4 [5,6,7,8] -1 Chicken 1 [5,6,7,8] -1 Chicken 2 [5,6,7,8] -1 Chicken 3 [5,6,7,8] -1 Chicken 4 [5,6,7,8] --- mv_expand_test_table | mv-expand with_itemindex=index b, c, d -- -0 1 Salmon 1 5 -1 1 Steak 2 6 -2 1 Chicken 3 7 -3 1 4 8 --- mv_expand_test_table | mv-expand array_concat(c,d) -- -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 1 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 2 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 3 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 4 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 5 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 6 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 7 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 8 --- mv_expand_test_table | mv-expand x = c, y = d -- -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 1 5 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 2 6 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 3 7 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 4 8 --- mv_expand_test_table | mv-expand xy = array_concat(c, d) -- -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 1 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 2 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 3 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 4 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 5 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 6 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 7 -1 ['Salmon','Steak','Chicken'] [1,2,3,4] [5,6,7,8] 8 --- mv_expand_test_table | mv-expand xy = array_concat(c, d) limit 2| summarize count() by xy -- -1 1 -2 1 --- mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bool) -- -0 1 ['Salmon','Steak','Chicken'] 1 true -1 1 ['Salmon','Steak','Chicken'] 2 true -2 1 ['Salmon','Steak','Chicken'] 3 true -3 1 ['Salmon','Steak','Chicken'] 4 true --- mv_expand_test_table | mv-expand c to typeof(bool) -- -1 ['Salmon','Steak','Chicken'] [5,6,7,8] true -1 ['Salmon','Steak','Chicken'] [5,6,7,8] true -1 ['Salmon','Steak','Chicken'] [5,6,7,8] true -1 ['Salmon','Steak','Chicken'] [5,6,7,8] true diff --git a/tests/queries/0_stateless/02366_kql_mvexpand.sql b/tests/queries/0_stateless/02366_kql_mvexpand.sql deleted file mode 100644 index e7798609646..00000000000 --- a/tests/queries/0_stateless/02366_kql_mvexpand.sql +++ /dev/null @@ -1,35 +0,0 @@ --- datatable(a: int, b: dynamic, c: dynamic, d: dynamic) [ --- 1, dynamic(['Salmon', 'Steak', 'Chicken']), dynamic([1, 2, 3, 4]), dynamic([5, 6, 7, 8]) --- ] - -DROP TABLE IF EXISTS mv_expand_test_table; -CREATE TABLE mv_expand_test_table -( - a UInt8, - b Array(String), - c Array(Int8), - d Array(Int8) -) ENGINE = Memory; -INSERT INTO mv_expand_test_table VALUES (1, ['Salmon', 'Steak','Chicken'],[1,2,3,4],[5,6,7,8]); -set dialect='kusto'; -print '-- mv-expand --'; -print '-- mv_expand_test_table | mv-expand c --'; -mv_expand_test_table | mv-expand c; -print '-- mv_expand_test_table | mv-expand c, d --'; -mv_expand_test_table | mv-expand c, d; -print '-- mv_expand_test_table | mv-expand b | mv-expand c --'; -mv_expand_test_table | mv-expand b | mv-expand c; -print '-- mv_expand_test_table | mv-expand with_itemindex=index b, c, d --'; -mv_expand_test_table | mv-expand with_itemindex=index b, c, d; -print '-- mv_expand_test_table | mv-expand array_concat(c,d) --'; -mv_expand_test_table | mv-expand array_concat(c,d); -print '-- mv_expand_test_table | mv-expand x = c, y = d --'; -mv_expand_test_table | mv-expand x = c, y = d; -print '-- mv_expand_test_table | mv-expand xy = array_concat(c, d) --'; -mv_expand_test_table | mv-expand xy = array_concat(c, d); -print '-- mv_expand_test_table | mv-expand xy = array_concat(c, d) limit 2| summarize count() by xy --'; -mv_expand_test_table | mv-expand xy = array_concat(c, d) limit 2| summarize count() by xy; -print '-- mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bool) --'; -mv_expand_test_table | mv-expand with_itemindex=index c,d to typeof(bool); -print '-- mv_expand_test_table | mv-expand c to typeof(bool) --'; -mv_expand_test_table | mv-expand c to typeof(bool); diff --git a/tests/queries/0_stateless/02366_kql_native_interval_format.reference b/tests/queries/0_stateless/02366_kql_native_interval_format.reference deleted file mode 100644 index 8a12c6885c4..00000000000 --- a/tests/queries/0_stateless/02366_kql_native_interval_format.reference +++ /dev/null @@ -1,23 +0,0 @@ -numeric -kusto -00:00:00 -00:00:00.0000001 -00:00:00.0010000 -00:00:42 -01:06:00 -2.18:00:00 -5.00:00:00 -7.00:00:00 -14.00:00:00 -('00:01:12','21.00:00:00','00:00:00.0000002') -numeric -99 -100 -1 -42 -66 -66 -5 -1 -2 -(72,3,200) diff --git a/tests/queries/0_stateless/02366_kql_native_interval_format.sql.j2 b/tests/queries/0_stateless/02366_kql_native_interval_format.sql.j2 deleted file mode 100644 index 0731687222d..00000000000 --- a/tests/queries/0_stateless/02366_kql_native_interval_format.sql.j2 +++ /dev/null @@ -1,16 +0,0 @@ -select value from system.settings where name = 'interval_output_format'; - -{% for format in ['kusto', 'numeric'] -%} -select '{{ format }}'; -set interval_output_format = '{{ format }}'; -select toIntervalNanosecond(99); -select toIntervalNanosecond(100); -select toIntervalMillisecond(1); -select toIntervalSecond(42); -select toIntervalMinute(66); -select toIntervalHour(66); -select toIntervalDay(5); -select toIntervalWeek(1); -select toIntervalWeek(2); -select toIntervalSecond(72) + toIntervalWeek(3) + toIntervalNanosecond(200); -{% endfor -%} diff --git a/tests/queries/0_stateless/02366_kql_operator_in_sql.reference b/tests/queries/0_stateless/02366_kql_operator_in_sql.reference deleted file mode 100644 index 4e0987aa5c3..00000000000 --- a/tests/queries/0_stateless/02366_kql_operator_in_sql.reference +++ /dev/null @@ -1,60 +0,0 @@ --- #1 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Apple Skilled Manual Bachelors 28 --- #2 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Peter Nara Skilled Manual Graduate Degree 26 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 --- #3 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 --- #4 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 --- #5 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 --- #6 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 --- #7 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 --- #8 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 --- #9 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Peter Nara Skilled Manual Graduate Degree 26 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 --- #10 -- --- #11 -- --- #12 -- --- #13 -- --- #14 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 --- #15 -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management abcd defg Bachelors 33 -Latoya Shen Professional Graduate Degree 25 -Apple Skilled Manual Bachelors 28 diff --git a/tests/queries/0_stateless/02366_kql_operator_in_sql.sql b/tests/queries/0_stateless/02366_kql_operator_in_sql.sql deleted file mode 100644 index 0b02faa0680..00000000000 --- a/tests/queries/0_stateless/02366_kql_operator_in_sql.sql +++ /dev/null @@ -1,42 +0,0 @@ -DROP TABLE IF EXISTS Customers; -CREATE TABLE Customers -( - FirstName Nullable(String), - LastName String, - Occupation String, - Education String, - Age Nullable(UInt8) -) ENGINE = Memory; - -INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); -Select '-- #1 --' ; -select * from kql($$Customers | where FirstName !in ('Peter', 'Latoya')$$); -Select '-- #2 --' ; -select * from kql($$Customers | where FirstName !in ("test", "test2")$$); -Select '-- #3 --' ; -select * from kql($$Customers | where FirstName !contains 'Pet'$$); -Select '-- #4 --' ; -select * from kql($$Customers | where FirstName !contains_cs 'Pet'$$); -Select '-- #5 --' ; -select * from kql($$Customers | where FirstName !endswith 'ter'$$); -Select '-- #6 --' ; -select * from kql($$Customers | where FirstName !endswith_cs 'ter'$$); -Select '-- #7 --' ; -select * from kql($$Customers | where FirstName != 'Peter'$$); -Select '-- #8 --' ; -select * from kql($$Customers | where FirstName !has 'Peter'$$); -Select '-- #9 --' ; -select * from kql($$Customers | where FirstName !has_cs 'peter'$$); -Select '-- #10 --' ; --- select * from kql($$Customers | where FirstName !hasprefix 'Peter'$$); -- will enable when analyzer fixed `and` issue -Select '-- #11 --' ; ---select * from kql($$Customers | where FirstName !hasprefix_cs 'Peter'$$); -Select '-- #12 --' ; ---select * from kql($$Customers | where FirstName !hassuffix 'Peter'$$); -Select '-- #13 --' ; ---select * from kql($$Customers | where FirstName !hassuffix_cs 'Peter'$$); -Select '-- #14 --' ; -select * from kql($$Customers | where FirstName !startswith 'Peter'$$); -Select '-- #15 --' ; -select * from kql($$Customers | where FirstName !startswith_cs 'Peter'$$); -DROP TABLE IF EXISTS Customers; diff --git a/tests/queries/0_stateless/02366_kql_summarize.reference b/tests/queries/0_stateless/02366_kql_summarize.reference deleted file mode 100644 index aeb42feb6be..00000000000 --- a/tests/queries/0_stateless/02366_kql_summarize.reference +++ /dev/null @@ -1,92 +0,0 @@ --- test summarize -- -12 25 46 32.416666666666664 389 -Skilled Manual 5 26 36 30.2 151 -Professional 6 25 46 34.166666666666664 205 -Management abcd defg 1 33 33 33 33 -Skilled Manual 0 -Professional 2 -Management abcd defg 0 -Skilled Manual 36 -Professional 38 -Management abcd defg 33 -Skilled Manual 26 -Professional 25 -Management abcd defg 33 -Skilled Manual 30.2 -Professional 29.25 -Management abcd defg 33 -Skilled Manual 151 -Professional 117 -Management abcd defg 33 -4 -2 -40 2 -30 4 -20 6 -Skilled Manual 5 -Professional 6 -Management abcd defg 1 --- make_list() -- -Skilled Manual ['Bachelors','Graduate Degree','High School','Partial College','Bachelors'] -Professional ['Graduate Degree','Partial College','Partial College','Partial College','Partial College','Partial College'] -Management abcd defg ['Bachelors'] -Skilled Manual ['Bachelors','Graduate Degree'] -Professional ['Graduate Degree','Partial College'] -Management abcd defg ['Bachelors'] --- make_list_if() -- -Skilled Manual ['Edward','Christine'] -Professional ['Dalton','Angel'] -Management abcd defg ['Stephanie'] -Skilled Manual ['Edward'] -Professional ['Dalton'] -Management abcd defg ['Stephanie'] --- make_set() -- -Skilled Manual ['Graduate Degree','High School','Partial College','Bachelors'] -Professional ['Graduate Degree','Partial College'] -Management abcd defg ['Bachelors'] -Skilled Manual ['Graduate Degree','Bachelors'] -Professional ['Graduate Degree','Partial College'] -Management abcd defg ['Bachelors'] --- make_set_if() -- -Skilled Manual ['Partial College','High School'] -Professional ['Partial College'] -Management abcd defg ['Bachelors'] -Skilled Manual ['High School'] -Professional ['Partial College'] -Management abcd defg ['Bachelors'] --- stdev() -- -6.855102059227432 --- stdevif() -- -7.557189365836421 --- binary_all_and -- -42 --- binary_all_or -- -46 --- binary_all_xor -- -4 -43.8 -25.55 30.5 43.8 -30.5 -35 -[25,35,45] --- Summarize following sort -- -Skilled Manual 5 -Professional 6 -Management abcd defg 1 --- summarize with bin -- -0 1 -245000 2 -0 1 -245 2 -0 1 -245 2 -2015-10-12 00:00:00.000000000 -2016-10-12 00:00:00.000000000 --- make_list_with_nulls -- -['Theodore','Stephanie','Peter','Latoya','Joshua','Edward','Dalton','Christine','Cameron','Angel','Apple',NULL] -Skilled Manual ['Theodore','Peter','Edward','Christine','Apple'] -Professional ['Latoya','Joshua','Dalton','Cameron','Angel',NULL] -Management abcd defg ['Stephanie'] -Skilled Manual ['Theodore','Peter','Edward','Christine','Apple'] [28,26,36,33,28] -Professional ['Latoya','Joshua','Dalton','Cameron','Angel',NULL] [25,26,42,28,46,38] -Management abcd defg ['Stephanie'] [33] diff --git a/tests/queries/0_stateless/02366_kql_summarize.sql b/tests/queries/0_stateless/02366_kql_summarize.sql deleted file mode 100644 index bb12d1f251f..00000000000 --- a/tests/queries/0_stateless/02366_kql_summarize.sql +++ /dev/null @@ -1,102 +0,0 @@ --- datatable(FirstName:string, LastName:string, Occupation:string, Education:string, Age:int) [ --- 'Theodore', 'Diaz', 'Skilled Manual', 'Bachelors', 28, --- 'Stephanie', 'Cox', 'Management abcd defg', 'Bachelors', 33, --- 'Peter', 'Nara', 'Skilled Manual', 'Graduate Degree', 26, --- 'Latoya', 'Shen', 'Professional', 'Graduate Degree', 25, --- 'Joshua', 'Lee', 'Professional', 'Partial College', 26, --- 'Edward', 'Hernandez', 'Skilled Manual', 'High School', 36, --- 'Dalton', 'Wood', 'Professional', 'Partial College', 42, --- 'Christine', 'Nara', 'Skilled Manual', 'Partial College', 33, --- 'Cameron', 'Rodriguez', 'Professional', 'Partial College', 28, --- 'Angel', 'Stewart', 'Professional', 'Partial College', 46, --- 'Apple', '', 'Skilled Manual', 'Bachelors', 28, --- dynamic(null), 'why', 'Professional', 'Partial College', 38 --- ] - -DROP TABLE IF EXISTS Customers; -CREATE TABLE Customers -( - FirstName Nullable(String), - LastName String, - Occupation String, - Education String, - Age Nullable(UInt8) -) ENGINE = Memory; - -INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28),('Stephanie','Cox','Management abcd defg','Bachelors',33),('Peter','Nara','Skilled Manual','Graduate Degree',26),('Latoya','Shen','Professional','Graduate Degree',25),('Joshua','Lee','Professional','Partial College',26),('Edward','Hernandez','Skilled Manual','High School',36),('Dalton','Wood','Professional','Partial College',42),('Christine','Nara','Skilled Manual','Partial College',33),('Cameron','Rodriguez','Professional','Partial College',28),('Angel','Stewart','Professional','Partial College',46),('Apple','','Skilled Manual','Bachelors',28),(NULL,'why','Professional','Partial College',38); - -drop table if exists EventLog; -create table EventLog -( - LogEntry String, - Created Int64 -) ENGINE = Memory; - -insert into EventLog values ('Darth Vader has entered the room.', 546), ('Rambo is suspciously looking at Darth Vader.', 245234), ('Darth Sidious electrocutes both using Force Lightning.', 245554); - -drop table if exists Dates; -create table Dates -( - EventTime DateTime, -) ENGINE = Memory; - -Insert into Dates VALUES ('2015-10-12') , ('2016-10-12') -Select '-- test summarize --' ; -set dialect='kusto'; -Customers | summarize count(), min(Age), max(Age), avg(Age), sum(Age); -Customers | summarize count(), min(Age), max(Age), avg(Age), sum(Age) by Occupation | order by Occupation; -Customers | summarize countif(Age>40) by Occupation | order by Occupation; -Customers | summarize MyMax = maxif(Age, Age<40) by Occupation | order by Occupation; -Customers | summarize MyMin = minif(Age, Age<40) by Occupation | order by Occupation; -Customers | summarize MyAvg = avgif(Age, Age<40) by Occupation | order by Occupation; -Customers | summarize MySum = sumif(Age, Age<40) by Occupation | order by Occupation; -Customers | summarize dcount(Education); -Customers | summarize dcountif(Education, Occupation=='Professional'); -Customers | summarize count_ = count() by bin(Age, 10) | order by count_ asc; -Customers | summarize job_count = count() by Occupation | where job_count > 0 | order by Occupation; -Customers | summarize 'Edu Count'=count() by Education | sort by 'Edu Count' desc; -- { clientError 62 } - -print '-- make_list() --'; -Customers | summarize f_list = make_list(Education) by Occupation | sort by Occupation; -Customers | summarize f_list = make_list(Education, 2) by Occupation | sort by Occupation; -print '-- make_list_if() --'; -Customers | summarize f_list = make_list_if(FirstName, Age>30) by Occupation | sort by Occupation; -Customers | summarize f_list = make_list_if(FirstName, Age>30, 1) by Occupation | sort by Occupation; -print '-- make_set() --'; -Customers | summarize f_list = make_set(Education) by Occupation | sort by Occupation; -Customers | summarize f_list = make_set(Education, 2) by Occupation | sort by Occupation; -print '-- make_set_if() --'; -Customers | summarize f_list = make_set_if(Education, Age>30) by Occupation | sort by Occupation; -Customers | summarize f_list = make_set_if(Education, Age>30, 1) by Occupation | sort by Occupation; -print '-- stdev() --'; -Customers | project Age | summarize stdev(Age); -print '-- stdevif() --'; -Customers | project Age | summarize stdevif(Age, Age%2==0); -print '-- binary_all_and --'; -Customers | project Age | where Age > 40 | summarize binary_all_and(Age); -print '-- binary_all_or --'; -Customers | project Age | where Age > 40 | summarize binary_all_or(Age); -print '-- binary_all_xor --'; -Customers | project Age | where Age > 40 | summarize binary_all_xor(Age); - -Customers | project Age | summarize percentile(Age, 95); -Customers | project Age | summarize percentiles(Age, 5, 50, 95)|project round(percentiles_Age[0],2),round(percentiles_Age[1],2),round(percentiles_Age[2],2); -Customers | project Age | summarize percentiles(Age, 5, 50, 95)[1]; -Customers | summarize w=count() by AgeBucket=bin(Age, 5) | summarize percentilew(AgeBucket, w, 75); -Customers | summarize w=count() by AgeBucket=bin(Age, 5) | summarize percentilesw(AgeBucket, w, 50, 75, 99.9); - -print '-- Summarize following sort --'; -Customers | sort by FirstName | summarize count() by Occupation | sort by Occupation; - -print '-- summarize with bin --'; -EventLog | summarize count=count() by bin(Created, 1000) | sort by count asc; -EventLog | summarize count=count() by bin(unixtime_seconds_todatetime(Created/1000), 1s) | sort by count asc; -EventLog | summarize count=count() by time_label=bin(Created/1000, 1s) | sort by count asc; -Dates | project bin(datetime(EventTime), 1m); -print '-- make_list_with_nulls --'; -Customers | summarize t = make_list_with_nulls(FirstName); -Customers | summarize f_list = make_list_with_nulls(FirstName) by Occupation | sort by Occupation; -Customers | summarize f_list = make_list_with_nulls(FirstName), a_list = make_list_with_nulls(Age) by Occupation | sort by Occupation; --- TODO: --- arg_max() --- arg_min() diff --git a/tests/queries/0_stateless/02366_kql_tabular.reference b/tests/queries/0_stateless/02366_kql_tabular.reference deleted file mode 100644 index e70c02ce34f..00000000000 --- a/tests/queries/0_stateless/02366_kql_tabular.reference +++ /dev/null @@ -1,139 +0,0 @@ --- test Query only has table name: -- -Theodore Diaz Skilled Manual Bachelors 28 -Stephanie Cox Management Bachelors 33 -Peter Nara Skilled Manual Graduate Degree 26 -Latoya Shen Professional Graduate Degree 25 -Joshua Lee Professional Partial College 26 -Edward Hernandez Skilled Manual High School 36 -Dalton Wood Professional Partial College 42 -Christine Nara Skilled Manual Partial College 33 -Cameron Rodriguez Professional Partial College 28 -Angel Stewart Professional Partial College 46 --- Query has Column Selection -- -Theodore Diaz Skilled Manual -Stephanie Cox Management -Peter Nara Skilled Manual -Latoya Shen Professional -Joshua Lee Professional -Edward Hernandez Skilled Manual -Dalton Wood Professional -Christine Nara Skilled Manual -Cameron Rodriguez Professional -Angel Stewart Professional --- Query has limit -- -Theodore Diaz Skilled Manual -Stephanie Cox Management -Peter Nara Skilled Manual -Latoya Shen Professional -Joshua Lee Professional -Theodore Diaz Skilled Manual -Stephanie Cox Management -Peter Nara Skilled Manual -Latoya Shen Professional -Joshua Lee Professional --- Query has second limit with bigger value -- -Theodore Diaz Skilled Manual -Stephanie Cox Management -Peter Nara Skilled Manual -Latoya Shen Professional -Joshua Lee Professional --- Query has second limit with smaller value -- -Theodore Diaz Skilled Manual -Stephanie Cox Management -Peter Nara Skilled Manual --- Query has second Column selection -- -Theodore Diaz -Stephanie Cox -Peter Nara --- Query has second Column selection with extra column -- --- Query with desc sort -- -Theodore -Stephanie -Peter -Latoya -Joshua -Skilled Manual -Skilled Manual -Professional -Professional -Management --- Query with asc sort -- -Management -Professional -Professional -Skilled Manual -Skilled Manual --- Query with sort (without keyword asc desc) -- -Theodore -Stephanie -Peter -Latoya -Joshua -Skilled Manual -Skilled Manual -Professional -Professional -Management --- Query with sort 2 Columns with different direction -- -Stephanie Cox Management -Latoya Shen Professional -Joshua Lee Professional -Peter Nara Skilled Manual -Theodore Diaz Skilled Manual --- Query with second sort -- -Stephanie Cox Management -Latoya Shen Professional -Joshua Lee Professional -Peter Nara Skilled Manual -Theodore Diaz Skilled Manual --- Test String Equals (==) -- -Theodore Diaz Skilled Manual -Peter Nara Skilled Manual -Edward Hernandez Skilled Manual -Christine Nara Skilled Manual --- Test String Not equals (!=) -- -Stephanie Cox Management -Latoya Shen Professional -Joshua Lee Professional -Dalton Wood Professional -Cameron Rodriguez Professional -Angel Stewart Professional --- Test Filter using a list (in) -- -Theodore Diaz Skilled Manual Bachelors -Stephanie Cox Management Bachelors -Edward Hernandez Skilled Manual High School --- Test Filter using a list (!in) -- -Peter Nara Skilled Manual Graduate Degree -Latoya Shen Professional Graduate Degree -Joshua Lee Professional Partial College -Dalton Wood Professional Partial College -Christine Nara Skilled Manual Partial College -Cameron Rodriguez Professional Partial College -Angel Stewart Professional Partial College --- Test Filter using common string operations (contains_cs) -- -Joshua Lee Professional Partial College -Dalton Wood Professional Partial College -Christine Nara Skilled Manual Partial College -Cameron Rodriguez Professional Partial College -Angel Stewart Professional Partial College --- Test Filter using common string operations (startswith_cs) -- -Latoya Shen Professional Graduate Degree -Joshua Lee Professional Partial College -Dalton Wood Professional Partial College -Cameron Rodriguez Professional Partial College -Angel Stewart Professional Partial College --- Test Filter using common string operations (endswith_cs) -- -Latoya Shen Professional Graduate Degree -Joshua Lee Professional Partial College --- Test Filter using numerical equal (==) -- -Peter Nara Skilled Manual Graduate Degree 26 -Joshua Lee Professional Partial College 26 --- Test Filter using numerical great and less (> , <) -- -Stephanie Cox Management Bachelors 33 -Edward Hernandez Skilled Manual High School 36 -Christine Nara Skilled Manual Partial College 33 --- Test Filter using multi where -- -Dalton Wood Professional Partial College 42 -Angel Stewart Professional Partial College 46 --- Complex query with unknown function -- --- Missing column in front of startsWith -- diff --git a/tests/queries/0_stateless/02366_kql_tabular.sql b/tests/queries/0_stateless/02366_kql_tabular.sql deleted file mode 100644 index f73c4c09cca..00000000000 --- a/tests/queries/0_stateless/02366_kql_tabular.sql +++ /dev/null @@ -1,88 +0,0 @@ -DROP TABLE IF EXISTS Customers; -CREATE TABLE Customers -( - FirstName Nullable(String), - LastName String, - Occupation String, - Education String, - Age Nullable(UInt8) -) ENGINE = Memory; - -INSERT INTO Customers VALUES ('Theodore','Diaz','Skilled Manual','Bachelors',28), ('Stephanie','Cox','Management','Bachelors',33), ('Peter','Nara','Skilled Manual','Graduate Degree',26), ('Latoya','Shen','Professional','Graduate Degree',25), ('Joshua','Lee','Professional','Partial College',26), ('Edward','Hernandez','Skilled Manual','High School',36), ('Dalton','Wood','Professional','Partial College',42), ('Christine','Nara','Skilled Manual','Partial College',33), ('Cameron','Rodriguez','Professional','Partial College',28), ('Angel','Stewart','Professional','Partial College',46); - -set dialect='kusto'; -print '-- test Query only has table name: --'; -Customers; - -print '-- Query has Column Selection --'; -Customers | project FirstName,LastName,Occupation; - -print '-- Query has limit --'; -Customers | project FirstName,LastName,Occupation | take 5; -Customers | project FirstName,LastName,Occupation | limit 5; - -print '-- Query has second limit with bigger value --'; -Customers | project FirstName,LastName,Occupation | take 5 | take 7; - -print '-- Query has second limit with smaller value --'; -Customers | project FirstName,LastName,Occupation | take 5 | take 3; - -print '-- Query has second Column selection --'; -Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName; - -print '-- Query has second Column selection with extra column --'; -Customers| project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education;-- { serverError 47 } - -print '-- Query with desc sort --'; -Customers | project FirstName | take 5 | sort by FirstName desc; -Customers | project Occupation | take 5 | order by Occupation desc; - -print '-- Query with asc sort --'; -Customers | project Occupation | take 5 | sort by Occupation asc; - -print '-- Query with sort (without keyword asc desc) --'; -Customers | project FirstName | take 5 | sort by FirstName; -Customers | project Occupation | take 5 | order by Occupation; - -print '-- Query with sort 2 Columns with different direction --'; -Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation asc, LastName desc; - -print '-- Query with second sort --'; -Customers | project FirstName,LastName,Occupation | take 5 | sort by Occupation desc |sort by Occupation asc, LastName desc; - -print '-- Test String Equals (==) --'; -Customers | project FirstName,LastName,Occupation | where Occupation == 'Skilled Manual'; - -print '-- Test String Not equals (!=) --'; -Customers | project FirstName,LastName,Occupation | where Occupation != 'Skilled Manual'; - -print '-- Test Filter using a list (in) --'; -Customers | project FirstName,LastName,Occupation,Education | where Education in ('Bachelors','High School'); - -print '-- Test Filter using a list (!in) --'; -set dialect='kusto'; -Customers | project FirstName,LastName,Occupation,Education | where Education !in ('Bachelors','High School'); - -print '-- Test Filter using common string operations (contains_cs) --'; -Customers | project FirstName,LastName,Occupation,Education | where Education contains_cs 'Coll'; - -print '-- Test Filter using common string operations (startswith_cs) --'; -Customers | project FirstName,LastName,Occupation,Education | where Occupation startswith_cs 'Prof'; - -print '-- Test Filter using common string operations (endswith_cs) --'; -Customers | project FirstName,LastName,Occupation,Education | where FirstName endswith_cs 'a'; - -print '-- Test Filter using numerical equal (==) --'; -Customers | project FirstName,LastName,Occupation,Education,Age | where Age == 26; - -print '-- Test Filter using numerical great and less (> , <) --'; -Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 and Age < 40; - -print '-- Test Filter using multi where --'; -Customers | project FirstName,LastName,Occupation,Education,Age | where Age > 30 | where Occupation == 'Professional'; - -print '-- Complex query with unknown function --'; -hits | where CounterID == 62 and EventDate >= '2013-07-14' and EventDate <= '2013-07-15' and IsRefresh == 0 and DontCountHits == 0 | summarize count() by d=bin(poopoo(EventTime), 1m) | order by d | limit 10; -- { clientError UNKNOWN_FUNCTION } - -print '-- Missing column in front of startsWith --'; -StormEvents | where startswith "W" | summarize Count=count() by State; -- { clientError SYNTAX_ERROR } diff --git a/tests/queries/0_stateless/02375_pretty_formats.sql.j2 b/tests/queries/0_stateless/02375_pretty_formats.sql.j2 index cc61346d267..55462ea6b61 100644 --- a/tests/queries/0_stateless/02375_pretty_formats.sql.j2 +++ b/tests/queries/0_stateless/02375_pretty_formats.sql.j2 @@ -3,6 +3,6 @@ 'PrettySpaceNoEscapesMonoBlock'] -%} select '{{ format }}'; -select number as x, number + 1 as y from numbers(4) settings max_block_size=2 format {{ format }}; +select number as x, number + 1 as y from numbers(4) settings max_block_size=2, output_format_pretty_color=1 format {{ format }}; {% endfor -%} diff --git a/tests/queries/0_stateless/02375_system_schema_inference_cache.reference b/tests/queries/0_stateless/02375_system_schema_inference_cache.reference index 676fb441f53..e08bc754a71 100644 --- a/tests/queries/0_stateless/02375_system_schema_inference_cache.reference +++ b/tests/queries/0_stateless/02375_system_schema_inference_cache.reference @@ -1,11 +1,3 @@ -storage String -source String -format String -additional_format_info String -registration_time DateTime -schema Nullable(String) -number_of_rows Nullable(UInt64) -schema_inference_mode Nullable(String) x Nullable(Int64) s Nullable(String) x Nullable(Int64) diff --git a/tests/queries/0_stateless/02375_system_schema_inference_cache.sql b/tests/queries/0_stateless/02375_system_schema_inference_cache.sql index 310e22ed31f..64b6cd86fc7 100644 --- a/tests/queries/0_stateless/02375_system_schema_inference_cache.sql +++ b/tests/queries/0_stateless/02375_system_schema_inference_cache.sql @@ -4,7 +4,6 @@ set input_format_json_try_infer_numbers_from_strings=1; insert into function file('02374_data1.jsonl') select number as x, 'str' as s from numbers(10); insert into function file('02374_data2.jsonl') select number as x, 'str' as s from numbers(10); -desc system.schema_inference_cache; system drop schema cache for file; desc file('02374_data1.jsonl'); diff --git a/tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference b/tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference index 5dd39c39852..365725f8ffe 100644 --- a/tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference +++ b/tests/queries/0_stateless/02381_join_dup_columns_in_plan.reference @@ -1,3 +1,10 @@ +-- { echoOn } + +SET join_algorithm = 'hash'; +EXPLAIN actions=0, description=0, header=1 +SELECT * FROM ( SELECT 'key2' AS key ) AS s1 +JOIN ( SELECT 'key1' AS key, '1' AS value UNION ALL SELECT 'key2' AS key, '1' AS value ) AS s2 +USING (key); Expression Header: key String value String @@ -21,6 +28,121 @@ Header: key String __table3.value String ReadFromStorage Header: dummy UInt8 +SELECT * FROM ( SELECT 1 AS k ) as t1 FULL JOIN ( SELECT 1 AS k, k ) as t2 ON t1.k = t2.k ORDER BY 1; +1 1 1 +SELECT * FROM ( SELECT 1 AS k, 1 as n ) as t1 FULL JOIN ( SELECT 1 AS k, k, 1 as n ) as t2 ON t1.n = t2.n ORDER BY 1; +1 1 1 1 1 +SELECT * +FROM ( SELECT number, number, number, number, number FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, FROM numbers(3, 8) ) as t2 +ON t1.number = t2.number +ORDER BY t1.number, t2.number +; +0 0 0 0 0 0 0 0 +0 0 0 0 0 5 5 5 +0 0 0 0 0 6 6 6 +0 0 0 0 0 7 7 7 +0 0 0 0 0 8 8 8 +0 0 0 0 0 9 9 9 +0 0 0 0 0 10 10 10 +1 1 1 1 1 0 0 0 +2 2 2 2 2 0 0 0 +3 3 3 3 3 3 3 3 +4 4 4 4 4 4 4 4 +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +ON t1.number = t2.number +ORDER BY t1.number, t2.number +; +0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 5 5 5 6 6 6 6 6 +0 0 0 0 0 0 0 0 6 6 6 7 7 7 7 7 +0 0 0 0 0 0 0 0 7 7 7 8 8 8 8 8 +0 0 0 0 0 0 0 0 8 8 8 9 9 9 9 9 +0 0 0 0 0 0 0 0 9 9 9 10 10 10 10 10 +0 0 0 0 0 0 0 0 10 10 10 11 11 11 11 11 +1 1 1 1 1 2 2 2 0 0 0 0 0 0 0 0 +2 2 2 2 2 3 3 3 0 0 0 0 0 0 0 0 +3 3 3 3 3 4 4 4 3 3 3 4 4 4 4 4 +4 4 4 4 4 5 5 5 4 4 4 5 5 5 5 5 +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +ON t1.k = t2.k +ORDER BY t1.k, t2.k +; +0 0 0 0 0 0 0 0 5 5 5 6 6 6 6 6 +0 0 0 0 0 0 0 0 6 6 6 7 7 7 7 7 +0 0 0 0 0 0 0 0 7 7 7 8 8 8 8 8 +0 0 0 0 0 0 0 0 8 8 8 9 9 9 9 9 +0 0 0 0 0 0 0 0 9 9 9 10 10 10 10 10 +0 0 0 0 0 0 0 0 10 10 10 11 11 11 11 11 +0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 +1 1 1 1 1 2 2 2 0 0 0 0 0 0 0 0 +2 2 2 2 2 3 3 3 0 0 0 0 0 0 0 0 +3 3 3 3 3 4 4 4 3 3 3 4 4 4 4 4 +4 4 4 4 4 5 5 5 4 4 4 5 5 5 5 5 +SELECT * FROM ( SELECT 1 AS k ) as t1 FULL JOIN ( SELECT 1 AS k, k ) as t2 USING (k) ORDER BY 1; +1 +SELECT * FROM ( SELECT 1 AS k, 1 as n ) as t1 FULL JOIN ( SELECT 1 AS k, k, 1 as n ) as t2 USING (n) ORDER BY 1; +1 1 1 1 +SELECT * +FROM ( SELECT number, number, number, number, number FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, FROM numbers(3, 8) ) as t2 +USING (number) +ORDER BY number +; +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +USING (number) +ORDER BY number +; +0 1 1 1 0 0 0 0 0 +1 2 2 2 0 0 0 0 0 +2 3 3 3 0 0 0 0 0 +3 4 4 4 4 4 4 4 4 +4 5 5 5 5 5 5 5 5 +5 0 0 0 6 6 6 6 6 +6 0 0 0 7 7 7 7 7 +7 0 0 0 8 8 8 8 8 +8 0 0 0 9 9 9 9 9 +9 0 0 0 10 10 10 10 10 +10 0 0 0 11 11 11 11 11 +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +USING (k) +ORDER BY k +; +1 0 0 0 0 0 0 0 0 +2 1 1 1 1 1 0 0 0 +3 2 2 2 2 2 0 0 0 +4 3 3 3 3 3 3 3 3 +5 4 4 4 4 4 4 4 4 +6 0 0 0 0 0 5 5 5 +7 0 0 0 0 0 6 6 6 +8 0 0 0 0 0 7 7 7 +9 0 0 0 0 0 8 8 8 +10 0 0 0 0 0 9 9 9 +11 0 0 0 0 0 10 10 10 +SET join_algorithm = 'full_sorting_merge', max_rows_in_set_to_optimize_join = 0; +EXPLAIN actions=0, description=0, header=1 +SELECT * FROM ( SELECT 'key2' AS key ) AS s1 +JOIN ( SELECT 'key1' AS key, '1' AS value UNION ALL SELECT 'key2' AS key, '1' AS value ) AS s2 +USING (key); Expression Header: key String value String @@ -50,3 +172,113 @@ Header: key String __table3.value String ReadFromStorage Header: dummy UInt8 +SELECT * FROM ( SELECT 1 AS k ) as t1 FULL JOIN ( SELECT 1 AS k, k ) as t2 ON t1.k = t2.k ORDER BY 1; +1 1 1 +SELECT * FROM ( SELECT 1 AS k, 1 as n ) as t1 FULL JOIN ( SELECT 1 AS k, k, 1 as n ) as t2 ON t1.n = t2.n ORDER BY 1; +1 1 1 1 1 +SELECT * +FROM ( SELECT number, number, number, number, number FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, FROM numbers(3, 8) ) as t2 +ON t1.number = t2.number +ORDER BY t1.number, t2.number +; +0 0 0 0 0 0 0 0 +0 0 0 0 0 5 5 5 +0 0 0 0 0 6 6 6 +0 0 0 0 0 7 7 7 +0 0 0 0 0 8 8 8 +0 0 0 0 0 9 9 9 +0 0 0 0 0 10 10 10 +1 1 1 1 1 0 0 0 +2 2 2 2 2 0 0 0 +3 3 3 3 3 3 3 3 +4 4 4 4 4 4 4 4 +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +ON t1.number = t2.number +ORDER BY t1.number, t2.number +; +0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 5 5 5 6 6 6 6 6 +0 0 0 0 0 0 0 0 6 6 6 7 7 7 7 7 +0 0 0 0 0 0 0 0 7 7 7 8 8 8 8 8 +0 0 0 0 0 0 0 0 8 8 8 9 9 9 9 9 +0 0 0 0 0 0 0 0 9 9 9 10 10 10 10 10 +0 0 0 0 0 0 0 0 10 10 10 11 11 11 11 11 +1 1 1 1 1 2 2 2 0 0 0 0 0 0 0 0 +2 2 2 2 2 3 3 3 0 0 0 0 0 0 0 0 +3 3 3 3 3 4 4 4 3 3 3 4 4 4 4 4 +4 4 4 4 4 5 5 5 4 4 4 5 5 5 5 5 +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +ON t1.k = t2.k +ORDER BY t1.k, t2.k +; +0 0 0 0 0 0 0 0 5 5 5 6 6 6 6 6 +0 0 0 0 0 0 0 0 6 6 6 7 7 7 7 7 +0 0 0 0 0 0 0 0 7 7 7 8 8 8 8 8 +0 0 0 0 0 0 0 0 8 8 8 9 9 9 9 9 +0 0 0 0 0 0 0 0 9 9 9 10 10 10 10 10 +0 0 0 0 0 0 0 0 10 10 10 11 11 11 11 11 +0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 +1 1 1 1 1 2 2 2 0 0 0 0 0 0 0 0 +2 2 2 2 2 3 3 3 0 0 0 0 0 0 0 0 +3 3 3 3 3 4 4 4 3 3 3 4 4 4 4 4 +4 4 4 4 4 5 5 5 4 4 4 5 5 5 5 5 +SELECT * FROM ( SELECT 1 AS k ) as t1 FULL JOIN ( SELECT 1 AS k, k ) as t2 USING (k) ORDER BY 1; +1 +SELECT * FROM ( SELECT 1 AS k, 1 as n ) as t1 FULL JOIN ( SELECT 1 AS k, k, 1 as n ) as t2 USING (n) ORDER BY 1; +1 1 1 1 +SELECT * +FROM ( SELECT number, number, number, number, number FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, FROM numbers(3, 8) ) as t2 +USING (number) +ORDER BY number +; +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +USING (number) +ORDER BY number +; +0 1 1 1 0 0 0 0 0 +1 2 2 2 0 0 0 0 0 +2 3 3 3 0 0 0 0 0 +3 4 4 4 4 4 4 4 4 +4 5 5 5 5 5 5 5 5 +5 0 0 0 6 6 6 6 6 +6 0 0 0 7 7 7 7 7 +7 0 0 0 8 8 8 8 8 +8 0 0 0 9 9 9 9 9 +9 0 0 0 10 10 10 10 10 +10 0 0 0 11 11 11 11 11 +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +USING (k) +ORDER BY k +; +1 0 0 0 0 0 0 0 0 +2 1 1 1 1 1 0 0 0 +3 2 2 2 2 2 0 0 0 +4 3 3 3 3 3 3 3 3 +5 4 4 4 4 4 4 4 4 +6 0 0 0 0 0 5 5 5 +7 0 0 0 0 0 6 6 6 +8 0 0 0 0 0 7 7 7 +9 0 0 0 0 0 8 8 8 +10 0 0 0 0 0 9 9 9 +11 0 0 0 0 0 10 10 10 diff --git a/tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql b/tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql deleted file mode 100644 index dfcd8c12e11..00000000000 --- a/tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql +++ /dev/null @@ -1,16 +0,0 @@ -SET allow_experimental_analyzer = 1; -SET join_algorithm = 'hash'; - -EXPLAIN actions=0, description=0, header=1 -SELECT * FROM ( SELECT 'key2' AS key ) AS s1 -JOIN ( SELECT 'key1' AS key, '1' AS value UNION ALL SELECT 'key2' AS key, '1' AS value ) AS s2 -USING (key); - -SET join_algorithm = 'full_sorting_merge'; - -SET max_rows_in_set_to_optimize_join = 0; - -EXPLAIN actions=0, description=0, header=1 -SELECT * FROM ( SELECT 'key2' AS key ) AS s1 -JOIN ( SELECT 'key1' AS key, '1' AS value UNION ALL SELECT 'key2' AS key, '1' AS value ) AS s2 -USING (key); diff --git a/tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql.j2 b/tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql.j2 new file mode 100644 index 00000000000..ca4af4df6b6 --- /dev/null +++ b/tests/queries/0_stateless/02381_join_dup_columns_in_plan.sql.j2 @@ -0,0 +1,62 @@ +SET allow_experimental_analyzer = 1; + +-- { echoOn } + +{% for query_settings in ['join_algorithm = \'hash\'', 'join_algorithm = \'full_sorting_merge\', max_rows_in_set_to_optimize_join = 0'] -%} + +SET {{ query_settings }}; + +EXPLAIN actions=0, description=0, header=1 +SELECT * FROM ( SELECT 'key2' AS key ) AS s1 +JOIN ( SELECT 'key1' AS key, '1' AS value UNION ALL SELECT 'key2' AS key, '1' AS value ) AS s2 +USING (key); + +SELECT * FROM ( SELECT 1 AS k ) as t1 FULL JOIN ( SELECT 1 AS k, k ) as t2 ON t1.k = t2.k ORDER BY 1; +SELECT * FROM ( SELECT 1 AS k, 1 as n ) as t1 FULL JOIN ( SELECT 1 AS k, k, 1 as n ) as t2 ON t1.n = t2.n ORDER BY 1; + +SELECT * +FROM ( SELECT number, number, number, number, number FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, FROM numbers(3, 8) ) as t2 +ON t1.number = t2.number +ORDER BY t1.number, t2.number +; + +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +ON t1.number = t2.number +ORDER BY t1.number, t2.number +; + +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +ON t1.k = t2.k +ORDER BY t1.k, t2.k +; + +SELECT * FROM ( SELECT 1 AS k ) as t1 FULL JOIN ( SELECT 1 AS k, k ) as t2 USING (k) ORDER BY 1; +SELECT * FROM ( SELECT 1 AS k, 1 as n ) as t1 FULL JOIN ( SELECT 1 AS k, k, 1 as n ) as t2 USING (n) ORDER BY 1; + +SELECT * +FROM ( SELECT number, number, number, number, number FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, FROM numbers(3, 8) ) as t2 +USING (number) +ORDER BY number +; + +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +USING (number) +ORDER BY number +; + +SELECT * +FROM ( SELECT number, number, number, number, number, number + 1 as k, k, k FROM numbers(5) ) as t1 +FULL JOIN ( SELECT number, number, number, number + 1 as k, k, k, k, k FROM numbers(3, 8) ) as t2 +USING (k) +ORDER BY k +; + +{% endfor -%} diff --git a/tests/queries/0_stateless/02418_do_not_return_empty_blocks_from_ConvertingAggregatedToChunksTransform.sh b/tests/queries/0_stateless/02418_do_not_return_empty_blocks_from_ConvertingAggregatedToChunksTransform.sh index 32693adff24..847e9682bf5 100755 --- a/tests/queries/0_stateless/02418_do_not_return_empty_blocks_from_ConvertingAggregatedToChunksTransform.sh +++ b/tests/queries/0_stateless/02418_do_not_return_empty_blocks_from_ConvertingAggregatedToChunksTransform.sh @@ -13,7 +13,7 @@ ${CLICKHOUSE_CURL} \ from numbers_mt(1e6) where number = 42 group by number - settings max_threads = 10, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1 + settings max_threads = 10, max_bytes_before_external_group_by = 1, group_by_two_level_threshold = 1, output_format_pretty_color=1 format PrettyCompact" ${CLICKHOUSE_CURL} \ @@ -24,5 +24,5 @@ ${CLICKHOUSE_CURL} \ from numbers_mt(1e6) where number = 42 group by number - settings max_threads = 10, max_bytes_before_external_group_by = 0, group_by_two_level_threshold = 1 + settings max_threads = 10, max_bytes_before_external_group_by = 0, group_by_two_level_threshold = 1, output_format_pretty_color=1 format PrettyCompact" diff --git a/tests/queries/0_stateless/02421_formats_with_totals_and_extremes.sql.j2 b/tests/queries/0_stateless/02421_formats_with_totals_and_extremes.sql.j2 index 32738766199..f936501e72a 100644 --- a/tests/queries/0_stateless/02421_formats_with_totals_and_extremes.sql.j2 +++ b/tests/queries/0_stateless/02421_formats_with_totals_and_extremes.sql.j2 @@ -1,5 +1,6 @@ -- Tags: no-fasttest +set output_format_pretty_color=1; set output_format_write_statistics=0; {% for format in ['CSV', 'TSV', 'XML', 'Vertical', 'Pretty', 'JSON', 'JSONCompact'] -%} diff --git a/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.reference b/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.reference new file mode 100644 index 00000000000..f1d036b08bf --- /dev/null +++ b/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.reference @@ -0,0 +1,467 @@ +-- { echoOn } +-- Should be allowed since destination partition expr is monotonically increasing and compatible +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); +ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +201003 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION '20100302' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +201003 +-- Should be allowed since destination partition expr is monotonically increasing and compatible. Note that even though +-- the destination partition expression is more granular, the data would still fall in the same partition. Thus, it is valid +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); +ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +20100302 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION '201003' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +20100302 +-- Should be allowed since destination partition expr is monotonically increasing and compatible for those specific values +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY intDiv(A, 6); +CREATE TABLE destination (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY A; +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 1), ('2010-03-02 02:01:03', 1); +ALTER TABLE destination ATTACH PARTITION ID '0' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 1 +2010-03-02 02:01:03 1 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 1 +2010-03-02 02:01:03 1 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +1 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION 0 FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 1 +2010-03-02 02:01:03 1 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 1 +2010-03-02 02:01:03 1 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +1 +-- Should be allowed because dst partition exp is monot inc and data is not split +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY cityHash64(category); +CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category); +INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general'); +INSERT INTO TABLE source VALUES ('rice', 'food'); +ALTER TABLE destination ATTACH PARTITION ID '17908065610379824077' from source; +SELECT * FROM source ORDER BY productName; +mop general +rice food +spaghetti food +SELECT * FROM destination ORDER BY productName; +rice food +spaghetti food +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +59532f3c39a412a413f0f014c7750a9d +59532f3c39a412a413f0f014c7750a9d +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION '17908065610379824077' from source; +SELECT * FROM source ORDER BY productName; +mop general +rice food +spaghetti food +SELECT * FROM destination ORDER BY productName; +rice food +spaghetti food +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +59532f3c39a412a413f0f014c7750a9d +59532f3c39a412a413f0f014c7750a9d +-- Should be allowed, extra test case to validate https://github.com/ClickHouse/ClickHouse/pull/39507#issuecomment-1747574133 + +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp Int64) engine=MergeTree ORDER BY (timestamp) PARTITION BY intDiv(timestamp, 86400000); +CREATE TABLE destination (timestamp Int64) engine=MergeTree ORDER BY (timestamp) PARTITION BY toYear(toDateTime(intDiv(timestamp, 1000))); +INSERT INTO TABLE source VALUES (1267495261123); +ALTER TABLE destination ATTACH PARTITION ID '14670' FROM source; +SELECT * FROM source ORDER BY timestamp; +1267495261123 +SELECT * FROM destination ORDER BY timestamp; +1267495261123 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +2010 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION '14670' from source; +SELECT * FROM source ORDER BY timestamp; +1267495261123 +SELECT * FROM destination ORDER BY timestamp; +1267495261123 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +2010 +-- Should be allowed, extra test case to validate https://github.com/ClickHouse/ClickHouse/pull/39507#issuecomment-1747511726 + +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime('UTC'), key Int64, f Float64) engine=MergeTree ORDER BY (key, timestamp) PARTITION BY toYear(timestamp); +CREATE TABLE destination (timestamp DateTime('UTC'), key Int64, f Float64) engine=MergeTree ORDER BY (key, timestamp) PARTITION BY (intDiv(toUInt32(timestamp),86400)); +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01',1,1),('2010-03-02 02:01:01',1,1),('2011-02-02 02:01:03',1,1); +ALTER TABLE destination ATTACH PARTITION ID '2010' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 1 1 +2010-03-02 02:01:01 1 1 +2011-02-02 02:01:03 1 1 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 1 1 +2010-03-02 02:01:01 1 1 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +14670 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION '2010' from source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 1 1 +2010-03-02 02:01:01 1 1 +2011-02-02 02:01:03 1 1 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 1 1 +2010-03-02 02:01:01 1 1 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +14670 +-- Should be allowed, partitioned table to unpartitioned. Since the destination is unpartitioned, parts would ultimately +-- fall into the same partition. +-- Destination partition by expression is omitted, which causes StorageMetadata::getPartitionKeyAST() to be nullptr. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple(); +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); +ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +all +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION '201003' from source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +all +-- Same as above, but destination partition by expression is explicitly defined. Test case required to validate that +-- partition by tuple() is accepted. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY tuple(); +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); +ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +all +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION '201003' from source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +all +-- Should be allowed because the destination partition expression columns are a subset of the source partition expression columns +-- Columns in this case refer to the expression elements, not to the actual table columns +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b, c); +CREATE TABLE destination (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b); +INSERT INTO TABLE source VALUES (1, 2, 3), (1, 2, 4); +ALTER TABLE destination ATTACH PARTITION ID '1-2-3' FROM source; +SELECT * FROM source ORDER BY (a, b, c); +1 2 3 +1 2 4 +SELECT * FROM destination ORDER BY (a, b, c); +1 2 3 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +1-2 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION (1, 2, 3) from source; +SELECT * FROM source ORDER BY (a, b, c); +1 2 3 +1 2 4 +SELECT * FROM destination ORDER BY (a, b, c); +1 2 3 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +1-2 +-- Should be allowed because the destination partition expression columns are a subset of the source partition expression columns +-- Columns in this case refer to the expression elements, not to the actual table columns +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b, c); +CREATE TABLE destination (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY a; +INSERT INTO TABLE source VALUES (1, 2, 3), (1, 2, 4); +ALTER TABLE destination ATTACH PARTITION ID '1-2-3' FROM source; +SELECT * FROM source ORDER BY (a, b, c); +1 2 3 +1 2 4 +SELECT * FROM destination ORDER BY (a, b, c); +1 2 3 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +1 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION (1, 2, 3) from source; +SELECT * FROM source ORDER BY (a, b, c); +1 2 3 +1 2 4 +SELECT * FROM destination ORDER BY (a, b, c); +1 2 3 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +1 +-- Should be allowed. Special test case, tricky to explain. First column of source partition expression is +-- timestamp, while first column of destination partition expression is `A`. One of the previous implementations +-- would not match the columns, which could lead to `timestamp` min max being used to calculate monotonicity of `A`. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (`timestamp` DateTime, `A` Int64) ENGINE = MergeTree PARTITION BY tuple(toYYYYMM(timestamp), intDiv(A, 6)) ORDER BY timestamp; +CREATE TABLE destination (`timestamp` DateTime, `A` Int64) ENGINE = MergeTree PARTITION BY A ORDER BY timestamp; +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 5); +ALTER TABLE destination ATTACH PARTITION ID '201003-0' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 5 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 5 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +5 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION (201003, 0) from source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 5 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 5 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +5 +-- Should be allowed. Destination partition expression contains multiple expressions, but all of them are monotonically +-- increasing in the source partition min max indexes. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(A, B) ORDER BY tuple(); +CREATE TABLE destination (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(intDiv(A, 2), intDiv(B, 2)) ORDER BY tuple(); +INSERT INTO TABLE source VALUES (6, 12); +ALTER TABLE destination ATTACH PARTITION ID '6-12' FROM source; +SELECT * FROM source ORDER BY A; +6 12 +SELECT * FROM destination ORDER BY A; +6 12 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +3-6 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION (6, 12) from source; +SELECT * FROM source ORDER BY A; +6 12 +SELECT * FROM destination ORDER BY A; +6 12 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +3-6 +-- Should be allowed. The same scenario as above, but partition expressions inverted. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(intDiv(A, 2), intDiv(B, 2)) ORDER BY tuple(); +CREATE TABLE destination (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(A, B) ORDER BY tuple(); +INSERT INTO TABLE source VALUES (6, 12); +ALTER TABLE destination ATTACH PARTITION ID '3-6' FROM source; +SELECT * FROM source ORDER BY A; +6 12 +SELECT * FROM destination ORDER BY A; +6 12 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +6-12 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION (3, 6) from source; +SELECT * FROM source ORDER BY A; +6 12 +SELECT * FROM destination ORDER BY A; +6 12 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +6-12 +-- Should be allowed, it is a local operation, no different than regular attach. Replicated to replicated. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE + source(timestamp DateTime) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/source_replicated_to_replicated_distinct_expression', '1') + PARTITION BY toYYYYMMDD(timestamp) + ORDER BY tuple(); +CREATE TABLE + destination(timestamp DateTime) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/destination_replicated_to_replicated_distinct_expression', '1') + PARTITION BY toYYYYMM(timestamp) + ORDER BY tuple(); +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); +ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +201003 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION '20100302' from source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +201003 +-- Should be allowed, it is a local operation, no different than regular attach. Non replicated to replicated +DROP TABLE IF EXISTS source SYNC; +DROP TABLE IF EXISTS destination SYNC; +CREATE TABLE source(timestamp DateTime) ENGINE = MergeTree() PARTITION BY toYYYYMMDD(timestamp) ORDER BY tuple(); +CREATE TABLE + destination(timestamp DateTime) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/destination_non_replicated_to_replicated_distinct_expression', '1') + PARTITION BY toYYYYMM(timestamp) + ORDER BY tuple(); +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); +ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +201003 +TRUNCATE TABLE destination; +ALTER TABLE destination ATTACH PARTITION '20100302' from source; +SELECT * FROM source ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT * FROM destination ORDER BY timestamp; +2010-03-02 02:01:01 +2010-03-02 02:01:03 +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +201003 +-- Should not be allowed because data would be split into two different partitions +DROP TABLE IF EXISTS source SYNC; +DROP TABLE IF EXISTS destination SYNC; +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-03 02:01:03'); +ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; -- { serverError 248 } +ALTER TABLE destination ATTACH PARTITION '201003' from source; -- { serverError 248 } +-- Should not be allowed because data would be split into two different partitions +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY intDiv(A, 6); +CREATE TABLE destination (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY A; +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 1), ('2010-03-02 02:01:03', 2); +ALTER TABLE destination ATTACH PARTITION ID '0' FROM source; -- { serverError 248 } +ALTER TABLE destination ATTACH PARTITION 0 FROM source; -- { serverError 248 } +-- Should not be allowed because dst partition exp takes more than two arguments, so it's not considered monotonically inc +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category); +CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY substring(category, 1, 2); +INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general'); +INSERT INTO TABLE source VALUES ('rice', 'food'); +ALTER TABLE destination ATTACH PARTITION ID '4590ba78048910b74a47d5bfb308abed' from source; -- { serverError 36 } +ALTER TABLE destination ATTACH PARTITION 'food' from source; -- { serverError 36 } +-- Should not be allowed because dst partition exp depends on a different set of columns +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category); +CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(productName); +INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general'); +INSERT INTO TABLE source VALUES ('rice', 'food'); +ALTER TABLE destination ATTACH PARTITION ID '4590ba78048910b74a47d5bfb308abed' from source; -- { serverError 36 } +ALTER TABLE destination ATTACH PARTITION 'food' from source; -- { serverError 36 } +-- Should not be allowed because dst partition exp is not monotonically increasing +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (productName String) engine=MergeTree ORDER BY tuple() PARTITION BY left(productName, 2); +CREATE TABLE destination (productName String) engine=MergeTree ORDER BY tuple() PARTITION BY cityHash64(productName); +INSERT INTO TABLE source VALUES ('bread'), ('mop'); +INSERT INTO TABLE source VALUES ('broccoli'); +ALTER TABLE destination ATTACH PARTITION ID '4589453b7ee96ce9de1265bd57674496' from source; -- { serverError 36 } +ALTER TABLE destination ATTACH PARTITION 'br' from source; -- { serverError 36 } +-- Empty/ non-existent partition, same partition expression. Nothing should happen +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +ALTER TABLE destination ATTACH PARTITION ID '1' FROM source; +ALTER TABLE destination ATTACH PARTITION 1 FROM source; +SELECT * FROM destination; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +-- Empty/ non-existent partition, different partition expression. Nothing should happen +-- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045 +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +ALTER TABLE destination ATTACH PARTITION ID '1' FROM source; +ALTER TABLE destination ATTACH PARTITION 1 FROM source; +SELECT * FROM destination; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +-- Replace instead of attach. Empty/ non-existent partition, same partition expression. Nothing should happen +-- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045 +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +ALTER TABLE destination REPLACE PARTITION '1' FROM source; +SELECT * FROM destination; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; +-- Replace instead of attach. Empty/ non-existent partition to non-empty partition, same partition id. +-- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045 +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (A Int) engine=MergeTree ORDER BY tuple() PARTITION BY A; +CREATE TABLE destination (A Int) engine=MergeTree ORDER BY tuple() PARTITION BY A; +INSERT INTO TABLE destination VALUES (1); +ALTER TABLE destination REPLACE PARTITION '1' FROM source; +SELECT * FROM destination; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; diff --git a/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.sql b/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.sql new file mode 100644 index 00000000000..9547d6ae249 --- /dev/null +++ b/tests/queries/0_stateless/02456_test_attach_partition_different_partition_exp.sql @@ -0,0 +1,485 @@ +-- { echoOn } +-- Should be allowed since destination partition expr is monotonically increasing and compatible +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); + +ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION '20100302' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed since destination partition expr is monotonically increasing and compatible. Note that even though +-- the destination partition expression is more granular, the data would still fall in the same partition. Thus, it is valid +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); + +ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION '201003' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed since destination partition expr is monotonically increasing and compatible for those specific values +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY intDiv(A, 6); + +CREATE TABLE destination (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY A; + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 1), ('2010-03-02 02:01:03', 1); + +ALTER TABLE destination ATTACH PARTITION ID '0' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION 0 FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed because dst partition exp is monot inc and data is not split +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY cityHash64(category); +CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category); + +INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general'); +INSERT INTO TABLE source VALUES ('rice', 'food'); + +ALTER TABLE destination ATTACH PARTITION ID '17908065610379824077' from source; + +SELECT * FROM source ORDER BY productName; +SELECT * FROM destination ORDER BY productName; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION '17908065610379824077' from source; + +SELECT * FROM source ORDER BY productName; +SELECT * FROM destination ORDER BY productName; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed, extra test case to validate https://github.com/ClickHouse/ClickHouse/pull/39507#issuecomment-1747574133 + +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (timestamp Int64) engine=MergeTree ORDER BY (timestamp) PARTITION BY intDiv(timestamp, 86400000); +CREATE TABLE destination (timestamp Int64) engine=MergeTree ORDER BY (timestamp) PARTITION BY toYear(toDateTime(intDiv(timestamp, 1000))); + +INSERT INTO TABLE source VALUES (1267495261123); + +ALTER TABLE destination ATTACH PARTITION ID '14670' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION '14670' from source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed, extra test case to validate https://github.com/ClickHouse/ClickHouse/pull/39507#issuecomment-1747511726 + +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (timestamp DateTime('UTC'), key Int64, f Float64) engine=MergeTree ORDER BY (key, timestamp) PARTITION BY toYear(timestamp); +CREATE TABLE destination (timestamp DateTime('UTC'), key Int64, f Float64) engine=MergeTree ORDER BY (key, timestamp) PARTITION BY (intDiv(toUInt32(timestamp),86400)); + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01',1,1),('2010-03-02 02:01:01',1,1),('2011-02-02 02:01:03',1,1); + +ALTER TABLE destination ATTACH PARTITION ID '2010' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION '2010' from source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed, partitioned table to unpartitioned. Since the destination is unpartitioned, parts would ultimately +-- fall into the same partition. +-- Destination partition by expression is omitted, which causes StorageMetadata::getPartitionKeyAST() to be nullptr. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple(); + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); + +ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION '201003' from source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Same as above, but destination partition by expression is explicitly defined. Test case required to validate that +-- partition by tuple() is accepted. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY tuple(); + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); + +ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION '201003' from source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed because the destination partition expression columns are a subset of the source partition expression columns +-- Columns in this case refer to the expression elements, not to the actual table columns +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b, c); +CREATE TABLE destination (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b); + +INSERT INTO TABLE source VALUES (1, 2, 3), (1, 2, 4); + +ALTER TABLE destination ATTACH PARTITION ID '1-2-3' FROM source; + +SELECT * FROM source ORDER BY (a, b, c); +SELECT * FROM destination ORDER BY (a, b, c); +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION (1, 2, 3) from source; + +SELECT * FROM source ORDER BY (a, b, c); +SELECT * FROM destination ORDER BY (a, b, c); +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed because the destination partition expression columns are a subset of the source partition expression columns +-- Columns in this case refer to the expression elements, not to the actual table columns +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE source (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY (a, b, c); +CREATE TABLE destination (a Int, b Int, c Int) engine=MergeTree ORDER BY tuple() PARTITION BY a; + +INSERT INTO TABLE source VALUES (1, 2, 3), (1, 2, 4); + +ALTER TABLE destination ATTACH PARTITION ID '1-2-3' FROM source; + +SELECT * FROM source ORDER BY (a, b, c); +SELECT * FROM destination ORDER BY (a, b, c); +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION (1, 2, 3) from source; + +SELECT * FROM source ORDER BY (a, b, c); +SELECT * FROM destination ORDER BY (a, b, c); +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed. Special test case, tricky to explain. First column of source partition expression is +-- timestamp, while first column of destination partition expression is `A`. One of the previous implementations +-- would not match the columns, which could lead to `timestamp` min max being used to calculate monotonicity of `A`. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (`timestamp` DateTime, `A` Int64) ENGINE = MergeTree PARTITION BY tuple(toYYYYMM(timestamp), intDiv(A, 6)) ORDER BY timestamp; +CREATE TABLE destination (`timestamp` DateTime, `A` Int64) ENGINE = MergeTree PARTITION BY A ORDER BY timestamp; + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 5); + +ALTER TABLE destination ATTACH PARTITION ID '201003-0' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION (201003, 0) from source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed. Destination partition expression contains multiple expressions, but all of them are monotonically +-- increasing in the source partition min max indexes. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(A, B) ORDER BY tuple(); +CREATE TABLE destination (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(intDiv(A, 2), intDiv(B, 2)) ORDER BY tuple(); + +INSERT INTO TABLE source VALUES (6, 12); + +ALTER TABLE destination ATTACH PARTITION ID '6-12' FROM source; + +SELECT * FROM source ORDER BY A; +SELECT * FROM destination ORDER BY A; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION (6, 12) from source; + +SELECT * FROM source ORDER BY A; +SELECT * FROM destination ORDER BY A; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed. The same scenario as above, but partition expressions inverted. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(intDiv(A, 2), intDiv(B, 2)) ORDER BY tuple(); +CREATE TABLE destination (A Int, B Int) ENGINE = MergeTree PARTITION BY tuple(A, B) ORDER BY tuple(); + +INSERT INTO TABLE source VALUES (6, 12); + +ALTER TABLE destination ATTACH PARTITION ID '3-6' FROM source; + +SELECT * FROM source ORDER BY A; +SELECT * FROM destination ORDER BY A; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION (3, 6) from source; + +SELECT * FROM source ORDER BY A; +SELECT * FROM destination ORDER BY A; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed, it is a local operation, no different than regular attach. Replicated to replicated. +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; +CREATE TABLE + source(timestamp DateTime) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/source_replicated_to_replicated_distinct_expression', '1') + PARTITION BY toYYYYMMDD(timestamp) + ORDER BY tuple(); + +CREATE TABLE + destination(timestamp DateTime) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/destination_replicated_to_replicated_distinct_expression', '1') + PARTITION BY toYYYYMM(timestamp) + ORDER BY tuple(); + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); + +ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION '20100302' from source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should be allowed, it is a local operation, no different than regular attach. Non replicated to replicated +DROP TABLE IF EXISTS source SYNC; +DROP TABLE IF EXISTS destination SYNC; +CREATE TABLE source(timestamp DateTime) ENGINE = MergeTree() PARTITION BY toYYYYMMDD(timestamp) ORDER BY tuple(); + +CREATE TABLE + destination(timestamp DateTime) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/destination_non_replicated_to_replicated_distinct_expression', '1') + PARTITION BY toYYYYMM(timestamp) + ORDER BY tuple(); + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-02 02:01:03'); + +ALTER TABLE destination ATTACH PARTITION ID '20100302' FROM source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +TRUNCATE TABLE destination; + +ALTER TABLE destination ATTACH PARTITION '20100302' from source; + +SELECT * FROM source ORDER BY timestamp; +SELECT * FROM destination ORDER BY timestamp; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Should not be allowed because data would be split into two different partitions +DROP TABLE IF EXISTS source SYNC; +DROP TABLE IF EXISTS destination SYNC; + +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01'), ('2010-03-03 02:01:03'); + +ALTER TABLE destination ATTACH PARTITION ID '201003' FROM source; -- { serverError 248 } +ALTER TABLE destination ATTACH PARTITION '201003' from source; -- { serverError 248 } + +-- Should not be allowed because data would be split into two different partitions +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY intDiv(A, 6); + +CREATE TABLE destination (timestamp DateTime, A Int64) engine=MergeTree ORDER BY timestamp PARTITION BY A; + +INSERT INTO TABLE source VALUES ('2010-03-02 02:01:01', 1), ('2010-03-02 02:01:03', 2); + +ALTER TABLE destination ATTACH PARTITION ID '0' FROM source; -- { serverError 248 } +ALTER TABLE destination ATTACH PARTITION 0 FROM source; -- { serverError 248 } + +-- Should not be allowed because dst partition exp takes more than two arguments, so it's not considered monotonically inc +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category); +CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY substring(category, 1, 2); + +INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general'); +INSERT INTO TABLE source VALUES ('rice', 'food'); + +ALTER TABLE destination ATTACH PARTITION ID '4590ba78048910b74a47d5bfb308abed' from source; -- { serverError 36 } +ALTER TABLE destination ATTACH PARTITION 'food' from source; -- { serverError 36 } + +-- Should not be allowed because dst partition exp depends on a different set of columns +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(category); +CREATE TABLE destination (productName String, category String) engine=MergeTree ORDER BY tuple() PARTITION BY toString(productName); + +INSERT INTO TABLE source VALUES ('spaghetti', 'food'), ('mop', 'general'); +INSERT INTO TABLE source VALUES ('rice', 'food'); + +ALTER TABLE destination ATTACH PARTITION ID '4590ba78048910b74a47d5bfb308abed' from source; -- { serverError 36 } +ALTER TABLE destination ATTACH PARTITION 'food' from source; -- { serverError 36 } + +-- Should not be allowed because dst partition exp is not monotonically increasing +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (productName String) engine=MergeTree ORDER BY tuple() PARTITION BY left(productName, 2); +CREATE TABLE destination (productName String) engine=MergeTree ORDER BY tuple() PARTITION BY cityHash64(productName); + +INSERT INTO TABLE source VALUES ('bread'), ('mop'); +INSERT INTO TABLE source VALUES ('broccoli'); + +ALTER TABLE destination ATTACH PARTITION ID '4589453b7ee96ce9de1265bd57674496' from source; -- { serverError 36 } +ALTER TABLE destination ATTACH PARTITION 'br' from source; -- { serverError 36 } + +-- Empty/ non-existent partition, same partition expression. Nothing should happen +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); + +ALTER TABLE destination ATTACH PARTITION ID '1' FROM source; +ALTER TABLE destination ATTACH PARTITION 1 FROM source; + +SELECT * FROM destination; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Empty/ non-existent partition, different partition expression. Nothing should happen +-- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045 +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMMDD(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); + +ALTER TABLE destination ATTACH PARTITION ID '1' FROM source; +ALTER TABLE destination ATTACH PARTITION 1 FROM source; + +SELECT * FROM destination; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Replace instead of attach. Empty/ non-existent partition, same partition expression. Nothing should happen +-- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045 +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); +CREATE TABLE destination (timestamp DateTime) engine=MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(timestamp); + +ALTER TABLE destination REPLACE PARTITION '1' FROM source; + +SELECT * FROM destination; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; + +-- Replace instead of attach. Empty/ non-existent partition to non-empty partition, same partition id. +-- https://github.com/ClickHouse/ClickHouse/pull/39507#discussion_r1399839045 +DROP TABLE IF EXISTS source; +DROP TABLE IF EXISTS destination; + +CREATE TABLE source (A Int) engine=MergeTree ORDER BY tuple() PARTITION BY A; +CREATE TABLE destination (A Int) engine=MergeTree ORDER BY tuple() PARTITION BY A; + +INSERT INTO TABLE destination VALUES (1); + +ALTER TABLE destination REPLACE PARTITION '1' FROM source; + +SELECT * FROM destination; +SELECT partition_id FROM system.parts where table='destination' AND database = currentDatabase() AND active = 1; diff --git a/tests/queries/0_stateless/02475_bson_each_row_format.sh b/tests/queries/0_stateless/02475_bson_each_row_format.sh index aa58d27fa50..f5c48608639 100755 --- a/tests/queries/0_stateless/02475_bson_each_row_format.sh +++ b/tests/queries/0_stateless/02475_bson_each_row_format.sh @@ -5,6 +5,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# In case of parallel parsing and small block +# (--min_chunk_bytes_for_parallel_parsing) we may have multiple blocks, and +# this will break sorting order, so let's limit number of threads to avoid +# reordering. +CLICKHOUSE_CLIENT+="--allow_repeated_settings --max_threads 1" + echo "Integers" $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select number::Bool as bool, number::Int8 as int8, number::UInt8 as uint8, number::Int16 as int16, number::UInt16 as uint16, number::Int32 as int32, number::UInt32 as uint32, number::Int64 as int64, number::UInt64 as uint64 from numbers(5) settings engine_file_truncate_on_insert=1" $CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'bool Bool, int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64')" diff --git a/tests/queries/0_stateless/02494_query_cache_user_quotas_after_drop.reference b/tests/queries/0_stateless/02494_query_cache_user_quotas_after_drop.reference new file mode 100644 index 00000000000..5bfc400b254 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_user_quotas_after_drop.reference @@ -0,0 +1,13 @@ +a +b +1 +c +d +3 +-- +a +b +1 +c +d +3 diff --git a/tests/queries/0_stateless/02494_query_cache_user_quotas_after_drop.sql b/tests/queries/0_stateless/02494_query_cache_user_quotas_after_drop.sql new file mode 100644 index 00000000000..f09e43ee052 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_user_quotas_after_drop.sql @@ -0,0 +1,41 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +-- Tests per-user quotas of the query cache. Settings 'query_cache_max_size_in_bytes' and 'query_cache_max_entries' are actually supposed to +-- be used in a settings profile, together with a readonly constraint. For simplicity, test both settings stand-alone in a stateless test +-- instead of an integration test - the relevant logic will still be covered by that. + +SYSTEM DROP QUERY CACHE; + +-- Run SELECT with quota that current user may write only 1 entry in the query cache +SET query_cache_max_entries = 1; +SELECT 'a' SETTINGS use_query_cache = true; +SELECT 'b' SETTINGS use_query_cache = true; +SELECT count(*) FROM system.query_cache; -- expect 1 entry + +-- Run SELECTs again but w/o quota +SET query_cache_max_entries = DEFAULT; +SELECT 'c' SETTINGS use_query_cache = true; +SELECT 'd' SETTINGS use_query_cache = true; +SELECT count(*) FROM system.query_cache; -- expect 3 entries + +SYSTEM DROP QUERY CACHE; + +-- Run the same as above after a DROP QUERY CACHE. +SELECT '--'; + +SET query_cache_max_entries = 1; +SELECT 'a' SETTINGS use_query_cache = true; +SELECT 'b' SETTINGS use_query_cache = true; +SELECT count(*) FROM system.query_cache; -- expect 1 entry + +-- Run SELECTs again but w/o quota +SET query_cache_max_entries = DEFAULT; +SELECT 'c' SETTINGS use_query_cache = true; +SELECT 'd' SETTINGS use_query_cache = true; +SELECT count(*) FROM system.query_cache; -- expect 3 entries + +SYSTEM DROP QUERY CACHE; + +-- SELECT '---'; + diff --git a/tests/queries/0_stateless/02495_concat_with_separator.reference b/tests/queries/0_stateless/02495_concat_with_separator.reference index 8f0ea917f4b..ebff5deb6aa 100644 --- a/tests/queries/0_stateless/02495_concat_with_separator.reference +++ b/tests/queries/0_stateless/02495_concat_with_separator.reference @@ -14,6 +14,45 @@ 1 1 1 +1 +1 \N \N \N +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +0 diff --git a/tests/queries/0_stateless/02495_concat_with_separator.sql b/tests/queries/0_stateless/02495_concat_with_separator.sql index 916c4cda1b7..7167d48a1da 100644 --- a/tests/queries/0_stateless/02495_concat_with_separator.sql +++ b/tests/queries/0_stateless/02495_concat_with_separator.sql @@ -1,27 +1,72 @@ -select concatWithSeparator('|', 'a', 'b') == 'a|b'; -select concatWithSeparator('|', 'a', materialize('b')) == 'a|b'; -select concatWithSeparator('|', materialize('a'), 'b') == 'a|b'; -select concatWithSeparator('|', materialize('a'), materialize('b')) == 'a|b'; +SET allow_suspicious_low_cardinality_types=1; -select concatWithSeparator('|', 'a', toFixedString('b', 1)) == 'a|b'; -select concatWithSeparator('|', 'a', materialize(toFixedString('b', 1))) == 'a|b'; -select concatWithSeparator('|', materialize('a'), toFixedString('b', 1)) == 'a|b'; -select concatWithSeparator('|', materialize('a'), materialize(toFixedString('b', 1))) == 'a|b'; +-- negative tests +SELECT concatWithSeparator(materialize('|'), 'a', 'b'); -- { serverError ILLEGAL_COLUMN } +SELECT concatWithSeparator(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -select concatWithSeparator('|', toFixedString('a', 1), 'b') == 'a|b'; -select concatWithSeparator('|', toFixedString('a', 1), materialize('b')) == 'a|b'; -select concatWithSeparator('|', materialize(toFixedString('a', 1)), 'b') == 'a|b'; -select concatWithSeparator('|', materialize(toFixedString('a', 1)), materialize('b')) == 'a|b'; +-- special cases +SELECT concatWithSeparator('|') = ''; +SELECT concatWithSeparator('|', 'a') == 'a'; -select concatWithSeparator('|', toFixedString('a', 1), toFixedString('b', 1)) == 'a|b'; -select concatWithSeparator('|', toFixedString('a', 1), materialize(toFixedString('b', 1))) == 'a|b'; -select concatWithSeparator('|', materialize(toFixedString('a', 1)), toFixedString('b', 1)) == 'a|b'; -select concatWithSeparator('|', materialize(toFixedString('a', 1)), materialize(toFixedString('b', 1))) == 'a|b'; +SELECT concatWithSeparator('|', 'a', 'b') == 'a|b'; +SELECT concatWithSeparator('|', 'a', materialize('b')) == 'a|b'; +SELECT concatWithSeparator('|', materialize('a'), 'b') == 'a|b'; +SELECT concatWithSeparator('|', materialize('a'), materialize('b')) == 'a|b'; -select concatWithSeparator(null, 'a', 'b') == null; -select concatWithSeparator('1', null, 'b') == null; -select concatWithSeparator('1', 'a', null) == null; +SELECT concatWithSeparator('|', 'a', toFixedString('b', 1)) == 'a|b'; +SELECT concatWithSeparator('|', 'a', materialize(toFixedString('b', 1))) == 'a|b'; +SELECT concatWithSeparator('|', materialize('a'), toFixedString('b', 1)) == 'a|b'; +SELECT concatWithSeparator('|', materialize('a'), materialize(toFixedString('b', 1))) == 'a|b'; -select concatWithSeparator(materialize('|'), 'a', 'b'); -- { serverError 44 } -select concatWithSeparator(); -- { serverError 42 } -select concatWithSeparator('|', 'a', 100); -- { serverError 43 } +SELECT concatWithSeparator('|', toFixedString('a', 1), 'b') == 'a|b'; +SELECT concatWithSeparator('|', toFixedString('a', 1), materialize('b')) == 'a|b'; +SELECT concatWithSeparator('|', materialize(toFixedString('a', 1)), 'b') == 'a|b'; +SELECT concatWithSeparator('|', materialize(toFixedString('a', 1)), materialize('b')) == 'a|b'; + +SELECT concatWithSeparator('|', toFixedString('a', 1), toFixedString('b', 1)) == 'a|b'; +SELECT concatWithSeparator('|', toFixedString('a', 1), materialize(toFixedString('b', 1))) == 'a|b'; +SELECT concatWithSeparator('|', materialize(toFixedString('a', 1)), toFixedString('b', 1)) == 'a|b'; +SELECT concatWithSeparator('|', materialize(toFixedString('a', 1)), materialize(toFixedString('b', 1))) == 'a|b'; + +SELECT concatWithSeparator(null, 'a', 'b') == null; +SELECT concatWithSeparator('1', null, 'b') == null; +SELECT concatWithSeparator('1', 'a', null) == null; + +-- Const String + non-const non-String/non-FixedString type' +SELECT concatWithSeparator('|', 'a', materialize(42 :: Int8)) == 'a|42'; +SELECT concatWithSeparator('|', 'a', materialize(43 :: Int16)) == 'a|43'; +SELECT concatWithSeparator('|', 'a', materialize(44 :: Int32)) == 'a|44'; +SELECT concatWithSeparator('|', 'a', materialize(45 :: Int64)) == 'a|45'; +SELECT concatWithSeparator('|', 'a', materialize(46 :: Int128)) == 'a|46'; +SELECT concatWithSeparator('|', 'a', materialize(47 :: Int256)) == 'a|47'; +SELECT concatWithSeparator('|', 'a', materialize(48 :: UInt8)) == 'a|48'; +SELECT concatWithSeparator('|', 'a', materialize(49 :: UInt16)) == 'a|49'; +SELECT concatWithSeparator('|', 'a', materialize(50 :: UInt32)) == 'a|50'; +SELECT concatWithSeparator('|', 'a', materialize(51 :: UInt64)) == 'a|51'; +SELECT concatWithSeparator('|', 'a', materialize(52 :: UInt128)) == 'a|52'; +SELECT concatWithSeparator('|', 'a', materialize(53 :: UInt256)) == 'a|53'; +SELECT concatWithSeparator('|', 'a', materialize(42.42 :: Float32)) == 'a|42.42'; +SELECT concatWithSeparator('|', 'a', materialize(43.43 :: Float64)) == 'a|43.43'; +SELECT concatWithSeparator('|', 'a', materialize(44.44 :: Decimal(2))) == 'a|44'; +SELECT concatWithSeparator('|', 'a', materialize(true :: Bool)) == 'a|true'; +SELECT concatWithSeparator('|', 'a', materialize(false :: Bool)) == 'a|false'; +SELECT concatWithSeparator('|', 'a', materialize('foo' :: String)) == 'a|foo'; +SELECT concatWithSeparator('|', 'a', materialize('bar' :: FixedString(3))) == 'a|bar'; +SELECT concatWithSeparator('|', 'a', materialize('foo' :: Nullable(String))) == 'a|foo'; +SELECT concatWithSeparator('|', 'a', materialize('bar' :: Nullable(FixedString(3)))) == 'a|bar'; +SELECT concatWithSeparator('|', 'a', materialize('foo' :: LowCardinality(String))) == 'a|foo'; +SELECT concatWithSeparator('|', 'a', materialize('bar' :: LowCardinality(FixedString(3)))) == 'a|bar'; +SELECT concatWithSeparator('|', 'a', materialize('foo' :: LowCardinality(Nullable(String)))) == 'a|foo'; +SELECT concatWithSeparator('|', 'a', materialize('bar' :: LowCardinality(Nullable(FixedString(3))))) == 'a|bar'; +SELECT concatWithSeparator('|', 'a', materialize(42 :: LowCardinality(Nullable(UInt32)))) == 'a|42'; +SELECT concatWithSeparator('|', 'a', materialize(42 :: LowCardinality(UInt32))) == 'a|42'; +SELECT concatWithSeparator('|', 'a', materialize('fae310ca-d52a-4923-9e9b-02bf67f4b009' :: UUID)) == 'a|fae310ca-d52a-4923-9e9b-02bf67f4b009'; +SELECT concatWithSeparator('|', 'a', materialize('2023-11-14' :: Date)) == 'a|2023-11-14'; +SELECT concatWithSeparator('|', 'a', materialize('2123-11-14' :: Date32)) == 'a|2123-11-14'; +SELECT concatWithSeparator('|', 'a', materialize('2023-11-14 05:50:12' :: DateTime('Europe/Amsterdam'))) == 'a|2023-11-14 05:50:12'; +SELECT concatWithSeparator('|', 'a', materialize('hallo' :: Enum('hallo' = 1))) == 'a|hallo'; +SELECT concatWithSeparator('|', 'a', materialize(['foo', 'bar'] :: Array(String))) == 'a|[\'foo\',\'bar\']'; +SELECT concatWithSeparator('|', 'a', materialize((42, 'foo') :: Tuple(Int32, String))) == 'a|(42,\'foo\')'; +SELECT concatWithSeparator('|', 'a', materialize(map(42, 'foo') :: Map(Int32, String))) == 'a|{42:\'foo\'}'; +SELECT concatWithSeparator('|', 'a', materialize('122.233.64.201' :: IPv4)) == 'a|122.233.64.201'; +SELECT concatWithSeparator('|', 'a', materialize('2001:0001:130F:0002:0003:09C0:876A:130B' :: IPv6)) == 'a|2001:0001:130F:0002:0003:09C0:876A:130B'; diff --git a/tests/queries/0_stateless/02564_read_in_order_final_desc.sql b/tests/queries/0_stateless/02564_read_in_order_final_desc.sql index 627fd834101..c1a8ba90722 100644 --- a/tests/queries/0_stateless/02564_read_in_order_final_desc.sql +++ b/tests/queries/0_stateless/02564_read_in_order_final_desc.sql @@ -18,13 +18,12 @@ FROM mytable FINAL WHERE key = 5 ORDER BY timestamp DESC; - SELECT if(explain like '%ReadType: InOrder%', 'Ok', 'Error: ' || explain) FROM ( EXPLAIN PLAN actions = 1 SELECT timestamp, value FROM mytable FINAL WHERE key = 5 - ORDER BY timestamp + ORDER BY timestamp SETTINGS enable_vertical_final = 0 ) WHERE explain like '%ReadType%'; diff --git a/tests/queries/0_stateless/02698_marked_dropped_tables.reference b/tests/queries/0_stateless/02698_marked_dropped_tables.reference index 84e478cc383..a9ac6089fa9 100644 --- a/tests/queries/0_stateless/02698_marked_dropped_tables.reference +++ b/tests/queries/0_stateless/02698_marked_dropped_tables.reference @@ -1,76 +1,3 @@ 25400_dropped_tables MergeTree -index UInt32 -database String -table String -uuid UUID -engine String -metadata_dropped_path String -table_dropped_time DateTime default 25400_dropped_tables all_1_1_0 default 25400_dropped_tables all_2_2_0 -partition String -name String -uuid UUID -part_type String -active UInt8 -marks UInt64 -rows UInt64 -bytes_on_disk UInt64 -data_compressed_bytes UInt64 -data_uncompressed_bytes UInt64 -primary_key_size UInt64 -marks_bytes UInt64 -secondary_indices_compressed_bytes UInt64 -secondary_indices_uncompressed_bytes UInt64 -secondary_indices_marks_bytes UInt64 -modification_time DateTime -remove_time DateTime -refcount UInt32 -min_date Date -max_date Date -min_time DateTime -max_time DateTime -partition_id String -min_block_number Int64 -max_block_number Int64 -level UInt32 -data_version UInt64 -primary_key_bytes_in_memory UInt64 -primary_key_bytes_in_memory_allocated UInt64 -is_frozen UInt8 -database String -table String -engine String -disk_name String -path String -hash_of_all_files String -hash_of_uncompressed_files String -uncompressed_hash_of_compressed_files String -delete_ttl_info_min DateTime -delete_ttl_info_max DateTime -move_ttl_info.expression Array(String) -move_ttl_info.min Array(DateTime) -move_ttl_info.max Array(DateTime) -default_compression_codec String -recompression_ttl_info.expression Array(String) -recompression_ttl_info.min Array(DateTime) -recompression_ttl_info.max Array(DateTime) -group_by_ttl_info.expression Array(String) -group_by_ttl_info.min Array(DateTime) -group_by_ttl_info.max Array(DateTime) -rows_where_ttl_info.expression Array(String) -rows_where_ttl_info.min Array(DateTime) -rows_where_ttl_info.max Array(DateTime) -projections Array(String) -visible UInt8 -creation_tid Tuple(UInt64, UInt64, UUID) -removal_tid_lock UInt64 -removal_tid Tuple(UInt64, UInt64, UUID) -creation_csn UInt64 -removal_csn UInt64 -has_lightweight_delete UInt8 -last_removal_attempt_time DateTime -removal_state String -bytes UInt64 ALIAS bytes_on_disk -marks_size UInt64 ALIAS marks_bytes -part_name String ALIAS name diff --git a/tests/queries/0_stateless/02698_marked_dropped_tables.sql b/tests/queries/0_stateless/02698_marked_dropped_tables.sql index a3686159a78..3a11603956b 100644 --- a/tests/queries/0_stateless/02698_marked_dropped_tables.sql +++ b/tests/queries/0_stateless/02698_marked_dropped_tables.sql @@ -9,6 +9,4 @@ INSERT INTO 25400_dropped_tables VALUES (3),(4); DROP TABLE 25400_dropped_tables; SELECT table, engine FROM system.dropped_tables WHERE database = currentDatabase() LIMIT 1; -DESCRIBE TABLE system.dropped_tables; SELECT database, table, name FROM system.dropped_tables_parts WHERE database = currentDatabase() and table = '25400_dropped_tables'; -DESCRIBE TABLE system.dropped_tables_parts; diff --git a/tests/queries/0_stateless/02731_analyzer_join_resolve_nested.reference b/tests/queries/0_stateless/02731_analyzer_join_resolve_nested.reference new file mode 100644 index 00000000000..5f1b7e8b1fe --- /dev/null +++ b/tests/queries/0_stateless/02731_analyzer_join_resolve_nested.reference @@ -0,0 +1,314 @@ +[(1,'a')] +[1] +[(1,'a')] [(1,'a')] +[(1,'a')] +[1] +[(1,'a')] 1 +[(1,'a')] [1] +[1] +[1] +[1] +[1] 1 ['a'] 1 ['a'] +[(1,'a')] +[1] +[1] +[(1,'a')] +[1] +[('a',1)] +1 [1] ['a'] ['a'] [1] +[(1,'a')] +[1] +[1] +[(1,'a')] +[1] +[('a',1)] +1 [1] ['a'] 1 ['a'] [1] +(1,'s') +(1,'s') 1 +1 +((((1,'s'),'s'),'s'),'s') Tuple(Tuple(Tuple(Tuple(Int64, String), String), String), String) +((((2,'s'),'s'),'s'),'s') Tuple(Tuple(Tuple(Tuple(Int64, String), String), String), String) +(1,'a') +1 +1 +(1,'a') +1 +('a',1) +1 1 (1,'a') 1 ('a',1) +(1,'a') +1 +1 +(1,'a') +1 +('a',1) +1 (1,'a') ('a',1) +(1,'a') +1 +1 +(1,'a') +1 +('a',1) +1 (1,'a') 1 ('a',1) +((((1,'s'),'s'),'s'),'s') +((((1,'s'),'s'),'s'),'s') +((((2,'s'),'s'),'s'),'s') +((((2,'s'),'s'),'s'),'s') +((1,'s'),'s') +((1,'s'),'s') +((2,'s'),'s') +((2,'s'),'s') +1 +1 +2 +2 +((((1,'s'),'s'),'s'),'s') Tuple(Tuple(Tuple(Tuple(Int64, String), String), String), String) +((((2,'s'),'s'),'s'),'s') Tuple(Tuple(Tuple(Tuple(Int64, String), String), String), String) +((((3,'s'),'s'),'s'),'s') Tuple(Tuple(Tuple(Tuple(Int64, String), String), String), String) +((1,'s'),'s') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) +((2,'s'),'s') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) +((0,''),'') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) +1 UInt32 +2 UInt32 +0 UInt32 +(((1,'s'),'s'),'s') s Tuple(Tuple(Tuple(Int64, String), String), String) String +(((2,'s'),'s'),'s') s Tuple(Tuple(Tuple(Int64, String), String), String) String +(((3,'s'),'s'),'s') s Tuple(Tuple(Tuple(Int64, String), String), String) String +((1,'s'),'s') s Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) String +((2,'s'),'s') s Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) String +((0,''),'') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) String +(1,'s') s Tuple(\n t UInt32,\n s String) String +(2,'s') s Tuple(\n t UInt32,\n s String) String +(0,'') Tuple(\n t UInt32,\n s String) String +1 s UInt32 String +2 s UInt32 String +0 UInt32 String +((((1,'s'),'s'),'s'),'s') Tuple(\n t Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String),\n s String) +((((2,'s'),'s'),'s'),'s') Tuple(\n t Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String),\n s String) +((((0,''),''),''),'') Tuple(\n t Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String),\n s String) +((1,'s'),'s') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) +((2,'s'),'s') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) +((0,''),'') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) +1 UInt32 +2 UInt32 +0 UInt32 +(((1,'s'),'s'),'s') s Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String) String +(((2,'s'),'s'),'s') s Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String) String +(((0,''),''),'') Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String) String +((1,'s'),'s') s Tuple(Tuple(Int64, String), String) String +((2,'s'),'s') s Tuple(Tuple(Int64, String), String) String +((3,'s'),'s') s Tuple(Tuple(Int64, String), String) String +(1,'s') s Tuple(\n t UInt32,\n s String) String +(2,'s') s Tuple(\n t UInt32,\n s String) String +(0,'') Tuple(\n t UInt32,\n s String) String +1 s UInt32 String +2 s UInt32 String +0 UInt32 String +((((1,'s'),'s'),'s'),'s') Tuple(\n t Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String),\n s String) +((((2,'s'),'s'),'s'),'s') Tuple(\n t Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String),\n s String) +((((0,''),''),''),'') Tuple(\n t Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String),\n s String) +((1,'s'),'s') Tuple(Tuple(Int64, String), String) +((2,'s'),'s') Tuple(Tuple(Int64, String), String) +((3,'s'),'s') Tuple(Tuple(Int64, String), String) +1 UInt32 +2 UInt32 +0 UInt32 +((((1,'s'),'s'),'s'),'s') Tuple(\n t Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String),\n s String) +((((2,'s'),'s'),'s'),'s') Tuple(\n t Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String),\n s String) +((((0,''),''),''),'') Tuple(\n t Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String),\n s String) +((1,'s'),'s') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) +((2,'s'),'s') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) +((0,''),'') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) +1 Int64 +2 Int64 +3 Int64 +(((1,'s'),'s'),'s') s Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String) String +(((2,'s'),'s'),'s') s Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String) String +(((0,''),''),'') Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String) String +((1,'s'),'s') s Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) String +((2,'s'),'s') s Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) String +((0,''),'') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) String +(1,'s') s Tuple(\n t UInt32,\n s String) String +(2,'s') s Tuple(\n t UInt32,\n s String) String +(0,'') Tuple(\n t UInt32,\n s String) String +1 s UInt32 String +2 s UInt32 String +0 UInt32 String +[([([([(1,'d')],'d')],'s')],'s')] +[[[([(1,'d')],'d')]]] +[[[[1]]]] +[[([([(1,'d')],'d')],'s')]] Array(Array(Tuple(Array(Tuple(Array(Tuple(Int64, String)), String)), String))) +[[[([(1,'d')],'d')]]] Array(Array(Nested(t Nested(t Int32, s String), s String))) +[[[[1]]]] Array(Array(Array(Array(Int32)))) +[[[([(1,'d')],'d')]]] [['s']] Array(Array(Array(Tuple(Array(Tuple(Int64, String)), String)))) Array(Array(String)) +[[[[(1,'d')]]]] [[['d']]] Array(Array(Array(Array(Tuple(\n t Int32,\n s String))))) Array(Array(Array(String))) +[[[[1]]]] [[[['d']]]] Array(Array(Array(Array(Int32)))) Array(Array(Array(Array(String)))) +[([([([(1,'d')],'d')],'s')],'s')] Array(Tuple(\n t Array(Tuple(\n t Array(Tuple(\n t Array(Tuple(\n t Int32,\n s String)),\n s String)),\n s String)),\n s String)) +[[([([(1,'d')],'d')],'s')]] Array(Nested(t Nested(t Nested(t Int32, s String), s String), s String)) +[[[([(1,'d')],'d')]]] Array(Array(Array(Tuple(Array(Tuple(Int64, String)), String)))) +[[[[1]]]] Array(Array(Array(Array(Int32)))) +[[([([(1,'d')],'d')],'s')]] ['s'] Array(Array(Tuple(\n t Array(Tuple(\n t Array(Tuple(\n t Int32,\n s String)),\n s String)),\n s String))) Array(String) +[[[([(1,'d')],'d')]]] [['s']] Array(Array(Array(Tuple(\n t Array(Tuple(\n t Int32,\n s String)),\n s String)))) Array(Array(String)) +[[[[(1,'d')]]]] [[['d']]] Array(Array(Array(Array(Tuple(Int64, String))))) Array(Array(Array(String))) +[[[[1]]]] [[[['d']]]] Array(Array(Array(Array(Int32)))) Array(Array(Array(Array(String)))) +[([([([(1,'d')],'d')],'s')],'s')] Array(Tuple(\n t Array(Tuple(\n t Array(Tuple(\n t Array(Tuple(\n t Int32,\n s String)),\n s String)),\n s String)),\n s String)) +[[([([(1,'d')],'d')],'s')]] Array(Nested(t Nested(t Nested(t Int32, s String), s String), s String)) +[[[([(1,'d')],'d')]]] Array(Array(Nested(t Nested(t Int32, s String), s String))) +[[[[1]]]] Array(Array(Array(Array(Int64)))) +[[[[1]]]] [[[['d']]]] +[(1,'a')] +[1] +[(1,'a')] [(1,'a')] +[(1,'a')] +[1] +[(1,'a')] 1 +[(1,'a')] [1] +[1] +[1] +[1] +[1] 1 ['a'] 1 ['a'] +[(1,'a')] +[1] +[1] +[(1,'a')] +[1] +[('a',1)] +1 [1] ['a'] ['a'] [1] +[(1,'a')] +[1] +[1] +[(1,'a')] +[1] +[('a',1)] +1 [1] ['a'] 1 ['a'] [1] +(1,'s') +(1,'s') 1 +1 +((((1,'s'),'s'),'s'),'s') Tuple(Tuple(Tuple(Tuple(Int64, String), String), String), String) +((((2,'s'),'s'),'s'),'s') Tuple(Tuple(Tuple(Tuple(Int64, String), String), String), String) +(1,'a') +1 +1 +(1,'a') +1 +('a',1) +1 1 (1,'a') 1 ('a',1) +(1,'a') +1 +1 +(1,'a') +1 +('a',1) +1 (1,'a') ('a',1) +(1,'a') +1 +1 +(1,'a') +1 +('a',1) +1 (1,'a') 1 ('a',1) +((((1,'s'),'s'),'s'),'s') +((((1,'s'),'s'),'s'),'s') +((((2,'s'),'s'),'s'),'s') +((((2,'s'),'s'),'s'),'s') +((1,'s'),'s') +((1,'s'),'s') +((2,'s'),'s') +((2,'s'),'s') +1 +1 +2 +2 +((((1,'s'),'s'),'s'),'s') Tuple(Tuple(Tuple(Tuple(Int64, String), String), String), String) +((((2,'s'),'s'),'s'),'s') Tuple(Tuple(Tuple(Tuple(Int64, String), String), String), String) +((((3,'s'),'s'),'s'),'s') Tuple(Tuple(Tuple(Tuple(Int64, String), String), String), String) +((1,'s'),'s') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) +((2,'s'),'s') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) +((0,''),'') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) +1 Nullable(UInt32) +2 Nullable(UInt32) +\N Nullable(UInt32) +(((1,'s'),'s'),'s') s Tuple(Tuple(Tuple(Int64, String), String), String) String +(((2,'s'),'s'),'s') s Tuple(Tuple(Tuple(Int64, String), String), String) String +(((3,'s'),'s'),'s') s Tuple(Tuple(Tuple(Int64, String), String), String) String +((1,'s'),'s') s Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) String +((2,'s'),'s') s Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) String +((0,''),'') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) String +(1,'s') s Tuple(\n t UInt32,\n s String) String +(2,'s') s Tuple(\n t UInt32,\n s String) String +(0,'') Tuple(\n t UInt32,\n s String) String +1 s UInt32 String +2 s UInt32 String +0 UInt32 String +((((1,'s'),'s'),'s'),'s') Tuple(\n t Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String),\n s String) +((((2,'s'),'s'),'s'),'s') Tuple(\n t Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String),\n s String) +((((0,''),''),''),'') Tuple(\n t Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String),\n s String) +((1,'s'),'s') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) +((2,'s'),'s') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) +((0,''),'') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) +1 Nullable(UInt32) +2 Nullable(UInt32) +\N Nullable(UInt32) +(((1,'s'),'s'),'s') s Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String) String +(((2,'s'),'s'),'s') s Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String) String +(((0,''),''),'') Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String) String +((1,'s'),'s') s Tuple(Tuple(Int64, String), String) String +((2,'s'),'s') s Tuple(Tuple(Int64, String), String) String +((3,'s'),'s') s Tuple(Tuple(Int64, String), String) String +(1,'s') s Tuple(\n t UInt32,\n s String) String +(2,'s') s Tuple(\n t UInt32,\n s String) String +(0,'') Tuple(\n t UInt32,\n s String) String +1 s UInt32 String +2 s UInt32 String +0 UInt32 String +((((1,'s'),'s'),'s'),'s') Tuple(\n t Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String),\n s String) +((((2,'s'),'s'),'s'),'s') Tuple(\n t Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String),\n s String) +((((0,''),''),''),'') Tuple(\n t Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String),\n s String) +((1,'s'),'s') Tuple(Tuple(Int64, String), String) +((2,'s'),'s') Tuple(Tuple(Int64, String), String) +((3,'s'),'s') Tuple(Tuple(Int64, String), String) +1 Nullable(UInt32) +2 Nullable(UInt32) +\N Nullable(UInt32) +((((1,'s'),'s'),'s'),'s') Tuple(\n t Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String),\n s String) +((((2,'s'),'s'),'s'),'s') Tuple(\n t Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String),\n s String) +((((0,''),''),''),'') Tuple(\n t Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String),\n s String) +((1,'s'),'s') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) +((2,'s'),'s') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) +((0,''),'') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) +1 Nullable(Int64) +2 Nullable(Int64) +3 Nullable(Int64) +(((1,'s'),'s'),'s') s Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String) String +(((2,'s'),'s'),'s') s Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String) String +(((0,''),''),'') Tuple(\n t Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String),\n s String) String +((1,'s'),'s') s Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) String +((2,'s'),'s') s Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) String +((0,''),'') Tuple(\n t Tuple(\n t UInt32,\n s String),\n s String) String +(1,'s') s Tuple(\n t UInt32,\n s String) String +(2,'s') s Tuple(\n t UInt32,\n s String) String +(0,'') Tuple(\n t UInt32,\n s String) String +1 s UInt32 String +2 s UInt32 String +0 UInt32 String +[([([([(1,'d')],'d')],'s')],'s')] +[[[([(1,'d')],'d')]]] +[[[[1]]]] +[[([([(1,'d')],'d')],'s')]] Array(Array(Tuple(Array(Tuple(Array(Tuple(Int64, String)), String)), String))) +[[[([(1,'d')],'d')]]] Array(Array(Nested(t Nested(t Int32, s String), s String))) +[[[[1]]]] Array(Array(Array(Array(Int32)))) +[[[([(1,'d')],'d')]]] [['s']] Array(Array(Array(Tuple(Array(Tuple(Int64, String)), String)))) Array(Array(String)) +[[[[(1,'d')]]]] [[['d']]] Array(Array(Array(Array(Tuple(\n t Int32,\n s String))))) Array(Array(Array(String))) +[[[[1]]]] [[[['d']]]] Array(Array(Array(Array(Int32)))) Array(Array(Array(Array(String)))) +[([([([(1,'d')],'d')],'s')],'s')] Array(Tuple(\n t Array(Tuple(\n t Array(Tuple(\n t Array(Tuple(\n t Int32,\n s String)),\n s String)),\n s String)),\n s String)) +[[([([(1,'d')],'d')],'s')]] Array(Nested(t Nested(t Nested(t Int32, s String), s String), s String)) +[[[([(1,'d')],'d')]]] Array(Array(Array(Tuple(Array(Tuple(Int64, String)), String)))) +[[[[1]]]] Array(Array(Array(Array(Int32)))) +[[([([(1,'d')],'d')],'s')]] ['s'] Array(Array(Tuple(\n t Array(Tuple(\n t Array(Tuple(\n t Int32,\n s String)),\n s String)),\n s String))) Array(String) +[[[([(1,'d')],'d')]]] [['s']] Array(Array(Array(Tuple(\n t Array(Tuple(\n t Int32,\n s String)),\n s String)))) Array(Array(String)) +[[[[(1,'d')]]]] [[['d']]] Array(Array(Array(Array(Tuple(Int64, String))))) Array(Array(Array(String))) +[[[[1]]]] [[[['d']]]] Array(Array(Array(Array(Int32)))) Array(Array(Array(Array(String)))) +[([([([(1,'d')],'d')],'s')],'s')] Array(Tuple(\n t Array(Tuple(\n t Array(Tuple(\n t Array(Tuple(\n t Int32,\n s String)),\n s String)),\n s String)),\n s String)) +[[([([(1,'d')],'d')],'s')]] Array(Nested(t Nested(t Nested(t Int32, s String), s String), s String)) +[[[([(1,'d')],'d')]]] Array(Array(Nested(t Nested(t Int32, s String), s String))) +[[[[1]]]] Array(Array(Array(Array(Int64)))) +[[[[1]]]] [[[['d']]]] diff --git a/tests/queries/0_stateless/02731_analyzer_join_resolve_nested.sql.j2 b/tests/queries/0_stateless/02731_analyzer_join_resolve_nested.sql.j2 new file mode 100644 index 00000000000..c2f3c51b17a --- /dev/null +++ b/tests/queries/0_stateless/02731_analyzer_join_resolve_nested.sql.j2 @@ -0,0 +1,219 @@ +DROP TABLE IF EXISTS ta; +DROP TABLE IF EXISTS tb; +DROP TABLE IF EXISTS ttta; +DROP TABLE IF EXISTS tttb; +DROP TABLE IF EXISTS na; +DROP TABLE IF EXISTS nb; +DROP TABLE IF EXISTS nnna; +DROP TABLE IF EXISTS nnnb; + +CREATE table ta (x Int32, t Tuple(t UInt32, s String)) ENGINE = MergeTree ORDER BY x; +INSERT INTO ta VALUES (1, (1, 'a')); + +CREATE table tb (x Int32, t Tuple(s String, t Int32)) ENGINE = MergeTree ORDER BY x; +INSERT INTO tb VALUES (1, ('a', 1)); + +CREATE table ttta (x Int32, t Tuple(t Tuple(t Tuple(t Tuple(t UInt32, s String), s String), s String), s String)) ENGINE = MergeTree ORDER BY x; +INSERT INTO ttta VALUES (1, ((((1, 's'), 's'), 's'), 's')), (2, ((((2, 's'), 's'), 's'), 's')); + +CREATE table tttb (x Int32, t Tuple(t Tuple(t Tuple(t Tuple(t Int32, s String), s String), s String), s String)) ENGINE = MergeTree ORDER BY x; +INSERT INTO tttb VALUES (2, ((((2, 's'), 's'), 's'), 's')), (3, ((((3, 's'), 's'), 's'), 's')); + +CREATE table na (x Int32, t Nested(t UInt32, s String)) ENGINE = MergeTree ORDER BY x; +INSERT INTO na VALUES (1, [1], ['a']); + +CREATE table nb (x Int32, t Nested(s String, t Int32)) ENGINE = MergeTree ORDER BY x; +INSERT INTO nb VALUES (1, ['a'], [1]); + +CREATE TABLE nnna ( x UInt64, t Nested(t Nested(t Nested(t Nested(t Int32, s String), s String), s String), s String) ) ENGINE = MergeTree ORDER BY x; +INSERT INTO nnna VALUES (1, [[([([(1,'d')],'d')], 's')]], ['s']); + +CREATE TABLE nnnb ( x UInt64, t Nested(t Nested(t Nested(t Nested(t UInt32, s String), s String), s String), s String) ) ENGINE = MergeTree ORDER BY x; +INSERT INTO nnnb VALUES (1, [[([([(1,'d')],'d')], 's')]], ['s']); + +SET allow_experimental_analyzer = 1; + +{% for join_use_nulls in [0, 1] -%} + +SET join_use_nulls = {{ join_use_nulls }}; + +SELECT t FROM (SELECT [(1, 'a')] :: Nested(t UInt32, s String) AS t) AS na, (SELECT [(1, 'a')] :: Nested(t Int32, s String) AS t) AS t; +SELECT na.t.t FROM (SELECT [(1, 'a')] :: Nested(t UInt32, s String) AS t) AS na, (SELECT [(1, 'a')] :: Nested(t Int32, s String) AS t) AS t; +SELECT * FROM (SELECT [(1, 'a')] :: Nested(t UInt32, s String) AS t) AS na, (SELECT [(1, 'a')] :: Nested(t Int32, s String) AS t) AS t; + +SELECT t FROM (SELECT [(1, 'a')] :: Nested(t UInt32, s String) AS t) AS na, (SELECT 1 AS t) AS t; +SELECT na.t.t FROM (SELECT [(1, 'a')] :: Nested(t UInt32, s String) AS t) AS na, (SELECT 1 AS t) AS t; +SELECT * FROM (SELECT [(1, 'a')] :: Nested(t UInt32, s String) AS t) AS na, (SELECT 1 AS t) AS t; + +SELECT * FROM (SELECT [(1, 'a')] :: Nested(t UInt32, s String) AS t) AS na FULL JOIN (SELECT [1] :: Array(Int32) AS t) AS nb ON nb.t = na.t.t; + +SELECT t FROM na FULL JOIN nb USING (t.t); -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT t.t FROM na FULL JOIN nb USING (t.t); +SELECT na.t.t FROM na FULL JOIN nb USING (t.t); +SELECT na.t FROM na FULL JOIN nb USING (t.t); -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT nb.t.t FROM na FULL JOIN nb USING (t.t); +SELECT nb.t FROM na FULL JOIN nb USING (t.t); -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT * FROM na FULL JOIN nb USING (t.t); + +SELECT t FROM na FULL JOIN nb USING (x); +SELECT t.t FROM na FULL JOIN nb USING (x); +SELECT na.t.t FROM na FULL JOIN nb USING (x); +SELECT na.t FROM na FULL JOIN nb USING (x); +SELECT nb.t.t FROM na FULL JOIN nb USING (x); +SELECT nb.t FROM na FULL JOIN nb USING (x); +SELECT * FROM na FULL JOIN nb USING (x); + +SELECT t FROM na, nb; +SELECT t.t FROM na, nb; +SELECT na.t.t FROM na, nb; +SELECT na.t FROM na, nb; +SELECT nb.t.t FROM na, nb; +SELECT nb.t FROM na, nb; +SELECT * FROM na, nb; + +--- + +SELECT * FROM (SELECT (1, 's') :: Tuple(t Int32, s String) as t ) as na FULL JOIN (SELECT (1, 's') :: Tuple(t UInt32, s String) as t ) as nb USING (t); +SELECT * FROM (SELECT (1, 's') :: Tuple(t Int32, s String) as t ) as na, (SELECT 1 as t ) as t; + +SELECT t.t FROM (SELECT (1, 's') :: Tuple(t Int32, s String) as t ) as na, (SELECT 1 as t ) as t; + +SELECT t.t FROM (SELECT (1, 's') :: Tuple(t Int32, s String) as t ) as na FULL JOIN (SELECT (1, 's') :: Tuple(t UInt32, s String) as t ) as nb USING (t); -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT * FROM (SELECT (1, 's') :: Tuple(t Int32, s String) as t ) as na FULL JOIN (SELECT (1, 's') :: Tuple(t UInt32, s String) as t ) as nb USING (t.t); -- { serverError UNSUPPORTED_METHOD } + +SELECT t as e, toTypeName(e) FROM ( + SELECT ((((1, 's'), 's'), 's'), 's') :: Tuple(t Tuple(t Tuple(t Tuple(t UInt32, s String), s String), s String), s String) as t +) ttta FULL JOIN ( + SELECT ((((2, 's'), 's'), 's'), 's') :: Tuple(t Tuple(t Tuple(t Tuple(t Int32, s String), s String), s String), s String) as t +) tttb USING (t.t); -- { serverError UNSUPPORTED_METHOD } + +SELECT t.t as e, toTypeName(e) FROM ( + SELECT ((((1, 's'), 's'), 's'), 's') :: Tuple(t Tuple(t Tuple(t Tuple(t UInt32, s String), s String), s String), s String) as t +) ttta FULL JOIN ( + SELECT ((((2, 's'), 's'), 's'), 's') :: Tuple(t Tuple(t Tuple(t Tuple(t Int32, s String), s String), s String), s String) as t +) tttb USING (t.t); -- { serverError UNSUPPORTED_METHOD } + +SELECT t.t.t as e, toTypeName(e) FROM ( + SELECT ((((1, 's'), 's'), 's'), 's') :: Tuple(t Tuple(t Tuple(t Tuple(t UInt32, s String), s String), s String), s String) as t +) ttta FULL JOIN ( + SELECT ((((2, 's'), 's'), 's'), 's') :: Tuple(t Tuple(t Tuple(t Tuple(t Int32, s String), s String), s String), s String) as t +) tttb USING (t.t); -- { serverError UNSUPPORTED_METHOD } + +SELECT t as e, toTypeName(e) FROM ( + SELECT ((((1, 's'), 's'), 's'), 's') :: Tuple(t Tuple(t Tuple(t Tuple(t UInt32, s String), s String), s String), s String) as t +) ttta FULL JOIN ( + SELECT ((((2, 's'), 's'), 's'), 's') :: Tuple(t Tuple(t Tuple(t Tuple(t Int32, s String), s String), s String), s String) as t +) tttb USING (t); + +SELECT t.t as e, toTypeName(e) FROM ( + SELECT ((((1, 's'), 's'), 's'), 's') :: Tuple(t Tuple(t Tuple(t Tuple(t UInt32, s String), s String), s String), s String) as t +) ttta FULL JOIN ( + SELECT ((((2, 's'), 's'), 's'), 's') :: Tuple(t Tuple(t Tuple(t Tuple(t Int32, s String), s String), s String), s String) as t +) tttb USING (t); -- { serverError AMBIGUOUS_IDENTIFIER } + +SELECT t FROM ta FULL JOIN tb USING (t.t); +SELECT t.t FROM ta FULL JOIN tb USING (t.t); +SELECT ta.t.t FROM ta FULL JOIN tb USING (t.t); +SELECT ta.t FROM ta FULL JOIN tb USING (t.t); +SELECT tb.t.t FROM ta FULL JOIN tb USING (t.t); +SELECT tb.t FROM ta FULL JOIN tb USING (t.t); +SELECT * FROM ta FULL JOIN tb USING (t.t); + +SELECT t FROM ta FULL JOIN tb USING (x); +SELECT t.t FROM ta FULL JOIN tb USING (x); +SELECT ta.t.t FROM ta FULL JOIN tb USING (x); +SELECT ta.t FROM ta FULL JOIN tb USING (x); +SELECT tb.t.t FROM ta FULL JOIN tb USING (x); +SELECT tb.t FROM ta FULL JOIN tb USING (x); +SELECT * FROM ta FULL JOIN tb USING (x); + +SELECT t FROM ta, tb; +SELECT t.t FROM ta, tb; +SELECT ta.t.t FROM ta, tb; +SELECT ta.t FROM ta, tb; +SELECT tb.t.t FROM ta, tb; +SELECT tb.t FROM ta, tb; +SELECT * FROM ta, tb; + +SELECT t FROM ttta, tttb; +SELECT t.t.t FROM ttta, tttb; +SELECT t.t.t.t.t FROM ttta, tttb; + +SELECT t as e, toTypeName(e) FROM ttta FULL JOIN tttb USING (t); +SELECT t.t.t as e, toTypeName(e) FROM ttta FULL JOIN tttb USING (t); +SELECT t.t.t.t.t as e, toTypeName(e) FROM ttta FULL JOIN tttb USING (t); + +SELECT t.*, t.* APPLY toTypeName FROM ttta FULL JOIN tttb USING (t); +SELECT t.t.*, t.t.* APPLY toTypeName FROM ttta FULL JOIN tttb USING (t); +SELECT t.t.t.*, t.t.t.* APPLY toTypeName FROM ttta FULL JOIN tttb USING (t); +SELECT t.t.t.t.*, t.t.t.t.* APPLY toTypeName FROM ttta FULL JOIN tttb USING (t); + +SELECT t as e, toTypeName(e) FROM ttta FULL JOIN tttb USING (t.t); +SELECT t.t.t as e, toTypeName(e) FROM ttta FULL JOIN tttb USING (t.t); +SELECT t.t.t.t.t as e, toTypeName(e) FROM ttta FULL JOIN tttb USING (t.t); + +SELECT t.*, t.* APPLY toTypeName FROM ttta FULL JOIN tttb USING (t.t); +SELECT t.t.*, t.t.* APPLY toTypeName FROM ttta FULL JOIN tttb USING (t.t); +SELECT t.t.t.*, t.t.t.* APPLY toTypeName FROM ttta FULL JOIN tttb USING (t.t); +SELECT t.t.t.t.*, t.t.t.t.* APPLY toTypeName FROM ttta FULL JOIN tttb USING (t.t); + +SELECT t as e, toTypeName(e) FROM ttta FULL JOIN tttb USING (t.t.t); +SELECT t.t.t as e, toTypeName(e) FROM ttta FULL JOIN tttb USING (t.t.t); +SELECT t.t.t.t.t as e, toTypeName(e) FROM ttta FULL JOIN tttb USING (t.t.t); + +SELECT t as e, toTypeName(e) FROM ttta FULL JOIN tttb USING (t.t.t.t.t); +SELECT t.t.t as e, toTypeName(e) FROM ttta FULL JOIN tttb USING (t.t.t.t.t); +SELECT t.t.t.t.t as e, toTypeName(e) FROM ttta FULL JOIN tttb USING (t.t.t.t.t); + +SELECT t.*, t.* APPLY toTypeName FROM ttta FULL JOIN tttb USING (t.t.t.t.t); +SELECT t.t.*, t.t.* APPLY toTypeName FROM ttta FULL JOIN tttb USING (t.t.t.t.t); +SELECT t.t.t.*, t.t.t.* APPLY toTypeName FROM ttta FULL JOIN tttb USING (t.t.t.t.t); +SELECT t.t.t.t.*, t.t.t.t.* APPLY toTypeName FROM ttta FULL JOIN tttb USING (t.t.t.t.t); + +SELECT t FROM nnna, nnnb; +SELECT t.t.t FROM nnna, nnnb; +SELECT t.t.t.t.t FROM nnna, nnnb; + +SELECT t as e, toTypeName(e) FROM nnna FULL JOIN nnnb USING (t); -- { serverError UNSUPPORTED_METHOD } +SELECT t.t as e, toTypeName(e) FROM nnna FULL JOIN nnnb USING (t); -- { serverError UNSUPPORTED_METHOD } +SELECT t.t.t as e, toTypeName(e) FROM nnna FULL JOIN nnnb USING (t); -- { serverError UNSUPPORTED_METHOD } +SELECT t.t.t.t.t as e, toTypeName(e) FROM nnna FULL JOIN nnnb USING (t); -- { serverError UNSUPPORTED_METHOD } + +SELECT t as e, toTypeName(e) FROM nnna FULL JOIN nnnb USING (t.t); -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT t.t as e, toTypeName(e) FROM nnna FULL JOIN nnnb USING (t.t); +SELECT t.t.t as e, toTypeName(e) FROM nnna FULL JOIN nnnb USING (t.t); +SELECT t.t.t.t.t as e, toTypeName(e) FROM nnna FULL JOIN nnnb USING (t.t); + +SELECT t.*, t.* APPLY toTypeName FROM nnna FULL JOIN nnnb USING (t.t); -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT t.t.*, t.t.* APPLY toTypeName FROM nnna FULL JOIN nnnb USING (t.t); +SELECT t.t.t.*, t.t.t.* APPLY toTypeName FROM nnna FULL JOIN nnnb USING (t.t); +SELECT t.t.t.t.*, t.t.t.t.* APPLY toTypeName FROM nnna FULL JOIN nnnb USING (t.t); + +SELECT t as e, toTypeName(e) FROM nnna FULL JOIN nnnb USING (t.t.t); +SELECT t.t as e, toTypeName(e) FROM nnna FULL JOIN nnnb USING (t.t.t); +SELECT t.t.t as e, toTypeName(e) FROM nnna FULL JOIN nnnb USING (t.t.t); +SELECT t.t.t.t.t as e, toTypeName(e) FROM nnna FULL JOIN nnnb USING (t.t.t); + +SELECT t.*, t.* APPLY toTypeName FROM nnna FULL JOIN nnnb USING (t.t.t); +SELECT t.t.*, t.t.* APPLY toTypeName FROM nnna FULL JOIN nnnb USING (t.t.t); +SELECT t.t.t.*, t.t.t.* APPLY toTypeName FROM nnna FULL JOIN nnnb USING (t.t.t); +SELECT t.t.t.t.*, t.t.t.t.* APPLY toTypeName FROM nnna FULL JOIN nnnb USING (t.t.t); + +SELECT t as e, toTypeName(e) FROM nnna FULL JOIN nnnb USING (t.t.t.t.t); +SELECT t.t as e, toTypeName(e) FROM nnna FULL JOIN nnnb USING (t.t.t.t.t); +SELECT t.t.t as e, toTypeName(e) FROM nnna FULL JOIN nnnb USING (t.t.t.t.t); +SELECT t.t.t.t.t as e, toTypeName(e) FROM nnna FULL JOIN nnnb USING (t.t.t.t.t); +SELECT t.t.t.t.* FROM nnna FULL JOIN nnnb USING (t.t.t.t.t); + +SELECT 1 FROM na FULL JOIN nb USING (t); -- { serverError UNSUPPORTED_METHOD } + +{% endfor -%} + +DROP TABLE IF EXISTS ta; +DROP TABLE IF EXISTS tb; +DROP TABLE IF EXISTS ttta; +DROP TABLE IF EXISTS tttb; +DROP TABLE IF EXISTS na; +DROP TABLE IF EXISTS nb; +DROP TABLE IF EXISTS nnna; +DROP TABLE IF EXISTS nnnb; diff --git a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference index 608fac626fa..ec4928bc325 100644 --- a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference +++ b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.reference @@ -20,9 +20,21 @@ 23 Sx>b:^UG XpedE)Q: 7433019734386307503 29 2j&S)ba?XG QuQj 17163829389637435056 3 UlI+1 14144472852965836438 +0 PJFiUe#J2O _s\' 14427935816175499794 +1 >T%O ,z< 17537932797009027240 +12 D[6,P #}Lmb[ ZzU 6394957109822140795 +18 $_N- 24422838680427462 +2 bX?}ix [ Ny]2 G 16242612901291874718 +20 VE] Y 15120036904703536841 +22 Ti~3)N)< A!( 3 18361093572663329113 +23 Sx>b:^UG XpedE)Q: 7433019734386307503 +29 2j&S)ba?XG QuQj 17163829389637435056 +3 UlI+1 14144472852965836438 =============== QUERIES EXECUTED BY PARALLEL INNER QUERY ALONE =============== +0 3 SELECT `__table1`.`key` AS `key`, `__table1`.`value1` AS `value1`, `__table1`.`value2` AS `value2`, toUInt64(min(`__table1`.`time`)) AS `start_ts` FROM `default`.`join_inner_table` AS `__table1` PREWHERE (`__table1`.`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`__table1`.`number` > 1610517366120) GROUP BY `__table1`.`key`, `__table1`.`value1`, `__table1`.`value2` ORDER BY `__table1`.`key` ASC, `__table1`.`value1` ASC, `__table1`.`value2` ASC LIMIT _CAST(10, \'UInt64\') SETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer = 1 0 3 SELECT `key`, `value1`, `value2`, toUInt64(min(`time`)) AS `start_ts` FROM `default`.`join_inner_table` PREWHERE (`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`number` > toUInt64(\'1610517366120\')) GROUP BY `key`, `value1`, `value2` ORDER BY `key` ASC, `value1` ASC, `value2` ASC LIMIT 10 -1 1 -- Parallel inner query alone\nSELECT\n key,\n value1,\n value2,\n toUInt64(min(time)) AS start_ts\nFROM join_inner_table\nPREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\nGROUP BY key, value1, value2\nORDER BY key, value1, value2\nLIMIT 10\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1; +1 1 -- Parallel inner query alone\nSELECT\n key,\n value1,\n value2,\n toUInt64(min(time)) AS start_ts\nFROM join_inner_table\nPREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\nGROUP BY key, value1, value2\nORDER BY key, value1, value2\nLIMIT 10\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=0; +1 1 -- Parallel inner query alone\nSELECT\n key,\n value1,\n value2,\n toUInt64(min(time)) AS start_ts\nFROM join_inner_table\nPREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\nGROUP BY key, value1, value2\nORDER BY key, value1, value2\nLIMIT 10\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=1; =============== OUTER QUERY (NO PARALLEL) =============== >T%O ,z< 10 NQTpY# W\\Xx4 10 @@ -39,6 +51,16 @@ U c 10 UlI+1 10 bX?}ix [ Ny]2 G 10 tT%O ,z< 10 +NQTpY# W\\Xx4 10 +PJFiUe#J2O _s\' 10 +U c 10 +UlI+1 10 +bX?}ix [ Ny]2 G 10 +t 1610517366120) GROUP BY `__table1`.`key`, `__table1`.`value1`, `__table1`.`value2` +0 3 SELECT `__table2`.`value1` AS `value1`, `__table2`.`value2` AS `value2`, count() AS `count` FROM `default`.`join_outer_table` AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table2` USING (`key`) GROUP BY `__table1`.`key`, `__table2`.`value1`, `__table2`.`value2` 0 3 SELECT `key`, `value1`, `value2` FROM `default`.`join_inner_table` PREWHERE (`id` = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (`number` > toUInt64(\'1610517366120\')) GROUP BY `key`, `value1`, `value2` -0 3 SELECT `value1`, `value2`, count() AS `count` FROM `default`.`join_outer_table` ALL INNER JOIN `_data_7105554115296635472_12427301373021079614` USING (`key`) GROUP BY `key`, `value1`, `value2` -1 1 -- Parallel full query\nSELECT\n value1,\n value2,\n avg(count) AS avg\nFROM\n (\n SELECT\n key,\n value1,\n value2,\n count() AS count\n FROM join_outer_table\n INNER JOIN\n (\n SELECT\n key,\n value1,\n value2,\n toUInt64(min(time)) AS start_ts\n FROM join_inner_table\n PREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\n GROUP BY key, value1, value2\n ) USING (key)\n GROUP BY key, value1, value2\n )\nGROUP BY value1, value2\nORDER BY value1, value2\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1; +0 3 SELECT `value1`, `value2`, count() AS `count` FROM `default`.`join_outer_table` ALL INNER JOIN `_data_` USING (`key`) GROUP BY `key`, `value1`, `value2` +1 1 -- Parallel full query\nSELECT\n value1,\n value2,\n avg(count) AS avg\nFROM\n (\n SELECT\n key,\n value1,\n value2,\n count() AS count\n FROM join_outer_table\n INNER JOIN\n (\n SELECT\n key,\n value1,\n value2,\n toUInt64(min(time)) AS start_ts\n FROM join_inner_table\n PREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\n GROUP BY key, value1, value2\n ) USING (key)\n GROUP BY key, value1, value2\n )\nGROUP BY value1, value2\nORDER BY value1, value2\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=0; +1 1 -- Parallel full query\nSELECT\n value1,\n value2,\n avg(count) AS avg\nFROM\n (\n SELECT\n key,\n value1,\n value2,\n count() AS count\n FROM join_outer_table\n INNER JOIN\n (\n SELECT\n key,\n value1,\n value2,\n toUInt64(min(time)) AS start_ts\n FROM join_inner_table\n PREWHERE (id = \'833c9e22-c245-4eb5-8745-117a9a1f26b1\') AND (number > toUInt64(\'1610517366120\'))\n GROUP BY key, value1, value2\n ) USING (key)\n GROUP BY key, value1, value2\n )\nGROUP BY value1, value2\nORDER BY value1, value2\nSETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=1; diff --git a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql index a117378b0bf..7693d0da295 100644 --- a/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql +++ b/tests/queries/0_stateless/02731_parallel_replicas_join_subquery.sql @@ -21,7 +21,6 @@ SELECT * FROM generateRandom('number Int64, value1 String, value2 String, time Int64', 1, 10, 2) LIMIT 100; -SET allow_experimental_analyzer = 0; SET max_parallel_replicas = 3; SET prefer_localhost_replica = 1; SET cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost'; @@ -39,6 +38,18 @@ FROM join_inner_table GROUP BY key, value1, value2 ORDER BY key, value1, value2 LIMIT 10; +-- settings allow_experimental_analyzer=0; + +-- SELECT +-- key, +-- value1, +-- value2, +-- toUInt64(min(time)) AS start_ts +-- FROM join_inner_table +-- PREWHERE (id = '833c9e22-c245-4eb5-8745-117a9a1f26b1') AND (number > toUInt64('1610517366120')) +-- GROUP BY key, value1, value2 +-- ORDER BY key, value1, value2 +-- LIMIT 10 settings allow_experimental_analyzer=1; SELECT '=============== INNER QUERY (PARALLEL) ==============='; @@ -53,18 +64,31 @@ PREWHERE (id = '833c9e22-c245-4eb5-8745-117a9a1f26b1') AND (number > toUInt64('1 GROUP BY key, value1, value2 ORDER BY key, value1, value2 LIMIT 10 -SETTINGS allow_experimental_parallel_reading_from_replicas = 1; +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=0; + +-- Parallel inner query alone +SELECT + key, + value1, + value2, + toUInt64(min(time)) AS start_ts +FROM join_inner_table +PREWHERE (id = '833c9e22-c245-4eb5-8745-117a9a1f26b1') AND (number > toUInt64('1610517366120')) +GROUP BY key, value1, value2 +ORDER BY key, value1, value2 +LIMIT 10 +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=1; SELECT '=============== QUERIES EXECUTED BY PARALLEL INNER QUERY ALONE ==============='; SYSTEM FLUSH LOGS; -- There should be 4 queries. The main query as received by the initiator and the 3 equal queries sent to each replica -SELECT is_initial_query, count() as c, query, +SELECT is_initial_query, count() as c, replaceRegexpAll(query, '_data_(\d+)_(\d+)', '_data_') as query FROM system.query_log WHERE event_date >= yesterday() AND type = 'QueryFinish' - AND initial_query_id = + AND initial_query_id IN ( SELECT query_id FROM system.query_log @@ -160,18 +184,48 @@ FROM ) GROUP BY value1, value2 ORDER BY value1, value2 -SETTINGS allow_experimental_parallel_reading_from_replicas = 1; +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=0; + +-- Parallel full query +SELECT + value1, + value2, + avg(count) AS avg +FROM + ( + SELECT + key, + value1, + value2, + count() AS count + FROM join_outer_table + INNER JOIN + ( + SELECT + key, + value1, + value2, + toUInt64(min(time)) AS start_ts + FROM join_inner_table + PREWHERE (id = '833c9e22-c245-4eb5-8745-117a9a1f26b1') AND (number > toUInt64('1610517366120')) + GROUP BY key, value1, value2 + ) USING (key) + GROUP BY key, value1, value2 + ) +GROUP BY value1, value2 +ORDER BY value1, value2 +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, allow_experimental_analyzer=1; SYSTEM FLUSH LOGS; -- There should be 7 queries. The main query as received by the initiator, the 3 equal queries to execute the subquery -- in the inner join and the 3 queries executing the whole query (but replacing the subquery with a temp table) -SELECT is_initial_query, count() as c, query, +SELECT is_initial_query, count() as c, replaceRegexpAll(query, '_data_(\d+)_(\d+)', '_data_') as query FROM system.query_log WHERE event_date >= yesterday() AND type = 'QueryFinish' - AND initial_query_id = + AND initial_query_id IN ( SELECT query_id FROM system.query_log diff --git a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.reference b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.reference index 802d55f8ae3..521e3e2edbc 100644 --- a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.reference +++ b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.reference @@ -1,6 +1,4 @@ -02784_automatic_parallel_replicas_join-default_simple_join_10M_pure 0 estimated parallel replicas 02784_automatic_parallel_replicas_join-default_simple_join_10M_pure 1 estimated parallel replicas -02784_automatic_parallel_replicas_join-default_simple_join_5M_pure 0 estimated parallel replicas 02784_automatic_parallel_replicas_join-default_simple_join_5M_pure 2 estimated parallel replicas 02784_automatic_parallel_replicas_join-default_simple_join_1M_pure 1 estimated parallel replicas 02784_automatic_parallel_replicas_join-default_simple_join_1M_pure 10 estimated parallel replicas diff --git a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh index baeeb820da5..1a74c3230c6 100755 --- a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh +++ b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh @@ -68,7 +68,7 @@ function run_query_with_pure_parallel_replicas () { --allow_experimental_parallel_reading_from_replicas 1 \ --parallel_replicas_for_non_replicated_merge_tree 1 \ --parallel_replicas_min_number_of_rows_per_replica "$2" \ - |& grep "It is enough work for" | awk '{ print substr($7, 2, length($7) - 2) "\t" $20 " estimated parallel replicas" }' + |& grep "It is enough work for" | awk '{ print substr($7, 2, length($7) - 2) "\t" $20 " estimated parallel replicas" }' | sort -n -k2 -b | grep -Pv "\t0 estimated parallel replicas" } query_id_base="02784_automatic_parallel_replicas_join-$CLICKHOUSE_DATABASE" diff --git a/tests/queries/0_stateless/02797_join_nested_lowcardinality_convert.reference b/tests/queries/0_stateless/02797_join_nested_lowcardinality_convert.reference new file mode 100644 index 00000000000..23ee1a7ce30 --- /dev/null +++ b/tests/queries/0_stateless/02797_join_nested_lowcardinality_convert.reference @@ -0,0 +1,4 @@ +[0] [0] +[0] [0] +[0] [0] +[0] [0] diff --git a/tests/queries/0_stateless/02797_join_nested_lowcardinality_convert.sql.j2 b/tests/queries/0_stateless/02797_join_nested_lowcardinality_convert.sql.j2 new file mode 100644 index 00000000000..79a7c654f10 --- /dev/null +++ b/tests/queries/0_stateless/02797_join_nested_lowcardinality_convert.sql.j2 @@ -0,0 +1,37 @@ + + +DROP TABLE IF EXISTS test1__fuzz_36; +DROP TABLE IF EXISTS test1__fuzz_38; +DROP TABLE IF EXISTS test1__fuzz_41; + +CREATE TABLE test1__fuzz_36 (`pt` Array(Array(LowCardinality(Int256))), `exposure_uv` UInt32) ENGINE = Memory; +CREATE TABLE test1__fuzz_38 (`pt` Array(Array(Int256)), `exposure_uv` UInt32) ENGINE = Memory; +CREATE TABLE test1__fuzz_41 (`pt` Array(Array(Int8)), `exposure_uv` Decimal(76, 39)) ENGINE = Memory; + +insert into test1__fuzz_36 select * from generateRandom() limit 10; +insert into test1__fuzz_38 select * from generateRandom() limit 10; +insert into test1__fuzz_41 select * from generateRandom() limit 10; + +{% for join_algorithm in ['default','grace_hash','partial_merge','full_sorting_merge'] -%} + +SET join_algorithm = '{{ join_algorithm }}'; + +SELECT * +FROM (SELECT materialize([0] :: Array(LowCardinality(Int64))) as pt) AS m0 +LEFT JOIN (SELECT materialize([0] :: Array(Int64)) as pt) AS m2 +ON m0.pt = m2.pt +; + +SELECT * FROM ( + SELECT m0.pt, m0.exposure_uv AS exposure_uv, round(m2.exposure_uv, 10) FROM (SELECT pt, exposure_uv FROM test1__fuzz_36) AS m0 + LEFT JOIN (SELECT pt, exposure_uv FROM test1__fuzz_38) AS m1 ON m0.pt = m1.pt LEFT JOIN (SELECT pt, exposure_uv FROM test1__fuzz_41) AS m2 + ON m0.pt = m2.pt +) AS c0 +ORDER BY exposure_uv ASC NULLS LAST +FORMAT Null SETTINGS join_use_nulls = 1; + +{% endfor -%} + +DROP TABLE IF EXISTS test1__fuzz_36; +DROP TABLE IF EXISTS test1__fuzz_38; +DROP TABLE IF EXISTS test1__fuzz_41; diff --git a/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.sql b/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.sql index 76f2129abfa..5b9976714ea 100644 --- a/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.sql +++ b/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.sql @@ -1,3 +1,4 @@ +SET output_format_pretty_color=1; SET read_in_order_two_level_merge_threshold=1000000; DROP TABLE IF EXISTS t; diff --git a/tests/queries/0_stateless/02813_seriesDecomposeSTL.reference b/tests/queries/0_stateless/02813_seriesDecomposeSTL.reference index dc30e7f8371..28dae705335 100644 --- a/tests/queries/0_stateless/02813_seriesDecomposeSTL.reference +++ b/tests/queries/0_stateless/02813_seriesDecomposeSTL.reference @@ -1,4 +1,4 @@ -[[-13.529999,-3.1799996,16.71,-13.53,-3.1799996,16.71,-13.53,-3.1799996,16.71,-13.530001,-3.18,16.710001,-13.530001,-3.1800003,16.710001,-13.530001,-3.1800003,16.710001,-13.530001,-3.1799994,16.71,-13.529999,-3.1799994,16.709997],[23.63,23.63,23.630003,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.63,23.630001,23.630001,23.63,23.630001,23.630001,23.63,23.630001,23.630001,23.630001,23.630001,23.630001,23.630003],[0,0.0000019073486,-0.0000019073486,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.0000019073486,0,0]] -[[4.04452e-8,-1.7846537e-8,-5.9488454e-9,0,0,0,0,0,0,-1.9868216e-8,-9.5297715e-8,2.2540547e-9,3.4229203e-8,8.573613e-8],[1.9999999,2,2,2,2,2,2,2,2,2,2,2,1.9999996,1.9999996],[1.1920929e-7,0,0,0,0,0,0,0,0,0,0,0,3.5762787e-7,2.3841858e-7]] -[[-13.529999,-3.1799996,16.71,-13.53,-3.1799996,16.71,-13.53,-3.1799996,16.71,-13.530001,-3.18,16.710001,-13.530001,-3.1800003,16.710001,-13.530001,-3.1800003,16.710001,-13.530001,-3.1799994,16.71,-13.529999,-3.1799994,16.709997],[23.63,23.63,23.630003,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.63,23.630001,23.630001,23.63,23.630001,23.630001,23.63,23.630001,23.630001,23.630001,23.630001,23.630001,23.630003],[0,0.0000019073486,-0.0000019073486,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.0000019073486,0,0]] -[[53.946846,-4.8119445,43.525013,-23.71359,-42.472305,-51.636955,-50.458298,-51.982674,37.62072,-15.9006605,56.65076,-5.809669,57.143845,-2.0370207,54.050922,-4.897961,43.954018,-23.808758,-42.651337,-51.86827,-50.709732,-52.18156,37.734905,-15.853402,56.91643,-5.8815174,57.253094,-2.012879,54.157806,-4.9817176,44.384747,-23.902956,-42.830154,-52.10025,-50.96271,-52.3829,37.84573,-15.81032,57.177113,-5.958963,57.356136,-1.9952412,54.27533,-5.066312,44.878296,-23.956438,-42.993656,-52.337124,-51.208073,-52.615646,37.91102,-15.8062525,57.49891,-6.056076,57.45604,-1.9797823,54.39525,-5.1483474,45.374573],[88.028534,88.95315,89.87776,90.802376,91.64913,92.49588,93.342636,94.19737,95.0521,95.90684,96.712975,97.51912,98.32526,98.36342,98.40158,98.43974,98.36777,98.29579,98.223816,98.536446,98.849075,99.161705,99.7552,100.348694,100.94219,101.53184,102.12149,102.711136,103.79921,104.88729,105.975365,107.50462,109.033875,110.56313,111.79767,113.032196,114.26673,115.02128,115.775826,116.53037,117.15541,117.78044,118.40548,118.86489,119.3243,119.783714,120.04031,120.29691,120.55351,120.78621,121.01891,121.25161,121.533585,121.81555,122.09753,122.41821,122.7389,123.059586,123.39267],[-2.97538,2.8587952,-23.402771,0.91121674,4.8231735,9.141075,8.115662,10.785301,0.32717896,5.99382,-12.363731,5.29055,0.53089905,-2.3264008,-3.4524994,1.4582214,-2.321785,2.51297,5.4275208,3.3318253,5.8606567,0.019859314,-4.4901123,-12.495293,-5.8586197,-1.650322,-11.374588,4.3017426,4.042984,1.094429,9.639885,3.3983307,-3.20372,-5.462883,-5.834961,-6.649292,-1.1124649,3.7890396,16.047066,-2.5714111,8.488449,-2.785202,2.319191,-0.79857635,13.797401,-5.827278,-6.0466614,-5.9597855,-7.3454437,-3.1705627,6.0700684,3.5546417,1.9675064,-0.7594757,2.446434,0.5615692,0.86585236,-3.9112396,1.2327576]] +[[-13.529999,-3.1799996,16.71,-13.53,-3.1799996,16.71,-13.53,-3.1799996,16.71,-13.530001,-3.18,16.710001,-13.530001,-3.1800003,16.710001,-13.530001,-3.1800003,16.710001,-13.530001,-3.1799994,16.71,-13.529999,-3.1799994,16.709997],[23.63,23.63,23.630003,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.63,23.630001,23.630001,23.63,23.630001,23.630001,23.63,23.630001,23.630001,23.630001,23.630001,23.630001,23.630003],[0,0.0000019073486,-0.0000019073486,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.0000019073486,0,0],[10.1,20.449999,40.340004,10.100001,20.45,40.34,10.100001,20.45,40.34,10.1,20.45,40.34,10.1,20.45,40.34,10.1,20.45,40.34,10.1,20.45,40.34,10.100002,20.45,40.34]] +[[4.04452e-8,-1.7846537e-8,-5.9488454e-9,0,0,0,0,0,0,-1.9868216e-8,-9.5297715e-8,2.2540547e-9,3.4229203e-8,8.573613e-8],[1.9999999,2,2,2,2,2,2,2,2,2,2,2,1.9999996,1.9999996],[1.1920929e-7,0,0,0,0,0,0,0,0,0,0,0,3.5762787e-7,2.3841858e-7],[1.9999999,2,2,2,2,2,2,2,2,2,1.9999999,2,1.9999996,1.9999998]] +[[-13.529999,-3.1799996,16.71,-13.53,-3.1799996,16.71,-13.53,-3.1799996,16.71,-13.530001,-3.18,16.710001,-13.530001,-3.1800003,16.710001,-13.530001,-3.1800003,16.710001,-13.530001,-3.1799994,16.71,-13.529999,-3.1799994,16.709997],[23.63,23.63,23.630003,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.630001,23.63,23.630001,23.630001,23.63,23.630001,23.630001,23.63,23.630001,23.630001,23.630001,23.630001,23.630001,23.630003],[0,0.0000019073486,-0.0000019073486,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.0000019073486,0,0],[10.1,20.449999,40.340004,10.100001,20.45,40.34,10.100001,20.45,40.34,10.1,20.45,40.34,10.1,20.45,40.34,10.1,20.45,40.34,10.1,20.45,40.34,10.100002,20.45,40.34]] +[[53.946846,-4.8119445,43.525013,-23.71359,-42.472305,-51.636955,-50.458298,-51.982674,37.62072,-15.9006605,56.65076,-5.809669,57.143845,-2.0370207,54.050922,-4.897961,43.954018,-23.808758,-42.651337,-51.86827,-50.709732,-52.18156,37.734905,-15.853402,56.91643,-5.8815174,57.253094,-2.012879,54.157806,-4.9817176,44.384747,-23.902956,-42.830154,-52.10025,-50.96271,-52.3829,37.84573,-15.81032,57.177113,-5.958963,57.356136,-1.9952412,54.27533,-5.066312,44.878296,-23.956438,-42.993656,-52.337124,-51.208073,-52.615646,37.91102,-15.8062525,57.49891,-6.056076,57.45604,-1.9797823,54.39525,-5.1483474,45.374573],[88.028534,88.95315,89.87776,90.802376,91.64913,92.49588,93.342636,94.19737,95.0521,95.90684,96.712975,97.51912,98.32526,98.36342,98.40158,98.43974,98.36777,98.29579,98.223816,98.536446,98.849075,99.161705,99.7552,100.348694,100.94219,101.53184,102.12149,102.711136,103.79921,104.88729,105.975365,107.50462,109.033875,110.56313,111.79767,113.032196,114.26673,115.02128,115.775826,116.53037,117.15541,117.78044,118.40548,118.86489,119.3243,119.783714,120.04031,120.29691,120.55351,120.78621,121.01891,121.25161,121.533585,121.81555,122.09753,122.41821,122.7389,123.059586,123.39267],[-2.97538,2.8587952,-23.402771,0.91121674,4.8231735,9.141075,8.115662,10.785301,0.32717896,5.99382,-12.363731,5.29055,0.53089905,-2.3264008,-3.4524994,1.4582214,-2.321785,2.51297,5.4275208,3.3318253,5.8606567,0.019859314,-4.4901123,-12.495293,-5.8586197,-1.650322,-11.374588,4.3017426,4.042984,1.094429,9.639885,3.3983307,-3.20372,-5.462883,-5.834961,-6.649292,-1.1124649,3.7890396,16.047066,-2.5714111,8.488449,-2.785202,2.319191,-0.79857635,13.797401,-5.827278,-6.0466614,-5.9597855,-7.3454437,-3.1705627,6.0700684,3.5546417,1.9675064,-0.7594757,2.446434,0.5615692,0.86585236,-3.9112396,1.2327576],[141.97537,84.141205,133.40277,67.08878,49.176826,40.858925,42.88434,42.2147,132.67282,80.00618,153.36374,91.70945,155.4691,96.3264,152.4525,93.54178,142.32178,74.48703,55.57248,46.668175,48.139343,46.980145,137.49011,84.49529,157.85863,95.65032,159.37459,100.69826,157.95702,99.90557,150.3601,83.60167,66.20372,58.462883,60.834957,60.649296,152.11246,99.21096,172.95294,110.57141,174.51155,115.7852,172.68082,113.79858,164.2026,95.82728,77.04666,67.95979,69.34544,68.17056,158.92993,105.44536,179.0325,115.759476,179.55356,120.43843,177.13416,117.91124,168.76724]] diff --git a/tests/queries/0_stateless/02841_not_ready_set_bug.sh b/tests/queries/0_stateless/02841_not_ready_set_bug.sh index fd7f62d28bf..3aaffe51578 100755 --- a/tests/queries/0_stateless/02841_not_ready_set_bug.sh +++ b/tests/queries/0_stateless/02841_not_ready_set_bug.sh @@ -9,3 +9,4 @@ $CLICKHOUSE_CLIENT -q "create table t1 (number UInt64) engine = MergeTree order $CLICKHOUSE_CLIENT -q "insert into t1 select number from numbers(10);" $CLICKHOUSE_CLIENT --max_threads=2 --max_result_rows=1 --result_overflow_mode=break -q "with tab as (select min(number) from t1 prewhere number in (select number from view(select number, row_number() OVER (partition by number % 2 ORDER BY number DESC) from numbers_mt(1e4)) where number != 2 order by number)) select number from t1 union all select * from tab;" > /dev/null +$CLICKHOUSE_CLIENT -q "SELECT * FROM system.tables WHERE 1 in (SELECT number from numbers(2)) AND database = currentDatabase() format Null" diff --git a/tests/queries/0_stateless/02870_per_column_settings.reference b/tests/queries/0_stateless/02870_per_column_settings.reference new file mode 100644 index 00000000000..144c8c5ee2e --- /dev/null +++ b/tests/queries/0_stateless/02870_per_column_settings.reference @@ -0,0 +1,18 @@ +CREATE TABLE default.tab\n(\n `id` UInt64,\n `long_string` String SETTINGS (min_compress_block_size = 163840, max_compress_block_size = 163840),\n `v1` String,\n `v2` UInt64,\n `v3` Float32,\n `v4` Float64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/tab/2870\', \'r1\')\nORDER BY id\nSETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192 +1000 +CREATE TABLE default.tab\n(\n `id` UInt64,\n `long_string` String SETTINGS (min_compress_block_size = 8192, max_compress_block_size = 163840),\n `v1` String,\n `v2` UInt64,\n `v3` Float32,\n `v4` Float64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/tab/2870\', \'r1\')\nORDER BY id\nSETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192 +CREATE TABLE default.tab\n(\n `id` UInt64,\n `long_string` String SETTINGS (max_compress_block_size = 163840),\n `v1` String,\n `v2` UInt64,\n `v3` Float32,\n `v4` Float64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/tab/2870\', \'r1\')\nORDER BY id\nSETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192 +CREATE TABLE default.tab\n(\n `id` UInt64,\n `long_string` String,\n `v1` String,\n `v2` UInt64,\n `v3` Float32,\n `v4` Float64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/tab/2870\', \'r1\')\nORDER BY id\nSETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192 +CREATE TABLE default.tab\n(\n `id` UInt64,\n `long_string` String SETTINGS (min_compress_block_size = 163840, max_compress_block_size = 163840),\n `v1` String,\n `v2` UInt64,\n `v3` Float32,\n `v4` Float64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/tab/2870\', \'r1\')\nORDER BY id\nSETTINGS min_bytes_for_wide_part = 1, index_granularity = 8192 +--- +(0,0) 0 +(1,1) 1 +(2,2) 2 +(3,3) 3 +(4,4) 4 +(5,5) 5 +(6,6) 6 +(7,7) 7 +(8,8) 8 +(9,9) 9 +--- diff --git a/tests/queries/0_stateless/02870_per_column_settings.sql b/tests/queries/0_stateless/02870_per_column_settings.sql new file mode 100644 index 00000000000..345cf5cc744 --- /dev/null +++ b/tests/queries/0_stateless/02870_per_column_settings.sql @@ -0,0 +1,69 @@ +-- Tags: no-random-merge-tree-settings, no-replicated-database +-- Tag no-replicated-database: Old syntax is not allowed +-- The test use replicated table to test serialize and deserialize column with settings declaration on zookeeper +-- Tests column-level settings for MergeTree* tables + +DROP TABLE IF EXISTS tab; + +CREATE TABLE tab +( + id UInt64, + long_string String SETTINGS (min_compress_block_size = 163840, max_compress_block_size = 163840), + v1 String, + v2 UInt64, + v3 Float32, + v4 Float64 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/tab/2870', 'r1') +ORDER BY id +SETTINGS min_bytes_for_wide_part = 1; + +SHOW CREATE tab; + +INSERT INTO TABLE tab SELECT number, randomPrintableASCII(1000), randomPrintableASCII(10), rand(number), rand(number+1), rand(number+2) FROM numbers(1000); +SELECT count() FROM tab; + +ALTER TABLE tab MODIFY COLUMN long_string MODIFY SETTING min_compress_block_size = 8192; +SHOW CREATE tab; + +ALTER TABLE tab MODIFY COLUMN long_string RESET SETTING min_compress_block_size; +SHOW CREATE tab; + +ALTER TABLE tab MODIFY COLUMN long_string REMOVE SETTINGS; +SHOW CREATE tab; + +ALTER TABLE tab MODIFY COLUMN long_string String SETTINGS (min_compress_block_size = 163840, max_compress_block_size = 163840); +SHOW CREATE tab; + +DROP TABLE tab; + +SELECT '--- '; + +SET allow_experimental_object_type = 1; + +CREATE TABLE tab +( + id UInt64, + tup Tuple(UInt64, UInt64) SETTINGS (min_compress_block_size = 81920, max_compress_block_size = 163840), + json JSON SETTINGS (min_compress_block_size = 81920, max_compress_block_size = 163840), +) +ENGINE = MergeTree +ORDER BY id +SETTINGS min_bytes_for_wide_part = 1; + +INSERT INTO TABLE tab SELECT number, tuple(number, number), concat('{"key": ', toString(number), ' ,"value": ', toString(rand(number+1)), '}') FROM numbers(1000); +SELECT tup, json.key AS key FROM tab ORDER BY key LIMIT 10; + +DROP TABLE tab; + +SELECT '--- '; + +-- Unsupported column-level settings are rejected +CREATE TABLE tab +( + id UInt64, + long_string String SETTINGS (min_block_size = 81920, max_compress_block_size = 163840), +) +ENGINE = MergeTree +ORDER BY id +SETTINGS min_bytes_for_wide_part = 1; -- {serverError UNKNOWN_SETTING} diff --git a/tests/queries/0_stateless/02891_array_shingles.reference b/tests/queries/0_stateless/02891_array_shingles.reference new file mode 100644 index 00000000000..00bd9f6bb41 --- /dev/null +++ b/tests/queries/0_stateless/02891_array_shingles.reference @@ -0,0 +1,11 @@ +-- negative tests +-- const and non-const inputs +[1,2,3,4,5] 1 [[1],[2],[3],[4],[5]] [[1],[2],[3],[4],[5]] +[1,2,3,4,5] 3 [[1,2,3],[2,3,4],[3,4,5]] [[1,2,3],[2,3,4],[3,4,5]] +[1,2,3,4,5] 5 [[1,2,3,4,5]] [[1,2,3,4,5]] +['ab','c','de','','hi'] 1 [['ab'],['c'],['de'],[''],['hi']] [['ab'],['c'],['de'],[''],['hi']] +['ab','c','de','','hi'] 3 [['ab','c','de'],['c','de',''],['de','','hi']] [['ab','c','de'],['c','de',''],['de','','hi']] +['ab','c','de','','hi'] 5 [['ab','c','de','','hi']] [['ab','c','de','','hi']] +-- special cases +[[2],[1]] +[[2],[1]] diff --git a/tests/queries/0_stateless/02891_array_shingles.sql b/tests/queries/0_stateless/02891_array_shingles.sql new file mode 100644 index 00000000000..e2b5cde880f --- /dev/null +++ b/tests/queries/0_stateless/02891_array_shingles.sql @@ -0,0 +1,23 @@ +SELECT '-- negative tests'; +SELECT arrayShingles(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT arrayShingles([1, 2, 3, 4, 5]); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT arrayShingles([1, 2, 3, 4, 5], 2, 3); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT arrayShingles([1, 2, 3, 4, 5], 'str'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayShingles((1, 2, 3, 4, 5), 0); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayShingles([1, 2, 3, 4, 5], 0); -- { serverError BAD_ARGUMENTS } +SELECT arrayShingles([1, 2, 3, 4, 5], -2); -- { serverError BAD_ARGUMENTS } +SELECT arrayShingles([1, 2, 3, 4, 5], 6); -- { serverError BAD_ARGUMENTS } +SELECT arrayShingles([], 1); -- { serverError BAD_ARGUMENTS } + +SELECT '-- const and non-const inputs'; +SELECT [1, 2, 3, 4, 5] AS arr, 1 AS len, arrayShingles(arr, len), arrayShingles(materialize(arr), len); +SELECT [1, 2, 3, 4, 5] AS arr, 3 AS len, arrayShingles(arr, len), arrayShingles(materialize(arr), len); +SELECT [1, 2 ,3, 4, 5] AS arr, 5 AS len, arrayShingles(arr, len), arrayShingles(materialize(arr), len); + +SELECT ['ab', 'c', 'de', '', 'hi'] AS arr, 1 AS len, arrayShingles(arr, len), arrayShingles(materialize(arr), len); +SELECT ['ab', 'c', 'de', '', 'hi'] AS arr, 3 AS len, arrayShingles(arr, len), arrayShingles(materialize(arr), len); +SELECT ['ab', 'c', 'de', '', 'hi'] AS arr, 5 AS len, arrayShingles(arr, len), arrayShingles(materialize(arr), len); + +SELECT '-- special cases'; +SELECT arrayShingles([toNullable(2), toNullable(1)], 1); +SELECT arrayShingles([toLowCardinality(2), toLowCardinality(1)], 1); diff --git a/tests/queries/0_stateless/02893_vertical_final_array_join.reference b/tests/queries/0_stateless/02893_vertical_final_array_join.reference new file mode 100644 index 00000000000..27b54a2e42e --- /dev/null +++ b/tests/queries/0_stateless/02893_vertical_final_array_join.reference @@ -0,0 +1,15 @@ +-- { echo ON } +SELECT arrayJoin([(k1, v), (k2, v)]) AS row, row.1 as k FROM t FINAL WHERE k1 != 3 AND k = 1 ORDER BY row SETTINGS enable_vertical_final = 0; +(1,4) 1 +SELECT arrayJoin([(k1, v), (k2, v)]) AS row, row.1 as k FROM t FINAL WHERE k1 != 3 AND k = 1 ORDER BY row SETTINGS enable_vertical_final = 1; +(1,4) 1 +SELECT arrayJoin([(k1, v), (k2, v)]) AS row, row.1 as k FROM t FINAL WHERE k1 != 3 AND k = 2 ORDER BY row SETTINGS enable_vertical_final = 0; +(2,4) 2 +(2,5) 2 +SELECT arrayJoin([(k1, v), (k2, v)]) AS row, row.1 as k FROM t FINAL WHERE k1 != 3 AND k = 2 ORDER BY row SETTINGS enable_vertical_final = 1; +(2,4) 2 +(2,5) 2 +SELECT arrayJoin([(k1, v), (k2, v)]) AS row, row.1 as k FROM t FINAL WHERE k1 != 3 AND k = 3 ORDER BY row SETTINGS enable_vertical_final = 0; +(3,5) 3 +SELECT arrayJoin([(k1, v), (k2, v)]) AS row, row.1 as k FROM t FINAL WHERE k1 != 3 AND k = 3 ORDER BY row SETTINGS enable_vertical_final = 1; +(3,5) 3 diff --git a/tests/queries/0_stateless/02893_vertical_final_array_join.sql b/tests/queries/0_stateless/02893_vertical_final_array_join.sql new file mode 100644 index 00000000000..cc2e37fdc6e --- /dev/null +++ b/tests/queries/0_stateless/02893_vertical_final_array_join.sql @@ -0,0 +1,10 @@ +CREATE TABLE t (k1 UInt64, k2 UInt64, v UInt64) ENGINE = ReplacingMergeTree() ORDER BY (k1, k2); +SET optimize_on_insert = 0; +INSERT INTO t VALUES (1, 2, 3) (1, 2, 4) (2, 3, 4), (2, 3, 5); +-- { echo ON } +SELECT arrayJoin([(k1, v), (k2, v)]) AS row, row.1 as k FROM t FINAL WHERE k1 != 3 AND k = 1 ORDER BY row SETTINGS enable_vertical_final = 0; +SELECT arrayJoin([(k1, v), (k2, v)]) AS row, row.1 as k FROM t FINAL WHERE k1 != 3 AND k = 1 ORDER BY row SETTINGS enable_vertical_final = 1; +SELECT arrayJoin([(k1, v), (k2, v)]) AS row, row.1 as k FROM t FINAL WHERE k1 != 3 AND k = 2 ORDER BY row SETTINGS enable_vertical_final = 0; +SELECT arrayJoin([(k1, v), (k2, v)]) AS row, row.1 as k FROM t FINAL WHERE k1 != 3 AND k = 2 ORDER BY row SETTINGS enable_vertical_final = 1; +SELECT arrayJoin([(k1, v), (k2, v)]) AS row, row.1 as k FROM t FINAL WHERE k1 != 3 AND k = 3 ORDER BY row SETTINGS enable_vertical_final = 0; +SELECT arrayJoin([(k1, v), (k2, v)]) AS row, row.1 as k FROM t FINAL WHERE k1 != 3 AND k = 3 ORDER BY row SETTINGS enable_vertical_final = 1; diff --git a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference deleted file mode 100644 index 5b932f50824..00000000000 --- a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.reference +++ /dev/null @@ -1,35 +0,0 @@ -Leading zeroes into INTEGER -1 1 00000 0 0 -1 2 0 0 0 -1 3 00 0 0 -1 4 01 1 1 -1 5 +01 1 1 -1 6 -01 -1 -1 -1 7 0001 1 1 -1 8 0005 5 5 -1 9 0008 8 8 -1 10 0017 17 17 -1 11 0021 21 21 -1 12 0051 51 51 -1 13 00000123 123 123 -1 14 0b10000 16 16 -1 15 0x0abcd 43981 43981 -1 16 0000.008 0 0 -1 17 1000.0008 1000 1000 -1 18 0008.0008 8 8 -Leading zeroes into Float32 -1 1 00000 0 0 -1 2 00009.00009 9.00009 9.00009 -1 3 00009e9 9000000000 9000000000 -1 4 00009e09 9000000000 9000000000 -1 5 00009e0009 9000000000 9000000000 -1 6 -00000 -0.1 -0.1 -1 7 -00009.00009 -9.00009 -9.00009 -1 8 -00009e9 -9000000000 -9000000000 -1 9 -00009e09 -9000000000 -9000000000 -1 10 -00009e0009 -9000000000 -9000000000 -1 11 +00000 0 0 -1 12 +00009.00009 9.00009 9.00009 -1 13 +00009e9 9000000000 9000000000 -1 14 +00009e09 9000000000 9000000000 -1 15 +00009e0009 9000000000 9000000000 diff --git a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql b/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql deleted file mode 100644 index 69cc06a46f8..00000000000 --- a/tests/queries/0_stateless/02896_leading_zeroes_no_octal.sql +++ /dev/null @@ -1,28 +0,0 @@ -DROP TABLE IF EXISTS t_leading_zeroes; -DROP TABLE IF EXISTS t_leading_zeroes_f; - -CREATE TABLE t_leading_zeroes(id INTEGER, input String, val INTEGER, expected INTEGER) ENGINE=MergeTree ORDER BY id; - -INSERT INTO t_leading_zeroes VALUES (1, '00000', 00000, 0), (2, '0', 0, 0), (3, '00', 00, 0), (4, '01', 01, 1), (5, '+01', +01, 1); -INSERT INTO t_leading_zeroes VALUES (6, '-01', -01, -1), (7, '0001', 0001, 1), (8, '0005', 0005, 5), (9, '0008', 0008, 8); -INSERT INTO t_leading_zeroes VALUES (10, '0017', 0017, 17), (11, '0021', 0021, 21), (12, '0051', 0051, 51), (13, '00000123', 00000123, 123); -INSERT INTO t_leading_zeroes VALUES (14, '0b10000', 0b10000, 16), (15, '0x0abcd', 0x0abcd, 43981), (16, '0000.008', 0000.008, 0) -INSERT INTO t_leading_zeroes VALUES (17, '1000.0008', 1000.0008, 1000), (18, '0008.0008', 0008.0008, 8); - -SELECT 'Leading zeroes into INTEGER'; -SELECT t.val == t.expected AS ok, * FROM t_leading_zeroes t ORDER BY id; - --- Floats don't go via the weird octal path: -CREATE TABLE t_leading_zeroes_f(id INTEGER, input String, val Float32, expected Float32) ENGINE=MergeTree ORDER BY id; -INSERT INTO t_leading_zeroes_f VALUES (1, '00000', 00000, 0), (2, '00009.00009', 00009.00009, 9.00009), (3, '00009e9', 00009e9, 9e9), (4, '00009e09', 00009e09, 9e9), (5, '00009e0009', 00009e0009, 9e9); -INSERT INTO t_leading_zeroes_f VALUES (6, '-00000', -00000.1, -0.1), (7, '-00009.00009', -00009.00009, -9.00009), (8, '-00009e9', -00009e9, -9e9), (9, '-00009e09', -00009e09, -9e9), (10, '-00009e0009', -00009e0009, -9e9); -INSERT INTO t_leading_zeroes_f VALUES (11, '+00000', +00000., 0), (12, '+00009.00009', +00009.00009, 9.00009), (13, '+00009e9', +00009e9, 9e9), (14, '+00009e09', +00009e09, 9e9), (15, '+00009e0009', +00009e0009, 9e9); --- Coincidentally, the following result in 9 rather than 9e9 because of readFloatTextFastImpl --- using readUIntTextUpToNSignificantDigits<4>(exponent, in) --- INSERT INTO t_leading_zeroes_f VALUES (100, '00009e00009', 00009e00009, 9e9), (101, '-00009e00009', -00009e00009, -9e9), (102, '+00009e00009', +00009e00009, 9e9) - -SELECT 'Leading zeroes into Float32'; -SELECT t.val == t.expected AS ok, * FROM t_leading_zeroes_f t ORDER BY id; - -DROP TABLE IF EXISTS t_leading_zeroes; -DROP TABLE IF EXISTS t_leading_zeroes_f; \ No newline at end of file diff --git a/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql b/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql index 439b8b3f032..ec86a66c7dd 100644 --- a/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql +++ b/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql @@ -1,10 +1,10 @@ -- Tags: no-fasttest -- Query stops after timeout without an error -SELECT * FROM numbers(100000000) SETTINGS max_block_size=1, max_execution_time=13, timeout_overflow_mode='break' FORMAT Null; +SELECT * FROM numbers(100000000) SETTINGS max_block_size=1, max_execution_time=2, timeout_overflow_mode='break' FORMAT Null; --- Query returns an error when runtime is estimated after 10 sec of execution -SELECT * FROM numbers(100000000) SETTINGS max_block_size=1, max_execution_time=13, timeout_overflow_mode='throw' FORMAT Null; -- { serverError TOO_SLOW } +-- Query returns an error when runtime is estimated after timeout_before_checking_execution_speed passed +SELECT * FROM numbers(100000000) SETTINGS max_block_size=1, timeout_before_checking_execution_speed=1, max_estimated_execution_time=2, timeout_overflow_mode='throw' FORMAT Null; -- { serverError TOO_SLOW } -- Query returns timeout error before its full execution time is estimated -SELECT * FROM numbers(100000000) SETTINGS max_block_size=1, max_execution_time=2, timeout_overflow_mode='throw' FORMAT Null; -- { serverError TIMEOUT_EXCEEDED } +SELECT * FROM numbers(100000000) SETTINGS max_block_size=1, timeout_before_checking_execution_speed=1, max_execution_time=2, timeout_overflow_mode='throw' FORMAT Null; -- { serverError TIMEOUT_EXCEEDED } diff --git a/tests/queries/0_stateless/02896_union_distinct_http_format.sh b/tests/queries/0_stateless/02896_union_distinct_http_format.sh index bb35800e39d..9426321e62f 100755 --- a/tests/queries/0_stateless/02896_union_distinct_http_format.sh +++ b/tests/queries/0_stateless/02896_union_distinct_http_format.sh @@ -4,5 +4,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -curl -d@- -sS "${CLICKHOUSE_URL}" <<< 'SELECT 1 UNION DISTINCT SELECT 1 FORMAT PrettyCompactMonoBlock' -curl -d@- -sS "${CLICKHOUSE_URL}" <<< 'SELECT * FROM (SELECT 1 as a UNION DISTINCT SELECT 2 as a) ORDER BY a FORMAT PrettyCompactMonoBlock' +curl -d@- -sS "${CLICKHOUSE_URL}" <<< 'SELECT 1 UNION DISTINCT SELECT 1 SETTINGS output_format_pretty_color=1 FORMAT PrettyCompactMonoBlock' +curl -d@- -sS "${CLICKHOUSE_URL}" <<< 'SELECT * FROM (SELECT 1 as a UNION DISTINCT SELECT 2 as a) ORDER BY a SETTINGS output_format_pretty_color=1 FORMAT PrettyCompactMonoBlock' diff --git a/tests/queries/0_stateless/02901_parallel_replicas_rollup.reference b/tests/queries/0_stateless/02901_parallel_replicas_rollup.reference index 29004fde9d4..c7f78cd74c2 100644 --- a/tests/queries/0_stateless/02901_parallel_replicas_rollup.reference +++ b/tests/queries/0_stateless/02901_parallel_replicas_rollup.reference @@ -1,5 +1,7 @@ 1 02901_parallel_replicas_rollup-default Used parallel replicas: true +Distributed query with analyzer +1 0 0 0 6 2019 0 0 2 2019 1 0 2 diff --git a/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh b/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh index 029b4d07ee2..def813c17b4 100755 --- a/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh +++ b/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh @@ -39,6 +39,11 @@ $CLICKHOUSE_CLIENT \ ORDER BY max((SELECT 1 WHERE 0)); "; were_parallel_replicas_used $query_id + +# It was a bug in analyzer distributed header. +echo "Distributed query with analyzer" +$CLICKHOUSE_CLIENT --query "SELECT 1 FROM remote('127.0.0.{2,3}', currentDatabase(), nested) GROUP BY 1 WITH ROLLUP ORDER BY max((SELECT 1 WHERE 0))" + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS nested" diff --git a/tests/queries/0_stateless/02919_ddsketch_quantile.reference b/tests/queries/0_stateless/02919_ddsketch_quantile.reference new file mode 100644 index 00000000000..9615f717e85 --- /dev/null +++ b/tests/queries/0_stateless/02919_ddsketch_quantile.reference @@ -0,0 +1,22 @@ +1 +98.5 +344.02 +83.01 +503.02 +2 +-1.01 +0 +3 +-5.99 +4 +4.01 +5 +1.99 +6 +5.99 +7 +4.01 +8 +139082109.85 +9 +[1799.03] diff --git a/tests/queries/0_stateless/02919_ddsketch_quantile.sql b/tests/queries/0_stateless/02919_ddsketch_quantile.sql new file mode 100644 index 00000000000..d98978c117e --- /dev/null +++ b/tests/queries/0_stateless/02919_ddsketch_quantile.sql @@ -0,0 +1,73 @@ +SELECT '1'; -- simple test +SELECT round(quantileDD(0.01, 0.5)(number), 2) FROM numbers(200); +SELECT round(quantileDD(0.0001, 0.69)(number), 2) FROM numbers(500); +SELECT round(quantileDD(0.003, 0.42)(number), 2) FROM numbers(200); +SELECT round(quantileDD(0.02, 0.99)(number), 2) FROM numbers(500); + +SELECT '2'; -- median is close to 0 +SELECT round(quantileDD(0.01, 0.5)(number), 2) +FROM +( + SELECT arrayJoin([toInt64(number), number - 10]) AS number + FROM numbers(0, 10) +); +SELECT round(quantileDD(0.01, 0.5)(number - 10), 2) FROM numbers(21); + +SELECT '3'; -- all values are negative +SELECT round(quantileDD(0.01, 0.99)(-number), 2) FROM numbers(1, 500); + +SELECT '4'; -- min and max values of integer types (-2^63, 2^63-1) +SELECT round(quantileDD(0.01, 0.5)(number), 2) +FROM +( + SELECT arrayJoin([toInt64(number), number - 9223372036854775808, toInt64(number + 9223372036854775798)]) AS number + FROM numbers(0, 10) +); + +SELECT '5'; -- min and max values of floating point types +SELECT round(quantileDD(0.01, 0.42)(number), 2) +FROM +( + SELECT arrayJoin([toFloat32(number), number - 3.4028235e+38, toFloat32(number + 3.4028235e+38)]) AS number + FROM numbers(0, 10) +); + +SELECT '6'; -- denormalized floats +SELECT round(quantileDD(0.01, 0.69)(number), 2) +FROM +( + SELECT arrayJoin([toFloat32(number), number - 1.1754944e-38, toFloat32(number + 1.1754944e-38)]) AS number + FROM numbers(0, 10) +); + +SELECT '7'; -- NaNs +SELECT round(quantileDD(0.01, 0.5)(number), 2) +FROM +( + SELECT arrayJoin([toFloat32(number), NaN * number]) AS number + FROM numbers(0, 10) +); + +SELECT '8'; -- sparse sketch + +SELECT round(quantileDD(0.01, 0.75)(number), 2) +FROM +( + SELECT number * 1e7 AS number + FROM numbers(20) +); + +SELECT '9'; -- ser/deser + +DROP TABLE IF EXISTS `02919_ddsketch_quantile`; + +CREATE TABLE `02919_ddsketch_quantile` +ENGINE = Log AS +SELECT quantilesDDState(0.001, 0.9)(number) AS sketch +FROM numbers(1000); + +INSERT INTO `02919_ddsketch_quantile` SELECT quantilesDDState(0.001, 0.9)(number + 1000) +FROM numbers(1000); + +SELECT arrayMap(a -> round(a, 2), (quantilesDDMerge(0.001, 0.9)(sketch))) +FROM `02919_ddsketch_quantile`; diff --git a/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.reference b/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.reference index f9c7b26d245..feebf7dbf49 100644 --- a/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.reference +++ b/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.reference @@ -1,13 +1,97 @@ +\N +\N +[nan,nan] +\N [nan,nan] 0 \N 0 0 0 +\N +\N + +\N +[nan,nan] +[nan,nan] + +[nan,nan] +-- notinhgs: +\N Nullable(Nothing) +0 UInt8 +\N Nullable(Nothing) +0 UInt64 +0 UInt64 +0 UInt64 +\N Nullable(Nothing) +\N Nullable(Nothing) +\N Nullable(Nothing) +-- quantile: +\N +\N +\N +\N +1 +1 +1 +-- quantiles: +[nan,nan] +[nan,nan] +[nan,nan] +[nan,nan] +[1,1] +[1,1] +[1,1] +-- nothing: +\N +\N +\N +\N +0 +0 +0 +-- nothing(UInt64): +0 +0 +0 +0 +0 +0 +0 +-- nothing(Nullable(Nothing)): +\N +\N +\N +\N +\N +\N +\N +-- sum: +\N +\N +\N +\N +6 +6 +6 +-- count: +0 +0 +0 +0 +6 +6 +6 0 0 0 \N +0 \N +0 \N + 0 \N 0 0 +\N \N [nan,nan] + +\N \N [nan,nan] diff --git a/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql b/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql index a064c091df0..5b9343c6e13 100644 --- a/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql +++ b/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql @@ -1,7 +1,87 @@ -#!/usr/bin/env -S ${HOME}/clickhouse-client --progress --queries-file +select sum(NULL); +select quantile(0.5)(NULL); +select quantiles(0.1, 0.2)(NULL :: Nullable(UInt32)); +select quantile(0.5)(NULL), quantiles(0.1, 0.2)(NULL :: Nullable(UInt32)), count(NULL), sum(NULL); SELECT count(NULL) FROM remote('127.0.0.{1,2}', numbers(3)) GROUP BY number % 2 WITH TOTALS; +SELECT quantile(0.5)(NULL) FROM remote('127.0.0.{1,2}', numbers(3)) GROUP BY number % 2 WITH TOTALS; +SELECT quantiles(0.1, 0.2)(NULL :: Nullable(UInt32)) FROM remote('127.0.0.{1,2}', numbers(3)) GROUP BY number % 2 WITH TOTALS; + +SELECT '-- notinhgs:'; +SELECT nothing() as n, toTypeName(n); +SELECT nothing(1) as n, toTypeName(n); +SELECT nothing(NULL) as n, toTypeName(n); +SELECT nothingUInt64() as n, toTypeName(n); +SELECT nothingUInt64(1) as n, toTypeName(n); +SELECT nothingUInt64(NULL) as n, toTypeName(n); +SELECT nothingNull() as n, toTypeName(n); +SELECT nothingNull(1) as n, toTypeName(n); +SELECT nothingNull(NULL) as n, toTypeName(n); + +SELECT '-- quantile:'; +SELECT quantileArray(0.5)([NULL, NULL]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantileArrayIf(0.5)([NULL], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantileArrayIf(0.5)([NULL], 0) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantileIfArray(0.5)([NULL, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantileIfArray(0.5)([1, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantileIfArrayIf(0.5)([1, NULL], [1, 0], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantileIfArrayArray(0.5)([[1, NULL]], [[1, 0]]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); + +SELECT '-- quantiles:'; +select quantilesArray(0.5, 0.9)([NULL :: Nullable(UInt64), NULL]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantilesArrayIf(0.5, 0.9)([NULL :: Nullable(UInt64)], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantilesArrayIf(0.5, 0.9)([NULL :: Nullable(UInt64)], 0) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantilesIfArray(0.5, 0.9)([NULL :: Nullable(UInt64), NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantilesIfArray(0.5, 0.9)([1, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantilesIfArrayIf(0.5, 0.9)([1, NULL], [1, 0], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT quantilesIfArrayArray(0.5, 0.9)([[1, NULL]], [[1, 0]]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); + +SELECT '-- nothing:'; +SELECT nothingArray([NULL, NULL]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingArrayIf([NULL], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingArrayIf([NULL], 0) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingIfArray([NULL, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingIfArray([1, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingIfArrayIf([1, NULL], [1, 0], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingIfArrayArray([[1, NULL]], [[1, 0]]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); + +SELECT '-- nothing(UInt64):'; +SELECT nothingUInt64Array([NULL, NULL]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingUInt64ArrayIf([NULL], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingUInt64ArrayIf([NULL], 0) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingUInt64IfArray([NULL, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingUInt64IfArray([1, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingUInt64IfArrayIf([1, NULL], [1, 0], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingUInt64IfArrayArray([[1, NULL]], [[1, 0]]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); + +SELECT '-- nothing(Nullable(Nothing)):'; +SELECT nothingNullArray([NULL, NULL]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingNullArrayIf([NULL], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingNullArrayIf([NULL], 0) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingNullIfArray([NULL, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingNullIfArray([1, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingNullIfArrayIf([1, NULL], [1, 0], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT nothingNullIfArrayArray([[1, NULL]], [[1, 0]]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); + +SELECT '-- sum:'; +SELECT sumArray([NULL, NULL]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT sumArrayIf([NULL], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT sumArrayIf([NULL], 0) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT sumIfArray([NULL, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT sumIfArray([1, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT sumIfArrayIf([1, NULL], [1, 0], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT sumIfArrayArray([[1, NULL]], [[1, 0]]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); + +SELECT '-- count:'; +SELECT countArray([NULL, NULL]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT countArrayIf([NULL], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT countArrayIf([NULL], 0) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT countIfArray([NULL, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT countIfArray([1, NULL], [1, 0]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT countIfArrayIf([1, NULL], [1, 0], 1) AS x FROM remote('127.0.0.{1,2}', numbers(3)); +SELECT countIfArrayArray([[1, NULL]], [[1, 0]]) AS x FROM remote('127.0.0.{1,2}', numbers(3)); + DROP TABLE IF EXISTS t1; CREATE TABLE t1 (`n` UInt64) ENGINE = MergeTree ORDER BY tuple(); @@ -18,7 +98,7 @@ SET SELECT count(NULL) FROM t1 WITH TOTALS; SELECT count(NULL as a), a FROM t1 WITH TOTALS; --- result differs in old and new analyzer: --- SELECT count(NULL as a), sum(a) FROM t1 WITH TOTALS; +SELECT count(NULL as a), sum(a) FROM t1 WITH TOTALS; SELECT uniq(NULL) FROM t1 WITH TOTALS; +SELECT quantile(0.5)(NULL), quantile(0.9)(NULL), quantiles(0.1, 0.2)(NULL :: Nullable(UInt32)) FROM t1 WITH TOTALS; diff --git a/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference new file mode 100644 index 00000000000..8f29910e9ae --- /dev/null +++ b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.reference @@ -0,0 +1,476 @@ +-- { echoOn } +Select sum(number + 1) from numbers(10); +55 +Select sum(1 + number) from numbers(10); +55 +Select sum(number - 1) from numbers(10); +35 +Select sum(1 - number) from numbers(10); +-35 +EXPLAIN SYNTAX (Select sum(number + 1) from numbers(10)); +SELECT sum(number) + (1 * count(number)) +FROM numbers(10) +EXPLAIN SYNTAX (Select sum(1 + number) from numbers(10)); +SELECT (1 * count(number)) + sum(number) +FROM numbers(10) +EXPLAIN SYNTAX (Select sum(number - 1) from numbers(10)); +SELECT sum(number) - (1 * count(number)) +FROM numbers(10) +EXPLAIN SYNTAX (Select sum(1 - number) from numbers(10)); +SELECT (1 * count(number)) - sum(number) +FROM numbers(10) +WITH 1::Nullable(UInt64) as my_literal Select sum(number + my_literal) from numbers(0); +\N +WITH 1::Nullable(UInt64) as my_literal Select sum(number) + my_literal * count() from numbers(0); +0 +EXPLAIN SYNTAX (WITH 1::Nullable(UInt64) as my_literal Select sum(number + my_literal) from numbers(0)); +WITH CAST(\'1\', \'Nullable(UInt64)\') AS my_literal +SELECT sum(number + my_literal) +FROM numbers(0) +EXPLAIN SYNTAX (WITH 1::Nullable(UInt64) as my_literal Select sum(number) + my_literal * count() from numbers(0)); +WITH CAST(\'1\', \'Nullable(UInt64)\') AS my_literal +SELECT sum(number) + (my_literal * count()) +FROM numbers(0) +-- { echoOn } +SELECT sum(uint64 + 1 AS i) from test_table where i > 0; +20 +SELECT sum(uint64 + 1) AS j from test_table having j > 0; +20 +SELECT sum(uint64 + 1 AS i) j from test_table where i > 0 having j > 0; +20 +SELECT sum((uint64 AS m) + (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +20 +SELECT sum(((uint64 AS m) + (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +20 +EXPLAIN SYNTAX (SELECT sum(uint64 + 1 AS i) from test_table where i > 0); +SELECT sum(uint64) + (1 * count(uint64)) +FROM test_table +WHERE ((uint64 + 1) AS i) > 0 +EXPLAIN SYNTAX (SELECT sum(uint64 + 1) AS j from test_table having j > 0); +SELECT sum(uint64) + (1 * count(uint64)) +FROM test_table +HAVING (sum(uint64) + (1 * count(uint64))) > 0 +EXPLAIN SYNTAX (SELECT sum(uint64 + 1 AS i) j from test_table where i > 0 having j > 0); +SELECT sum(uint64) + (1 * count(uint64)) +FROM test_table +WHERE ((uint64 + 1) AS i) > 0 +HAVING (sum(uint64) + (1 * count(uint64))) > 0 +EXPLAIN SYNTAX (SELECT sum((uint64 AS m) + (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +SELECT sum(uint64) + ((1 AS n) * count(uint64)) +FROM test_table +WHERE ((uint64 AS m) > 0) AND (n > 0) +HAVING (sum(uint64) + (n * count(uint64))) > 0 +EXPLAIN SYNTAX (SELECT sum(((uint64 AS m) + (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); +SELECT sum(uint64) + ((1 AS n) * count(uint64)) +FROM test_table +WHERE ((uint64 AS m) > 0) AND (n > 0) AND (((m + n) AS i) > 0) +HAVING (sum(uint64) + (n * count(uint64))) > 0 +SELECT sum(1 + uint64 AS i) from test_table where i > 0; +20 +SELECT sum(1 + uint64) AS j from test_table having j > 0; +20 +SELECT sum(1 + uint64 AS i) j from test_table where i > 0 having j > 0; +20 +SELECT sum((1 AS m) + (uint64 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +20 +SELECT sum(((1 AS m) + (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +20 +EXPLAIN SYNTAX (SELECT sum(1 + uint64 AS i) from test_table where i > 0); +SELECT (1 * count(uint64)) + sum(uint64) +FROM test_table +WHERE ((1 + uint64) AS i) > 0 +EXPLAIN SYNTAX (SELECT sum(1 + uint64) AS j from test_table having j > 0); +SELECT (1 * count(uint64)) + sum(uint64) +FROM test_table +HAVING ((1 * count(uint64)) + sum(uint64)) > 0 +EXPLAIN SYNTAX (SELECT sum(1 + uint64 AS i) j from test_table where i > 0 having j > 0); +SELECT (1 * count(uint64)) + sum(uint64) +FROM test_table +WHERE ((1 + uint64) AS i) > 0 +HAVING ((1 * count(uint64)) + sum(uint64)) > 0 +EXPLAIN SYNTAX (SELECT sum((1 AS m) + (uint64 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +SELECT ((1 AS m) * count(uint64)) + sum(uint64) +FROM test_table +WHERE (m > 0) AND ((uint64 AS n) > 0) +HAVING ((m * count(uint64)) + sum(uint64)) > 0 +EXPLAIN SYNTAX (SELECT sum(((1 AS m) + (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); +SELECT ((1 AS m) * count(uint64)) + sum(uint64) +FROM test_table +WHERE (m > 0) AND ((uint64 AS n) > 0) AND (((m + n) AS i) > 0) +HAVING ((m * count(uint64)) + sum(uint64)) > 0 +SELECT sum(uint64 - 1 AS i) from test_table where i > 0; +10 +SELECT sum(uint64 - 1) AS j from test_table having j > 0; +10 +SELECT sum(uint64 - 1 AS i) j from test_table where i > 0 having j > 0; +10 +SELECT sum((uint64 AS m) - (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +10 +SELECT sum(((uint64 AS m) - (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +10 +EXPLAIN SYNTAX (SELECT sum(uint64 - 1 AS i) from test_table where i > 0); +SELECT sum(uint64) - (1 * count(uint64)) +FROM test_table +WHERE ((uint64 - 1) AS i) > 0 +EXPLAIN SYNTAX (SELECT sum(uint64 - 1) AS j from test_table having j > 0); +SELECT sum(uint64) - (1 * count(uint64)) +FROM test_table +HAVING (sum(uint64) - (1 * count(uint64))) > 0 +EXPLAIN SYNTAX (SELECT sum(uint64 - 1 AS i) j from test_table where i > 0 having j > 0); +SELECT sum(uint64) - (1 * count(uint64)) +FROM test_table +WHERE ((uint64 - 1) AS i) > 0 +HAVING (sum(uint64) - (1 * count(uint64))) > 0 +EXPLAIN SYNTAX (SELECT sum((uint64 AS m) - (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +SELECT sum(uint64) - ((1 AS n) * count(uint64)) +FROM test_table +WHERE ((uint64 AS m) > 0) AND (n > 0) +HAVING (sum(uint64) - (n * count(uint64))) > 0 +EXPLAIN SYNTAX (SELECT sum(((uint64 AS m) - (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); +SELECT sum(uint64) - ((1 AS n) * count(uint64)) +FROM test_table +WHERE ((uint64 AS m) > 0) AND (n > 0) AND (((m - n) AS i) > 0) +HAVING (sum(uint64) - (n * count(uint64))) > 0 +SELECT sum(1 - uint64 AS i) from test_table; +-10 +SELECT sum(1 - uint64) AS j from test_table; +-10 +SELECT sum(1 - uint64 AS i) j from test_table; +-10 +SELECT sum((1 AS m) - (uint64 AS n)) j from test_table; +-10 +SELECT sum(((1 AS m) - (uint64 AS n)) AS i) j from test_table; +-10 +EXPLAIN SYNTAX (SELECT sum(1 - uint64 AS i) from test_table where i > 0); +SELECT (1 * count(uint64)) - sum(uint64) +FROM test_table +WHERE ((1 - uint64) AS i) > 0 +EXPLAIN SYNTAX (SELECT sum(1 - uint64) AS j from test_table having j < 0); +SELECT (1 * count(uint64)) - sum(uint64) +FROM test_table +HAVING ((1 * count(uint64)) - sum(uint64)) < 0 +EXPLAIN SYNTAX (SELECT sum(1 - uint64 AS i) j from test_table where i > 0 having j < 0); +SELECT (1 * count(uint64)) - sum(uint64) +FROM test_table +WHERE ((1 - uint64) AS i) > 0 +HAVING ((1 * count(uint64)) - sum(uint64)) < 0 +EXPLAIN SYNTAX (SELECT sum((1 AS m) - (uint64 AS n)) j from test_table where m > 0 and n > 0 having j < 0); +SELECT ((1 AS m) * count(uint64)) - sum(uint64) +FROM test_table +WHERE (m > 0) AND ((uint64 AS n) > 0) +HAVING ((m * count(uint64)) - sum(uint64)) < 0 +EXPLAIN SYNTAX (SELECT sum(((1 AS m) - (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i < 0 having j < 0); +SELECT ((1 AS m) * count(uint64)) - sum(uint64) +FROM test_table +WHERE (m > 0) AND ((uint64 AS n) > 0) AND (((m - n) AS i) < 0) +HAVING ((m * count(uint64)) - sum(uint64)) < 0 +SELECT sum(uint64 + 2.11) From test_table; +25.549999999999997 +SELECT sum(2.11 + uint64) From test_table; +25.549999999999997 +SELECT sum(uint64 - 2.11) From test_table; +4.450000000000001 +SELECT sum(2.11 - uint64) From test_table; +-4.450000000000001 +SELECT sum(uint64) + 2.11 * count(uint64) From test_table; +25.549999999999997 +SELECT 2.11 * count(uint64) + sum(uint64) From test_table; +25.549999999999997 +SELECT sum(uint64) - 2.11 * count(uint64) From test_table; +4.450000000000001 +SELECT 2.11 * count(uint64) - sum(uint64) From test_table; +-4.450000000000001 +EXPLAIN SYNTAX (SELECT sum(uint64 + 2.11) From test_table); +SELECT sum(uint64) + (2.11 * count(uint64)) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2.11 + uint64) From test_table); +SELECT (2.11 * count(uint64)) + sum(uint64) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(uint64 - 2.11) From test_table); +SELECT sum(uint64) - (2.11 * count(uint64)) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2.11 - uint64) From test_table); +SELECT (2.11 * count(uint64)) - sum(uint64) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(uint64) + 2.11 * count(uint64) From test_table); +SELECT sum(uint64) + (2.11 * count(uint64)) +FROM test_table +EXPLAIN SYNTAX (SELECT 2.11 * count(uint64) + sum(uint64) From test_table); +SELECT (2.11 * count(uint64)) + sum(uint64) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(uint64) - 2.11 * count(uint64) From test_table); +SELECT sum(uint64) - (2.11 * count(uint64)) +FROM test_table +EXPLAIN SYNTAX (SELECT 2.11 * count(uint64) - sum(uint64) From test_table); +SELECT (2.11 * count(uint64)) - sum(uint64) +FROM test_table +SELECT sum(uint64 + 2) From test_table; +25 +SELECT sum(2 + uint64) From test_table; +25 +SELECT sum(uint64 - 2) From test_table; +5 +SELECT sum(2 - uint64) From test_table; +-5 +SELECT sum(uint64) + 2 * count(uint64) From test_table; +25 +SELECT 2 * count(uint64) + sum(uint64) From test_table; +25 +SELECT sum(uint64) - 2 * count(uint64) From test_table; +5 +SELECT 2 * count(uint64) - sum(uint64) From test_table; +-5 +EXPLAIN SYNTAX (SELECT sum(uint64 + 2) From test_table); +SELECT sum(uint64) + (2 * count(uint64)) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2 + uint64) From test_table); +SELECT (2 * count(uint64)) + sum(uint64) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(uint64 - 2) From test_table); +SELECT sum(uint64) - (2 * count(uint64)) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2 - uint64) From test_table); +SELECT (2 * count(uint64)) - sum(uint64) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(uint64) + 2 * count(uint64) From test_table); +SELECT sum(uint64) + (2 * count(uint64)) +FROM test_table +EXPLAIN SYNTAX (SELECT 2 * count(uint64) + sum(uint64) From test_table); +SELECT (2 * count(uint64)) + sum(uint64) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(uint64) - 2 * count(uint64) From test_table); +SELECT sum(uint64) - (2 * count(uint64)) +FROM test_table +EXPLAIN SYNTAX (SELECT 2 * count(uint64) - sum(uint64) From test_table); +SELECT (2 * count(uint64)) - sum(uint64) +FROM test_table +SELECT sum(float64 + 2) From test_table; +26.5 +SELECT sum(2 + float64) From test_table; +26.5 +SELECT sum(float64 - 2) From test_table; +6.5 +SELECT sum(2 - float64) From test_table; +-6.5 +SELECT sum(float64) + 2 * count(float64) From test_table; +26.5 +SELECT 2 * count(float64) + sum(float64) From test_table; +26.5 +SELECT sum(float64) - 2 * count(float64) From test_table; +6.5 +SELECT 2 * count(float64) - sum(float64) From test_table; +-6.5 +EXPLAIN SYNTAX (SELECT sum(float64 + 2) From test_table); +SELECT sum(float64) + (2 * count(float64)) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2 + float64) From test_table); +SELECT (2 * count(float64)) + sum(float64) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(float64 - 2) From test_table); +SELECT sum(float64) - (2 * count(float64)) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2 - float64) From test_table); +SELECT (2 * count(float64)) - sum(float64) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(float64) + 2 * count(float64) From test_table); +SELECT sum(float64) + (2 * count(float64)) +FROM test_table +EXPLAIN SYNTAX (SELECT 2 * count(float64) + sum(float64) From test_table); +SELECT (2 * count(float64)) + sum(float64) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(float64) - 2 * count(float64) From test_table); +SELECT sum(float64) - (2 * count(float64)) +FROM test_table +EXPLAIN SYNTAX (SELECT 2 * count(float64) - sum(float64) From test_table); +SELECT (2 * count(float64)) - sum(float64) +FROM test_table +SELECT sum(decimal32 + 2) From test_table; +26.65 +SELECT sum(2 + decimal32) From test_table; +26.65 +SELECT sum(decimal32 - 2) From test_table; +6.65 +SELECT sum(2 - decimal32) From test_table; +-6.65 +SELECT sum(decimal32) + 2 * count(decimal32) From test_table; +26.65 +SELECT 2 * count(decimal32) + sum(decimal32) From test_table; +26.65 +SELECT sum(decimal32) - 2 * count(decimal32) From test_table; +6.65 +SELECT 2 * count(decimal32) - sum(decimal32) From test_table; +-6.65 +EXPLAIN SYNTAX (SELECT sum(decimal32 + 2) From test_table); +SELECT sum(decimal32) + (2 * count(decimal32)) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2 + decimal32) From test_table); +SELECT (2 * count(decimal32)) + sum(decimal32) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(decimal32 - 2) From test_table); +SELECT sum(decimal32) - (2 * count(decimal32)) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2 - decimal32) From test_table); +SELECT (2 * count(decimal32)) - sum(decimal32) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(decimal32) + 2 * count(decimal32) From test_table); +SELECT sum(decimal32) + (2 * count(decimal32)) +FROM test_table +EXPLAIN SYNTAX (SELECT 2 * count(decimal32) + sum(decimal32) From test_table); +SELECT (2 * count(decimal32)) + sum(decimal32) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(decimal32) - 2 * count(decimal32) From test_table); +SELECT sum(decimal32) - (2 * count(decimal32)) +FROM test_table +EXPLAIN SYNTAX (SELECT 2 * count(decimal32) - sum(decimal32) From test_table); +SELECT (2 * count(decimal32)) - sum(decimal32) +FROM test_table +SELECT sum(uint64 + 2) + sum(uint64 + 3) From test_table; +55 +SELECT sum(uint64 + 2) - sum(uint64 + 3) From test_table; +-5 +SELECT sum(uint64 - 2) + sum(uint64 - 3) From test_table; +5 +SELECT sum(uint64 - 2) - sum(uint64 - 3) From test_table; +5 +SELECT sum(2 - uint64) - sum(3 - uint64) From test_table; +-5 +SELECT (sum(uint64) + 2 * count(uint64)) + (sum(uint64) + 3 * count(uint64)) From test_table; +55 +SELECT (sum(uint64) + 2 * count(uint64)) - (sum(uint64) + 3 * count(uint64)) From test_table; +-5 +SELECT (sum(uint64) - 2 * count(uint64)) + (sum(uint64) - 3 * count(uint64)) From test_table; +5 +SELECT (sum(uint64) - 2 * count(uint64)) - (sum(uint64) - 3 * count(uint64)) From test_table; +5 +SELECT (2 * count(uint64) - sum(uint64)) + (3 * count(uint64) - sum(uint64)) From test_table; +-5 +EXPLAIN SYNTAX (SELECT sum(uint64 + 2) + sum(uint64 + 3) From test_table); +SELECT (sum(uint64) + (2 * count(uint64))) + (sum(uint64) + (3 * count(uint64))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(uint64 + 2) - sum(uint64 + 3) From test_table); +SELECT (sum(uint64) + (2 * count(uint64))) - (sum(uint64) + (3 * count(uint64))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(uint64 - 2) + sum(uint64 - 3) From test_table); +SELECT (sum(uint64) - (2 * count(uint64))) + (sum(uint64) - (3 * count(uint64))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(uint64 - 2) - sum(uint64 - 3) From test_table); +SELECT (sum(uint64) - (2 * count(uint64))) - (sum(uint64) - (3 * count(uint64))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2 - uint64) - sum(3 - uint64) From test_table); +SELECT ((2 * count(uint64)) - sum(uint64)) - ((3 * count(uint64)) - sum(uint64)) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(uint64) + 2 * count(uint64)) + (sum(uint64) + 3 * count(uint64)) From test_table); +SELECT (sum(uint64) + (2 * count(uint64))) + (sum(uint64) + (3 * count(uint64))) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(uint64) + 2 * count(uint64)) - (sum(uint64) + 3 * count(uint64)) From test_table); +SELECT (sum(uint64) + (2 * count(uint64))) - (sum(uint64) + (3 * count(uint64))) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(uint64) - 2 * count(uint64)) + (sum(uint64) - 3 * count(uint64)) From test_table); +SELECT (sum(uint64) - (2 * count(uint64))) + (sum(uint64) - (3 * count(uint64))) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(uint64) - 2 * count(uint64)) - (sum(uint64) - 3 * count(uint64)) From test_table); +SELECT (sum(uint64) - (2 * count(uint64))) - (sum(uint64) - (3 * count(uint64))) +FROM test_table +EXPLAIN SYNTAX (SELECT (2 * count(uint64) - sum(uint64)) + (3 * count(uint64) - sum(uint64)) From test_table); +SELECT ((2 * count(uint64)) - sum(uint64)) + ((3 * count(uint64)) - sum(uint64)) +FROM test_table +SELECT sum(float64 + 2) + sum(float64 + 3) From test_table; +58 +SELECT sum(float64 + 2) - sum(float64 + 3) From test_table; +-5 +SELECT sum(float64 - 2) + sum(float64 - 3) From test_table; +8 +SELECT sum(float64 - 2) - sum(float64 - 3) From test_table; +5 +SELECT sum(2 - float64) - sum(3 - float64) From test_table; +-5 +SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table; +58 +SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table; +-5 +SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table; +8 +SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table; +5 +SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table; +-8 +EXPLAIN SYNTAX (SELECT sum(float64 + 2) + sum(float64 + 3) From test_table); +SELECT (sum(float64) + (2 * count(float64))) + (sum(float64) + (3 * count(float64))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(float64 + 2) - sum(float64 + 3) From test_table); +SELECT (sum(float64) + (2 * count(float64))) - (sum(float64) + (3 * count(float64))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(float64 - 2) + sum(float64 - 3) From test_table); +SELECT (sum(float64) - (2 * count(float64))) + (sum(float64) - (3 * count(float64))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(float64 - 2) - sum(float64 - 3) From test_table); +SELECT (sum(float64) - (2 * count(float64))) - (sum(float64) - (3 * count(float64))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2 - float64) - sum(3 - float64) From test_table); +SELECT ((2 * count(float64)) - sum(float64)) - ((3 * count(float64)) - sum(float64)) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table); +SELECT (sum(float64) + (2 * count(float64))) + (sum(float64) + (3 * count(float64))) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table); +SELECT (sum(float64) + (2 * count(float64))) - (sum(float64) + (3 * count(float64))) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table); +SELECT (sum(float64) - (2 * count(float64))) + (sum(float64) - (3 * count(float64))) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table); +SELECT (sum(float64) - (2 * count(float64))) - (sum(float64) - (3 * count(float64))) +FROM test_table +EXPLAIN SYNTAX (SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table); +SELECT ((2 * count(float64)) - sum(float64)) + ((3 * count(float64)) - sum(float64)) +FROM test_table +SELECT sum(decimal32 + 2) + sum(decimal32 + 3) From test_table; +58.3 +SELECT sum(decimal32 + 2) - sum(decimal32 + 3) From test_table; +-5 +SELECT sum(decimal32 - 2) + sum(decimal32 - 3) From test_table; +8.3 +SELECT sum(decimal32 - 2) - sum(decimal32 - 3) From test_table; +5 +SELECT sum(2 - decimal32) - sum(3 - decimal32) From test_table; +-5 +SELECT (sum(decimal32) + 2 * count(decimal32)) + (sum(decimal32) + 3 * count(decimal32)) From test_table; +58.3 +SELECT (sum(decimal32) + 2 * count(decimal32)) - (sum(decimal32) + 3 * count(decimal32)) From test_table; +-5 +SELECT (sum(decimal32) - 2 * count(decimal32)) + (sum(decimal32) - 3 * count(decimal32)) From test_table; +8.3 +SELECT (sum(decimal32) - 2 * count(decimal32)) - (sum(decimal32) - 3 * count(decimal32)) From test_table; +5 +SELECT (2 * count(decimal32) - sum(decimal32)) + (3 * count(decimal32) - sum(decimal32)) From test_table; +-8.3 +EXPLAIN SYNTAX (SELECT sum(decimal32 + 2) + sum(decimal32 + 3) From test_table); +SELECT (sum(decimal32) + (2 * count(decimal32))) + (sum(decimal32) + (3 * count(decimal32))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(decimal32 + 2) - sum(decimal32 + 3) From test_table); +SELECT (sum(decimal32) + (2 * count(decimal32))) - (sum(decimal32) + (3 * count(decimal32))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(decimal32 - 2) + sum(decimal32 - 3) From test_table); +SELECT (sum(decimal32) - (2 * count(decimal32))) + (sum(decimal32) - (3 * count(decimal32))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(decimal32 - 2) - sum(decimal32 - 3) From test_table); +SELECT (sum(decimal32) - (2 * count(decimal32))) - (sum(decimal32) - (3 * count(decimal32))) +FROM test_table +EXPLAIN SYNTAX (SELECT sum(2 - decimal32) - sum(3 - decimal32) From test_table); +SELECT ((2 * count(decimal32)) - sum(decimal32)) - ((3 * count(decimal32)) - sum(decimal32)) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(decimal32) + 2 * count(decimal32)) + (sum(decimal32) + 3 * count(decimal32)) From test_table); +SELECT (sum(decimal32) + (2 * count(decimal32))) + (sum(decimal32) + (3 * count(decimal32))) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(decimal32) + 2 * count(decimal32)) - (sum(decimal32) + 3 * count(decimal32)) From test_table); +SELECT (sum(decimal32) + (2 * count(decimal32))) - (sum(decimal32) + (3 * count(decimal32))) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(decimal32) - 2 * count(decimal32)) + (sum(decimal32) - 3 * count(decimal32)) From test_table); +SELECT (sum(decimal32) - (2 * count(decimal32))) + (sum(decimal32) - (3 * count(decimal32))) +FROM test_table +EXPLAIN SYNTAX (SELECT (sum(decimal32) - 2 * count(decimal32)) - (sum(decimal32) - 3 * count(decimal32)) From test_table); +SELECT (sum(decimal32) - (2 * count(decimal32))) - (sum(decimal32) - (3 * count(decimal32))) +FROM test_table +EXPLAIN SYNTAX (SELECT (2 * count(decimal32) - sum(decimal32)) + (3 * count(decimal32) - sum(decimal32)) From test_table); +SELECT ((2 * count(decimal32)) - sum(decimal32)) + ((3 * count(decimal32)) - sum(decimal32)) +FROM test_table diff --git a/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql new file mode 100644 index 00000000000..b29407d7208 --- /dev/null +++ b/tests/queries/0_stateless/02931_rewrite_sum_column_and_constant.sql @@ -0,0 +1,209 @@ +-- { echoOn } +Select sum(number + 1) from numbers(10); +Select sum(1 + number) from numbers(10); +Select sum(number - 1) from numbers(10); +Select sum(1 - number) from numbers(10); +EXPLAIN SYNTAX (Select sum(number + 1) from numbers(10)); +EXPLAIN SYNTAX (Select sum(1 + number) from numbers(10)); +EXPLAIN SYNTAX (Select sum(number - 1) from numbers(10)); +EXPLAIN SYNTAX (Select sum(1 - number) from numbers(10)); + +WITH 1::Nullable(UInt64) as my_literal Select sum(number + my_literal) from numbers(0); +WITH 1::Nullable(UInt64) as my_literal Select sum(number) + my_literal * count() from numbers(0); +EXPLAIN SYNTAX (WITH 1::Nullable(UInt64) as my_literal Select sum(number + my_literal) from numbers(0)); +EXPLAIN SYNTAX (WITH 1::Nullable(UInt64) as my_literal Select sum(number) + my_literal * count() from numbers(0)); +-- { echoOff } + +DROP TABLE IF EXISTS test_table; + +CREATE TABLE test_table +( + uint64 UInt64, + float64 Float64, + decimal32 Decimal32(5), +) ENGINE=MergeTree ORDER BY uint64; + +INSERT INTO test_table VALUES (1, 1.1, 1.11); +INSERT INTO test_table VALUES (2, 2.2, 2.22); +INSERT INTO test_table VALUES (3, 3.3, 3.33); +INSERT INTO test_table VALUES (4, 4.4, 4.44); +INSERT INTO test_table VALUES (5, 5.5, 5.55); + +-- { echoOn } +SELECT sum(uint64 + 1 AS i) from test_table where i > 0; +SELECT sum(uint64 + 1) AS j from test_table having j > 0; +SELECT sum(uint64 + 1 AS i) j from test_table where i > 0 having j > 0; +SELECT sum((uint64 AS m) + (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +SELECT sum(((uint64 AS m) + (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +EXPLAIN SYNTAX (SELECT sum(uint64 + 1 AS i) from test_table where i > 0); +EXPLAIN SYNTAX (SELECT sum(uint64 + 1) AS j from test_table having j > 0); +EXPLAIN SYNTAX (SELECT sum(uint64 + 1 AS i) j from test_table where i > 0 having j > 0); +EXPLAIN SYNTAX (SELECT sum((uint64 AS m) + (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +EXPLAIN SYNTAX (SELECT sum(((uint64 AS m) + (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); + +SELECT sum(1 + uint64 AS i) from test_table where i > 0; +SELECT sum(1 + uint64) AS j from test_table having j > 0; +SELECT sum(1 + uint64 AS i) j from test_table where i > 0 having j > 0; +SELECT sum((1 AS m) + (uint64 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +SELECT sum(((1 AS m) + (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +EXPLAIN SYNTAX (SELECT sum(1 + uint64 AS i) from test_table where i > 0); +EXPLAIN SYNTAX (SELECT sum(1 + uint64) AS j from test_table having j > 0); +EXPLAIN SYNTAX (SELECT sum(1 + uint64 AS i) j from test_table where i > 0 having j > 0); +EXPLAIN SYNTAX (SELECT sum((1 AS m) + (uint64 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +EXPLAIN SYNTAX (SELECT sum(((1 AS m) + (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); + +SELECT sum(uint64 - 1 AS i) from test_table where i > 0; +SELECT sum(uint64 - 1) AS j from test_table having j > 0; +SELECT sum(uint64 - 1 AS i) j from test_table where i > 0 having j > 0; +SELECT sum((uint64 AS m) - (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +SELECT sum(((uint64 AS m) - (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +EXPLAIN SYNTAX (SELECT sum(uint64 - 1 AS i) from test_table where i > 0); +EXPLAIN SYNTAX (SELECT sum(uint64 - 1) AS j from test_table having j > 0); +EXPLAIN SYNTAX (SELECT sum(uint64 - 1 AS i) j from test_table where i > 0 having j > 0); +EXPLAIN SYNTAX (SELECT sum((uint64 AS m) - (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +EXPLAIN SYNTAX (SELECT sum(((uint64 AS m) - (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); + +SELECT sum(1 - uint64 AS i) from test_table; +SELECT sum(1 - uint64) AS j from test_table; +SELECT sum(1 - uint64 AS i) j from test_table; +SELECT sum((1 AS m) - (uint64 AS n)) j from test_table; +SELECT sum(((1 AS m) - (uint64 AS n)) AS i) j from test_table; +EXPLAIN SYNTAX (SELECT sum(1 - uint64 AS i) from test_table where i > 0); +EXPLAIN SYNTAX (SELECT sum(1 - uint64) AS j from test_table having j < 0); +EXPLAIN SYNTAX (SELECT sum(1 - uint64 AS i) j from test_table where i > 0 having j < 0); +EXPLAIN SYNTAX (SELECT sum((1 AS m) - (uint64 AS n)) j from test_table where m > 0 and n > 0 having j < 0); +EXPLAIN SYNTAX (SELECT sum(((1 AS m) - (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i < 0 having j < 0); + +SELECT sum(uint64 + 2.11) From test_table; +SELECT sum(2.11 + uint64) From test_table; +SELECT sum(uint64 - 2.11) From test_table; +SELECT sum(2.11 - uint64) From test_table; +SELECT sum(uint64) + 2.11 * count(uint64) From test_table; +SELECT 2.11 * count(uint64) + sum(uint64) From test_table; +SELECT sum(uint64) - 2.11 * count(uint64) From test_table; +SELECT 2.11 * count(uint64) - sum(uint64) From test_table; +EXPLAIN SYNTAX (SELECT sum(uint64 + 2.11) From test_table); +EXPLAIN SYNTAX (SELECT sum(2.11 + uint64) From test_table); +EXPLAIN SYNTAX (SELECT sum(uint64 - 2.11) From test_table); +EXPLAIN SYNTAX (SELECT sum(2.11 - uint64) From test_table); +EXPLAIN SYNTAX (SELECT sum(uint64) + 2.11 * count(uint64) From test_table); +EXPLAIN SYNTAX (SELECT 2.11 * count(uint64) + sum(uint64) From test_table); +EXPLAIN SYNTAX (SELECT sum(uint64) - 2.11 * count(uint64) From test_table); +EXPLAIN SYNTAX (SELECT 2.11 * count(uint64) - sum(uint64) From test_table); + +SELECT sum(uint64 + 2) From test_table; +SELECT sum(2 + uint64) From test_table; +SELECT sum(uint64 - 2) From test_table; +SELECT sum(2 - uint64) From test_table; +SELECT sum(uint64) + 2 * count(uint64) From test_table; +SELECT 2 * count(uint64) + sum(uint64) From test_table; +SELECT sum(uint64) - 2 * count(uint64) From test_table; +SELECT 2 * count(uint64) - sum(uint64) From test_table; +EXPLAIN SYNTAX (SELECT sum(uint64 + 2) From test_table); +EXPLAIN SYNTAX (SELECT sum(2 + uint64) From test_table); +EXPLAIN SYNTAX (SELECT sum(uint64 - 2) From test_table); +EXPLAIN SYNTAX (SELECT sum(2 - uint64) From test_table); +EXPLAIN SYNTAX (SELECT sum(uint64) + 2 * count(uint64) From test_table); +EXPLAIN SYNTAX (SELECT 2 * count(uint64) + sum(uint64) From test_table); +EXPLAIN SYNTAX (SELECT sum(uint64) - 2 * count(uint64) From test_table); +EXPLAIN SYNTAX (SELECT 2 * count(uint64) - sum(uint64) From test_table); + +SELECT sum(float64 + 2) From test_table; +SELECT sum(2 + float64) From test_table; +SELECT sum(float64 - 2) From test_table; +SELECT sum(2 - float64) From test_table; +SELECT sum(float64) + 2 * count(float64) From test_table; +SELECT 2 * count(float64) + sum(float64) From test_table; +SELECT sum(float64) - 2 * count(float64) From test_table; +SELECT 2 * count(float64) - sum(float64) From test_table; +EXPLAIN SYNTAX (SELECT sum(float64 + 2) From test_table); +EXPLAIN SYNTAX (SELECT sum(2 + float64) From test_table); +EXPLAIN SYNTAX (SELECT sum(float64 - 2) From test_table); +EXPLAIN SYNTAX (SELECT sum(2 - float64) From test_table); +EXPLAIN SYNTAX (SELECT sum(float64) + 2 * count(float64) From test_table); +EXPLAIN SYNTAX (SELECT 2 * count(float64) + sum(float64) From test_table); +EXPLAIN SYNTAX (SELECT sum(float64) - 2 * count(float64) From test_table); +EXPLAIN SYNTAX (SELECT 2 * count(float64) - sum(float64) From test_table); + +SELECT sum(decimal32 + 2) From test_table; +SELECT sum(2 + decimal32) From test_table; +SELECT sum(decimal32 - 2) From test_table; +SELECT sum(2 - decimal32) From test_table; +SELECT sum(decimal32) + 2 * count(decimal32) From test_table; +SELECT 2 * count(decimal32) + sum(decimal32) From test_table; +SELECT sum(decimal32) - 2 * count(decimal32) From test_table; +SELECT 2 * count(decimal32) - sum(decimal32) From test_table; +EXPLAIN SYNTAX (SELECT sum(decimal32 + 2) From test_table); +EXPLAIN SYNTAX (SELECT sum(2 + decimal32) From test_table); +EXPLAIN SYNTAX (SELECT sum(decimal32 - 2) From test_table); +EXPLAIN SYNTAX (SELECT sum(2 - decimal32) From test_table); +EXPLAIN SYNTAX (SELECT sum(decimal32) + 2 * count(decimal32) From test_table); +EXPLAIN SYNTAX (SELECT 2 * count(decimal32) + sum(decimal32) From test_table); +EXPLAIN SYNTAX (SELECT sum(decimal32) - 2 * count(decimal32) From test_table); +EXPLAIN SYNTAX (SELECT 2 * count(decimal32) - sum(decimal32) From test_table); + +SELECT sum(uint64 + 2) + sum(uint64 + 3) From test_table; +SELECT sum(uint64 + 2) - sum(uint64 + 3) From test_table; +SELECT sum(uint64 - 2) + sum(uint64 - 3) From test_table; +SELECT sum(uint64 - 2) - sum(uint64 - 3) From test_table; +SELECT sum(2 - uint64) - sum(3 - uint64) From test_table; +SELECT (sum(uint64) + 2 * count(uint64)) + (sum(uint64) + 3 * count(uint64)) From test_table; +SELECT (sum(uint64) + 2 * count(uint64)) - (sum(uint64) + 3 * count(uint64)) From test_table; +SELECT (sum(uint64) - 2 * count(uint64)) + (sum(uint64) - 3 * count(uint64)) From test_table; +SELECT (sum(uint64) - 2 * count(uint64)) - (sum(uint64) - 3 * count(uint64)) From test_table; +SELECT (2 * count(uint64) - sum(uint64)) + (3 * count(uint64) - sum(uint64)) From test_table; +EXPLAIN SYNTAX (SELECT sum(uint64 + 2) + sum(uint64 + 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(uint64 + 2) - sum(uint64 + 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(uint64 - 2) + sum(uint64 - 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(uint64 - 2) - sum(uint64 - 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(2 - uint64) - sum(3 - uint64) From test_table); +EXPLAIN SYNTAX (SELECT (sum(uint64) + 2 * count(uint64)) + (sum(uint64) + 3 * count(uint64)) From test_table); +EXPLAIN SYNTAX (SELECT (sum(uint64) + 2 * count(uint64)) - (sum(uint64) + 3 * count(uint64)) From test_table); +EXPLAIN SYNTAX (SELECT (sum(uint64) - 2 * count(uint64)) + (sum(uint64) - 3 * count(uint64)) From test_table); +EXPLAIN SYNTAX (SELECT (sum(uint64) - 2 * count(uint64)) - (sum(uint64) - 3 * count(uint64)) From test_table); +EXPLAIN SYNTAX (SELECT (2 * count(uint64) - sum(uint64)) + (3 * count(uint64) - sum(uint64)) From test_table); + +SELECT sum(float64 + 2) + sum(float64 + 3) From test_table; +SELECT sum(float64 + 2) - sum(float64 + 3) From test_table; +SELECT sum(float64 - 2) + sum(float64 - 3) From test_table; +SELECT sum(float64 - 2) - sum(float64 - 3) From test_table; +SELECT sum(2 - float64) - sum(3 - float64) From test_table; +SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table; +SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table; +SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table; +SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table; +SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table; +EXPLAIN SYNTAX (SELECT sum(float64 + 2) + sum(float64 + 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(float64 + 2) - sum(float64 + 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(float64 - 2) + sum(float64 - 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(float64 - 2) - sum(float64 - 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(2 - float64) - sum(3 - float64) From test_table); +EXPLAIN SYNTAX (SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table); +EXPLAIN SYNTAX (SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table); +EXPLAIN SYNTAX (SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table); +EXPLAIN SYNTAX (SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table); +EXPLAIN SYNTAX (SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table); + +SELECT sum(decimal32 + 2) + sum(decimal32 + 3) From test_table; +SELECT sum(decimal32 + 2) - sum(decimal32 + 3) From test_table; +SELECT sum(decimal32 - 2) + sum(decimal32 - 3) From test_table; +SELECT sum(decimal32 - 2) - sum(decimal32 - 3) From test_table; +SELECT sum(2 - decimal32) - sum(3 - decimal32) From test_table; +SELECT (sum(decimal32) + 2 * count(decimal32)) + (sum(decimal32) + 3 * count(decimal32)) From test_table; +SELECT (sum(decimal32) + 2 * count(decimal32)) - (sum(decimal32) + 3 * count(decimal32)) From test_table; +SELECT (sum(decimal32) - 2 * count(decimal32)) + (sum(decimal32) - 3 * count(decimal32)) From test_table; +SELECT (sum(decimal32) - 2 * count(decimal32)) - (sum(decimal32) - 3 * count(decimal32)) From test_table; +SELECT (2 * count(decimal32) - sum(decimal32)) + (3 * count(decimal32) - sum(decimal32)) From test_table; +EXPLAIN SYNTAX (SELECT sum(decimal32 + 2) + sum(decimal32 + 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(decimal32 + 2) - sum(decimal32 + 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(decimal32 - 2) + sum(decimal32 - 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(decimal32 - 2) - sum(decimal32 - 3) From test_table); +EXPLAIN SYNTAX (SELECT sum(2 - decimal32) - sum(3 - decimal32) From test_table); +EXPLAIN SYNTAX (SELECT (sum(decimal32) + 2 * count(decimal32)) + (sum(decimal32) + 3 * count(decimal32)) From test_table); +EXPLAIN SYNTAX (SELECT (sum(decimal32) + 2 * count(decimal32)) - (sum(decimal32) + 3 * count(decimal32)) From test_table); +EXPLAIN SYNTAX (SELECT (sum(decimal32) - 2 * count(decimal32)) + (sum(decimal32) - 3 * count(decimal32)) From test_table); +EXPLAIN SYNTAX (SELECT (sum(decimal32) - 2 * count(decimal32)) - (sum(decimal32) - 3 * count(decimal32)) From test_table); +EXPLAIN SYNTAX (SELECT (2 * count(decimal32) - sum(decimal32)) + (3 * count(decimal32) - sum(decimal32)) From test_table); +-- { echoOff } + +DROP TABLE IF EXISTS test_table; diff --git a/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.reference b/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.reference new file mode 100644 index 00000000000..802d920aaef --- /dev/null +++ b/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.reference @@ -0,0 +1,3324 @@ +-- { echoOn } +Select sum(number + 1) from numbers(10); +55 +Select sum(1 + number) from numbers(10); +55 +Select sum(number - 1) from numbers(10); +35 +Select sum(1 - number) from numbers(10); +-35 +EXPLAIN QUERY TREE (Select sum(number + 1) from numbers(10)); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(number, 1)) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 13, nodes: 1 + CONSTANT id: 14, constant_value: UInt64_10, constant_value_type: UInt8 +EXPLAIN QUERY TREE (Select sum(1 + number) from numbers(10)); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(1, number)) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: number, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: number, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE_FUNCTION id: 10, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 13, nodes: 1 + CONSTANT id: 14, constant_value: UInt64_10, constant_value_type: UInt8 +EXPLAIN QUERY TREE (Select sum(number - 1) from numbers(10)); +QUERY id: 0 + PROJECTION COLUMNS + sum(minus(number, 1)) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 13, nodes: 1 + CONSTANT id: 14, constant_value: UInt64_10, constant_value_type: UInt8 +EXPLAIN QUERY TREE (Select sum(1 - number) from numbers(10)); +QUERY id: 0 + PROJECTION COLUMNS + sum(minus(1, number)) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: number, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: number, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE_FUNCTION id: 10, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 13, nodes: 1 + CONSTANT id: 14, constant_value: UInt64_10, constant_value_type: UInt8 +WITH 1::Nullable(UInt64) as my_literal Select sum(number + my_literal) from numbers(0); +\N +WITH 1::Nullable(UInt64) as my_literal Select sum(number) + my_literal * count() from numbers(0); +0 +EXPLAIN QUERY TREE (WITH 1::Nullable(UInt64) as my_literal Select sum(number + my_literal) from numbers(0)); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(number, my_literal)) Nullable(UInt64) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: sum, function_type: aggregate, result_type: Nullable(UInt64) + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: Nullable(UInt64) + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: UInt64_1, constant_value_type: Nullable(UInt64) + EXPRESSION + FUNCTION id: 9, alias: my_literal, function_name: CAST, function_type: ordinary, result_type: Nullable(UInt64) + ARGUMENTS + LIST id: 10, nodes: 2 + CONSTANT id: 11, constant_value: \'1\', constant_value_type: String + CONSTANT id: 12, constant_value: \'Nullable(UInt64)\', constant_value_type: String + JOIN TREE + TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 13, nodes: 1 + CONSTANT id: 14, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (WITH 1::Nullable(UInt64) as my_literal Select sum(number) + my_literal * count() from numbers(0)); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(number), multiply(my_literal, count())) Nullable(UInt64) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Nullable(UInt64) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: Nullable(UInt64) + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: Nullable(UInt64) + EXPRESSION + FUNCTION id: 11, alias: my_literal, function_name: CAST, function_type: ordinary, result_type: Nullable(UInt64) + ARGUMENTS + LIST id: 12, nodes: 2 + CONSTANT id: 13, constant_value: \'1\', constant_value_type: String + CONSTANT id: 14, constant_value: \'Nullable(UInt64)\', constant_value_type: String + FUNCTION id: 15, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + TABLE_FUNCTION id: 7, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 16, nodes: 1 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 +-- { echoOn } +SELECT sum(uint64 + 1 AS i) from test_table where i > 0; +20 +SELECT sum(uint64 + 1) AS j from test_table having j > 0; +20 +SELECT sum(uint64 + 1 AS i) j from test_table where i > 0 having j > 0; +20 +SELECT sum((uint64 AS m) + (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +20 +SELECT sum(((uint64 AS m) + (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +20 +EXPLAIN QUERY TREE (SELECT sum(uint64 + 1 AS i) from test_table where i > 0); +QUERY id: 0 + PROJECTION COLUMNS + sum(i) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(uint64 + 1) AS j from test_table having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table + HAVING + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 22, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 23, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(uint64 + 1 AS i) j from test_table where i > 0 having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 18, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + FUNCTION id: 20, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 21, nodes: 2 + FUNCTION id: 22, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 24, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 25, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 26, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 27, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 28, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum((uint64 AS m) + (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 + CONSTANT id: 18, constant_value: UInt64_1, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 19, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 21, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 22, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + FUNCTION id: 24, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 25, nodes: 2 + FUNCTION id: 26, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 27, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 28, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 29, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 30, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 31, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 32, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(((uint64 AS m) + (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 3 + FUNCTION id: 15, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 + CONSTANT id: 18, constant_value: UInt64_1, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 19, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 21, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 22, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + FUNCTION id: 24, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 25, nodes: 2 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 26, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 27, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 28, nodes: 2 + FUNCTION id: 29, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 30, nodes: 2 + FUNCTION id: 31, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 32, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 33, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 34, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 35, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 36, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 37, constant_value: UInt64_0, constant_value_type: UInt8 +SELECT sum(1 + uint64 AS i) from test_table where i > 0; +20 +SELECT sum(1 + uint64) AS j from test_table having j > 0; +20 +SELECT sum(1 + uint64 AS i) j from test_table where i > 0 having j > 0; +20 +SELECT sum((1 AS m) + (uint64 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +20 +SELECT sum(((1 AS m) + (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +20 +EXPLAIN QUERY TREE (SELECT sum(1 + uint64 AS i) from test_table where i > 0); +QUERY id: 0 + PROJECTION COLUMNS + sum(i) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(1 + uint64) AS j from test_table having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table + HAVING + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 19, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 21, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 22, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 23, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(1 + uint64 AS i) j from test_table where i > 0 having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 18, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + FUNCTION id: 20, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 21, nodes: 2 + FUNCTION id: 22, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 24, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 25, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 26, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 27, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 28, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum((1 AS m) + (uint64 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 16, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 18, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 19, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 20, nodes: 2 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 21, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 22, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + FUNCTION id: 24, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 25, nodes: 2 + FUNCTION id: 26, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 27, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 28, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 29, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 30, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 31, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 32, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(((1 AS m) + (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 16, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 18, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 19, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 20, nodes: 2 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 21, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 22, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + FUNCTION id: 24, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 25, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 26, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 27, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 28, nodes: 2 + FUNCTION id: 29, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 30, nodes: 2 + FUNCTION id: 31, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 32, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 33, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 34, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 35, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 36, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 37, constant_value: UInt64_0, constant_value_type: UInt8 +SELECT sum(uint64 - 1 AS i) from test_table where i > 0; +10 +SELECT sum(uint64 - 1) AS j from test_table having j > 0; +10 +SELECT sum(uint64 - 1 AS i) j from test_table where i > 0 having j > 0; +10 +SELECT sum((uint64 AS m) - (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +10 +SELECT sum(((uint64 AS m) - (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +10 +EXPLAIN QUERY TREE (SELECT sum(uint64 - 1 AS i) from test_table where i > 0); +QUERY id: 0 + PROJECTION COLUMNS + sum(i) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(uint64 - 1) AS j from test_table having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table + HAVING + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 21, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 22, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 23, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(uint64 - 1 AS i) j from test_table where i > 0 having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 18, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + FUNCTION id: 20, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 21, nodes: 2 + FUNCTION id: 22, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 24, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 25, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 26, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 27, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 28, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum((uint64 AS m) - (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 + CONSTANT id: 18, constant_value: UInt64_1, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 19, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 21, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 22, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + FUNCTION id: 24, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 25, nodes: 2 + FUNCTION id: 26, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 27, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 28, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 29, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 30, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 31, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 32, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(((uint64 AS m) - (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); +QUERY id: 0 + PROJECTION COLUMNS + j Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 3 + FUNCTION id: 15, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 16, nodes: 2 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 + CONSTANT id: 18, constant_value: UInt64_1, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 19, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 21, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 22, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + FUNCTION id: 24, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 25, nodes: 2 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 26, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 27, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 28, nodes: 2 + FUNCTION id: 29, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 30, nodes: 2 + FUNCTION id: 31, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 32, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 33, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 34, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 35, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 36, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + CONSTANT id: 37, constant_value: UInt64_0, constant_value_type: UInt8 +SELECT sum(1 - uint64 AS i) from test_table; +-10 +SELECT sum(1 - uint64) AS j from test_table; +-10 +SELECT sum(1 - uint64 AS i) j from test_table; +-10 +SELECT sum((1 AS m) - (uint64 AS n)) j from test_table; +-10 +SELECT sum(((1 AS m) - (uint64 AS n)) AS i) j from test_table; +-10 +EXPLAIN QUERY TREE (SELECT sum(1 - uint64 AS i) from test_table where i > 0); +QUERY id: 0 + PROJECTION COLUMNS + sum(i) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(1 - uint64) AS j from test_table having j < 0); +QUERY id: 0 + PROJECTION COLUMNS + j Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table + HAVING + FUNCTION id: 13, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 19, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 21, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 22, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 23, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(1 - uint64 AS i) j from test_table where i > 0 having j < 0); +QUERY id: 0 + PROJECTION COLUMNS + j Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 18, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 19, nodes: 2 + FUNCTION id: 20, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 21, nodes: 2 + FUNCTION id: 22, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 24, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 25, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 26, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 27, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 28, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum((1 AS m) - (uint64 AS n)) j from test_table where m > 0 and n > 0 having j < 0); +QUERY id: 0 + PROJECTION COLUMNS + j Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 16, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 18, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 19, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 20, nodes: 2 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 21, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 22, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + FUNCTION id: 24, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 25, nodes: 2 + FUNCTION id: 26, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 27, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 28, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 29, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 30, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 31, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 32, constant_value: UInt64_0, constant_value_type: UInt8 +EXPLAIN QUERY TREE (SELECT sum(((1 AS m) - (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i < 0 having j < 0); +QUERY id: 0 + PROJECTION COLUMNS + j Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table + WHERE + FUNCTION id: 13, function_name: and, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 3 + CONSTANT id: 15, constant_value: UInt64_1, constant_value_type: UInt8 + EXPRESSION + FUNCTION id: 16, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 17, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + CONSTANT id: 18, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 19, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 20, nodes: 2 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 21, constant_value: UInt64_0, constant_value_type: UInt8 + FUNCTION id: 22, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 23, nodes: 2 + FUNCTION id: 24, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 25, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 26, constant_value: UInt64_0, constant_value_type: UInt8 + HAVING + FUNCTION id: 27, function_name: less, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 28, nodes: 2 + FUNCTION id: 29, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 30, nodes: 2 + FUNCTION id: 31, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 32, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 33, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 34, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 35, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 36, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + CONSTANT id: 37, constant_value: UInt64_0, constant_value_type: UInt8 +SELECT sum(uint64 + 2.11) From test_table; +25.549999999999997 +SELECT sum(2.11 + uint64) From test_table; +25.549999999999997 +SELECT sum(uint64 - 2.11) From test_table; +4.450000000000001 +SELECT sum(2.11 - uint64) From test_table; +-4.450000000000001 +SELECT sum(uint64) + 2.11 * count(uint64) From test_table; +25.549999999999997 +SELECT 2.11 * count(uint64) + sum(uint64) From test_table; +25.549999999999997 +SELECT sum(uint64) - 2.11 * count(uint64) From test_table; +4.450000000000001 +SELECT 2.11 * count(uint64) - sum(uint64) From test_table; +-4.450000000000001 +EXPLAIN QUERY TREE (SELECT sum(uint64 + 2.11) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(uint64, 2.11)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: Float64_2.11, constant_value_type: Float64 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2.11 + uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(2.11, uint64)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: Float64_2.11, constant_value_type: Float64 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(uint64 - 2.11) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(minus(uint64, 2.11)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: Float64_2.11, constant_value_type: Float64 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2.11 - uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(minus(2.11, uint64)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: Float64_2.11, constant_value_type: Float64 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(uint64) + 2.11 * count(uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(uint64), multiply(2.11, count(uint64))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: Float64_2.11, constant_value_type: Float64 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT 2.11 * count(uint64) + sum(uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(multiply(2.11, count(uint64)), sum(uint64)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: Float64_2.11, constant_value_type: Float64 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(uint64) - 2.11 * count(uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(uint64), multiply(2.11, count(uint64))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: Float64_2.11, constant_value_type: Float64 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT 2.11 * count(uint64) - sum(uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(multiply(2.11, count(uint64)), sum(uint64)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: Float64_2.11, constant_value_type: Float64 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +SELECT sum(uint64 + 2) From test_table; +25 +SELECT sum(2 + uint64) From test_table; +25 +SELECT sum(uint64 - 2) From test_table; +5 +SELECT sum(2 - uint64) From test_table; +-5 +SELECT sum(uint64) + 2 * count(uint64) From test_table; +25 +SELECT 2 * count(uint64) + sum(uint64) From test_table; +25 +SELECT sum(uint64) - 2 * count(uint64) From test_table; +5 +SELECT 2 * count(uint64) - sum(uint64) From test_table; +-5 +EXPLAIN QUERY TREE (SELECT sum(uint64 + 2) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(uint64, 2)) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2 + uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(2, uint64)) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(uint64 - 2) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(minus(uint64, 2)) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2 - uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(minus(2, uint64)) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(uint64) + 2 * count(uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(uint64), multiply(2, count(uint64))) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT 2 * count(uint64) + sum(uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(multiply(2, count(uint64)), sum(uint64)) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(uint64) - 2 * count(uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(uint64), multiply(2, count(uint64))) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: uint64, result_type: UInt64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT 2 * count(uint64) - sum(uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(multiply(2, count(uint64)), sum(uint64)) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: uint64, result_type: UInt64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +SELECT sum(float64 + 2) From test_table; +26.5 +SELECT sum(2 + float64) From test_table; +26.5 +SELECT sum(float64 - 2) From test_table; +6.5 +SELECT sum(2 - float64) From test_table; +-6.5 +SELECT sum(float64) + 2 * count(float64) From test_table; +26.5 +SELECT 2 * count(float64) + sum(float64) From test_table; +26.5 +SELECT sum(float64) - 2 * count(float64) From test_table; +6.5 +SELECT 2 * count(float64) - sum(float64) From test_table; +-6.5 +EXPLAIN QUERY TREE (SELECT sum(float64 + 2) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(float64, 2)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: float64, result_type: Float64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: float64, result_type: Float64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2 + float64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(2, float64)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: float64, result_type: Float64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: float64, result_type: Float64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(float64 - 2) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(minus(float64, 2)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: float64, result_type: Float64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: float64, result_type: Float64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2 - float64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(minus(2, float64)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: float64, result_type: Float64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: float64, result_type: Float64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(float64) + 2 * count(float64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(float64), multiply(2, count(float64))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: float64, result_type: Float64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: float64, result_type: Float64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT 2 * count(float64) + sum(float64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(multiply(2, count(float64)), sum(float64)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: float64, result_type: Float64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: float64, result_type: Float64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(float64) - 2 * count(float64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(float64), multiply(2, count(float64))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: float64, result_type: Float64, source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: float64, result_type: Float64, source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT 2 * count(float64) - sum(float64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(multiply(2, count(float64)), sum(float64)) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: float64, result_type: Float64, source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: float64, result_type: Float64, source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +SELECT sum(decimal32 + 2) From test_table; +26.65 +SELECT sum(2 + decimal32) From test_table; +26.65 +SELECT sum(decimal32 - 2) From test_table; +6.65 +SELECT sum(2 - decimal32) From test_table; +-6.65 +SELECT sum(decimal32) + 2 * count(decimal32) From test_table; +26.65 +SELECT 2 * count(decimal32) + sum(decimal32) From test_table; +26.65 +SELECT sum(decimal32) - 2 * count(decimal32) From test_table; +6.65 +SELECT 2 * count(decimal32) - sum(decimal32) From test_table; +-6.65 +EXPLAIN QUERY TREE (SELECT sum(decimal32 + 2) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(decimal32, 2)) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: decimal32, result_type: Decimal(9, 5), source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: decimal32, result_type: Decimal(9, 5), source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2 + decimal32) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(plus(2, decimal32)) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: decimal32, result_type: Decimal(9, 5), source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: decimal32, result_type: Decimal(9, 5), source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(decimal32 - 2) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(minus(decimal32, 2)) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: decimal32, result_type: Decimal(9, 5), source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: decimal32, result_type: Decimal(9, 5), source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2 - decimal32) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + sum(minus(2, decimal32)) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: decimal32, result_type: Decimal(9, 5), source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: decimal32, result_type: Decimal(9, 5), source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(decimal32) + 2 * count(decimal32) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(decimal32), multiply(2, count(decimal32))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: decimal32, result_type: Decimal(9, 5), source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: decimal32, result_type: Decimal(9, 5), source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT 2 * count(decimal32) + sum(decimal32) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(multiply(2, count(decimal32)), sum(decimal32)) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: decimal32, result_type: Decimal(9, 5), source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: decimal32, result_type: Decimal(9, 5), source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(decimal32) - 2 * count(decimal32) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(decimal32), multiply(2, count(decimal32))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 1 + COLUMN id: 6, column_name: decimal32, result_type: Decimal(9, 5), source_id: 7 + FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 9, nodes: 2 + CONSTANT id: 10, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 6, column_name: decimal32, result_type: Decimal(9, 5), source_id: 7 + JOIN TREE + TABLE id: 7, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT 2 * count(decimal32) - sum(decimal32) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(multiply(2, count(decimal32)), sum(decimal32)) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 7, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: decimal32, result_type: Decimal(9, 5), source_id: 10 + FUNCTION id: 11, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 12, nodes: 1 + COLUMN id: 9, column_name: decimal32, result_type: Decimal(9, 5), source_id: 10 + JOIN TREE + TABLE id: 10, alias: __table1, table_name: default.test_table +SELECT sum(uint64 + 2) + sum(uint64 + 3) From test_table; +55 +SELECT sum(uint64 + 2) - sum(uint64 + 3) From test_table; +-5 +SELECT sum(uint64 - 2) - sum(uint64 - 3) From test_table; +5 +SELECT sum(2 - uint64) - sum(3 - uint64) From test_table; +-5 +SELECT (sum(uint64) + 2 * count(uint64)) + (sum(uint64) + 3 * count(uint64)) From test_table; +55 +SELECT (sum(uint64) + 2 * count(uint64)) - (sum(uint64) + 3 * count(uint64)) From test_table; +-5 +SELECT (sum(uint64) - 2 * count(uint64)) + (sum(uint64) - 3 * count(uint64)) From test_table; +5 +SELECT (sum(uint64) - 2 * count(uint64)) - (sum(uint64) - 3 * count(uint64)) From test_table; +5 +SELECT (2 * count(uint64) - sum(uint64)) + (3 * count(uint64) - sum(uint64)) From test_table; +-5 +EXPLAIN QUERY TREE (SELECT sum(uint64 + 2) + sum(uint64 + 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(plus(uint64, 2)), sum(plus(uint64, 3))) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(uint64 + 2) - sum(uint64 + 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(plus(uint64, 2)), sum(plus(uint64, 3))) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(uint64 - 2) + sum(uint64 - 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(minus(uint64, 2)), sum(minus(uint64, 3))) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(uint64 - 2) - sum(uint64 - 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(minus(uint64, 2)), sum(minus(uint64, 3))) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2 - uint64) - sum(3 - uint64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(minus(2, uint64)), sum(minus(3, uint64))) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 9, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 10, nodes: 1 + COLUMN id: 11, column_name: uint64, result_type: UInt64, source_id: 12 + FUNCTION id: 13, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 11, column_name: uint64, result_type: UInt64, source_id: 12 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 2 + CONSTANT id: 19, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 20, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 21, nodes: 1 + COLUMN id: 11, column_name: uint64, result_type: UInt64, source_id: 12 + FUNCTION id: 22, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 11, column_name: uint64, result_type: UInt64, source_id: 12 + JOIN TREE + TABLE id: 12, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(uint64) + 2 * count(uint64)) + (sum(uint64) + 3 * count(uint64)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(plus(sum(uint64), multiply(2, count(uint64))), plus(sum(uint64), multiply(3, count(uint64)))) UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(uint64) + 2 * count(uint64)) - (sum(uint64) + 3 * count(uint64)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(plus(sum(uint64), multiply(2, count(uint64))), plus(sum(uint64), multiply(3, count(uint64)))) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(uint64) - 2 * count(uint64)) + (sum(uint64) - 3 * count(uint64)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(minus(sum(uint64), multiply(2, count(uint64))), minus(sum(uint64), multiply(3, count(uint64)))) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(uint64) - 2 * count(uint64)) - (sum(uint64) - 3 * count(uint64)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(minus(sum(uint64), multiply(2, count(uint64))), minus(sum(uint64), multiply(3, count(uint64)))) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: uint64, result_type: UInt64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (2 * count(uint64) - sum(uint64)) + (3 * count(uint64) - sum(uint64)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(minus(multiply(2, count(uint64)), sum(uint64)), minus(multiply(3, count(uint64)), sum(uint64))) Int64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 9, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 10, nodes: 1 + COLUMN id: 11, column_name: uint64, result_type: UInt64, source_id: 12 + FUNCTION id: 13, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 11, column_name: uint64, result_type: UInt64, source_id: 12 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Int64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 2 + CONSTANT id: 19, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 20, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 21, nodes: 1 + COLUMN id: 11, column_name: uint64, result_type: UInt64, source_id: 12 + FUNCTION id: 22, function_name: sum, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 11, column_name: uint64, result_type: UInt64, source_id: 12 + JOIN TREE + TABLE id: 12, alias: __table1, table_name: default.test_table +SELECT sum(float64 + 2) + sum(float64 + 3) From test_table; +58 +SELECT sum(float64 + 2) - sum(float64 + 3) From test_table; +-5 +SELECT sum(float64 - 2) + sum(float64 - 3) From test_table; +8 +SELECT sum(float64 - 2) - sum(float64 - 3) From test_table; +5 +SELECT sum(2 - float64) - sum(3 - float64) From test_table; +-5 +SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table; +58 +SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table; +-5 +SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table; +8 +SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table; +5 +SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table; +-8 +EXPLAIN QUERY TREE (SELECT sum(float64 + 2) + sum(float64 + 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(plus(float64, 2)), sum(plus(float64, 3))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(float64 + 2) - sum(float64 + 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(plus(float64, 2)), sum(plus(float64, 3))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(float64 - 2) + sum(float64 - 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(minus(float64, 2)), sum(minus(float64, 3))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(float64 - 2) - sum(float64 - 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(minus(float64, 2)), sum(minus(float64, 3))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2 - float64) - sum(3 - float64) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(minus(2, float64)), sum(minus(3, float64))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 9, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 10, nodes: 1 + COLUMN id: 11, column_name: float64, result_type: Float64, source_id: 12 + FUNCTION id: 13, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 11, column_name: float64, result_type: Float64, source_id: 12 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 2 + CONSTANT id: 19, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 20, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 21, nodes: 1 + COLUMN id: 11, column_name: float64, result_type: Float64, source_id: 12 + FUNCTION id: 22, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 11, column_name: float64, result_type: Float64, source_id: 12 + JOIN TREE + TABLE id: 12, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(plus(sum(float64), multiply(2, count(float64))), plus(sum(float64), multiply(3, count(float64)))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(plus(sum(float64), multiply(2, count(float64))), plus(sum(float64), multiply(3, count(float64)))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(minus(sum(float64), multiply(2, count(float64))), minus(sum(float64), multiply(3, count(float64)))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(minus(sum(float64), multiply(2, count(float64))), minus(sum(float64), multiply(3, count(float64)))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: float64, result_type: Float64, source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(minus(multiply(2, count(float64)), sum(float64)), minus(multiply(3, count(float64)), sum(float64))) Float64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 9, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 10, nodes: 1 + COLUMN id: 11, column_name: float64, result_type: Float64, source_id: 12 + FUNCTION id: 13, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 11, column_name: float64, result_type: Float64, source_id: 12 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 2 + CONSTANT id: 19, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 20, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 21, nodes: 1 + COLUMN id: 11, column_name: float64, result_type: Float64, source_id: 12 + FUNCTION id: 22, function_name: sum, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 11, column_name: float64, result_type: Float64, source_id: 12 + JOIN TREE + TABLE id: 12, alias: __table1, table_name: default.test_table +SELECT sum(decimal32 + 2) + sum(decimal32 + 3) From test_table; +58.3 +SELECT sum(decimal32 + 2) - sum(decimal32 + 3) From test_table; +-5 +SELECT sum(decimal32 - 2) + sum(decimal32 - 3) From test_table; +8.3 +SELECT sum(decimal32 - 2) - sum(decimal32 - 3) From test_table; +5 +SELECT sum(2 - decimal32) - sum(3 - decimal32) From test_table; +-5 +SELECT (sum(decimal32) + 2 * count(decimal32)) + (sum(decimal32) + 3 * count(decimal32)) From test_table; +58.3 +SELECT (sum(decimal32) + 2 * count(decimal32)) - (sum(decimal32) + 3 * count(decimal32)) From test_table; +-5 +SELECT (sum(decimal32) - 2 * count(decimal32)) + (sum(decimal32) - 3 * count(decimal32)) From test_table; +8.3 +SELECT (sum(decimal32) - 2 * count(decimal32)) - (sum(decimal32) - 3 * count(decimal32)) From test_table; +5 +SELECT (2 * count(decimal32) - sum(decimal32)) + (3 * count(decimal32) - sum(decimal32)) From test_table; +-8.3 +EXPLAIN QUERY TREE (SELECT sum(decimal32 + 2) + sum(decimal32 + 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(plus(decimal32, 2)), sum(plus(decimal32, 3))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(decimal32 + 2) - sum(decimal32 + 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(plus(decimal32, 2)), sum(plus(decimal32, 3))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(decimal32 - 2) + sum(decimal32 - 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(sum(minus(decimal32, 2)), sum(minus(decimal32, 3))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(decimal32 - 2) - sum(decimal32 - 3) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(minus(decimal32, 2)), sum(minus(decimal32, 3))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT sum(2 - decimal32) - sum(3 - decimal32) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(sum(minus(2, decimal32)), sum(minus(3, decimal32))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 9, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 10, nodes: 1 + COLUMN id: 11, column_name: decimal32, result_type: Decimal(9, 5), source_id: 12 + FUNCTION id: 13, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 11, column_name: decimal32, result_type: Decimal(9, 5), source_id: 12 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 2 + CONSTANT id: 19, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 20, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 21, nodes: 1 + COLUMN id: 11, column_name: decimal32, result_type: Decimal(9, 5), source_id: 12 + FUNCTION id: 22, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 11, column_name: decimal32, result_type: Decimal(9, 5), source_id: 12 + JOIN TREE + TABLE id: 12, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(decimal32) + 2 * count(decimal32)) + (sum(decimal32) + 3 * count(decimal32)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(plus(sum(decimal32), multiply(2, count(decimal32))), plus(sum(decimal32), multiply(3, count(decimal32)))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(decimal32) + 2 * count(decimal32)) - (sum(decimal32) + 3 * count(decimal32)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(plus(sum(decimal32), multiply(2, count(decimal32))), plus(sum(decimal32), multiply(3, count(decimal32)))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 15, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(decimal32) - 2 * count(decimal32)) + (sum(decimal32) - 3 * count(decimal32)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(minus(sum(decimal32), multiply(2, count(decimal32))), minus(sum(decimal32), multiply(3, count(decimal32)))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (sum(decimal32) - 2 * count(decimal32)) - (sum(decimal32) - 3 * count(decimal32)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + minus(minus(sum(decimal32), multiply(2, count(decimal32))), minus(sum(decimal32), multiply(3, count(decimal32)))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 10, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 18, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + FUNCTION id: 19, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 20, nodes: 2 + CONSTANT id: 21, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 22, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 8, column_name: decimal32, result_type: Decimal(9, 5), source_id: 9 + JOIN TREE + TABLE id: 9, alias: __table1, table_name: default.test_table +EXPLAIN QUERY TREE (SELECT (2 * count(decimal32) - sum(decimal32)) + (3 * count(decimal32) - sum(decimal32)) From test_table); +QUERY id: 0 + PROJECTION COLUMNS + plus(minus(multiply(2, count(decimal32)), sum(decimal32)), minus(multiply(3, count(decimal32)), sum(decimal32))) Decimal(38, 5) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 5, nodes: 2 + FUNCTION id: 6, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + CONSTANT id: 8, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 9, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 10, nodes: 1 + COLUMN id: 11, column_name: decimal32, result_type: Decimal(9, 5), source_id: 12 + FUNCTION id: 13, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 11, column_name: decimal32, result_type: Decimal(9, 5), source_id: 12 + FUNCTION id: 15, function_name: minus, function_type: ordinary, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: multiply, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 18, nodes: 2 + CONSTANT id: 19, constant_value: UInt64_3, constant_value_type: UInt8 + FUNCTION id: 20, function_name: count, function_type: aggregate, result_type: UInt64 + ARGUMENTS + LIST id: 21, nodes: 1 + COLUMN id: 11, column_name: decimal32, result_type: Decimal(9, 5), source_id: 12 + FUNCTION id: 22, function_name: sum, function_type: aggregate, result_type: Decimal(38, 5) + ARGUMENTS + LIST id: 23, nodes: 1 + COLUMN id: 11, column_name: decimal32, result_type: Decimal(9, 5), source_id: 12 + JOIN TREE + TABLE id: 12, alias: __table1, table_name: default.test_table diff --git a/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.sql b/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.sql new file mode 100644 index 00000000000..43dad8eb8e0 --- /dev/null +++ b/tests/queries/0_stateless/02932_analyzer_rewrite_sum_column_and_constant.sql @@ -0,0 +1,210 @@ +SET allow_experimental_analyzer=1; + +-- { echoOn } +Select sum(number + 1) from numbers(10); +Select sum(1 + number) from numbers(10); +Select sum(number - 1) from numbers(10); +Select sum(1 - number) from numbers(10); +EXPLAIN QUERY TREE (Select sum(number + 1) from numbers(10)); +EXPLAIN QUERY TREE (Select sum(1 + number) from numbers(10)); +EXPLAIN QUERY TREE (Select sum(number - 1) from numbers(10)); +EXPLAIN QUERY TREE (Select sum(1 - number) from numbers(10)); + +WITH 1::Nullable(UInt64) as my_literal Select sum(number + my_literal) from numbers(0); +WITH 1::Nullable(UInt64) as my_literal Select sum(number) + my_literal * count() from numbers(0); +EXPLAIN QUERY TREE (WITH 1::Nullable(UInt64) as my_literal Select sum(number + my_literal) from numbers(0)); +EXPLAIN QUERY TREE (WITH 1::Nullable(UInt64) as my_literal Select sum(number) + my_literal * count() from numbers(0)); +-- { echoOff } + +DROP TABLE IF EXISTS test_table; + +CREATE TABLE test_table +( + uint64 UInt64, + float64 Float64, + decimal32 Decimal32(5), +) ENGINE=MergeTree ORDER BY uint64; + +INSERT INTO test_table VALUES (1, 1.1, 1.11); +INSERT INTO test_table VALUES (2, 2.2, 2.22); +INSERT INTO test_table VALUES (3, 3.3, 3.33); +INSERT INTO test_table VALUES (4, 4.4, 4.44); +INSERT INTO test_table VALUES (5, 5.5, 5.55); + +-- { echoOn } +SELECT sum(uint64 + 1 AS i) from test_table where i > 0; +SELECT sum(uint64 + 1) AS j from test_table having j > 0; +SELECT sum(uint64 + 1 AS i) j from test_table where i > 0 having j > 0; +SELECT sum((uint64 AS m) + (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +SELECT sum(((uint64 AS m) + (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +EXPLAIN QUERY TREE (SELECT sum(uint64 + 1 AS i) from test_table where i > 0); +EXPLAIN QUERY TREE (SELECT sum(uint64 + 1) AS j from test_table having j > 0); +EXPLAIN QUERY TREE (SELECT sum(uint64 + 1 AS i) j from test_table where i > 0 having j > 0); +EXPLAIN QUERY TREE (SELECT sum((uint64 AS m) + (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +EXPLAIN QUERY TREE (SELECT sum(((uint64 AS m) + (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); + +SELECT sum(1 + uint64 AS i) from test_table where i > 0; +SELECT sum(1 + uint64) AS j from test_table having j > 0; +SELECT sum(1 + uint64 AS i) j from test_table where i > 0 having j > 0; +SELECT sum((1 AS m) + (uint64 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +SELECT sum(((1 AS m) + (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +EXPLAIN QUERY TREE (SELECT sum(1 + uint64 AS i) from test_table where i > 0); +EXPLAIN QUERY TREE (SELECT sum(1 + uint64) AS j from test_table having j > 0); +EXPLAIN QUERY TREE (SELECT sum(1 + uint64 AS i) j from test_table where i > 0 having j > 0); +EXPLAIN QUERY TREE (SELECT sum((1 AS m) + (uint64 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +EXPLAIN QUERY TREE (SELECT sum(((1 AS m) + (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); + +SELECT sum(uint64 - 1 AS i) from test_table where i > 0; +SELECT sum(uint64 - 1) AS j from test_table having j > 0; +SELECT sum(uint64 - 1 AS i) j from test_table where i > 0 having j > 0; +SELECT sum((uint64 AS m) - (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0; +SELECT sum(((uint64 AS m) - (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0; +EXPLAIN QUERY TREE (SELECT sum(uint64 - 1 AS i) from test_table where i > 0); +EXPLAIN QUERY TREE (SELECT sum(uint64 - 1) AS j from test_table having j > 0); +EXPLAIN QUERY TREE (SELECT sum(uint64 - 1 AS i) j from test_table where i > 0 having j > 0); +EXPLAIN QUERY TREE (SELECT sum((uint64 AS m) - (1 AS n)) j from test_table where m > 0 and n > 0 having j > 0); +EXPLAIN QUERY TREE (SELECT sum(((uint64 AS m) - (1 AS n)) AS i) j from test_table where m > 0 and n > 0 and i > 0 having j > 0); + +SELECT sum(1 - uint64 AS i) from test_table; +SELECT sum(1 - uint64) AS j from test_table; +SELECT sum(1 - uint64 AS i) j from test_table; +SELECT sum((1 AS m) - (uint64 AS n)) j from test_table; +SELECT sum(((1 AS m) - (uint64 AS n)) AS i) j from test_table; +EXPLAIN QUERY TREE (SELECT sum(1 - uint64 AS i) from test_table where i > 0); +EXPLAIN QUERY TREE (SELECT sum(1 - uint64) AS j from test_table having j < 0); +EXPLAIN QUERY TREE (SELECT sum(1 - uint64 AS i) j from test_table where i > 0 having j < 0); +EXPLAIN QUERY TREE (SELECT sum((1 AS m) - (uint64 AS n)) j from test_table where m > 0 and n > 0 having j < 0); +EXPLAIN QUERY TREE (SELECT sum(((1 AS m) - (uint64 AS n)) AS i) j from test_table where m > 0 and n > 0 and i < 0 having j < 0); + +SELECT sum(uint64 + 2.11) From test_table; +SELECT sum(2.11 + uint64) From test_table; +SELECT sum(uint64 - 2.11) From test_table; +SELECT sum(2.11 - uint64) From test_table; +SELECT sum(uint64) + 2.11 * count(uint64) From test_table; +SELECT 2.11 * count(uint64) + sum(uint64) From test_table; +SELECT sum(uint64) - 2.11 * count(uint64) From test_table; +SELECT 2.11 * count(uint64) - sum(uint64) From test_table; +EXPLAIN QUERY TREE (SELECT sum(uint64 + 2.11) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2.11 + uint64) From test_table); +EXPLAIN QUERY TREE (SELECT sum(uint64 - 2.11) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2.11 - uint64) From test_table); +EXPLAIN QUERY TREE (SELECT sum(uint64) + 2.11 * count(uint64) From test_table); +EXPLAIN QUERY TREE (SELECT 2.11 * count(uint64) + sum(uint64) From test_table); +EXPLAIN QUERY TREE (SELECT sum(uint64) - 2.11 * count(uint64) From test_table); +EXPLAIN QUERY TREE (SELECT 2.11 * count(uint64) - sum(uint64) From test_table); + +SELECT sum(uint64 + 2) From test_table; +SELECT sum(2 + uint64) From test_table; +SELECT sum(uint64 - 2) From test_table; +SELECT sum(2 - uint64) From test_table; +SELECT sum(uint64) + 2 * count(uint64) From test_table; +SELECT 2 * count(uint64) + sum(uint64) From test_table; +SELECT sum(uint64) - 2 * count(uint64) From test_table; +SELECT 2 * count(uint64) - sum(uint64) From test_table; +EXPLAIN QUERY TREE (SELECT sum(uint64 + 2) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2 + uint64) From test_table); +EXPLAIN QUERY TREE (SELECT sum(uint64 - 2) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2 - uint64) From test_table); +EXPLAIN QUERY TREE (SELECT sum(uint64) + 2 * count(uint64) From test_table); +EXPLAIN QUERY TREE (SELECT 2 * count(uint64) + sum(uint64) From test_table); +EXPLAIN QUERY TREE (SELECT sum(uint64) - 2 * count(uint64) From test_table); +EXPLAIN QUERY TREE (SELECT 2 * count(uint64) - sum(uint64) From test_table); + +SELECT sum(float64 + 2) From test_table; +SELECT sum(2 + float64) From test_table; +SELECT sum(float64 - 2) From test_table; +SELECT sum(2 - float64) From test_table; +SELECT sum(float64) + 2 * count(float64) From test_table; +SELECT 2 * count(float64) + sum(float64) From test_table; +SELECT sum(float64) - 2 * count(float64) From test_table; +SELECT 2 * count(float64) - sum(float64) From test_table; +EXPLAIN QUERY TREE (SELECT sum(float64 + 2) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2 + float64) From test_table); +EXPLAIN QUERY TREE (SELECT sum(float64 - 2) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2 - float64) From test_table); +EXPLAIN QUERY TREE (SELECT sum(float64) + 2 * count(float64) From test_table); +EXPLAIN QUERY TREE (SELECT 2 * count(float64) + sum(float64) From test_table); +EXPLAIN QUERY TREE (SELECT sum(float64) - 2 * count(float64) From test_table); +EXPLAIN QUERY TREE (SELECT 2 * count(float64) - sum(float64) From test_table); + +SELECT sum(decimal32 + 2) From test_table; +SELECT sum(2 + decimal32) From test_table; +SELECT sum(decimal32 - 2) From test_table; +SELECT sum(2 - decimal32) From test_table; +SELECT sum(decimal32) + 2 * count(decimal32) From test_table; +SELECT 2 * count(decimal32) + sum(decimal32) From test_table; +SELECT sum(decimal32) - 2 * count(decimal32) From test_table; +SELECT 2 * count(decimal32) - sum(decimal32) From test_table; +EXPLAIN QUERY TREE (SELECT sum(decimal32 + 2) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2 + decimal32) From test_table); +EXPLAIN QUERY TREE (SELECT sum(decimal32 - 2) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2 - decimal32) From test_table); +EXPLAIN QUERY TREE (SELECT sum(decimal32) + 2 * count(decimal32) From test_table); +EXPLAIN QUERY TREE (SELECT 2 * count(decimal32) + sum(decimal32) From test_table); +EXPLAIN QUERY TREE (SELECT sum(decimal32) - 2 * count(decimal32) From test_table); +EXPLAIN QUERY TREE (SELECT 2 * count(decimal32) - sum(decimal32) From test_table); + +SELECT sum(uint64 + 2) + sum(uint64 + 3) From test_table; +SELECT sum(uint64 + 2) - sum(uint64 + 3) From test_table; +SELECT sum(uint64 - 2) - sum(uint64 - 3) From test_table; +SELECT sum(2 - uint64) - sum(3 - uint64) From test_table; +SELECT (sum(uint64) + 2 * count(uint64)) + (sum(uint64) + 3 * count(uint64)) From test_table; +SELECT (sum(uint64) + 2 * count(uint64)) - (sum(uint64) + 3 * count(uint64)) From test_table; +SELECT (sum(uint64) - 2 * count(uint64)) + (sum(uint64) - 3 * count(uint64)) From test_table; +SELECT (sum(uint64) - 2 * count(uint64)) - (sum(uint64) - 3 * count(uint64)) From test_table; +SELECT (2 * count(uint64) - sum(uint64)) + (3 * count(uint64) - sum(uint64)) From test_table; +EXPLAIN QUERY TREE (SELECT sum(uint64 + 2) + sum(uint64 + 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(uint64 + 2) - sum(uint64 + 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(uint64 - 2) + sum(uint64 - 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(uint64 - 2) - sum(uint64 - 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2 - uint64) - sum(3 - uint64) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(uint64) + 2 * count(uint64)) + (sum(uint64) + 3 * count(uint64)) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(uint64) + 2 * count(uint64)) - (sum(uint64) + 3 * count(uint64)) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(uint64) - 2 * count(uint64)) + (sum(uint64) - 3 * count(uint64)) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(uint64) - 2 * count(uint64)) - (sum(uint64) - 3 * count(uint64)) From test_table); +EXPLAIN QUERY TREE (SELECT (2 * count(uint64) - sum(uint64)) + (3 * count(uint64) - sum(uint64)) From test_table); + +SELECT sum(float64 + 2) + sum(float64 + 3) From test_table; +SELECT sum(float64 + 2) - sum(float64 + 3) From test_table; +SELECT sum(float64 - 2) + sum(float64 - 3) From test_table; +SELECT sum(float64 - 2) - sum(float64 - 3) From test_table; +SELECT sum(2 - float64) - sum(3 - float64) From test_table; +SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table; +SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table; +SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table; +SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table; +SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table; +EXPLAIN QUERY TREE (SELECT sum(float64 + 2) + sum(float64 + 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(float64 + 2) - sum(float64 + 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(float64 - 2) + sum(float64 - 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(float64 - 2) - sum(float64 - 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2 - float64) - sum(3 - float64) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table); +EXPLAIN QUERY TREE (SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table); + +SELECT sum(decimal32 + 2) + sum(decimal32 + 3) From test_table; +SELECT sum(decimal32 + 2) - sum(decimal32 + 3) From test_table; +SELECT sum(decimal32 - 2) + sum(decimal32 - 3) From test_table; +SELECT sum(decimal32 - 2) - sum(decimal32 - 3) From test_table; +SELECT sum(2 - decimal32) - sum(3 - decimal32) From test_table; +SELECT (sum(decimal32) + 2 * count(decimal32)) + (sum(decimal32) + 3 * count(decimal32)) From test_table; +SELECT (sum(decimal32) + 2 * count(decimal32)) - (sum(decimal32) + 3 * count(decimal32)) From test_table; +SELECT (sum(decimal32) - 2 * count(decimal32)) + (sum(decimal32) - 3 * count(decimal32)) From test_table; +SELECT (sum(decimal32) - 2 * count(decimal32)) - (sum(decimal32) - 3 * count(decimal32)) From test_table; +SELECT (2 * count(decimal32) - sum(decimal32)) + (3 * count(decimal32) - sum(decimal32)) From test_table; +EXPLAIN QUERY TREE (SELECT sum(decimal32 + 2) + sum(decimal32 + 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(decimal32 + 2) - sum(decimal32 + 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(decimal32 - 2) + sum(decimal32 - 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(decimal32 - 2) - sum(decimal32 - 3) From test_table); +EXPLAIN QUERY TREE (SELECT sum(2 - decimal32) - sum(3 - decimal32) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(decimal32) + 2 * count(decimal32)) + (sum(decimal32) + 3 * count(decimal32)) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(decimal32) + 2 * count(decimal32)) - (sum(decimal32) + 3 * count(decimal32)) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(decimal32) - 2 * count(decimal32)) + (sum(decimal32) - 3 * count(decimal32)) From test_table); +EXPLAIN QUERY TREE (SELECT (sum(decimal32) - 2 * count(decimal32)) - (sum(decimal32) - 3 * count(decimal32)) From test_table); +EXPLAIN QUERY TREE (SELECT (2 * count(decimal32) - sum(decimal32)) + (3 * count(decimal32) - sum(decimal32)) From test_table); +-- { echoOff } + +DROP TABLE IF EXISTS test_table; diff --git a/tests/queries/0_stateless/02932_kill_query_sleep.sh b/tests/queries/0_stateless/02932_kill_query_sleep.sh index 84e84204aa1..5ae47755722 100755 --- a/tests/queries/0_stateless/02932_kill_query_sleep.sh +++ b/tests/queries/0_stateless/02932_kill_query_sleep.sh @@ -10,7 +10,7 @@ function wait_query_started() local query_id="$1" timeout=60 start=$EPOCHSECONDS - while [[ $($CLICKHOUSE_CLIENT --query="SELECT count() FROM system.processes WHERE query_id='$query_id'") == 0 ]]; do + while [[ $($CLICKHOUSE_CLIENT --query="SELECT count() FROM system.processes WHERE query_id='$query_id' SETTINGS use_query_cache=0") == 0 ]]; do if ((EPOCHSECONDS-start > timeout )); then echo "Timeout while waiting for query $query_id to start" exit 1 @@ -26,7 +26,7 @@ function kill_query() $CLICKHOUSE_CLIENT --query "KILL QUERY WHERE query_id='$query_id'" >/dev/null timeout=60 start=$EPOCHSECONDS - while [[ $($CLICKHOUSE_CLIENT --query="SELECT count() FROM system.processes WHERE query_id='$query_id'") != 0 ]]; do + while [[ $($CLICKHOUSE_CLIENT --query="SELECT count() FROM system.processes WHERE query_id='$query_id' SETTINGS use_query_cache=0") != 0 ]]; do if ((EPOCHSECONDS-start > timeout )); then echo "Timeout while waiting for query $query_id to cancel" exit 1 @@ -49,4 +49,4 @@ echo "Cancelling query" kill_query "$sleep_query_id" $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS;" -$CLICKHOUSE_CLIENT --query "SELECT exception FROM system.query_log WHERE query_id='$sleep_query_id' AND current_database = currentDatabase()" | grep -oF "QUERY_WAS_CANCELLED" +$CLICKHOUSE_CLIENT --query "SELECT exception FROM system.query_log WHERE query_id='$sleep_query_id' AND current_database = '$CLICKHOUSE_DATABASE'" | grep -oF "QUERY_WAS_CANCELLED" diff --git a/tests/queries/0_stateless/02933_paste_join.reference b/tests/queries/0_stateless/02933_paste_join.reference index 5ff13917957..81a8ac22da4 100644 --- a/tests/queries/0_stateless/02933_paste_join.reference +++ b/tests/queries/0_stateless/02933_paste_join.reference @@ -82,3 +82,26 @@ UInt64 7 2 8 1 9 0 +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +1 2 3 +0 0 +1 1 +0 +1 diff --git a/tests/queries/0_stateless/02933_paste_join.sql b/tests/queries/0_stateless/02933_paste_join.sql index b103bf72160..604078d1c3a 100644 --- a/tests/queries/0_stateless/02933_paste_join.sql +++ b/tests/queries/0_stateless/02933_paste_join.sql @@ -1,6 +1,6 @@ select * from (SELECT number as a FROM numbers(10)) t1 PASTE JOIN (select number as a from numbers(10)) t2; select * from (SELECT number as a FROM numbers(10)) t1 PASTE JOIN (select number as a from numbers(10) order by a desc) t2; -create table if not exists test (num UInt64) engine=Memory; +create table if not exists test (number UInt64) engine=Memory; insert into test select number from numbers(6); insert into test select number from numbers(5); SELECT * FROM (SELECT 1) t1 PASTE JOIN (SELECT 2) SETTINGS joined_subquery_requires_alias=0; @@ -35,3 +35,21 @@ SET max_threads = 2; select * from (SELECT number as a FROM numbers_mt(10)) t1 PASTE JOIN (select number as a from numbers(10) ORDER BY a DESC) t2 SETTINGS max_block_size=10; select * from (SELECT number as a FROM numbers(10)) t1 ANY PASTE JOIN (select number as a from numbers(10)) t2; -- { clientError SYNTAX_ERROR } select * from (SELECT number as a FROM numbers(10)) t1 ALL PASTE JOIN (select number as a from numbers(10)) t2; -- { clientError SYNTAX_ERROR } + +TRUNCATE TABLE test; +INSERT INTO test SELECT number from numbers(6); +SELECT * FROM (SELECT number FROM test) PASTE JOIN (SELECT number FROM numbers(6) ORDER BY number) SETTINGS joined_subquery_requires_alias = 0; +SELECT * FROM (SELECT number FROM test PASTE JOIN (Select number FROM numbers(7))) PASTE JOIN (SELECT number FROM numbers(6) PASTE JOIN (SELECT number FROM test)) SETTINGS joined_subquery_requires_alias = 0; +SELECT * FROM (SELECT number FROM test PASTE JOIN (SELECT number FROM test PASTE JOIN (Select number FROM numbers(7)))) PASTE JOIN (SELECT number FROM numbers(6) PASTE JOIN (SELECT number FROM test)) SETTINGS joined_subquery_requires_alias = 0; +SELECT * FROM (SELECT 1 AS a) PASTE JOIN (SELECT 2 AS b) PASTE JOIN (SELECT 3 AS c) SETTINGS allow_experimental_analyzer = 1; +SELECT * FROM (SELECT 1 AS a) PASTE JOIN (SELECT 2 AS b) PASTE JOIN (SELECT 3 AS a) SETTINGS allow_experimental_analyzer = 1; -- { serverError AMBIGUOUS_COLUMN_NAME } + +SET allow_experimental_analyzer = 1; +CREATE TABLE test1 (a Int32) engine=MergeTree order by a; +INSERT INTO test1 SELECT * FROM numbers(2); +CREATE TABLE test2 (a Int32) engine=MergeTree order by a; +INSERT INTO test2 SELECT * FROM numbers(2); +SELECT * FROM test1 PASTE JOIN (SELECT * FROM test2); +SELECT a `test2.a` FROM test1 PASTE JOIN test2; +SELECT * FROM test1 `test2.a` PASTE JOIN test2 `test2.a`; -- { serverError MULTIPLE_EXPRESSIONS_FOR_ALIAS } +SELECT * FROM test1 PASTE JOIN (SELECT number AS a FROM numbers(2) ORDER BY number DESC); -- { serverError AMBIGUOUS_COLUMN_NAME } diff --git a/tests/queries/0_stateless/02940_variant_text_deserialization.reference b/tests/queries/0_stateless/02940_variant_text_deserialization.reference new file mode 100644 index 00000000000..8836e6c4e57 --- /dev/null +++ b/tests/queries/0_stateless/02940_variant_text_deserialization.reference @@ -0,0 +1,516 @@ +JSON +String +{"v":null,"variantElement(v, 'String')":null} +{"v":"string","variantElement(v, 'String')":"string"} +{"v":"42","variantElement(v, 'String')":null} +FixedString +{"v":null,"variantElement(v, 'FixedString(4)')":null} +{"v":"string","variantElement(v, 'FixedString(4)')":null} +{"v":"abcd","variantElement(v, 'FixedString(4)')":"abcd"} +Bool +{"v":null,"variantElement(v, 'Bool')":null} +{"v":"string","variantElement(v, 'Bool')":null} +{"v":true,"variantElement(v, 'Bool')":true} +Integers +{"v":null,"variantElement(v, 'Int8')":null} +{"v":"string","variantElement(v, 'Int8')":null} +{"v":-1,"variantElement(v, 'Int8')":-1} +{"v":0,"variantElement(v, 'Int8')":0} +{"v":"10000000000","variantElement(v, 'Int8')":null} +{"v":null,"variantElement(v, 'UInt8')":null} +{"v":"string","variantElement(v, 'UInt8')":null} +{"v":"-1","variantElement(v, 'UInt8')":null} +{"v":0,"variantElement(v, 'UInt8')":0} +{"v":"10000000000","variantElement(v, 'UInt8')":null} +{"v":null,"variantElement(v, 'Int16')":null} +{"v":"string","variantElement(v, 'Int16')":null} +{"v":-1,"variantElement(v, 'Int16')":-1} +{"v":0,"variantElement(v, 'Int16')":0} +{"v":"10000000000","variantElement(v, 'Int16')":null} +{"v":null,"variantElement(v, 'UInt16')":null} +{"v":"string","variantElement(v, 'UInt16')":null} +{"v":"-1","variantElement(v, 'UInt16')":null} +{"v":0,"variantElement(v, 'UInt16')":0} +{"v":"10000000000","variantElement(v, 'UInt16')":null} +{"v":null,"variantElement(v, 'Int32')":null} +{"v":"string","variantElement(v, 'Int32')":null} +{"v":-1,"variantElement(v, 'Int32')":-1} +{"v":0,"variantElement(v, 'Int32')":0} +{"v":"10000000000","variantElement(v, 'Int32')":null} +{"v":null,"variantElement(v, 'UInt32')":null} +{"v":"string","variantElement(v, 'UInt32')":null} +{"v":"-1","variantElement(v, 'UInt32')":null} +{"v":0,"variantElement(v, 'UInt32')":0} +{"v":"10000000000","variantElement(v, 'UInt32')":null} +{"v":null,"variantElement(v, 'Int64')":null} +{"v":"string","variantElement(v, 'Int64')":null} +{"v":"-1","variantElement(v, 'Int64')":"-1"} +{"v":"0","variantElement(v, 'Int64')":"0"} +{"v":"10000000000000000000000","variantElement(v, 'Int64')":null} +{"v":null,"variantElement(v, 'UInt64')":null} +{"v":"string","variantElement(v, 'UInt64')":null} +{"v":"-1","variantElement(v, 'UInt64')":null} +{"v":"0","variantElement(v, 'UInt64')":"0"} +{"v":"10000000000000000000000","variantElement(v, 'UInt64')":null} +{"v":null,"variantElement(v, 'Int128')":null} +{"v":"string","variantElement(v, 'Int128')":null} +{"v":"-1","variantElement(v, 'Int128')":"-1"} +{"v":"0","variantElement(v, 'Int128')":"0"} +{"v":null,"variantElement(v, 'UInt128')":null} +{"v":"string","variantElement(v, 'UInt128')":null} +{"v":"-1","variantElement(v, 'UInt128')":null} +{"v":"0","variantElement(v, 'UInt128')":"0"} +Floats +{"v":null,"variantElement(v, 'Float32')":null} +{"v":"string","variantElement(v, 'Float32')":null} +{"v":42.42,"variantElement(v, 'Float32')":42.42} +{"v":null,"variantElement(v, 'Float64')":null} +{"v":"string","variantElement(v, 'Float64')":null} +{"v":42.42,"variantElement(v, 'Float64')":42.42} +Decimals +{"v":null,"variantElement(v, 'Decimal32(6)')":null} +{"v":"string","variantElement(v, 'Decimal32(6)')":null} +{"v":42.42,"variantElement(v, 'Decimal32(6)')":42.42} +{"v":"4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242","variantElement(v, 'Decimal32(6)')":null} +{"v":null,"variantElement(v, 'Decimal64(6)')":null} +{"v":"string","variantElement(v, 'Decimal64(6)')":null} +{"v":42.42,"variantElement(v, 'Decimal64(6)')":42.42} +{"v":"4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242","variantElement(v, 'Decimal64(6)')":null} +{"v":null,"variantElement(v, 'Decimal128(6)')":null} +{"v":"string","variantElement(v, 'Decimal128(6)')":null} +{"v":42.42,"variantElement(v, 'Decimal128(6)')":42.42} +{"v":"4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242","variantElement(v, 'Decimal128(6)')":null} +{"v":null,"variantElement(v, 'Decimal256(6)')":null} +{"v":"string","variantElement(v, 'Decimal256(6)')":null} +{"v":42.42,"variantElement(v, 'Decimal256(6)')":42.42} +{"v":"4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242","variantElement(v, 'Decimal256(6)')":null} +Dates and DateTimes +{"v":null,"variantElement(v, 'Date')":null} +{"v":"string","variantElement(v, 'Date')":null} +{"v":"2020-01-01","variantElement(v, 'Date')":"2020-01-01"} +{"v":"2020-01-01 00:00:00.999","variantElement(v, 'Date')":null} +{"v":null,"variantElement(v, 'Date32')":null} +{"v":"string","variantElement(v, 'Date32')":null} +{"v":"1900-01-01","variantElement(v, 'Date32')":"1900-01-01"} +{"v":"2020-01-01 00:00:00.999","variantElement(v, 'Date32')":null} +{"v":null,"variantElement(v, 'DateTime')":null} +{"v":"string","variantElement(v, 'DateTime')":null} +{"v":"2020-01-01 00:00:00","variantElement(v, 'DateTime')":"2020-01-01 00:00:00"} +{"v":"2020-01-01 00:00:00.999","variantElement(v, 'DateTime')":null} +{"v":null,"variantElement(v, 'DateTime64')":null} +{"v":"string","variantElement(v, 'DateTime64')":null} +{"v":"2020-01-01 00:00:00.999","variantElement(v, 'DateTime64')":"2020-01-01 00:00:00.999"} +{"v":"2020-01-01 00:00:00.999999999 ABC","variantElement(v, 'DateTime64')":null} +UUID +{"v":null,"variantElement(v, 'UUID')":null} +{"v":"string","variantElement(v, 'UUID')":null} +{"v":"c8619cca-0caa-445e-ae76-1d4f6e0b3927","variantElement(v, 'UUID')":"c8619cca-0caa-445e-ae76-1d4f6e0b3927"} +IPv4 +{"v":null,"variantElement(v, 'IPv4')":null} +{"v":"string","variantElement(v, 'IPv4')":null} +{"v":"127.0.0.1","variantElement(v, 'IPv4')":"127.0.0.1"} +IPv6 +{"v":null,"variantElement(v, 'IPv6')":null} +{"v":"string","variantElement(v, 'IPv6')":null} +{"v":"2001:db8:85a3::8a2e:370:7334","variantElement(v, 'IPv6')":"2001:db8:85a3::8a2e:370:7334"} +Enum +{"v":null,"variantElement(v, 'Enum(\\'a\\' = 1)')":null} +{"v":"string","variantElement(v, 'Enum(\\'a\\' = 1)')":null} +{"v":"a","variantElement(v, 'Enum(\\'a\\' = 1)')":"a"} +{"v":"a","variantElement(v, 'Enum(\\'a\\' = 1)')":"a"} +{"v":2,"variantElement(v, 'Enum(\\'a\\' = 1)')":null} +Map +{"v":null,"variantElement(v, 'Map(String, UInt64)')":{}} +{"v":"string","variantElement(v, 'Map(String, UInt64)')":{}} +{"v":{"a":"42","b":"43","c":"0"},"variantElement(v, 'Map(String, UInt64)')":{"a":"42","b":"43","c":"0"}} +{"v":"{\"c\" : 44, \"d\" : [1,2,3]}","variantElement(v, 'Map(String, UInt64)')":{}} +Tuple +{"v":null,"variantElement(v, 'Tuple(a UInt64, b UInt64)')":{"a":"0","b":"0"}} +{"v":"string","variantElement(v, 'Tuple(a UInt64, b UInt64)')":{"a":"0","b":"0"}} +{"v":{"a":"42","b":"0"},"variantElement(v, 'Tuple(a UInt64, b UInt64)')":{"a":"42","b":"0"}} +{"v":{"a":"44","b":"0"},"variantElement(v, 'Tuple(a UInt64, b UInt64)')":{"a":"44","b":"0"}} +\N (0,0) +string (0,0) +(42,0) (42,0) +{"a" : 44, "d" : 32} (0,0) +Array +{"v":null,"variantElement(v, 'Array(UInt64)')":[]} +{"v":"string","variantElement(v, 'Array(UInt64)')":[]} +{"v":["1","2","3"],"variantElement(v, 'Array(UInt64)')":["1","2","3"]} +{"v":["0","0","0"],"variantElement(v, 'Array(UInt64)')":["0","0","0"]} +{"v":"[1, 2, \"hello\"]","variantElement(v, 'Array(UInt64)')":[]} +LowCardinality +{"v":null,"variantElement(v, 'LowCardinality(String)')":null} +{"v":"string","variantElement(v, 'LowCardinality(String)')":"string"} +{"v":"42","variantElement(v, 'LowCardinality(String)')":null} +{"v":null,"variantElement(v, 'Array(LowCardinality(Nullable(String)))')":[]} +{"v":["string",null],"variantElement(v, 'Array(LowCardinality(Nullable(String)))')":["string",null]} +{"v":"42","variantElement(v, 'Array(LowCardinality(Nullable(String)))')":[]} +Nullable +{"v":null,"variantElement(v, 'Array(Nullable(String))')":[]} +{"v":"string","variantElement(v, 'Array(Nullable(String))')":[]} +{"v":["hello",null,"world"],"variantElement(v, 'Array(Nullable(String))')":["hello",null,"world"]} +{"repeat('-', 80)":"--------------------------------------------------------------------------------"} +CSV +String +\N,\N +"string","string" +"string","string" +42,\N +FixedString +\N,\N +"string",\N +"string",\N +"abcd","abcd" +Bool +\N,\N +"Truee",\N +true,true +Integers +\N,\N +"string",\N +-1,-1 +0,0 +10000000000,\N +"42d42",\N +\N,\N +"string",\N +-1,\N +0,0 +10000000000,\N +"42d42",\N +\N,\N +"string",\N +-1,-1 +0,0 +10000000000,\N +"42d42",\N +\N,\N +"string",\N +-1,\N +0,0 +10000000000,\N +"42d42",\N +\N,\N +"string",\N +-1,-1 +0,0 +10000000000,\N +"42d42",\N +\N,\N +"string",\N +-1,\N +0,0 +10000000000,\N +"42d42",\N +\N,\N +"string",\N +-1,-1 +0,0 +10000000000000000000000,\N +"42d42",\N +\N,\N +"string",\N +-1,\N +0,0 +10000000000000000000000,\N +"42d42",\N +\N,\N +"string",\N +-1,-1 +0,0 +"42d42",\N +\N,\N +"string",\N +-1,\N +0,0 +"42d42",\N +Floats +\N,\N +"string",\N +42.42,42.42 +"42.d42",\N +\N,\N +"string",\N +42.42,42.42 +"42.d42",\N +Decimals +\N,\N +"string",\N +42.42,42.42 +"42d42",\N +"4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242",\N +\N,\N +"string",\N +42.42,42.42 +"42d42",\N +"4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242",\N +\N,\N +"string",\N +42.42,42.42 +"42d42",\N +"4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242",\N +\N,\N +"string",\N +42.42,42.42 +"42d42",\N +"4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242",\N +Dates and DateTimes +\N,\N +"string",\N +"2020-01-d1",\N +"2020-01-01","2020-01-01" +"2020-01-01 00:00:00.999",\N +\N,\N +"string",\N +"2020-01-d1",\N +"1900-01-01","1900-01-01" +"2020-01-01 00:00:00.999",\N +\N,\N +"string",\N +"2020-01-d1",\N +"2020-01-01 00:00:00","2020-01-01 00:00:00" +"2020-01-01 00:00:00.999",\N +\N,\N +"string",\N +"2020-01-d1",\N +"2020-01-01 00:00:00.999","2020-01-01 00:00:00.999" +"2020-01-01 00:00:00.999999999 ABC",\N +UUID +\N,\N +"string",\N +"c8619cca-0caa-445e-ae76-1d4f6e0b3927","c8619cca-0caa-445e-ae76-1d4f6e0b3927" +"c8619cca-0caa-445e-ae76-1d4f6e0b3927AAA",\N +IPv4 +\N,\N +"string",\N +"127.0.0.1","127.0.0.1" +"127.0.0.1AAA",\N +IPv6 +\N,\N +"string",\N +"2001:db8:85a3::8a2e:370:7334","2001:db8:85a3::8a2e:370:7334" +"2001:0db8:85a3:0000:0000:8a2e:0370:7334AAA",\N +Enum +\N,\N +"string",\N +"a","a" +"a","a" +2,\N +"aa",\N +Map +\N,"{}" +"string","{}" +"{'a':42,'b':43,'c':0}","{'a':42,'b':43,'c':0}" +"{'c' : 44, 'd' : [1,2,3]}","{}" +"{'c' : 44","{}" +Array +\N,"[]" +"string","[]" +"[1,2,3]","[1,2,3]" +"[0,0,0]","[0,0,0]" +"[1, 2, 'hello']","[]" +"[1, 2","[]" +LowCardinality +\N,\N +"string","string" +42,\N +\N,"[]" +"['string',NULL]","['string',NULL]" +"['string', nul]","[]" +42,"[]" +Nullable +\N,"[]" +"string","[]" +"['hello',NULL,'world']","['hello',NULL,'world']" +"['hello', nul]","[]" +{"repeat('-', 80)":"--------------------------------------------------------------------------------"} +TSV +String +\N \N +string string +42 \N +FixedString +\N \N +string \N +abcd abcd +Bool +\N \N +Truee \N +true true +Integers +\N \N +string \N +-1 -1 +0 0 +10000000000 \N +42d42 \N +\N \N +string \N +-1 \N +0 0 +10000000000 \N +42d42 \N +\N \N +string \N +-1 -1 +0 0 +10000000000 \N +42d42 \N +\N \N +string \N +-1 \N +0 0 +10000000000 \N +42d42 \N +\N \N +string \N +-1 -1 +0 0 +10000000000 \N +42d42 \N +\N \N +string \N +-1 \N +0 0 +10000000000 \N +42d42 \N +\N \N +string \N +-1 -1 +0 0 +10000000000000000000000 \N +42d42 \N +\N \N +string \N +-1 \N +0 0 +10000000000000000000000 \N +42d42 \N +\N \N +string \N +-1 -1 +0 0 +42d42 \N +\N \N +string \N +-1 \N +0 0 +42d42 \N +Floats +\N \N +string \N +42.42 42.42 +42.d42 \N +\N \N +string \N +42.42 42.42 +42.d42 \N +Decimals +\N \N +string \N +42.42 42.42 +42d42 \N +4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242 \N +\N \N +string \N +42.42 42.42 +42d42 \N +4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242 \N +\N \N +string \N +42.42 42.42 +42d42 \N +4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242 \N +\N \N +string \N +42.42 42.42 +42d42 \N +4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242 \N +Dates and DateTimes +\N \N +string \N +2020-01-d1 \N +2020-01-01 2020-01-01 +2020-01-01 00:00:00.999 \N +\N \N +string \N +2020-01-d1 \N +1900-01-01 1900-01-01 +2020-01-01 00:00:00.999 \N +\N \N +string \N +2020-01-d1 \N +2020-01-01 00:00:00 2020-01-01 00:00:00 +2020-01-01 00:00:00.999 \N +\N \N +string \N +2020-01-d1 \N +2020-01-01 00:00:00.999 2020-01-01 00:00:00.999 +2020-01-01 00:00:00.999999999 ABC \N +UUID +\N \N +string \N +c8619cca-0caa-445e-ae76-1d4f6e0b3927 c8619cca-0caa-445e-ae76-1d4f6e0b3927 +c8619cca-0caa-445e-ae76-1d4f6e0b3927AAA \N +IPv4 +\N \N +string \N +127.0.0.1 127.0.0.1 +127.0.0.1AAA \N +IPv6 +\N \N +string \N +2001:db8:85a3::8a2e:370:7334 2001:db8:85a3::8a2e:370:7334 +2001:0db8:85a3:0000:0000:8a2e:0370:7334AAA \N +Enum +\N \N +string \N +a a +a a +2 \N +aa \N +Map +\N {} +string {} +{'a':42,'b':43,'c':0} {'a':42,'b':43,'c':0} +{\'c\' : 44, \'d\' : [1,2,3]} {} +{\'c\' : 44 {} +Array +\N [] +string [] +[1,2,3] [1,2,3] +[0,0,0] [0,0,0] +[1, 2, \'hello\'] [] +[1, 2 [] +LowCardinality +\N \N +string string +42 \N +\N [] +['string',NULL] ['string',NULL] +[\'string\', nul] [] +42 [] +Nullable +\N [] +string [] +['hello',NULL,'world'] ['hello',NULL,'world'] +[\'hello\', nul] [] +{"repeat('-', 80)":"--------------------------------------------------------------------------------"} +Values +String +(NULL,NULL),('string','string'),(42,NULL)FixedString +(NULL,NULL),('string',NULL),('abcd','abcd')Bool +(NULL,NULL),(true,true)Integers +(NULL,NULL),('string',NULL),(-1,-1),(0,0),(10000000000,NULL)(NULL,NULL),('string',NULL),(-1,NULL),(0,0),(10000000000,NULL)(NULL,NULL),('string',NULL),(-1,-1),(0,0),(10000000000,NULL)(NULL,NULL),('string',NULL),(-1,NULL),(0,0),(10000000000,NULL)(NULL,NULL),('string',NULL),(-1,-1),(0,0),(10000000000,NULL)(NULL,NULL),('string',NULL),(-1,NULL),(0,0),(10000000000,NULL)(NULL,NULL),('string',NULL),(-1,-1),(0,0),(10000000000000000000000,NULL)(NULL,NULL),('string',NULL),(-1,NULL),(0,0),(10000000000000000000000,NULL)(NULL,NULL),('string',NULL),(-1,-1),(0,0)(NULL,NULL),('string',NULL),(-1,NULL),(0,0)Floats +(NULL,NULL),('string',NULL),(42.42,42.42)(NULL,NULL),('string',NULL),(42.42,42.42)Decimals +(NULL,NULL),('string',NULL),(42.42,42.42)(NULL,NULL),('string',NULL),(42.42,42.42)(NULL,NULL),('string',NULL),(42.42,42.42)(NULL,NULL),('string',NULL),(42.42,42.42)Dates and DateTimes +(NULL,NULL),('string',NULL),('2020-01-d1',NULL),('2020-01-01','2020-01-01'),('2020-01-01 00:00:00.999',NULL)(NULL,NULL),('string',NULL),('2020-01-d1',NULL),('1900-01-01','1900-01-01'),('2020-01-01 00:00:00.999',NULL)(NULL,NULL),('string',NULL),('2020-01-d1',NULL),('2020-01-01 00:00:00','2020-01-01 00:00:00'),('2020-01-01 00:00:00.999',NULL)(NULL,NULL),('string',NULL),('2020-01-d1',NULL),('2020-01-01 00:00:00.999','2020-01-01 00:00:00.999'),('2020-01-01 00:00:00.999999999 ABC',NULL)UUID +(NULL,NULL),('string',NULL),('c8619cca-0caa-445e-ae76-1d4f6e0b3927','c8619cca-0caa-445e-ae76-1d4f6e0b3927'),('c8619cca-0caa-445e-ae76-1d4f6e0b3927AAA',NULL)IPv4 +(NULL,NULL),('string',NULL),('127.0.0.1','127.0.0.1'),('127.0.0.1AAA',NULL)IPv6 +(NULL,NULL),('string',NULL),('2001:db8:85a3::8a2e:370:7334','2001:db8:85a3::8a2e:370:7334'),('2001:0db8:85a3:0000:0000:8a2e:0370:7334AAA',NULL)Enum +(NULL,NULL),('string',NULL),('a','a'),(1,NULL),(2,NULL),('aa',NULL)Map +(NULL,{}),('string',{}),({'a':42,'b':43,'c':0},{'a':42,'b':43,'c':0})Array +(NULL,[]),('string',[]),([1,2,3],[1,2,3]),([0,0,0],[0,0,0])LowCardinality +(NULL,NULL),('string','string'),(42,NULL)(NULL,[]),(['string',NULL],['string',NULL]),(42,[])Nullable +(NULL,[]),('string',[]),(['hello',NULL,'world'],['hello',NULL,'world']) diff --git a/tests/queries/0_stateless/02940_variant_text_deserialization.sql b/tests/queries/0_stateless/02940_variant_text_deserialization.sql new file mode 100644 index 00000000000..041d02088ef --- /dev/null +++ b/tests/queries/0_stateless/02940_variant_text_deserialization.sql @@ -0,0 +1,266 @@ +set allow_experimental_variant_type = 1; +set session_timezone = 'UTC'; + +select 'JSON'; +select 'String'; +select v, variantElement(v, 'String') from format(JSONEachRow, 'v Variant(String, UInt64)', '{"v" : null}, {"v" : "string"}, {"v" : 42}') format JSONEachRow; + +select 'FixedString'; +select v, variantElement(v, 'FixedString(4)') from format(JSONEachRow, 'v Variant(String, FixedString(4))', '{"v" : null}, {"v" : "string"}, {"v" : "abcd"}') format JSONEachRow; + +select 'Bool'; +select v, variantElement(v, 'Bool') from format(JSONEachRow, 'v Variant(String, Bool)', '{"v" : null}, {"v" : "string"}, {"v" : true}') format JSONEachRow; + +select 'Integers'; +select v, variantElement(v, 'Int8') from format(JSONEachRow, 'v Variant(String, Int8, UInt64)', '{"v" : null}, {"v" : "string"}, {"v" : -1}, {"v" : 0}, {"v" : 10000000000}') format JSONEachRow; +select v, variantElement(v, 'UInt8') from format(JSONEachRow, 'v Variant(String, UInt8, Int64)', '{"v" : null}, {"v" : "string"}, {"v" : -1}, {"v" : 0}, {"v" : 10000000000}') format JSONEachRow; +select v, variantElement(v, 'Int16') from format(JSONEachRow, 'v Variant(String, Int16, Int64)', '{"v" : null}, {"v" : "string"}, {"v" : -1}, {"v" : 0}, {"v" : 10000000000}') format JSONEachRow; +select v, variantElement(v, 'UInt16') from format(JSONEachRow, 'v Variant(String, UInt16, Int64)', '{"v" : null}, {"v" : "string"}, {"v" : -1}, {"v" : 0}, {"v" : 10000000000}') format JSONEachRow; +select v, variantElement(v, 'Int32') from format(JSONEachRow, 'v Variant(String, Int32, Int64)', '{"v" : null}, {"v" : "string"}, {"v" : -1}, {"v" : 0}, {"v" : 10000000000}') format JSONEachRow; +select v, variantElement(v, 'UInt32') from format(JSONEachRow, 'v Variant(String, UInt32, Int64)', '{"v" : null}, {"v" : "string"}, {"v" : -1}, {"v" : 0}, {"v" : 10000000000}') format JSONEachRow; +select v, variantElement(v, 'Int64') from format(JSONEachRow, 'v Variant(String, Int64, Int128)', '{"v" : null}, {"v" : "string"}, {"v" : -1}, {"v" : 0}, {"v" : 10000000000000000000000}') format JSONEachRow; +select v, variantElement(v, 'UInt64') from format(JSONEachRow, 'v Variant(String, UInt64, Int128)', '{"v" : null}, {"v" : "string"}, {"v" : -1}, {"v" : 0}, {"v" : 10000000000000000000000}') format JSONEachRow; +select v, variantElement(v, 'Int128') from format(JSONEachRow, 'v Variant(String, Int128, Int256)', '{"v" : null}, {"v" : "string"}, {"v" : -1}, {"v" : 0}') format JSONEachRow; +select v, variantElement(v, 'UInt128') from format(JSONEachRow, 'v Variant(String, UInt128, Int256)', '{"v" : null}, {"v" : "string"}, {"v" : -1}, {"v" : 0}') format JSONEachRow; + +select 'Floats'; +select v, variantElement(v, 'Float32') from format(JSONEachRow, 'v Variant(String, Float32)', '{"v" : null}, {"v" : "string"}, {"v" : 42.42}') format JSONEachRow; +select v, variantElement(v, 'Float64') from format(JSONEachRow, 'v Variant(String, Float64)', '{"v" : null}, {"v" : "string"}, {"v" : 42.42}') format JSONEachRow; + +select 'Decimals'; +select v, variantElement(v, 'Decimal32(6)') from format(JSONEachRow, 'v Variant(String, Decimal32(6))', '{"v" : null}, {"v" : "string"}, {"v" : 42.42}, {"v" : 4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242}') format JSONEachRow; +select v, variantElement(v, 'Decimal64(6)') from format(JSONEachRow, 'v Variant(String, Decimal64(6))', '{"v" : null}, {"v" : "string"}, {"v" : 42.42}, {"v" : 4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242}') format JSONEachRow; +select v, variantElement(v, 'Decimal128(6)') from format(JSONEachRow, 'v Variant(String, Decimal128(6))', '{"v" : null}, {"v" : "string"}, {"v" : 42.42}, {"v" : 4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242}') format JSONEachRow; +select v, variantElement(v, 'Decimal256(6)') from format(JSONEachRow, 'v Variant(String, Decimal256(6))', '{"v" : null}, {"v" : "string"}, {"v" : 42.42}, {"v" : 4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242}') format JSONEachRow; + +select 'Dates and DateTimes'; +select v, variantElement(v, 'Date') from format(JSONEachRow, 'v Variant(String, Date, DateTime64)', '{"v" : null}, {"v" : "string"}, {"v" : "2020-01-01"}, {"v" : "2020-01-01 00:00:00.999"}') format JSONEachRow; +select v, variantElement(v, 'Date32') from format(JSONEachRow, 'v Variant(String, Date32, DateTime64)', '{"v" : null}, {"v" : "string"}, {"v" : "1900-01-01"}, {"v" : "2020-01-01 00:00:00.999"}') format JSONEachRow; +select v, variantElement(v, 'DateTime') from format(JSONEachRow, 'v Variant(String, DateTime, DateTime64)', '{"v" : null}, {"v" : "string"}, {"v" : "2020-01-01 00:00:00"}, {"v" : "2020-01-01 00:00:00.999"}') format JSONEachRow; +select v, variantElement(v, 'DateTime64') from format(JSONEachRow, 'v Variant(String, DateTime64)', '{"v" : null}, {"v" : "string"}, {"v" : "2020-01-01 00:00:00.999"}, {"v" : "2020-01-01 00:00:00.999999999 ABC"}') format JSONEachRow; + +select 'UUID'; +select v, variantElement(v, 'UUID') from format(JSONEachRow, 'v Variant(String, UUID)', '{"v" : null}, {"v" : "string"}, {"v" : "c8619cca-0caa-445e-ae76-1d4f6e0b3927"}') format JSONEachRow; + +select 'IPv4'; +select v, variantElement(v, 'IPv4') from format(JSONEachRow, 'v Variant(String, IPv4)', '{"v" : null}, {"v" : "string"}, {"v" : "127.0.0.1"}') format JSONEachRow; + +select 'IPv6'; +select v, variantElement(v, 'IPv6') from format(JSONEachRow, 'v Variant(String, IPv6)', '{"v" : null}, {"v" : "string"}, {"v" : "2001:0db8:85a3:0000:0000:8a2e:0370:7334"}') format JSONEachRow; + +select 'Enum'; +select v, variantElement(v, 'Enum(''a'' = 1)') from format(JSONEachRow, 'v Variant(String, UInt32, Enum(''a'' = 1))', '{"v" : null}, {"v" : "string"}, {"v" : "a"}, {"v" : 1}, {"v" : 2}') format JSONEachRow; + +select 'Map'; +select v, variantElement(v, 'Map(String, UInt64)') from format(JSONEachRow, 'v Variant(String, Map(String, UInt64))', '{"v" : null}, {"v" : "string"}, {"v" : {"a" : 42, "b" : 43, "c" : null}}, {"v" : {"c" : 44, "d" : [1,2,3]}}') format JSONEachRow; + +select 'Tuple'; +select v, variantElement(v, 'Tuple(a UInt64, b UInt64)') from format(JSONEachRow, 'v Variant(String, Tuple(a UInt64, b UInt64))', '{"v" : null}, {"v" : "string"}, {"v" : {"a" : 42, "b" : null}}, {"v" : {"a" : 44, "d" : 32}}') format JSONEachRow; +select v, variantElement(v, 'Tuple(a UInt64, b UInt64)') from format(JSONEachRow, 'v Variant(String, Tuple(a UInt64, b UInt64))', '{"v" : null}, {"v" : "string"}, {"v" : {"a" : 42, "b" : null}}, {"v" : {"a" : 44, "d" : 32}}') settings input_format_json_defaults_for_missing_elements_in_named_tuple=0; + +select 'Array'; +select v, variantElement(v, 'Array(UInt64)') from format(JSONEachRow, 'v Variant(String, Array(UInt64))', '{"v" : null}, {"v" : "string"}, {"v" : [1, 2, 3]}, {"v" : [null, null, null]} {"v" : [1, 2, "hello"]}') format JSONEachRow; + +select 'LowCardinality'; +select v, variantElement(v, 'LowCardinality(String)') from format(JSONEachRow, 'v Variant(LowCardinality(String), UInt64)', '{"v" : null}, {"v" : "string"}, {"v" : 42}') format JSONEachRow; +select v, variantElement(v, 'Array(LowCardinality(Nullable(String)))') from format(JSONEachRow, 'v Variant(Array(LowCardinality(Nullable(String))), UInt64)', '{"v" : null}, {"v" : ["string", null]}, {"v" : 42}') format JSONEachRow; + +select 'Nullable'; +select v, variantElement(v, 'Array(Nullable(String))') from format(JSONEachRow, 'v Variant(String, Array(Nullable(String)))', '{"v" : null}, {"v" : "string"}, {"v" : ["hello", null, "world"]}') format JSONEachRow; + +select repeat('-', 80) format JSONEachRow; + +select 'CSV'; +select 'String'; +select v, variantElement(v, 'String') from format(CSV, 'v Variant(String, UInt64)', '\\N\n"string"\nstring\n42') format CSV; + +select 'FixedString'; +select v, variantElement(v, 'FixedString(4)') from format(CSV, 'v Variant(String, FixedString(4))', '\\N\n"string"\nstring\n"abcd"') format CSV; + +select 'Bool'; +select v, variantElement(v, 'Bool') from format(CSV, 'v Variant(String, Bool)', '\\N\nTruee\nTrue') format CSV; + +select 'Integers'; +select v, variantElement(v, 'Int8') from format(CSV, 'v Variant(String, Int8, UInt64)', '\n"string"\n-1\n0\n10000000000\n42d42') format CSV; +select v, variantElement(v, 'UInt8') from format(CSV, 'v Variant(String, UInt8, Int64)', '\\N\n"string"\n-1\n0\n10000000000\n42d42') format CSV; +select v, variantElement(v, 'Int16') from format(CSV, 'v Variant(String, Int16, Int64)', '\\N\n"string"\n-1\n0\n10000000000\n42d42') format CSV; +select v, variantElement(v, 'UInt16') from format(CSV, 'v Variant(String, UInt16, Int64)', '\\N\n"string"\n-1\n0\n10000000000\n42d42') format CSV; +select v, variantElement(v, 'Int32') from format(CSV, 'v Variant(String, Int32, Int64)', '\\N\n"string"\n-1\n0\n10000000000\n42d42') format CSV; +select v, variantElement(v, 'UInt32') from format(CSV, 'v Variant(String, UInt32, Int64)', '\\N\n"string"\n-1\n0\n10000000000\n42d42') format CSV; +select v, variantElement(v, 'Int64') from format(CSV, 'v Variant(String, Int64, Int128)', '\\N\n"string"\n-1\n0\n10000000000000000000000\n42d42') format CSV; +select v, variantElement(v, 'UInt64') from format(CSV, 'v Variant(String, UInt64, Int128)', '\\N\n"string"\n-1\n0\n10000000000000000000000\n42d42') format CSV; +select v, variantElement(v, 'Int128') from format(CSV, 'v Variant(String, Int128, Int256)', '\\N\n"string"\n-1\n0\n42d42') format CSV; +select v, variantElement(v, 'UInt128') from format(CSV, 'v Variant(String, UInt128, Int256)', '\\N\n"string"\n-1\n0\n42d42') format CSV; + +select 'Floats'; +select v, variantElement(v, 'Float32') from format(CSV, 'v Variant(String, Float32)', '\\N\n"string"\n42.42\n42.d42') format CSV; +select v, variantElement(v, 'Float64') from format(CSV, 'v Variant(String, Float64)', '\\N\n"string"\n42.42\n42.d42') format CSV; + +select 'Decimals'; +select v, variantElement(v, 'Decimal32(6)') from format(CSV, 'v Variant(String, Decimal32(6))', '\\N\n"string"\n42.42\n42d42\n4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242') format CSV; +select v, variantElement(v, 'Decimal64(6)') from format(CSV, 'v Variant(String, Decimal64(6))', '\\N\n"string"\n42.42\n42d42\n4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242') format CSV; +select v, variantElement(v, 'Decimal128(6)') from format(CSV, 'v Variant(String, Decimal128(6))', '\\N\n"string"\n42.42\n42d42\n4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242') format CSV; +select v, variantElement(v, 'Decimal256(6)') from format(CSV, 'v Variant(String, Decimal256(6))', '\\N\n"string"\n42.42\n42d42\n4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242') format CSV; + +select 'Dates and DateTimes'; +select v, variantElement(v, 'Date') from format(CSV, 'v Variant(String, Date, DateTime64)', '\\N\n"string"\n"2020-01-d1"\n"2020-01-01"\n"2020-01-01 00:00:00.999"') format CSV; +select v, variantElement(v, 'Date32') from format(CSV, 'v Variant(String, Date32, DateTime64)', '\\N\n"string"\n"2020-01-d1"\n"1900-01-01"\n"2020-01-01 00:00:00.999"') format CSV; +select v, variantElement(v, 'DateTime') from format(CSV, 'v Variant(String, DateTime, DateTime64)', '\\N\n"string"\n"2020-01-d1"\n"2020-01-01 00:00:00"\n"2020-01-01 00:00:00.999"') format CSV; +select v, variantElement(v, 'DateTime64') from format(CSV, 'v Variant(String, DateTime64)', '\\N\n"string"\n"2020-01-d1"\n"2020-01-01 00:00:00.999"\n"2020-01-01 00:00:00.999999999 ABC"') format CSV; + +select 'UUID'; +select v, variantElement(v, 'UUID') from format(CSV, 'v Variant(String, UUID)', '\\N\n"string"\n"c8619cca-0caa-445e-ae76-1d4f6e0b3927"\nc8619cca-0caa-445e-ae76-1d4f6e0b3927AAA') format CSV; + +select 'IPv4'; +select v, variantElement(v, 'IPv4') from format(CSV, 'v Variant(String, IPv4)', '\\N\n"string"\n"127.0.0.1"\n"127.0.0.1AAA"') format CSV; + +select 'IPv6'; +select v, variantElement(v, 'IPv6') from format(CSV, 'v Variant(String, IPv6)', '\\N\n"string"\n"2001:0db8:85a3:0000:0000:8a2e:0370:7334"\n2001:0db8:85a3:0000:0000:8a2e:0370:7334AAA') format CSV; + +select 'Enum'; +select v, variantElement(v, 'Enum(''a'' = 1)') from format(CSV, 'v Variant(String, UInt32, Enum(''a'' = 1))', '\\N\n"string"\n"a"\n1\n2\naa') format CSV; + +select 'Map'; +select v, variantElement(v, 'Map(String, UInt64)') from format(CSV, 'v Variant(String, Map(String, UInt64))', '\\N\n"string"\n"{''a'' : 42, ''b'' : 43, ''c'' : null}"\n"{''c'' : 44, ''d'' : [1,2,3]}"\n"{''c'' : 44"') format CSV; + +select 'Array'; +select v, variantElement(v, 'Array(UInt64)') from format(CSV, 'v Variant(String, Array(UInt64))', '\\N\n"string"\n"[1, 2, 3]"\n"[null, null, null]"\n"[1, 2, ''hello'']"\n"[1, 2"') format CSV; + +select 'LowCardinality'; +select v, variantElement(v, 'LowCardinality(String)') from format(CSV, 'v Variant(LowCardinality(String), UInt64)', '\\N\n"string"\n42') format CSV; +select v, variantElement(v, 'Array(LowCardinality(Nullable(String)))') from format(CSV, 'v Variant(Array(LowCardinality(Nullable(String))), UInt64, String)', '\\N\n"[''string'', null]"\n"[''string'', nul]"\n42') format CSV; + +select 'Nullable'; +select v, variantElement(v, 'Array(Nullable(String))') from format(CSV, 'v Variant(String, Array(Nullable(String)))', '\\N\n"string"\n"[''hello'', null, ''world'']"\n"[''hello'', nul]"') format CSV; + +select repeat('-', 80) format JSONEachRow; + +select 'TSV'; +select 'String'; +select v, variantElement(v, 'String') from format(TSV, 'v Variant(String, UInt64)', '\\N\nstring\n42') format TSV; + +select 'FixedString'; +select v, variantElement(v, 'FixedString(4)') from format(TSV, 'v Variant(String, FixedString(4))', '\\N\nstring\nabcd') format TSV; + +select 'Bool'; +select v, variantElement(v, 'Bool') from format(TSV, 'v Variant(String, Bool)', '\\N\nTruee\nTrue') format TSV; + +select 'Integers'; +select v, variantElement(v, 'Int8') from format(TSV, 'v Variant(String, Int8, UInt64)', '\\N\nstring\n-1\n0\n10000000000\n42d42') format TSV; +select v, variantElement(v, 'UInt8') from format(TSV, 'v Variant(String, UInt8, Int64)', '\\N\nstring\n-1\n0\n10000000000\n42d42') format TSV; +select v, variantElement(v, 'Int16') from format(TSV, 'v Variant(String, Int16, Int64)', '\\N\nstring\n-1\n0\n10000000000\n42d42') format TSV; +select v, variantElement(v, 'UInt16') from format(TSV, 'v Variant(String, UInt16, Int64)', '\\N\nstring\n-1\n0\n10000000000\n42d42') format TSV; +select v, variantElement(v, 'Int32') from format(TSV, 'v Variant(String, Int32, Int64)', '\\N\nstring\n-1\n0\n10000000000\n42d42') format TSV; +select v, variantElement(v, 'UInt32') from format(TSV, 'v Variant(String, UInt32, Int64)', '\\N\nstring\n-1\n0\n10000000000\n42d42') format TSV; +select v, variantElement(v, 'Int64') from format(TSV, 'v Variant(String, Int64, Int128)', '\\N\nstring\n-1\n0\n10000000000000000000000\n42d42') format TSV; +select v, variantElement(v, 'UInt64') from format(TSV, 'v Variant(String, UInt64, Int128)', '\\N\nstring\n-1\n0\n10000000000000000000000\n42d42') format TSV; +select v, variantElement(v, 'Int128') from format(TSV, 'v Variant(String, Int128, Int256)', '\\N\nstring\n-1\n0\n42d42') format TSV; +select v, variantElement(v, 'UInt128') from format(TSV, 'v Variant(String, UInt128, Int256)', '\\N\nstring\n-1\n0\n42d42') format TSV; + +select 'Floats'; +select v, variantElement(v, 'Float32') from format(TSV, 'v Variant(String, Float32)', '\\N\nstring\n42.42\n42.d42') format TSV; +select v, variantElement(v, 'Float64') from format(TSV, 'v Variant(String, Float64)', '\\N\nstring\n42.42\n42.d42') format TSV; + +select 'Decimals'; +select v, variantElement(v, 'Decimal32(6)') from format(TSV, 'v Variant(String, Decimal32(6))', '\\N\nstring\n42.42\n42d42\n4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242') format TSV; +select v, variantElement(v, 'Decimal64(6)') from format(TSV, 'v Variant(String, Decimal64(6))', '\\N\nstring\n42.42\n42d42\n4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242') format TSV; +select v, variantElement(v, 'Decimal128(6)') from format(TSV, 'v Variant(String, Decimal128(6))', '\\N\nstring\n42.42\n42d42\n4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242') format TSV; +select v, variantElement(v, 'Decimal256(6)') from format(TSV, 'v Variant(String, Decimal256(6))', '\\N\nstring\n42.42\n42d42\n4242424242424242424242424242424242424242424242424242424242424242424242424242424242424242424242.424242424242424242') format TSV; + +select 'Dates and DateTimes'; +select v, variantElement(v, 'Date') from format(TSV, 'v Variant(String, Date, DateTime64)', '\\N\nstring\n2020-01-d1\n2020-01-01\n2020-01-01 00:00:00.999') format TSV; +select v, variantElement(v, 'Date32') from format(TSV, 'v Variant(String, Date32, DateTime64)', '\\N\nstring\n2020-01-d1\n1900-01-01\n2020-01-01 00:00:00.999') format TSV; +select v, variantElement(v, 'DateTime') from format(TSV, 'v Variant(String, DateTime, DateTime64)', '\\N\nstring\n2020-01-d1\n2020-01-01 00:00:00\n2020-01-01 00:00:00.999') format TSV; +select v, variantElement(v, 'DateTime64') from format(TSV, 'v Variant(String, DateTime64)', '\\N\nstring\n2020-01-d1\n2020-01-01 00:00:00.999\n2020-01-01 00:00:00.999999999 ABC') format TSV; + +select 'UUID'; +select v, variantElement(v, 'UUID') from format(TSV, 'v Variant(String, UUID)', '\\N\nstring\nc8619cca-0caa-445e-ae76-1d4f6e0b3927\nc8619cca-0caa-445e-ae76-1d4f6e0b3927AAA') format TSV; + +select 'IPv4'; +select v, variantElement(v, 'IPv4') from format(TSV, 'v Variant(String, IPv4)', '\\N\nstring\n127.0.0.1\n127.0.0.1AAA') format TSV; + +select 'IPv6'; +select v, variantElement(v, 'IPv6') from format(TSV, 'v Variant(String, IPv6)', '\\N\nstring\n2001:0db8:85a3:0000:0000:8a2e:0370:7334\n2001:0db8:85a3:0000:0000:8a2e:0370:7334AAA') format TSV; + +select 'Enum'; +select v, variantElement(v, 'Enum(''a'' = 1)') from format(TSV, 'v Variant(String, UInt32, Enum(''a'' = 1))', '\\N\nstring\na\n1\n2\naa') format TSV; + +select 'Map'; +select v, variantElement(v, 'Map(String, UInt64)') from format(TSV, 'v Variant(String, Map(String, UInt64))', '\\N\nstring\n{''a'' : 42, ''b'' : 43, ''c'' : null}\n{''c'' : 44, ''d'' : [1,2,3]}\n{''c'' : 44') format TSV; + +select 'Array'; +select v, variantElement(v, 'Array(UInt64)') from format(TSV, 'v Variant(String, Array(UInt64))', '\\N\nstring\n[1, 2, 3]\n[null, null, null]\n[1, 2, ''hello'']\n[1, 2') format TSV; + +select 'LowCardinality'; +select v, variantElement(v, 'LowCardinality(String)') from format(TSV, 'v Variant(LowCardinality(String), UInt64)', '\\N\nstring\n42') format TSV; +select v, variantElement(v, 'Array(LowCardinality(Nullable(String)))') from format(TSV, 'v Variant(Array(LowCardinality(Nullable(String))), UInt64, String)', '\\N\n[''string'', null]\n[''string'', nul]\n42') format TSV; + +select 'Nullable'; +select v, variantElement(v, 'Array(Nullable(String))') from format(TSV, 'v Variant(String, Array(Nullable(String)))', '\\N\nstring\n[''hello'', null, ''world'']\n[''hello'', nul]') format TSV; + +select repeat('-', 80) format JSONEachRow; + +select 'Values'; +select 'String'; +select v, variantElement(v, 'String') from format(Values, 'v Variant(String, UInt64)', '(NULL), (''string''), (42)') format Values; + +select 'FixedString'; +select v, variantElement(v, 'FixedString(4)') from format(Values, 'v Variant(String, FixedString(4))', '(NULL), (''string''), (''abcd'')') format Values; + +select 'Bool'; +select v, variantElement(v, 'Bool') from format(Values, 'v Variant(String, Bool)', '(NULL), (True)') format Values; + +select 'Integers'; +select v, variantElement(v, 'Int8') from format(Values, 'v Variant(String, Int8, UInt64)', '(NULL), (''string''), (-1), (0), (10000000000)') format Values; +select v, variantElement(v, 'UInt8') from format(Values, 'v Variant(String, UInt8, Int64)', '(NULL), (''string''), (-1), (0), (10000000000)') format Values; +select v, variantElement(v, 'Int16') from format(Values, 'v Variant(String, Int16, Int64)', '(NULL), (''string''), (-1), (0), (10000000000)') format Values; +select v, variantElement(v, 'UInt16') from format(Values, 'v Variant(String, UInt16, Int64)', '(NULL), (''string''), (-1), (0), (10000000000)') format Values; +select v, variantElement(v, 'Int32') from format(Values, 'v Variant(String, Int32, Int64)', '(NULL), (''string''), (-1), (0), (10000000000)') format Values; +select v, variantElement(v, 'UInt32') from format(Values, 'v Variant(String, UInt32, Int64)', '(NULL), (''string''), (-1), (0), (10000000000)') format Values; +select v, variantElement(v, 'Int64') from format(Values, 'v Variant(String, Int64, Int128)', '(NULL), (''string''), (-1), (0), (10000000000000000000000)') format Values; +select v, variantElement(v, 'UInt64') from format(Values, 'v Variant(String, UInt64, Int128)', '(NULL), (''string''), (-1), (0), (10000000000000000000000)') format Values; +select v, variantElement(v, 'Int128') from format(Values, 'v Variant(String, Int128, Int256)', '(NULL), (''string''), (-1), (0)') format Values; +select v, variantElement(v, 'UInt128') from format(Values, 'v Variant(String, UInt128, Int256)', '(NULL), (''string''), (-1), (0)') format Values; + +select 'Floats'; +select v, variantElement(v, 'Float32') from format(Values, 'v Variant(String, Float32)', '(NULL), (''string''), (42.42)') format Values; +select v, variantElement(v, 'Float64') from format(Values, 'v Variant(String, Float64)', '(NULL), (''string''), (42.42)') format Values; + +select 'Decimals'; +select v, variantElement(v, 'Decimal32(6)') from format(Values, 'v Variant(String, Decimal32(6))', '(NULL), (''string''), (42.42)') format Values; +select v, variantElement(v, 'Decimal64(6)') from format(Values, 'v Variant(String, Decimal64(6))', '(NULL), (''string''), (42.42)') format Values; +select v, variantElement(v, 'Decimal128(6)') from format(Values, 'v Variant(String, Decimal128(6))', '(NULL), (''string''), (42.42)') format Values; +select v, variantElement(v, 'Decimal256(6)') from format(Values, 'v Variant(String, Decimal256(6))', '(NULL), (''string''), (42.42)') format Values; + +select 'Dates and DateTimes'; +select v, variantElement(v, 'Date') from format(Values, 'v Variant(String, Date, DateTime64)', '(NULL), (''string''), (''2020-01-d1''), (''2020-01-01''), (''2020-01-01 00:00:00.999'')') format Values; +select v, variantElement(v, 'Date32') from format(Values, 'v Variant(String, Date32, DateTime64)', '(NULL), (''string''), (''2020-01-d1''), (''1900-01-01''), (''2020-01-01 00:00:00.999'')') format Values; +select v, variantElement(v, 'DateTime') from format(Values, 'v Variant(String, DateTime, DateTime64)', '(NULL), (''string''), (''2020-01-d1''), (''2020-01-01 00:00:00''), (''2020-01-01 00:00:00.999'')') format Values; +select v, variantElement(v, 'DateTime64') from format(Values, 'v Variant(String, DateTime64)', '(NULL), (''string''), (''2020-01-d1''), (''2020-01-01 00:00:00.999''), (''2020-01-01 00:00:00.999999999 ABC'')') format Values; + +select 'UUID'; +select v, variantElement(v, 'UUID') from format(Values, 'v Variant(String, UUID)', '(NULL), (''string''), (''c8619cca-0caa-445e-ae76-1d4f6e0b3927''), (''c8619cca-0caa-445e-ae76-1d4f6e0b3927AAA'')') format Values; + +select 'IPv4'; +select v, variantElement(v, 'IPv4') from format(Values, 'v Variant(String, IPv4)', '(NULL), (''string''), (''127.0.0.1''), (''127.0.0.1AAA'')') format Values; + +select 'IPv6'; +select v, variantElement(v, 'IPv6') from format(Values, 'v Variant(String, IPv6)', '(NULL), (''string''), (''2001:0db8:85a3:0000:0000:8a2e:0370:7334''), (''2001:0db8:85a3:0000:0000:8a2e:0370:7334AAA'')') format Values; + +select 'Enum'; +select v, variantElement(v, 'Enum(''a'' = 1)') from format(Values, 'v Variant(String, UInt32, Enum(''a'' = 1))', '(NULL), (''string''), (''a''), (1), (2), (''aa'')') format Values; + +select 'Map'; +select v, variantElement(v, 'Map(String, UInt64)') from format(Values, 'v Variant(String, Map(String, UInt64))', '(NULL), (''string''), ({''a'' : 42, ''b'' : 43, ''c'' : null})') format Values; + +select 'Array'; +select v, variantElement(v, 'Array(UInt64)') from format(Values, 'v Variant(String, Array(UInt64))', '(NULL), (''string''), ([1, 2, 3]), ([null, null, null])') format Values; + +select 'LowCardinality'; +select v, variantElement(v, 'LowCardinality(String)') from format(Values, 'v Variant(LowCardinality(String), UInt64)', '(NULL), (''string''), (42)') format Values; +select v, variantElement(v, 'Array(LowCardinality(Nullable(String)))') from format(Values, 'v Variant(Array(LowCardinality(Nullable(String))), UInt64, String)', '(NULL), ([''string'', null]), (42)') format Values; + +select 'Nullable'; +select v, variantElement(v, 'Array(Nullable(String))') from format(Values, 'v Variant(String, Array(Nullable(String)))', '(NULL), (''string''), ([''hello'', null, ''world''])') format Values; + +select ''; \ No newline at end of file diff --git a/tests/queries/0_stateless/02941_variant_type_1.reference b/tests/queries/0_stateless/02941_variant_type_1.reference new file mode 100644 index 00000000000..8a6e77d4f6d --- /dev/null +++ b/tests/queries/0_stateless/02941_variant_type_1.reference @@ -0,0 +1,2472 @@ +Memory +test1 insert +test1 select +\N +\N +\N +0 +1 +2 +str_0 +str_1 +str_2 +lc_str_0 +lc_str_1 +lc_str_2 +(0,1) +(1,2) +(2,3) +[0] +[0,1] +[0,1,2] +\N +\N +\N +\N +\N +\N +str_0 +str_1 +str_2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +1 +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +lc_str_0 +lc_str_1 +lc_str_2 +\N +\N +\N +\N +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,1) +(1,2) +(2,3) +(0,0) +(0,0) +(0,0) +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +1 +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +2 +3 +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[0,1] +[0,1,2] +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +2 +3 +----------------------------------------------------------------------------------------------------------- +test2 insert +test2 select +\N +\N +\N +0 +\N +2 +str_0 +\N +str_2 +lc_str_0 +\N +lc_str_2 +(0,1) +\N +(2,3) +[0] +\N +[0,1,2] +\N +\N +\N +\N +\N +\N +str_0 +\N +str_2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +lc_str_0 +\N +lc_str_2 +\N +\N +\N +\N +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,1) +(0,0) +(2,3) +(0,0) +(0,0) +(0,0) +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +\N +3 +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[] +[0,1,2] +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +\N +3 +----------------------------------------------------------------------------------------------------------- +test3 insert +test3 select +\N +str_1 +2 +lc_str_3 +(4,5) +[0,1,2,3,4,5] +\N +str_7 +8 +lc_str_9 +(10,11) +[0,1,2,3,4,5,6,7,8,9,10,11] +\N +str_13 +14 +lc_str_15 +(16,17) +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] +\N +str_1 +\N +\N +\N +\N +\N +str_7 +\N +\N +\N +\N +\N +str_13 +\N +\N +\N +\N +\N +\N +2 +\N +\N +\N +\N +\N +8 +\N +\N +\N +\N +\N +14 +\N +\N +\N +\N +\N +\N +lc_str_3 +\N +\N +\N +\N +\N +lc_str_9 +\N +\N +\N +\N +\N +lc_str_15 +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(4,5) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(10,11) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(16,17) +(0,0) +\N +\N +\N +\N +4 +\N +\N +\N +\N +\N +10 +\N +\N +\N +\N +\N +16 +\N +\N +\N +\N +\N +5 +\N +\N +\N +\N +\N +11 +\N +\N +\N +\N +\N +17 +\N +[] +[] +[] +[] +[] +[0,1,2,3,4,5] +[] +[] +[] +[] +[] +[0,1,2,3,4,5,6,7,8,9,10,11] +[] +[] +[] +[] +[] +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] +\N +\N +\N +\N +\N +6 +\N +\N +\N +\N +\N +12 +\N +\N +\N +\N +\N +18 +----------------------------------------------------------------------------------------------------------- +MergeTree compact +test1 insert +test1 select +\N +\N +\N +0 +1 +2 +str_0 +str_1 +str_2 +lc_str_0 +lc_str_1 +lc_str_2 +(0,1) +(1,2) +(2,3) +[0] +[0,1] +[0,1,2] +\N +\N +\N +\N +\N +\N +str_0 +str_1 +str_2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +1 +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +lc_str_0 +lc_str_1 +lc_str_2 +\N +\N +\N +\N +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,1) +(1,2) +(2,3) +(0,0) +(0,0) +(0,0) +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +1 +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +2 +3 +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[0,1] +[0,1,2] +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +2 +3 +----------------------------------------------------------------------------------------------------------- +test1 select +\N +\N +\N +0 +1 +2 +str_0 +str_1 +str_2 +lc_str_0 +lc_str_1 +lc_str_2 +(0,1) +(1,2) +(2,3) +[0] +[0,1] +[0,1,2] +\N +\N +\N +\N +\N +\N +str_0 +str_1 +str_2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +1 +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +lc_str_0 +lc_str_1 +lc_str_2 +\N +\N +\N +\N +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,1) +(1,2) +(2,3) +(0,0) +(0,0) +(0,0) +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +1 +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +2 +3 +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[0,1] +[0,1,2] +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +2 +3 +----------------------------------------------------------------------------------------------------------- +test2 insert +test2 select +\N +\N +\N +0 +\N +2 +str_0 +\N +str_2 +lc_str_0 +\N +lc_str_2 +(0,1) +\N +(2,3) +[0] +\N +[0,1,2] +\N +\N +\N +\N +\N +\N +str_0 +\N +str_2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +lc_str_0 +\N +lc_str_2 +\N +\N +\N +\N +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,1) +(0,0) +(2,3) +(0,0) +(0,0) +(0,0) +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +\N +3 +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[] +[0,1,2] +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +\N +3 +----------------------------------------------------------------------------------------------------------- +test2 select +\N +\N +\N +0 +\N +2 +str_0 +\N +str_2 +lc_str_0 +\N +lc_str_2 +(0,1) +\N +(2,3) +[0] +\N +[0,1,2] +\N +\N +\N +\N +\N +\N +str_0 +\N +str_2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +lc_str_0 +\N +lc_str_2 +\N +\N +\N +\N +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,1) +(0,0) +(2,3) +(0,0) +(0,0) +(0,0) +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +\N +3 +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[] +[0,1,2] +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +\N +3 +----------------------------------------------------------------------------------------------------------- +test3 insert +test3 select +\N +str_1 +2 +lc_str_3 +(4,5) +[0,1,2,3,4,5] +\N +str_7 +8 +lc_str_9 +(10,11) +[0,1,2,3,4,5,6,7,8,9,10,11] +\N +str_13 +14 +lc_str_15 +(16,17) +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] +\N +str_1 +\N +\N +\N +\N +\N +str_7 +\N +\N +\N +\N +\N +str_13 +\N +\N +\N +\N +\N +\N +2 +\N +\N +\N +\N +\N +8 +\N +\N +\N +\N +\N +14 +\N +\N +\N +\N +\N +\N +lc_str_3 +\N +\N +\N +\N +\N +lc_str_9 +\N +\N +\N +\N +\N +lc_str_15 +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(4,5) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(10,11) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(16,17) +(0,0) +\N +\N +\N +\N +4 +\N +\N +\N +\N +\N +10 +\N +\N +\N +\N +\N +16 +\N +\N +\N +\N +\N +5 +\N +\N +\N +\N +\N +11 +\N +\N +\N +\N +\N +17 +\N +[] +[] +[] +[] +[] +[0,1,2,3,4,5] +[] +[] +[] +[] +[] +[0,1,2,3,4,5,6,7,8,9,10,11] +[] +[] +[] +[] +[] +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] +\N +\N +\N +\N +\N +6 +\N +\N +\N +\N +\N +12 +\N +\N +\N +\N +\N +18 +----------------------------------------------------------------------------------------------------------- +test3 select +\N +str_1 +2 +lc_str_3 +(4,5) +[0,1,2,3,4,5] +\N +str_7 +8 +lc_str_9 +(10,11) +[0,1,2,3,4,5,6,7,8,9,10,11] +\N +str_13 +14 +lc_str_15 +(16,17) +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] +\N +str_1 +\N +\N +\N +\N +\N +str_7 +\N +\N +\N +\N +\N +str_13 +\N +\N +\N +\N +\N +\N +2 +\N +\N +\N +\N +\N +8 +\N +\N +\N +\N +\N +14 +\N +\N +\N +\N +\N +\N +lc_str_3 +\N +\N +\N +\N +\N +lc_str_9 +\N +\N +\N +\N +\N +lc_str_15 +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(4,5) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(10,11) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(16,17) +(0,0) +\N +\N +\N +\N +4 +\N +\N +\N +\N +\N +10 +\N +\N +\N +\N +\N +16 +\N +\N +\N +\N +\N +5 +\N +\N +\N +\N +\N +11 +\N +\N +\N +\N +\N +17 +\N +[] +[] +[] +[] +[] +[0,1,2,3,4,5] +[] +[] +[] +[] +[] +[0,1,2,3,4,5,6,7,8,9,10,11] +[] +[] +[] +[] +[] +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] +\N +\N +\N +\N +\N +6 +\N +\N +\N +\N +\N +12 +\N +\N +\N +\N +\N +18 +----------------------------------------------------------------------------------------------------------- +MergeTree wide +test1 insert +test1 select +\N +\N +\N +0 +1 +2 +str_0 +str_1 +str_2 +lc_str_0 +lc_str_1 +lc_str_2 +(0,1) +(1,2) +(2,3) +[0] +[0,1] +[0,1,2] +\N +\N +\N +\N +\N +\N +str_0 +str_1 +str_2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +1 +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +lc_str_0 +lc_str_1 +lc_str_2 +\N +\N +\N +\N +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,1) +(1,2) +(2,3) +(0,0) +(0,0) +(0,0) +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +1 +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +2 +3 +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[0,1] +[0,1,2] +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +2 +3 +----------------------------------------------------------------------------------------------------------- +test1 select +\N +\N +\N +0 +1 +2 +str_0 +str_1 +str_2 +lc_str_0 +lc_str_1 +lc_str_2 +(0,1) +(1,2) +(2,3) +[0] +[0,1] +[0,1,2] +\N +\N +\N +\N +\N +\N +str_0 +str_1 +str_2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +1 +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +lc_str_0 +lc_str_1 +lc_str_2 +\N +\N +\N +\N +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,1) +(1,2) +(2,3) +(0,0) +(0,0) +(0,0) +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +1 +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +2 +3 +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[0,1] +[0,1,2] +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +2 +3 +----------------------------------------------------------------------------------------------------------- +test2 insert +test2 select +\N +\N +\N +0 +\N +2 +str_0 +\N +str_2 +lc_str_0 +\N +lc_str_2 +(0,1) +\N +(2,3) +[0] +\N +[0,1,2] +\N +\N +\N +\N +\N +\N +str_0 +\N +str_2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +lc_str_0 +\N +lc_str_2 +\N +\N +\N +\N +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,1) +(0,0) +(2,3) +(0,0) +(0,0) +(0,0) +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +\N +3 +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[] +[0,1,2] +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +\N +3 +----------------------------------------------------------------------------------------------------------- +test2 select +\N +\N +\N +0 +\N +2 +str_0 +\N +str_2 +lc_str_0 +\N +lc_str_2 +(0,1) +\N +(2,3) +[0] +\N +[0,1,2] +\N +\N +\N +\N +\N +\N +str_0 +\N +str_2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +lc_str_0 +\N +lc_str_2 +\N +\N +\N +\N +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(0,1) +(0,0) +(2,3) +(0,0) +(0,0) +(0,0) +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +\N +3 +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[] +[0,1,2] +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +1 +\N +3 +----------------------------------------------------------------------------------------------------------- +test3 insert +test3 select +\N +str_1 +2 +lc_str_3 +(4,5) +[0,1,2,3,4,5] +\N +str_7 +8 +lc_str_9 +(10,11) +[0,1,2,3,4,5,6,7,8,9,10,11] +\N +str_13 +14 +lc_str_15 +(16,17) +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] +\N +str_1 +\N +\N +\N +\N +\N +str_7 +\N +\N +\N +\N +\N +str_13 +\N +\N +\N +\N +\N +\N +2 +\N +\N +\N +\N +\N +8 +\N +\N +\N +\N +\N +14 +\N +\N +\N +\N +\N +\N +lc_str_3 +\N +\N +\N +\N +\N +lc_str_9 +\N +\N +\N +\N +\N +lc_str_15 +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(4,5) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(10,11) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(16,17) +(0,0) +\N +\N +\N +\N +4 +\N +\N +\N +\N +\N +10 +\N +\N +\N +\N +\N +16 +\N +\N +\N +\N +\N +5 +\N +\N +\N +\N +\N +11 +\N +\N +\N +\N +\N +17 +\N +[] +[] +[] +[] +[] +[0,1,2,3,4,5] +[] +[] +[] +[] +[] +[0,1,2,3,4,5,6,7,8,9,10,11] +[] +[] +[] +[] +[] +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] +\N +\N +\N +\N +\N +6 +\N +\N +\N +\N +\N +12 +\N +\N +\N +\N +\N +18 +----------------------------------------------------------------------------------------------------------- +test3 select +\N +str_1 +2 +lc_str_3 +(4,5) +[0,1,2,3,4,5] +\N +str_7 +8 +lc_str_9 +(10,11) +[0,1,2,3,4,5,6,7,8,9,10,11] +\N +str_13 +14 +lc_str_15 +(16,17) +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] +\N +str_1 +\N +\N +\N +\N +\N +str_7 +\N +\N +\N +\N +\N +str_13 +\N +\N +\N +\N +\N +\N +2 +\N +\N +\N +\N +\N +8 +\N +\N +\N +\N +\N +14 +\N +\N +\N +\N +\N +\N +lc_str_3 +\N +\N +\N +\N +\N +lc_str_9 +\N +\N +\N +\N +\N +lc_str_15 +\N +\N +(0,0) +(0,0) +(0,0) +(0,0) +(4,5) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(10,11) +(0,0) +(0,0) +(0,0) +(0,0) +(0,0) +(16,17) +(0,0) +\N +\N +\N +\N +4 +\N +\N +\N +\N +\N +10 +\N +\N +\N +\N +\N +16 +\N +\N +\N +\N +\N +5 +\N +\N +\N +\N +\N +11 +\N +\N +\N +\N +\N +17 +\N +[] +[] +[] +[] +[] +[0,1,2,3,4,5] +[] +[] +[] +[] +[] +[0,1,2,3,4,5,6,7,8,9,10,11] +[] +[] +[] +[] +[] +[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] +\N +\N +\N +\N +\N +6 +\N +\N +\N +\N +\N +12 +\N +\N +\N +\N +\N +18 +----------------------------------------------------------------------------------------------------------- diff --git a/tests/queries/0_stateless/02941_variant_type_1.sh b/tests/queries/0_stateless/02941_variant_type_1.sh new file mode 100755 index 00000000000..4cf8ad25122 --- /dev/null +++ b/tests/queries/0_stateless/02941_variant_type_1.sh @@ -0,0 +1,125 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1" + +function test1_insert() +{ + echo "test1 insert" + $CH_CLIENT -q "insert into test select number, NULL from numbers(3);" + $CH_CLIENT -q "insert into test select number + 3, number from numbers(3);" + $CH_CLIENT -q "insert into test select number + 6, 'str_' || toString(number) from numbers(3);" + $CH_CLIENT -q "insert into test select number + 9, ('lc_str_' || toString(number))::LowCardinality(String) from numbers(3);" + $CH_CLIENT -q "insert into test select number + 12, tuple(number, number + 1)::Tuple(a UInt32, b UInt32) from numbers(3);" + $CH_CLIENT -q "insert into test select number + 15, range(number + 1)::Array(UInt64) from numbers(3);" +} + +function test1_select() +{ + echo "test1 select" + $CH_CLIENT -q "select v from test order by id;" + $CH_CLIENT -q "select v.String from test order by id;" + $CH_CLIENT -q "select v.UInt64 from test order by id;" + $CH_CLIENT -q "select v.\`LowCardinality(String)\` from test order by id;" + $CH_CLIENT -q "select v.\`Tuple(a UInt32, b UInt32)\` from test order by id;" + $CH_CLIENT -q "select v.\`Tuple(a UInt32, b UInt32)\`.a from test order by id;" + $CH_CLIENT -q "select v.\`Tuple(a UInt32, b UInt32)\`.b from test order by id;" + $CH_CLIENT -q "select v.\`Array(UInt64)\` from test order by id;" + $CH_CLIENT -q "select v.\`Array(UInt64)\`.size0 from test order by id;" + echo "-----------------------------------------------------------------------------------------------------------" +} + +function test2_insert() +{ + echo "test2 insert" + $CH_CLIENT -q "insert into test select number, NULL from numbers(3);" + $CH_CLIENT -q "insert into test select number + 3, number % 2 ? NULL : number from numbers(3);" + $CH_CLIENT -q "insert into test select number + 6, number % 2 ? NULL : 'str_' || toString(number) from numbers(3);" + $CH_CLIENT -q "insert into test select number + 9, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(('lc_str_' || toString(number))::LowCardinality(String), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(3);" + $CH_CLIENT -q "insert into test select number + 12, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(3);" + $CH_CLIENT -q "insert into test select number + 15, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(range(number + 1)::Array(UInt64), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(3);" +} + +function test2_select() +{ + echo "test2 select" + $CH_CLIENT -q "select v from test order by id;" + $CH_CLIENT -q "select v.String from test order by id;" + $CH_CLIENT -q "select v.UInt64 from test order by id;" + $CH_CLIENT -q "select v.\`LowCardinality(String)\` from test order by id;" + $CH_CLIENT -q "select v.\`Tuple(a UInt32, b UInt32)\` from test order by id;" + $CH_CLIENT -q "select v.\`Tuple(a UInt32, b UInt32)\`.a from test order by id;" + $CH_CLIENT -q "select v.\`Tuple(a UInt32, b UInt32)\`.b from test order by id;" + $CH_CLIENT -q "select v.\`Array(UInt64)\` from test order by id;" + $CH_CLIENT -q "select v.\`Array(UInt64)\`.size0 from test order by id;" + echo "-----------------------------------------------------------------------------------------------------------" +} + +function test3_insert() +{ + echo "test3 insert" + $CH_CLIENT -q "insert into test with 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))' as type select number, multiIf(number % 6 == 0, CAST(NULL, type), number % 6 == 1, CAST('str_' || toString(number), type), number % 6 == 2, CAST(number, type), number % 6 == 3, CAST(('lc_str_' || toString(number))::LowCardinality(String), type), number % 6 == 4, CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), type), CAST(range(number + 1)::Array(UInt64), type)) as res from numbers(18);" +} + +function test3_select() +{ + echo "test3 select" + $CH_CLIENT -q "select v from test order by id;" + $CH_CLIENT -q "select v.String from test order by id;" + $CH_CLIENT -q "select v.UInt64 from test order by id;" + $CH_CLIENT -q "select v.\`LowCardinality(String)\` from test order by id;" + $CH_CLIENT -q "select v.\`Tuple(a UInt32, b UInt32)\` from test order by id;" + $CH_CLIENT -q "select v.\`Tuple(a UInt32, b UInt32)\`.a from test order by id;" + $CH_CLIENT -q "select v.\`Tuple(a UInt32, b UInt32)\`.b from test order by id;" + $CH_CLIENT -q "select v.\`Array(UInt64)\` from test order by id;" + $CH_CLIENT -q "select v.\`Array(UInt64)\`.size0 from test order by id;" + echo "-----------------------------------------------------------------------------------------------------------" +} + +function run() +{ + test1_insert + test1_select + if [ $1 == 1 ]; then + $CH_CLIENT -q "optimize table test final;" + test1_select + fi + $CH_CLIENT -q "truncate table test;" + test2_insert + test2_select + if [ $1 == 1 ]; then + $CH_CLIENT -q "optimize table test final;" + test2_select + fi + $CH_CLIENT -q "truncate table test;" + test3_insert + test3_select + if [ $1 == 1 ]; then + $CH_CLIENT -q "optimize table test final;" + test3_select + fi + $CH_CLIENT -q "truncate table test;" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=Memory;" +run 0 +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run 1 +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run 1 +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/02941_variant_type_2.reference b/tests/queries/0_stateless/02941_variant_type_2.reference new file mode 100644 index 00000000000..4b6d53c52ac --- /dev/null +++ b/tests/queries/0_stateless/02941_variant_type_2.reference @@ -0,0 +1,51 @@ +Memory +test4 insert +test4 select +1000000 +200000 +200000 +200000 +200000 +200000 +200000 +200000 +MergeTree compact +test4 insert +test4 select +1000000 +200000 +200000 +200000 +200000 +200000 +200000 +200000 +test4 select +1000000 +200000 +200000 +200000 +200000 +200000 +200000 +200000 +MergeTree wide +test4 insert +test4 select +1000000 +200000 +200000 +200000 +200000 +200000 +200000 +200000 +test4 select +1000000 +200000 +200000 +200000 +200000 +200000 +200000 +200000 diff --git a/tests/queries/0_stateless/02941_variant_type_2.sh b/tests/queries/0_stateless/02941_variant_type_2.sh new file mode 100755 index 00000000000..7064dfbf4ec --- /dev/null +++ b/tests/queries/0_stateless/02941_variant_type_2.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1" + +function test4_insert() +{ + echo "test4 insert" + $CH_CLIENT -q "insert into test select number, NULL from numbers(200000);" + $CH_CLIENT -q "insert into test select number + 200000, number from numbers(200000);" + $CH_CLIENT -q "insert into test select number + 400000, 'str_' || toString(number) from numbers(200000);" + $CH_CLIENT -q "insert into test select number + 600000, ('lc_str_' || toString(number))::LowCardinality(String) from numbers(200000);" + $CH_CLIENT -q "insert into test select number + 800000, tuple(number, number + 1)::Tuple(a UInt32, b UInt32) from numbers(200000);" + $CH_CLIENT -q "insert into test select number + 1000000, range(number % 20 + 1)::Array(UInt64) from numbers(200000);" +} + +function test4_select +{ + echo "test4 select" + $CH_CLIENT -q "select v from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v);" + $CH_CLIENT -q "select v.String from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.String);" + $CH_CLIENT -q "select v.UInt64 from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.UInt64);" + $CH_CLIENT -q "select v.\`LowCardinality(String)\` from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.\`LowCardinality(String)\`);" + $CH_CLIENT -q "select v.\`Tuple(a UInt32, b UInt32)\` from test format Null;" + $CH_CLIENT -q "select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a);" + $CH_CLIENT -q "select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b);" + $CH_CLIENT -q "select v.\`Array(UInt64)\` from test format Null;" + $CH_CLIENT -q "select count() from test where not empty(v.\`Array(UInt64)\`);" + $CH_CLIENT -q "select v.\`Array(UInt64)\`.size0 from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" +} + +function run() +{ + test4_insert + test4_select + if [ $1 == 1 ]; then + $CH_CLIENT -q "optimize table test final;" + test4_select + fi + $CH_CLIENT -q "truncate table test;" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=Memory;" +run 0 +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run 1 +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run 1 +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/02941_variant_type_3.reference b/tests/queries/0_stateless/02941_variant_type_3.reference new file mode 100644 index 00000000000..1ccdb3acdff --- /dev/null +++ b/tests/queries/0_stateless/02941_variant_type_3.reference @@ -0,0 +1,51 @@ +Memory +test5 insert +test5 select +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 +MergeTree compact +test5 insert +test5 select +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 +test5 select +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 +MergeTree wide +test5 insert +test5 select +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 +test5 select +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 diff --git a/tests/queries/0_stateless/02941_variant_type_3.sh b/tests/queries/0_stateless/02941_variant_type_3.sh new file mode 100755 index 00000000000..303039edef7 --- /dev/null +++ b/tests/queries/0_stateless/02941_variant_type_3.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1" + +function test5_insert() +{ + echo "test5 insert" + $CH_CLIENT -q "insert into test select number, NULL from numbers(200000);" + $CH_CLIENT -q "insert into test select number + 200000, number % 2 ? NULL : number from numbers(200000);" + $CH_CLIENT -q "insert into test select number + 400000, number % 2 ? NULL : 'str_' || toString(number) from numbers(200000);" + $CH_CLIENT -q "insert into test select number + 600000, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(('lc_str_' || toString(number))::LowCardinality(String), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(200000);" + $CH_CLIENT -q "insert into test select number + 800000, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(200000);" + $CH_CLIENT -q "insert into test select number + 1000000, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(range(number % 20 + 1)::Array(UInt64), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(200000);" +} + +function test5_select() +{ + echo "test5 select" + $CH_CLIENT -q "select v from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v);" + $CH_CLIENT -q "select v.String from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.String);" + $CH_CLIENT -q "select v.UInt64 from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.UInt64);" + $CH_CLIENT -q "select v.\`LowCardinality(String)\` from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.\`LowCardinality(String)\`);" + $CH_CLIENT -q "select v.\`Tuple(a UInt32, b UInt32)\` from test format Null;" + $CH_CLIENT -q "select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a);" + $CH_CLIENT -q "select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b);" + $CH_CLIENT -q "select v.\`Array(UInt64)\` from test format Null;" + $CH_CLIENT -q "select count() from test where not empty(v.\`Array(UInt64)\`);" + $CH_CLIENT -q "select v.\`Array(UInt64)\`.size0 from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" +} + +function run() +{ + test5_insert + test5_select + if [ $1 == 1 ]; then + $CH_CLIENT -q "optimize table test final;" + test5_select + fi + $CH_CLIENT -q "truncate table test;" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=Memory;" +run 0 +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run 1 +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run 1 +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/02941_variant_type_4.reference b/tests/queries/0_stateless/02941_variant_type_4.reference new file mode 100644 index 00000000000..e13d5820343 --- /dev/null +++ b/tests/queries/0_stateless/02941_variant_type_4.reference @@ -0,0 +1,56 @@ +Memory +test6 insert +test6 select +1000000 +200000 +200000 +200000 +200000 +200000 +200000 +200000 +----------------------------------------------------------------------------------------------------------- +MergeTree compact +test6 insert +test6 select +1000000 +200000 +200000 +200000 +200000 +200000 +200000 +200000 +----------------------------------------------------------------------------------------------------------- +test6 select +1000000 +200000 +200000 +200000 +200000 +200000 +200000 +200000 +----------------------------------------------------------------------------------------------------------- +MergeTree wide +test6 insert +test6 select +1000000 +200000 +200000 +200000 +200000 +200000 +200000 +200000 +----------------------------------------------------------------------------------------------------------- +test6 select +1000000 +200000 +200000 +200000 +200000 +200000 +200000 +200000 +----------------------------------------------------------------------------------------------------------- diff --git a/tests/queries/0_stateless/02941_variant_type_4.sh b/tests/queries/0_stateless/02941_variant_type_4.sh new file mode 100755 index 00000000000..169e43c6d69 --- /dev/null +++ b/tests/queries/0_stateless/02941_variant_type_4.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1" + +function test6_insert() +{ + echo "test6 insert" + $CH_CLIENT -q "insert into test with 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))' as type select number, multiIf(number % 6 == 0, CAST(NULL, type), number % 6 == 1, CAST('str_' || toString(number), type), number % 6 == 2, CAST(number, type), number % 6 == 3, CAST(('lc_str_' || toString(number))::LowCardinality(String), type), number % 6 == 4, CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), type), CAST(range(number % 20 + 1)::Array(UInt64), type)) as res from numbers(1200000);" +} + +function test6_select() +{ + echo "test6 select" + $CH_CLIENT -q "select v from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v);" + $CH_CLIENT -q "select v.String from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.String);" + $CH_CLIENT -q "select v.UInt64 from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.UInt64);" + $CH_CLIENT -q "select v.\`LowCardinality(String)\` from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.\`LowCardinality(String)\`);" + $CH_CLIENT -q "select v.\`Tuple(a UInt32, b UInt32)\` from test format Null;" + $CH_CLIENT -q "select v.\`Tuple(a UInt32, b UInt32)\`.a from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.a);" + $CH_CLIENT -q "select v.\`Tuple(a UInt32, b UInt32)\`.b from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.\`Tuple(a UInt32, b UInt32)\`.b);" + $CH_CLIENT -q "select v.\`Array(UInt64)\` from test format Null;" + $CH_CLIENT -q "select count() from test where not empty(v.\`Array(UInt64)\`);" + $CH_CLIENT -q "select v.\`Array(UInt64)\`.size0 from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.\`Array(UInt64)\`.size0);" + echo "-----------------------------------------------------------------------------------------------------------" +} + +function run() +{ + test6_insert + test6_select + if [ $1 == 1 ]; then + $CH_CLIENT -q "optimize table test final;" + test6_select + fi + $CH_CLIENT -q "truncate table test;" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=Memory;" +run 0 +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run 1 +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, v Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run 1 +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/02941_variant_type_alters.reference b/tests/queries/0_stateless/02941_variant_type_alters.reference new file mode 100644 index 00000000000..52c834e455b --- /dev/null +++ b/tests/queries/0_stateless/02941_variant_type_alters.reference @@ -0,0 +1,330 @@ +Memory +initial insert +alter add column 1 +0 0 \N \N \N +1 1 \N \N \N +2 2 \N \N \N +insert after alter add column 1 +0 0 \N \N \N +1 1 \N \N \N +2 2 \N \N \N +3 3 3 \N 3 +4 4 4 \N 4 +5 5 5 \N 5 +6 6 str_6 str_6 \N +7 7 str_7 str_7 \N +8 8 str_8 str_8 \N +9 9 \N \N \N +10 10 \N \N \N +11 11 \N \N \N +12 12 12 \N 12 +13 13 str_13 str_13 \N +14 14 \N \N \N +alter modify column 1 +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +3 3 3 \N 3 \N +4 4 4 \N 4 \N +5 5 5 \N 5 \N +6 6 str_6 str_6 \N \N +7 7 str_7 str_7 \N \N +8 8 str_8 str_8 \N \N +9 9 \N \N \N \N +10 10 \N \N \N \N +11 11 \N \N \N \N +12 12 12 \N 12 \N +13 13 str_13 str_13 \N \N +14 14 \N \N \N \N +insert after alter modify column 1 +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +3 3 3 \N 3 \N +4 4 4 \N 4 \N +5 5 5 \N 5 \N +6 6 str_6 str_6 \N \N +7 7 str_7 str_7 \N \N +8 8 str_8 str_8 \N \N +9 9 \N \N \N \N +10 10 \N \N \N \N +11 11 \N \N \N \N +12 12 12 \N 12 \N +13 13 str_13 str_13 \N \N +14 14 \N \N \N \N +15 15 1970-01-16 \N \N 1970-01-16 +16 16 1970-01-17 \N \N 1970-01-17 +17 17 1970-01-18 \N \N 1970-01-18 +18 18 1970-01-19 \N \N 1970-01-19 +19 19 \N \N \N \N +20 20 20 \N 20 \N +21 21 str_21 str_21 \N \N +alter modify column 2 +0 0 \N \N \N \N \N \N +1 1 \N \N \N \N \N \N +2 2 \N \N \N \N \N \N +3 3 \N \N 3 \N 3 \N +4 4 \N \N 4 \N 4 \N +5 5 \N \N 5 \N 5 \N +6 6 \N \N str_6 str_6 \N \N +7 7 \N \N str_7 str_7 \N \N +8 8 \N \N str_8 str_8 \N \N +9 9 \N \N \N \N \N \N +10 10 \N \N \N \N \N \N +11 11 \N \N \N \N \N \N +12 12 \N \N 12 \N 12 \N +13 13 \N \N str_13 str_13 \N \N +14 14 \N \N \N \N \N \N +15 15 \N \N 1970-01-16 \N \N 1970-01-16 +16 16 \N \N 1970-01-17 \N \N 1970-01-17 +17 17 \N \N 1970-01-18 \N \N 1970-01-18 +18 18 \N \N 1970-01-19 \N \N 1970-01-19 +19 19 \N \N \N \N \N \N +20 20 \N \N 20 \N 20 \N +21 21 \N \N str_21 str_21 \N \N +insert after alter modify column 2 +0 0 \N \N \N \N \N \N +1 1 \N \N \N \N \N \N +2 2 \N \N \N \N \N \N +3 3 \N \N 3 \N 3 \N +4 4 \N \N 4 \N 4 \N +5 5 \N \N 5 \N 5 \N +6 6 \N \N str_6 str_6 \N \N +7 7 \N \N str_7 str_7 \N \N +8 8 \N \N str_8 str_8 \N \N +9 9 \N \N \N \N \N \N +10 10 \N \N \N \N \N \N +11 11 \N \N \N \N \N \N +12 12 \N \N 12 \N 12 \N +13 13 \N \N str_13 str_13 \N \N +14 14 \N \N \N \N \N \N +15 15 \N \N 1970-01-16 \N \N 1970-01-16 +16 16 \N \N 1970-01-17 \N \N 1970-01-17 +17 17 \N \N 1970-01-18 \N \N 1970-01-18 +18 18 \N \N 1970-01-19 \N \N 1970-01-19 +19 19 \N \N \N \N \N \N +20 20 \N \N 20 \N 20 \N +21 21 \N \N str_21 str_21 \N \N +22 str_22 \N str_22 \N \N \N \N +23 \N \N \N \N \N \N \N +24 24 24 \N \N \N \N \N +MergeTree compact +initial insert +alter add column 1 +0 0 \N \N \N +1 1 \N \N \N +2 2 \N \N \N +insert after alter add column 1 +0 0 \N \N \N +1 1 \N \N \N +2 2 \N \N \N +3 3 3 \N 3 +4 4 4 \N 4 +5 5 5 \N 5 +6 6 str_6 str_6 \N +7 7 str_7 str_7 \N +8 8 str_8 str_8 \N +9 9 \N \N \N +10 10 \N \N \N +11 11 \N \N \N +12 12 12 \N 12 +13 13 str_13 str_13 \N +14 14 \N \N \N +alter modify column 1 +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +3 3 3 \N 3 \N +4 4 4 \N 4 \N +5 5 5 \N 5 \N +6 6 str_6 str_6 \N \N +7 7 str_7 str_7 \N \N +8 8 str_8 str_8 \N \N +9 9 \N \N \N \N +10 10 \N \N \N \N +11 11 \N \N \N \N +12 12 12 \N 12 \N +13 13 str_13 str_13 \N \N +14 14 \N \N \N \N +insert after alter modify column 1 +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +3 3 3 \N 3 \N +4 4 4 \N 4 \N +5 5 5 \N 5 \N +6 6 str_6 str_6 \N \N +7 7 str_7 str_7 \N \N +8 8 str_8 str_8 \N \N +9 9 \N \N \N \N +10 10 \N \N \N \N +11 11 \N \N \N \N +12 12 12 \N 12 \N +13 13 str_13 str_13 \N \N +14 14 \N \N \N \N +15 15 1970-01-16 \N \N 1970-01-16 +16 16 1970-01-17 \N \N 1970-01-17 +17 17 1970-01-18 \N \N 1970-01-18 +18 18 1970-01-19 \N \N 1970-01-19 +19 19 \N \N \N \N +20 20 20 \N 20 \N +21 21 str_21 str_21 \N \N +alter modify column 2 +0 0 0 \N \N \N \N \N +1 1 1 \N \N \N \N \N +2 2 2 \N \N \N \N \N +3 3 3 \N 3 \N 3 \N +4 4 4 \N 4 \N 4 \N +5 5 5 \N 5 \N 5 \N +6 6 6 \N str_6 str_6 \N \N +7 7 7 \N str_7 str_7 \N \N +8 8 8 \N str_8 str_8 \N \N +9 9 9 \N \N \N \N \N +10 10 10 \N \N \N \N \N +11 11 11 \N \N \N \N \N +12 12 12 \N 12 \N 12 \N +13 13 13 \N str_13 str_13 \N \N +14 14 14 \N \N \N \N \N +15 15 15 \N 1970-01-16 \N \N 1970-01-16 +16 16 16 \N 1970-01-17 \N \N 1970-01-17 +17 17 17 \N 1970-01-18 \N \N 1970-01-18 +18 18 18 \N 1970-01-19 \N \N 1970-01-19 +19 19 19 \N \N \N \N \N +20 20 20 \N 20 \N 20 \N +21 21 21 \N str_21 str_21 \N \N +insert after alter modify column 2 +0 0 0 \N \N \N \N \N +1 1 1 \N \N \N \N \N +2 2 2 \N \N \N \N \N +3 3 3 \N 3 \N 3 \N +4 4 4 \N 4 \N 4 \N +5 5 5 \N 5 \N 5 \N +6 6 6 \N str_6 str_6 \N \N +7 7 7 \N str_7 str_7 \N \N +8 8 8 \N str_8 str_8 \N \N +9 9 9 \N \N \N \N \N +10 10 10 \N \N \N \N \N +11 11 11 \N \N \N \N \N +12 12 12 \N 12 \N 12 \N +13 13 13 \N str_13 str_13 \N \N +14 14 14 \N \N \N \N \N +15 15 15 \N 1970-01-16 \N \N 1970-01-16 +16 16 16 \N 1970-01-17 \N \N 1970-01-17 +17 17 17 \N 1970-01-18 \N \N 1970-01-18 +18 18 18 \N 1970-01-19 \N \N 1970-01-19 +19 19 19 \N \N \N \N \N +20 20 20 \N 20 \N 20 \N +21 21 21 \N str_21 str_21 \N \N +22 str_22 \N str_22 \N \N \N \N +23 \N \N \N \N \N \N \N +24 24 24 \N \N \N \N \N +MergeTree wide +initial insert +alter add column 1 +0 0 \N \N \N +1 1 \N \N \N +2 2 \N \N \N +insert after alter add column 1 +0 0 \N \N \N +1 1 \N \N \N +2 2 \N \N \N +3 3 3 \N 3 +4 4 4 \N 4 +5 5 5 \N 5 +6 6 str_6 str_6 \N +7 7 str_7 str_7 \N +8 8 str_8 str_8 \N +9 9 \N \N \N +10 10 \N \N \N +11 11 \N \N \N +12 12 12 \N 12 +13 13 str_13 str_13 \N +14 14 \N \N \N +alter modify column 1 +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +3 3 3 \N 3 \N +4 4 4 \N 4 \N +5 5 5 \N 5 \N +6 6 str_6 str_6 \N \N +7 7 str_7 str_7 \N \N +8 8 str_8 str_8 \N \N +9 9 \N \N \N \N +10 10 \N \N \N \N +11 11 \N \N \N \N +12 12 12 \N 12 \N +13 13 str_13 str_13 \N \N +14 14 \N \N \N \N +insert after alter modify column 1 +0 0 \N \N \N \N +1 1 \N \N \N \N +2 2 \N \N \N \N +3 3 3 \N 3 \N +4 4 4 \N 4 \N +5 5 5 \N 5 \N +6 6 str_6 str_6 \N \N +7 7 str_7 str_7 \N \N +8 8 str_8 str_8 \N \N +9 9 \N \N \N \N +10 10 \N \N \N \N +11 11 \N \N \N \N +12 12 12 \N 12 \N +13 13 str_13 str_13 \N \N +14 14 \N \N \N \N +15 15 1970-01-16 \N \N 1970-01-16 +16 16 1970-01-17 \N \N 1970-01-17 +17 17 1970-01-18 \N \N 1970-01-18 +18 18 1970-01-19 \N \N 1970-01-19 +19 19 \N \N \N \N +20 20 20 \N 20 \N +21 21 str_21 str_21 \N \N +alter modify column 2 +0 0 0 \N \N \N \N \N +1 1 1 \N \N \N \N \N +2 2 2 \N \N \N \N \N +3 3 3 \N 3 \N 3 \N +4 4 4 \N 4 \N 4 \N +5 5 5 \N 5 \N 5 \N +6 6 6 \N str_6 str_6 \N \N +7 7 7 \N str_7 str_7 \N \N +8 8 8 \N str_8 str_8 \N \N +9 9 9 \N \N \N \N \N +10 10 10 \N \N \N \N \N +11 11 11 \N \N \N \N \N +12 12 12 \N 12 \N 12 \N +13 13 13 \N str_13 str_13 \N \N +14 14 14 \N \N \N \N \N +15 15 15 \N 1970-01-16 \N \N 1970-01-16 +16 16 16 \N 1970-01-17 \N \N 1970-01-17 +17 17 17 \N 1970-01-18 \N \N 1970-01-18 +18 18 18 \N 1970-01-19 \N \N 1970-01-19 +19 19 19 \N \N \N \N \N +20 20 20 \N 20 \N 20 \N +21 21 21 \N str_21 str_21 \N \N +insert after alter modify column 2 +0 0 0 \N \N \N \N \N +1 1 1 \N \N \N \N \N +2 2 2 \N \N \N \N \N +3 3 3 \N 3 \N 3 \N +4 4 4 \N 4 \N 4 \N +5 5 5 \N 5 \N 5 \N +6 6 6 \N str_6 str_6 \N \N +7 7 7 \N str_7 str_7 \N \N +8 8 8 \N str_8 str_8 \N \N +9 9 9 \N \N \N \N \N +10 10 10 \N \N \N \N \N +11 11 11 \N \N \N \N \N +12 12 12 \N 12 \N 12 \N +13 13 13 \N str_13 str_13 \N \N +14 14 14 \N \N \N \N \N +15 15 15 \N 1970-01-16 \N \N 1970-01-16 +16 16 16 \N 1970-01-17 \N \N 1970-01-17 +17 17 17 \N 1970-01-18 \N \N 1970-01-18 +18 18 18 \N 1970-01-19 \N \N 1970-01-19 +19 19 19 \N \N \N \N \N +20 20 20 \N 20 \N 20 \N +21 21 21 \N str_21 str_21 \N \N +22 str_22 \N str_22 \N \N \N \N +23 \N \N \N \N \N \N \N +24 24 24 \N \N \N \N \N diff --git a/tests/queries/0_stateless/02941_variant_type_alters.sh b/tests/queries/0_stateless/02941_variant_type_alters.sh new file mode 100755 index 00000000000..7c151d1fe9e --- /dev/null +++ b/tests/queries/0_stateless/02941_variant_type_alters.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 " + +function run() +{ + echo "initial insert" + $CH_CLIENT -q "insert into test select number, number from numbers(3)" + + echo "alter add column 1" + $CH_CLIENT -q "alter table test add column v Variant(UInt64, String) settings mutations_sync=1" + $CH_CLIENT -q "select x, y, v, v.String, v.UInt64 from test order by x" + + echo "insert after alter add column 1" + $CH_CLIENT -q "insert into test select number, number, number from numbers(3, 3)" + $CH_CLIENT -q "insert into test select number, number, 'str_' || toString(number) from numbers(6, 3)" + $CH_CLIENT -q "insert into test select number, number, NULL from numbers(9, 3)" + $CH_CLIENT -q "insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3)" + $CH_CLIENT -q "select x, y, v, v.String, v.UInt64 from test order by x" + + echo "alter modify column 1" + $CH_CLIENT -q "alter table test modify column v Variant(UInt64, String, Date) settings mutations_sync=1" + $CH_CLIENT -q "select x, y, v, v.String, v.UInt64, v.Date from test order by x" + + echo "insert after alter modify column 1" + $CH_CLIENT -q "insert into test select number, number, toDate(number) from numbers(15, 3)" + $CH_CLIENT -q "insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(18, 4)" + $CH_CLIENT -q "select x, y, v, v.String, v.UInt64, v.Date from test order by x" + + echo "alter modify column 2" + $CH_CLIENT -q "alter table test modify column y Variant(UInt64, String) settings mutations_sync=1" + $CH_CLIENT -q "select x, y, y.UInt64, y.String, v, v.String, v.UInt64, v.Date from test order by x" + + echo "insert after alter modify column 2" + $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL), NULL from numbers(22, 3)" + $CH_CLIENT -q "select x, y, y.UInt64, y.String, v, v.String, v.UInt64, v.Date from test order by x" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=Memory" +run +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/02942_variant_cast.reference b/tests/queries/0_stateless/02942_variant_cast.reference new file mode 100644 index 00000000000..f3fd7a9ba33 --- /dev/null +++ b/tests/queries/0_stateless/02942_variant_cast.reference @@ -0,0 +1,25 @@ +\N +42 +0 +\N +2 +\N +Hello +Hello +NULL +Hello +Hello +\N +Hello +\N +0 +\N +42 +\N +Hello +2 +\N +Hello +5 +0 +1 diff --git a/tests/queries/0_stateless/02942_variant_cast.sql b/tests/queries/0_stateless/02942_variant_cast.sql new file mode 100644 index 00000000000..fc2d1d63657 --- /dev/null +++ b/tests/queries/0_stateless/02942_variant_cast.sql @@ -0,0 +1,24 @@ +set allow_experimental_variant_type=1; +set allow_experimental_analyzer=0; -- It's currently doesn't work with analyzer because of the way it works with constants, but it will be refactored and fixed in future + +select NULL::Variant(String, UInt64); +select 42::UInt64::Variant(String, UInt64); +select 42::UInt32::Variant(String, UInt64); -- {serverError CANNOT_CONVERT_TYPE} +select now()::Variant(String, UInt64); -- {serverError CANNOT_CONVERT_TYPE} +select CAST(number % 2 ? NULL : number, 'Variant(String, UInt64)') from numbers(4); +select 'Hello'::LowCardinality(String)::Variant(LowCardinality(String), UInt64); +select 'Hello'::LowCardinality(Nullable(String))::Variant(LowCardinality(String), UInt64); +select 'NULL'::LowCardinality(Nullable(String))::Variant(LowCardinality(String), UInt64); +select 'Hello'::LowCardinality(Nullable(String))::Variant(LowCardinality(String), UInt64); +select CAST(CAST(number % 2 ? NULL : 'Hello', 'LowCardinality(Nullable(String))'), 'Variant(LowCardinality(String), UInt64)') from numbers(4); + +select NULL::Variant(String, UInt64)::UInt64; +select NULL::Variant(String, UInt64)::Nullable(UInt64); +select '42'::Variant(String, UInt64)::UInt64; +select 'str'::Variant(String, UInt64)::UInt64; -- {serverError CANNOT_PARSE_TEXT} +select CAST(multiIf(number % 3 == 0, NULL::Variant(String, UInt64), number % 3 == 1, 'Hello'::Variant(String, UInt64), number::Variant(String, UInt64)), 'Nullable(String)') from numbers(6); +select CAST(multiIf(number == 1, NULL::Variant(String, UInt64), number == 2, 'Hello'::Variant(String, UInt64), number::Variant(String, UInt64)), 'UInt64') from numbers(6); -- {serverError CANNOT_PARSE_TEXT} + + +select number::Variant(UInt64)::Variant(String, UInt64)::Variant(Array(String), String, UInt64) from numbers(2); +select 'str'::Variant(String, UInt64)::Variant(String, Array(UInt64)); -- {serverError CANNOT_CONVERT_TYPE} diff --git a/tests/queries/0_stateless/02943_variant_element.reference b/tests/queries/0_stateless/02943_variant_element.reference new file mode 100644 index 00000000000..ab8aaa8fdef --- /dev/null +++ b/tests/queries/0_stateless/02943_variant_element.reference @@ -0,0 +1,44 @@ +\N +\N +\N +\N +0 +1 +2 +3 +\N +\N +\N +\N +0 +\N +2 +\N +\N +\N +\N +\N +str_0 +\N +str_2 +\N +\N +\N +\N +\N +[] +[] +[] +[] +[] +[] +[] +[] +[0] +[] +[0,1,2] +[] +[[0]] +[[NULL]] +[[2]] +[[NULL]] diff --git a/tests/queries/0_stateless/02943_variant_element.sql b/tests/queries/0_stateless/02943_variant_element.sql new file mode 100644 index 00000000000..556c0147e56 --- /dev/null +++ b/tests/queries/0_stateless/02943_variant_element.sql @@ -0,0 +1,16 @@ +set allow_experimental_variant_type=1; +set use_variant_as_common_type=1; + +select variantElement(NULL::Variant(String, UInt64), 'UInt64') from numbers(4); +select variantElement(number::Variant(String, UInt64), 'UInt64') from numbers(4); +select variantElement(number::Variant(String, UInt64), 'String') from numbers(4); +select variantElement((number % 2 ? NULL : number)::Variant(String, UInt64), 'UInt64') from numbers(4); +select variantElement((number % 2 ? NULL : number)::Variant(String, UInt64), 'String') from numbers(4); +select variantElement((number % 2 ? NULL : 'str_' || toString(number))::LowCardinality(Nullable(String))::Variant(LowCardinality(String), UInt64), 'LowCardinality(String)') from numbers(4); +select variantElement(NULL::LowCardinality(Nullable(String))::Variant(LowCardinality(String), UInt64), 'LowCardinality(String)') from numbers(4); +select variantElement((number % 2 ? NULL : number)::Variant(Array(UInt64), UInt64), 'Array(UInt64)') from numbers(4); +select variantElement(NULL::Variant(Array(UInt64), UInt64), 'Array(UInt64)') from numbers(4); +select variantElement(number % 2 ? NULL : range(number + 1), 'Array(UInt64)') from numbers(4); + +select variantElement([[(number % 2 ? NULL : number)::Variant(String, UInt64)]], 'UInt64') from numbers(4); + diff --git a/tests/queries/0_stateless/02943_variant_read_subcolumns.reference b/tests/queries/0_stateless/02943_variant_read_subcolumns.reference new file mode 100644 index 00000000000..4b93782cddf --- /dev/null +++ b/tests/queries/0_stateless/02943_variant_read_subcolumns.reference @@ -0,0 +1,6 @@ +Memory +test +MergeTree compact +test +MergeTree wide +test diff --git a/tests/queries/0_stateless/02943_variant_read_subcolumns.sh b/tests/queries/0_stateless/02943_variant_read_subcolumns.sh new file mode 100755 index 00000000000..88be09c2036 --- /dev/null +++ b/tests/queries/0_stateless/02943_variant_read_subcolumns.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 " + + +function test() +{ + echo "test" + $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 2, NULL, number % 3 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10))) from numbers(1000000) settings min_insert_block_size_rows=100000" + $CH_CLIENT -q "select v, v.UInt64, v.\`Array(Variant(String, UInt64))\`, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64 from test order by id format Null" + $CH_CLIENT -q "select v.UInt64, v.\`Array(Variant(String, UInt64))\`, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64 from test order by id format Null" + $CH_CLIENT -q "select v.\`Array(Variant(String, UInt64))\`, v.\`Array(Variant(String, UInt64))\`.size0, v.\`Array(Variant(String, UInt64))\`.UInt64, v.\`Array(Variant(String, UInt64))\`.String from test order by id format Null" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=Memory" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, Array(Variant(String, UInt64)))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" + diff --git a/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.reference b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.reference new file mode 100644 index 00000000000..1736a307c42 --- /dev/null +++ b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.reference @@ -0,0 +1,244 @@ +Memory +test1 insert +test1 select +0 \N 0 +1 \N 1 +2 \N 2 +3 \N 3 +4 \N 4 +5 \N 5 +6 \N 6 +7 \N 7 +8 \N 8 +9 \N 9 +10 \N 10 +\N \N \N +12 \N 12 +\N \N \N +14 \N 14 +\N \N \N +16 \N 16 +\N \N \N +18 \N 18 +\N \N \N +str_20 str_20 \N +\N \N \N +str_22 str_22 \N +\N \N \N +str_24 str_24 \N +\N \N \N +str_26 str_26 \N +\N \N \N +str_28 str_28 \N +\N \N \N +30 \N 30 +\N \N \N +32 \N 32 +\N \N \N +34 \N 34 +\N \N \N +str_36 str_36 \N +\N \N \N +str_38 str_38 \N +\N \N \N +----------------------------------------------------------------------------------------------------------- +test2 insert +test2 select +2500000 +750000 +1750000 +----------------------------------------------------------------------------------------------------------- +MergeTree compact +test1 insert +test1 select +0 \N 0 +1 \N 1 +2 \N 2 +3 \N 3 +4 \N 4 +5 \N 5 +6 \N 6 +7 \N 7 +8 \N 8 +9 \N 9 +10 \N 10 +\N \N \N +12 \N 12 +\N \N \N +14 \N 14 +\N \N \N +16 \N 16 +\N \N \N +18 \N 18 +\N \N \N +str_20 str_20 \N +\N \N \N +str_22 str_22 \N +\N \N \N +str_24 str_24 \N +\N \N \N +str_26 str_26 \N +\N \N \N +str_28 str_28 \N +\N \N \N +30 \N 30 +\N \N \N +32 \N 32 +\N \N \N +34 \N 34 +\N \N \N +str_36 str_36 \N +\N \N \N +str_38 str_38 \N +\N \N \N +----------------------------------------------------------------------------------------------------------- +test1 select +0 \N 0 +1 \N 1 +2 \N 2 +3 \N 3 +4 \N 4 +5 \N 5 +6 \N 6 +7 \N 7 +8 \N 8 +9 \N 9 +10 \N 10 +\N \N \N +12 \N 12 +\N \N \N +14 \N 14 +\N \N \N +16 \N 16 +\N \N \N +18 \N 18 +\N \N \N +str_20 str_20 \N +\N \N \N +str_22 str_22 \N +\N \N \N +str_24 str_24 \N +\N \N \N +str_26 str_26 \N +\N \N \N +str_28 str_28 \N +\N \N \N +30 \N 30 +\N \N \N +32 \N 32 +\N \N \N +34 \N 34 +\N \N \N +str_36 str_36 \N +\N \N \N +str_38 str_38 \N +\N \N \N +----------------------------------------------------------------------------------------------------------- +test2 insert +test2 select +2500000 +750000 +1750000 +----------------------------------------------------------------------------------------------------------- +test2 select +2500000 +750000 +1750000 +----------------------------------------------------------------------------------------------------------- +MergeTree wide +test1 insert +test1 select +0 \N 0 +1 \N 1 +2 \N 2 +3 \N 3 +4 \N 4 +5 \N 5 +6 \N 6 +7 \N 7 +8 \N 8 +9 \N 9 +10 \N 10 +\N \N \N +12 \N 12 +\N \N \N +14 \N 14 +\N \N \N +16 \N 16 +\N \N \N +18 \N 18 +\N \N \N +str_20 str_20 \N +\N \N \N +str_22 str_22 \N +\N \N \N +str_24 str_24 \N +\N \N \N +str_26 str_26 \N +\N \N \N +str_28 str_28 \N +\N \N \N +30 \N 30 +\N \N \N +32 \N 32 +\N \N \N +34 \N 34 +\N \N \N +str_36 str_36 \N +\N \N \N +str_38 str_38 \N +\N \N \N +----------------------------------------------------------------------------------------------------------- +test1 select +0 \N 0 +1 \N 1 +2 \N 2 +3 \N 3 +4 \N 4 +5 \N 5 +6 \N 6 +7 \N 7 +8 \N 8 +9 \N 9 +10 \N 10 +\N \N \N +12 \N 12 +\N \N \N +14 \N 14 +\N \N \N +16 \N 16 +\N \N \N +18 \N 18 +\N \N \N +str_20 str_20 \N +\N \N \N +str_22 str_22 \N +\N \N \N +str_24 str_24 \N +\N \N \N +str_26 str_26 \N +\N \N \N +str_28 str_28 \N +\N \N \N +30 \N 30 +\N \N \N +32 \N 32 +\N \N \N +34 \N 34 +\N \N \N +str_36 str_36 \N +\N \N \N +str_38 str_38 \N +\N \N \N +----------------------------------------------------------------------------------------------------------- +test2 insert +test2 select +2500000 +750000 +1750000 +----------------------------------------------------------------------------------------------------------- +test2 select +2500000 +750000 +1750000 +----------------------------------------------------------------------------------------------------------- diff --git a/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh new file mode 100755 index 00000000000..d089ed3cb2f --- /dev/null +++ b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 " + + +function test1_insert() +{ + echo "test1 insert" + $CH_CLIENT -q "insert into test select number, number::Variant(UInt64)::Variant(UInt64, Array(UInt64)) from numbers(10) settings max_block_size=3" + $CH_CLIENT -q "insert into test select number, if(number % 2, NULL, number)::Variant(UInt64)::Variant(UInt64, String, Array(UInt64)) as res from numbers(10, 10) settings max_block_size=3" + $CH_CLIENT -q "insert into test select number, if(number % 2, NULL, 'str_' || toString(number))::Variant(String)::Variant(UInt64, String, Array(UInt64)) as res from numbers(20, 10) settings max_block_size=3" + $CH_CLIENT -q "insert into test select number, if(number < 35, if(number % 2, NULL, number)::Variant(UInt64)::Variant(UInt64, String, Array(UInt64)), if(number % 2, NULL, 'str_' || toString(number))::Variant(String)::Variant(UInt64, String, Array(UInt64))) from numbers(30, 10) settings max_block_size=3" +} + +function test1_select() +{ + echo "test1 select" + $CH_CLIENT -q "select v, v.String, v.UInt64 from test order by id;" + echo "-----------------------------------------------------------------------------------------------------------" +} + +function test2_insert() +{ + echo "test2 insert" + $CH_CLIENT -q "insert into test select number, number::Variant(UInt64)::Variant(UInt64, Array(UInt64)) from numbers(1000000) settings max_insert_block_size = 100000, min_insert_block_size_rows=100000" + $CH_CLIENT -q "insert into test select number, if(number % 2, NULL, number)::Variant(UInt64)::Variant(UInt64, String, Array(UInt64)) as res from numbers(1000000, 1000000) settings max_insert_block_size = 100000, min_insert_block_size_rows=100000" + $CH_CLIENT -q "insert into test select number, if(number % 2, NULL, 'str_' || toString(number))::Variant(String)::Variant(UInt64, String, Array(UInt64)) as res from numbers(2000000, 1000000) settings max_insert_block_size = 100000, min_insert_block_size_rows=100000" + $CH_CLIENT -q "insert into test select number, if(number < 3500000, if(number % 2, NULL, number)::Variant(UInt64)::Variant(UInt64, String, Array(UInt64)), if(number % 2, NULL, 'str_' || toString(number))::Variant(String)::Variant(UInt64, String, Array(UInt64))) from numbers(3000000, 1000000) settings max_insert_block_size = 100000, min_insert_block_size_rows=100000" +} + +function test2_select() +{ + echo "test2 select" + $CH_CLIENT -q "select v, v.String, v.UInt64 from test format Null;" + $CH_CLIENT -q "select v from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v);" + $CH_CLIENT -q "select v.String from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.String);" + $CH_CLIENT -q "select v.UInt64 from test format Null;" + $CH_CLIENT -q "select count() from test where isNotNull(v.UInt64);" + echo "-----------------------------------------------------------------------------------------------------------" +} + +function run() +{ + test1_insert + test1_select + if [ $1 == 1 ]; then + $CH_CLIENT -q "optimize table test final;" + test1_select + fi + $CH_CLIENT -q "truncate table test;" + test2_insert + test2_select + if [ $1 == 1 ]; then + $CH_CLIENT -q "optimize table test final;" + test2_select + fi + $CH_CLIENT -q "truncate table test;" +} + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, String, Array(UInt64))) engine=Memory;" +run 0 +$CH_CLIENT -q "drop table test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, String, Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" +run 1 +$CH_CLIENT -q "drop table test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, v Variant(UInt64, String, Array(UInt64))) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +run 1 +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/02944_variant_as_common_type.reference b/tests/queries/0_stateless/02944_variant_as_common_type.reference new file mode 100644 index 00000000000..0425a8cfa30 --- /dev/null +++ b/tests/queries/0_stateless/02944_variant_as_common_type.reference @@ -0,0 +1,103 @@ +Array(UInt8) [1,2,3] +Array(UInt8) [1,2,3] +String str_1 +Nullable(String) str_1 +String str_1 +Nullable(String) str_1 +Variant(Array(UInt8), String) str_1 +Variant(Array(UInt8), String) str_1 +Array(UInt8) [1,2,3] +Array(UInt8) [1,2,3] +String str_1 +Nullable(String) str_1 +String str_1 +Nullable(String) str_1 +Variant(Array(UInt8), String) str_1 +Variant(Array(UInt8), String) str_1 +Array(UInt8) [1,2,3] +Array(UInt8) [1,2,3] +String str_1 +Nullable(String) str_1 +String str_1 +Nullable(String) str_1 +Variant(Array(UInt8), String) str_1 +Variant(Array(UInt8), String) str_1 +String str_0 +String str_1 +String str_2 +String str_3 +Nullable(String) str_0 +Nullable(String) str_1 +Nullable(String) str_2 +Nullable(String) str_3 +Array(UInt64) [0] +Array(UInt64) [0,1] +Array(UInt64) [0,1,2] +Array(UInt64) [0,1,2,3] +Array(UInt64) [0] +Array(UInt64) [0,1] +Array(UInt64) [0,1,2] +Array(UInt64) [0,1,2,3] +String str_0 +String str_1 +String str_2 +String str_3 +Nullable(String) str_0 +Nullable(String) str_1 +Nullable(String) str_2 +Nullable(String) str_3 +Variant(Array(UInt64), String) str_0 +Variant(Array(UInt64), String) str_1 +Variant(Array(UInt64), String) str_2 +Variant(Array(UInt64), String) str_3 +Variant(Array(UInt64), String) str_0 +Variant(Array(UInt64), String) str_1 +Variant(Array(UInt64), String) str_2 +Variant(Array(UInt64), String) str_3 +Variant(Array(UInt64), String) str_0 +Variant(Array(UInt64), String) [0,1] +Variant(Array(UInt64), String) str_2 +Variant(Array(UInt64), String) [0,1,2,3] +Variant(Array(UInt64), String) str_0 +Variant(Array(UInt64), String) [0,1] +Variant(Array(UInt64), String) str_2 +Variant(Array(UInt64), String) [0,1,2,3] +Variant(Array(UInt64), String) str_0 +Variant(Array(UInt64), String) [0,1] +Variant(Array(UInt64), String) str_2 +Variant(Array(UInt64), String) [0,1,2,3] +Variant(Array(UInt64), String) str_0 +Variant(Array(UInt64), String) [0,1] +Variant(Array(UInt64), String) str_2 +Variant(Array(UInt64), String) [0,1,2,3] +Variant(Array(UInt64), String, UInt64) [0] +Variant(Array(UInt64), String, UInt64) 1 +Variant(Array(UInt64), String, UInt64) str_2 +Variant(Array(UInt64), String, UInt64) [0,1,2,3] +Variant(Array(UInt64), String, UInt64) 4 +Variant(Array(UInt64), String, UInt64) str_5 +Variant(Array(UInt64), String, UInt64) [0] +Variant(Array(UInt64), String, UInt64) 1 +Variant(Array(UInt64), String, UInt64) str_2 +Variant(Array(UInt64), String, UInt64) [0,1,2,3] +Variant(Array(UInt64), String, UInt64) 4 +Variant(Array(UInt64), String, UInt64) str_5 +Variant(Array(UInt64), String, UInt64) [0] +Variant(Array(UInt64), String, UInt64) 1 +Variant(Array(UInt64), String, UInt64) str_2 +Variant(Array(UInt64), String, UInt64) [0,1,2,3] +Variant(Array(UInt64), String, UInt64) 4 +Variant(Array(UInt64), String, UInt64) str_5 +Variant(Array(UInt64), String, UInt64) [0] +Variant(Array(UInt64), String, UInt64) 1 +Variant(Array(UInt64), String, UInt64) str_2 +Variant(Array(UInt64), String, UInt64) [0,1,2,3] +Variant(Array(UInt64), String, UInt64) 4 +Variant(Array(UInt64), String, UInt64) str_5 +Array(Variant(String, UInt8)) [1,'str_1',2,'str_2'] +Array(Variant(Array(String), Array(UInt8))) [[1,2,3],['str_1','str_2','str_3']] +Array(Variant(Array(UInt8), Array(Variant(Array(String), Array(UInt8))))) [[[1,2,3],['str_1','str_2','str_3']],[1,2,3]] +Array(Variant(Array(Array(UInt8)), Array(UInt8))) [[1,2,3],[[1,2,3]]] +Map(String, Variant(String, UInt8)) {'a':1,'b':'str_1'} +Map(String, Variant(Map(String, Variant(String, UInt8)), UInt8)) {'a':1,'b':{'c':2,'d':'str_1'}} +Map(String, Variant(Array(Array(UInt8)), Array(UInt8), UInt8)) {'a':1,'b':[1,2,3],'c':[[4,5,6]]} diff --git a/tests/queries/0_stateless/02944_variant_as_common_type.sql b/tests/queries/0_stateless/02944_variant_as_common_type.sql new file mode 100644 index 00000000000..e985cf365dd --- /dev/null +++ b/tests/queries/0_stateless/02944_variant_as_common_type.sql @@ -0,0 +1,76 @@ +set allow_experimental_analyzer=0; -- The result type for if function with constant is different with analyzer. It wil be fixed after refactoring around constants in analyzer. + +set allow_experimental_variant_type=1; +set use_variant_as_common_type=1; + +select toTypeName(res), if(1, [1,2,3], 'str_1') as res; +select toTypeName(res), if(1, [1,2,3], 'str_1'::Nullable(String)) as res; + +select toTypeName(res), if(0, [1,2,3], 'str_1') as res; +select toTypeName(res), if(0, [1,2,3], 'str_1'::Nullable(String)) as res; + +select toTypeName(res), if(NULL, [1,2,3], 'str_1') as res; +select toTypeName(res), if(NULL, [1,2,3], 'str_1'::Nullable(String)) as res; + +select toTypeName(res), if(materialize(NULL::Nullable(UInt8)), [1,2,3], 'str_1') as res; +select toTypeName(res), if(materialize(NULL::Nullable(UInt8)), [1,2,3], 'str_1'::Nullable(String)) as res; + +select toTypeName(res), if(1, materialize([1,2,3]), 'str_1') as res; +select toTypeName(res), if(1, materialize([1,2,3]), 'str_1'::Nullable(String)) as res; + +select toTypeName(res), if(0, materialize([1,2,3]), 'str_1') as res; +select toTypeName(res), if(0, materialize([1,2,3]), 'str_1'::Nullable(String)) as res; + +select toTypeName(res), if(NULL, materialize([1,2,3]), 'str_1') as res; +select toTypeName(res), if(NULL, materialize([1,2,3]), 'str_1'::Nullable(String)) as res; + +select toTypeName(res), if(materialize(NULL::Nullable(UInt8)), materialize([1,2,3]), 'str_1') as res; +select toTypeName(res), if(materialize(NULL::Nullable(UInt8)), materialize([1,2,3]), 'str_1'::Nullable(String)) as res; + +select toTypeName(res), if(1, [1,2,3], materialize('str_1')) as res; +select toTypeName(res), if(1, [1,2,3], materialize('str_1')::Nullable(String)) as res; + +select toTypeName(res), if(0, [1,2,3], materialize('str_1')) as res; +select toTypeName(res), if(0, [1,2,3], materialize('str_1')::Nullable(String)) as res; + +select toTypeName(res), if(NULL, [1,2,3], materialize('str_1')) as res; +select toTypeName(res), if(NULL, [1,2,3], materialize('str_1')::Nullable(String)) as res; + +select toTypeName(res), if(materialize(NULL::Nullable(UInt8)), [1,2,3], materialize('str_1')) as res; +select toTypeName(res), if(materialize(NULL::Nullable(UInt8)), [1,2,3], materialize('str_1')::Nullable(String)) as res; + + +select toTypeName(res), if(0, range(number + 1), 'str_' || toString(number)) as res from numbers(4); +select toTypeName(res), if(0, range(number + 1), ('str_' || toString(number))::Nullable(String)) as res from numbers(4); + +select toTypeName(res), if(1, range(number + 1), 'str_' || toString(number)) as res from numbers(4); +select toTypeName(res), if(1, range(number + 1), ('str_' || toString(number))::Nullable(String)) as res from numbers(4); + +select toTypeName(res), if(NULL, range(number + 1), 'str_' || toString(number)) as res from numbers(4); +select toTypeName(res), if(NULL, range(number + 1), ('str_' || toString(number))::Nullable(String)) as res from numbers(4); + +select toTypeName(res), if(materialize(NULL::Nullable(UInt8)), range(number + 1), 'str_' || toString(number)) as res from numbers(4); +select toTypeName(res), if(materialize(NULL::Nullable(UInt8)), range(number + 1), ('str_' || toString(number))::Nullable(String)) as res from numbers(4); + +select toTypeName(res), if(number % 2, range(number + 1), 'str_' || toString(number)) as res from numbers(4); +select toTypeName(res), if(number % 2, range(number + 1), ('str_' || toString(number))::Nullable(String)) as res from numbers(4); + +select toTypeName(res), if(number % 2, range(number + 1), ('str_' || toString(number))::LowCardinality(String)) as res from numbers(4); +select toTypeName(res), if(number % 2, range(number + 1), ('str_' || toString(number))::LowCardinality(Nullable(String))) as res from numbers(4); + + +select toTypeName(res), multiIf(number % 3 == 0, range(number + 1), number % 3 == 1, number, 'str_' || toString(number)) as res from numbers(6); +select toTypeName(res), multiIf(number % 3 == 0, range(number + 1), number % 3 == 1, number, ('str_' || toString(number))::Nullable(String)) as res from numbers(6); +select toTypeName(res), multiIf(number % 3 == 0, range(number + 1), number % 3 == 1, number, ('str_' || toString(number))::LowCardinality(String)) as res from numbers(6); +select toTypeName(res), multiIf(number % 3 == 0, range(number + 1), number % 3 == 1, number, ('str_' || toString(number))::LowCardinality(Nullable(String))) as res from numbers(6); + + +select toTypeName(res), array(1, 'str_1', 2, 'str_2') as res; +select toTypeName(res), array([1, 2, 3], ['str_1', 'str_2', 'str_3']) as res; +select toTypeName(res), array(array([1, 2, 3], ['str_1', 'str_2', 'str_3']), [1, 2, 3]) as res; +select toTypeName(res), array([1, 2, 3], [[1, 2, 3]]) as res; + +select toTypeName(res), map('a', 1, 'b', 'str_1') as res; +select toTypeName(res), map('a', 1, 'b', map('c', 2, 'd', 'str_1')) as res; +select toTypeName(res), map('a', 1, 'b', [1, 2, 3], 'c', [[4, 5, 6]]) as res; + diff --git a/tests/queries/0_stateless/02955_avro_format_zstd_encode_support.reference b/tests/queries/0_stateless/02955_avro_format_zstd_encode_support.reference new file mode 100644 index 00000000000..ea90ee31980 --- /dev/null +++ b/tests/queries/0_stateless/02955_avro_format_zstd_encode_support.reference @@ -0,0 +1 @@ +45 diff --git a/tests/queries/0_stateless/02955_avro_format_zstd_encode_support.sql b/tests/queries/0_stateless/02955_avro_format_zstd_encode_support.sql new file mode 100644 index 00000000000..b88e1dbcccc --- /dev/null +++ b/tests/queries/0_stateless/02955_avro_format_zstd_encode_support.sql @@ -0,0 +1,16 @@ +-- Tags: no-fasttest +DROP TABLE IF EXISTS t; +CREATE TABLE t +( + `n1` Int32 +) +ENGINE = File(Avro) +SETTINGS output_format_avro_codec = 'zstd'; + +INSERT INTO t SELECT * +FROM numbers(10); + +SELECT sum(n1) +FROM t; + +DROP TABLE t; diff --git a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference new file mode 100644 index 00000000000..d3a002c4fd4 --- /dev/null +++ b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.reference @@ -0,0 +1,4 @@ +2023-10-09 10:11:12.001 +2023-10-09 10:11:12.001 +2023-10-09 10:11:12.000 +2023-10-09 10:11:12.000 diff --git a/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.sql b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.sql new file mode 100644 index 00000000000..178f21a9e63 --- /dev/null +++ b/tests/queries/0_stateless/02956_fix_to_start_of_milli_microsecond.sql @@ -0,0 +1,4 @@ +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000999', 6), toIntervalMillisecond(1)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000500', 6), toIntervalMillisecond(1)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000499', 6), toIntervalMillisecond(1)); +SELECT toStartOfInterval(toDateTime64('2023-10-09 10:11:12.000999', 6), toIntervalMillisecond(10)); \ No newline at end of file diff --git a/tests/queries/0_stateless/02960_alter_table_part_query_parameter.reference b/tests/queries/0_stateless/02960_alter_table_part_query_parameter.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02960_alter_table_part_query_parameter.sql b/tests/queries/0_stateless/02960_alter_table_part_query_parameter.sql new file mode 100644 index 00000000000..31d3ae10271 --- /dev/null +++ b/tests/queries/0_stateless/02960_alter_table_part_query_parameter.sql @@ -0,0 +1,15 @@ +drop table if exists data; +create table data (key Int) engine=MergeTree() order by key; + +insert into data values (1); + +set param_part='all_1_1_0'; +alter table data detach part {part:String}; +alter table data attach part {part:String}; +set param_part='all_2_2_0'; +alter table data detach part {part:String}; +alter table data drop detached part {part:String} settings allow_drop_detached=1; + +insert into data values (2); +set param_part='all_3_3_0'; +alter table data drop part {part:String}; diff --git a/tests/queries/0_stateless/02960_polygon_bound_bug.reference b/tests/queries/0_stateless/02960_polygon_bound_bug.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02960_polygon_bound_bug.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02960_polygon_bound_bug.sh b/tests/queries/0_stateless/02960_polygon_bound_bug.sh new file mode 100755 index 00000000000..0c3db01a77c --- /dev/null +++ b/tests/queries/0_stateless/02960_polygon_bound_bug.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -nm -q "CREATE TABLE test_table (geom MultiPolygon) engine=MergeTree ORDER BY geom; +INSERT INTO test_table SELECT * FROM file('$CURDIR/data_parquet/02960_polygon_bound_bug.parquet', Parquet); +CREATE DICTIONARY test_dict (geom MultiPolygon) PRIMARY KEY geom SOURCE (CLICKHOUSE(TABLE 'test_table')) LIFETIME(MIN 0 MAX 0) LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1)); +SELECT dictHas(test_dict,(174.84729269276494,-36.99524960275426));" diff --git a/tests/queries/0_stateless/02962_analyzer_const_in_count_distinct.reference b/tests/queries/0_stateless/02962_analyzer_const_in_count_distinct.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02962_analyzer_const_in_count_distinct.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02962_analyzer_const_in_count_distinct.sql b/tests/queries/0_stateless/02962_analyzer_const_in_count_distinct.sql new file mode 100644 index 00000000000..669018a1308 --- /dev/null +++ b/tests/queries/0_stateless/02962_analyzer_const_in_count_distinct.sql @@ -0,0 +1,8 @@ +set count_distinct_optimization = 1; + +SELECT uniqExact('257') +FROM + (SELECT + number, CAST(number / 9223372036854775806, 'UInt64') AS m + FROM numbers(3) + ); diff --git a/tests/queries/0_stateless/02962_analyzer_constant_set.reference b/tests/queries/0_stateless/02962_analyzer_constant_set.reference new file mode 100644 index 00000000000..ec635144f60 --- /dev/null +++ b/tests/queries/0_stateless/02962_analyzer_constant_set.reference @@ -0,0 +1 @@ +9 diff --git a/tests/queries/0_stateless/02962_analyzer_constant_set.sql b/tests/queries/0_stateless/02962_analyzer_constant_set.sql new file mode 100644 index 00000000000..aae2f1c0145 --- /dev/null +++ b/tests/queries/0_stateless/02962_analyzer_constant_set.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS test_parallel_index; + +CREATE TABLE test_parallel_index +( + z UInt64, + INDEX i z TYPE set(8) +) +ENGINE = MergeTree +ORDER BY (); + +insert into test_parallel_index select number from numbers(10); + +select sum(z) from test_parallel_index where z = 2 or z = 7 or z = 13 or z = 17 or z = 19 or z = 23; + +DROP TABLE test_parallel_index; diff --git a/tests/queries/0_stateless/02962_analyzer_resolve_group_by_on_shards.reference b/tests/queries/0_stateless/02962_analyzer_resolve_group_by_on_shards.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02962_analyzer_resolve_group_by_on_shards.sql b/tests/queries/0_stateless/02962_analyzer_resolve_group_by_on_shards.sql new file mode 100644 index 00000000000..00a80067994 --- /dev/null +++ b/tests/queries/0_stateless/02962_analyzer_resolve_group_by_on_shards.sql @@ -0,0 +1,20 @@ +SELECT NULL AND (toDate(-2147483647, NULL) AND NULL) +FROM remote('127.0.0.{1,2}', view( + SELECT + NULL AND NULL, + NULL, + toDate(toDate('0.0001048577', toDate(NULL, 10 AND (toDate(257, 9223372036854775807, NULL) AND NULL AND NULL) AND NULL, 7, NULL), NULL, NULL) AND NULL AND -2147483648, NULL, NULL) AND NULL + FROM system.one + WHERE toDate(toDate(NULL, NULL, NULL), NULL) + GROUP BY + GROUPING SETS ((NULL)) +)); + +SELECT NULL AND (toDate(-2147483647, NULL) AND NULL) +FROM remote('127.0.0.{1,2}', view( + SELECT NULL + FROM system.one + WHERE toDate(toDate(NULL, NULL, NULL), NULL) + GROUP BY + GROUPING SETS (('')) +)); diff --git a/tests/queries/0_stateless/02962_arrow_dictionary_indexes_types.reference b/tests/queries/0_stateless/02962_arrow_dictionary_indexes_types.reference new file mode 100644 index 00000000000..05e8b3e24fc --- /dev/null +++ b/tests/queries/0_stateless/02962_arrow_dictionary_indexes_types.reference @@ -0,0 +1,4 @@ +lc: dictionary not null +lc: dictionary not null +lc: dictionary not null +lc: dictionary not null diff --git a/tests/queries/0_stateless/02962_arrow_dictionary_indexes_types.sh b/tests/queries/0_stateless/02962_arrow_dictionary_indexes_types.sh new file mode 100755 index 00000000000..467c98b47cf --- /dev/null +++ b/tests/queries/0_stateless/02962_arrow_dictionary_indexes_types.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.arrow + +$CLICKHOUSE_LOCAL -q "select toLowCardinality(toString(number)) as lc from numbers(1000) format Arrow settings output_format_arrow_low_cardinality_as_dictionary=1, output_format_arrow_use_signed_indexes_for_dictionary=1, output_format_arrow_use_64_bit_indexes_for_dictionary=1" > $DATA_FILE +python3 -c "import pyarrow as pa; print(pa.ipc.open_file(pa.OSFile('$DATA_FILE', 'rb')).read_all().schema)" + +$CLICKHOUSE_LOCAL -q "select toLowCardinality(toString(number)) as lc from numbers(1000) format Arrow settings output_format_arrow_low_cardinality_as_dictionary=1, output_format_arrow_use_signed_indexes_for_dictionary=1, output_format_arrow_use_64_bit_indexes_for_dictionary=0" > $DATA_FILE +python3 -c "import pyarrow as pa; print(pa.ipc.open_file(pa.OSFile('$DATA_FILE', 'rb')).read_all().schema)" + +$CLICKHOUSE_LOCAL -q "select toLowCardinality(toString(number)) as lc from numbers(1000) format Arrow settings output_format_arrow_low_cardinality_as_dictionary=1, output_format_arrow_use_signed_indexes_for_dictionary=0, output_format_arrow_use_64_bit_indexes_for_dictionary=1" > $DATA_FILE +python3 -c "import pyarrow as pa; print(pa.ipc.open_file(pa.OSFile('$DATA_FILE', 'rb')).read_all().schema)" + +$CLICKHOUSE_LOCAL -q "select toLowCardinality(toString(number)) as lc from numbers(1000) format Arrow settings output_format_arrow_low_cardinality_as_dictionary=1, output_format_arrow_use_signed_indexes_for_dictionary=0, output_format_arrow_use_64_bit_indexes_for_dictionary=0" > $DATA_FILE +python3 -c "import pyarrow as pa; print(pa.ipc.open_file(pa.OSFile('$DATA_FILE', 'rb')).read_all().schema)" + +rm $DATA_FILE + diff --git a/tests/queries/0_stateless/02962_indexHint_rpn_construction.reference b/tests/queries/0_stateless/02962_indexHint_rpn_construction.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02962_indexHint_rpn_construction.sql b/tests/queries/0_stateless/02962_indexHint_rpn_construction.sql new file mode 100644 index 00000000000..3532bea57fa --- /dev/null +++ b/tests/queries/0_stateless/02962_indexHint_rpn_construction.sql @@ -0,0 +1,20 @@ +CREATE TABLE tab +( + `foo` Array(LowCardinality(String)), + INDEX idx foo TYPE bloom_filter GRANULARITY 1 +) +ENGINE = MergeTree +PRIMARY KEY tuple(); + +INSERT INTO tab SELECT if(number % 2, ['value'], []) +FROM system.numbers +LIMIT 10000; + +SELECT * +FROM tab +PREWHERE indexHint(indexHint(-1, 0.)) +WHERE has(foo, 'b'); + +SELECT * +FROM tab +PREWHERE indexHint(0); diff --git a/tests/queries/0_stateless/02962_parallel_window_functions_different_partitioning.reference b/tests/queries/0_stateless/02962_parallel_window_functions_different_partitioning.reference new file mode 100644 index 00000000000..f18a39e191e --- /dev/null +++ b/tests/queries/0_stateless/02962_parallel_window_functions_different_partitioning.reference @@ -0,0 +1,18 @@ +sales 15000 +sales 15000 +sales 15000 +sales 29400 +sales 29400 +sales 29400 +sales 43800 +sales 43800 +sales 43800 +sales 15000 5000 +sales 15000 5000 +sales 15000 5000 +sales 29400 4800 +sales 29400 4800 +sales 29400 4800 +sales 43800 4800 +sales 43800 4800 +sales 43800 4800 diff --git a/tests/queries/0_stateless/02962_parallel_window_functions_different_partitioning.sql b/tests/queries/0_stateless/02962_parallel_window_functions_different_partitioning.sql new file mode 100644 index 00000000000..90af415c5ea --- /dev/null +++ b/tests/queries/0_stateless/02962_parallel_window_functions_different_partitioning.sql @@ -0,0 +1,32 @@ +CREATE TABLE empsalary +( + `depname` LowCardinality(String), + `empno` UInt64, + `salary` Int32, + `enroll_date` Date +) +ENGINE = Memory; + +insert into empsalary values ('sales',3,4800,'2007-08-01'), ('sales',1,5000,'2006-10-01'), ('sales',4,4800,'2007-08-08'); + + +insert into empsalary values ('sales',3,4800,'2007-08-01'), ('sales',1,5000,'2006-10-01'), ('sales',4,4800,'2007-08-08'); + +insert into empsalary values ('sales',3,4800,'2007-08-01'), ('sales',1,5000,'2006-10-01'), ('sales',4,4800,'2007-08-08'); + +-- 1 window function + +SELECT depname, + sum(salary) OVER (PARTITION BY depname order by empno) AS depsalary +FROM empsalary +order by depsalary; + + +-- 2 window functions with different window, +-- but result should be the same for depsalary + +SELECT depname, + sum(salary) OVER (PARTITION BY depname order by empno) AS depsalary, + min(salary) OVER (PARTITION BY depname, empno order by enroll_date) AS depminsalary +FROM empsalary +order by depsalary; diff --git a/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.reference b/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.reference new file mode 100644 index 00000000000..42d5519df66 --- /dev/null +++ b/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.reference @@ -0,0 +1,4 @@ +Replication did not hang: synced all replicas of test_table_ +Consistency: 1 +Test completed +Data consistency check passed diff --git a/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.sh b/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.sh new file mode 100755 index 00000000000..f47801abf73 --- /dev/null +++ b/tests/queries/0_stateless/02962_system_sync_replica_lightweight_from_modifier.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +# Tags: zookeeper, no-parallel, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib + +TOTAL_REPLICAS=10 +REPLICAS_TO_DROP=7 +export TOTAL_REPLICAS +export REPLICAS_TO_DROP + +for i in $(seq $TOTAL_REPLICAS); do + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_table_$i" + $CLICKHOUSE_CLIENT --query "CREATE TABLE test_table_$i (key UInt64, value UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_table', '$i') ORDER BY key" +done + +function insert_thread() { + while true; do + REPLICA=$(($RANDOM % $TOTAL_REPLICAS + 1)) + $CLICKHOUSE_CLIENT --query "INSERT INTO test_table_$REPLICA VALUES ($RANDOM, $RANDOM % 255)" + sleep 0.$RANDOM + done +} + +function sync_and_drop_replicas() { + while true; do + for i in $(seq $REPLICAS_TO_DROP); do + local stable_replica_id=$((i + 1)) + $CLICKHOUSE_CLIENT --query "ALTER TABLE test_table_$i MODIFY SETTING parts_to_throw_insert = 0" + $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA test_table_$stable_replica_id LIGHTWEIGHT FROM '$i'" + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_table_$i" + done + + for i in $(seq $REPLICAS_TO_DROP); do + $CLICKHOUSE_CLIENT --query "CREATE TABLE test_table_$i (key UInt64, value UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_table', '$i') ORDER BY key" + done + done +} + +function optimize_thread() { + while true; do + REPLICA=$(($RANDOM % $TOTAL_REPLICAS + 1)) + $CLICKHOUSE_CLIENT --query "OPTIMIZE TABLE test_table_$REPLICA FINAL" + sleep 0.$RANDOM + done +} + +function mutations_thread() { + while true; do + REPLICA=$(($RANDOM % $TOTAL_REPLICAS + 1)) + CONDITION="key % 2 = 0" + $CLICKHOUSE_CLIENT --query "ALTER TABLE test_table_$REPLICA DELETE WHERE $CONDITION" + sleep 0.$RANDOM + done +} + +export -f insert_thread +export -f sync_and_drop_replicas +export -f optimize_thread +export -f mutations_thread + +TIMEOUT=60 + +timeout $TIMEOUT bash -c insert_thread 2> /dev/null & +timeout $TIMEOUT bash -c sync_and_drop_replicas 2> /dev/null & +timeout $TIMEOUT bash -c optimize_thread 2> /dev/null & +timeout $TIMEOUT bash -c mutations_thread 2> /dev/null & + +wait + +check_replication_consistency "test_table_" "count(), sum(key), sum(value)" + +echo "Test completed" + +lost_parts_count=$($CLICKHOUSE_CLIENT --query "SELECT SUM(lost_part_count) FROM system.replicas WHERE database=currentDatabase()") +if [ "$lost_parts_count" -ne 0 ]; then + echo "Data consistency check failed: lost parts count is not zero" + exit 1 +fi + +echo "Data consistency check passed" + +for i in $(seq $TOTAL_REPLICAS); do + if [ $i -gt $REPLICAS_TO_DROP ]; then + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_table_$i" + fi +done \ No newline at end of file diff --git a/tests/queries/0_stateless/02963_invalid_identifier.reference b/tests/queries/0_stateless/02963_invalid_identifier.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02963_invalid_identifier.sql b/tests/queries/0_stateless/02963_invalid_identifier.sql new file mode 100644 index 00000000000..64a52364baa --- /dev/null +++ b/tests/queries/0_stateless/02963_invalid_identifier.sql @@ -0,0 +1 @@ +SELECT t.t.t.* FROM system.tables WHERE database = currentDatabase(); --{serverError INVALID_IDENTIFIER} diff --git a/tests/queries/0_stateless/02963_msan_agg_addBatchLookupTable8.reference b/tests/queries/0_stateless/02963_msan_agg_addBatchLookupTable8.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02963_msan_agg_addBatchLookupTable8.sql b/tests/queries/0_stateless/02963_msan_agg_addBatchLookupTable8.sql new file mode 100644 index 00000000000..a3a8bd0624a --- /dev/null +++ b/tests/queries/0_stateless/02963_msan_agg_addBatchLookupTable8.sql @@ -0,0 +1,2 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/58727 +SELECT number % 2 AS even, aggThrow(number) FROM numbers(10) GROUP BY even; -- { serverError AGGREGATE_FUNCTION_THROW} diff --git a/tests/queries/0_stateless/02963_remote_read_small_buffer_size_bug.reference b/tests/queries/0_stateless/02963_remote_read_small_buffer_size_bug.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02963_remote_read_small_buffer_size_bug.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02963_remote_read_small_buffer_size_bug.sh b/tests/queries/0_stateless/02963_remote_read_small_buffer_size_bug.sh new file mode 100755 index 00000000000..24fe964b824 --- /dev/null +++ b/tests/queries/0_stateless/02963_remote_read_small_buffer_size_bug.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# Tags: no-random-settings, long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +disk_name="02963_remote_read_bug" + +$CLICKHOUSE_CLIENT -nm --query " +DROP TABLE IF EXISTS test; + +CREATE TABLE test (a Int32, s String) +ENGINE = MergeTree() +ORDER BY a +SETTINGS disk = disk(name = '$disk_name', type = cache, max_size = '10Gi', path = '$disk_name', disk = 's3_disk'); + +INSERT INTO test SELECT number % 1000000, randomString(1) FROM numbers_mt(1e7) SETTINGS enable_filesystem_cache_on_write_operations = 0; + +OPTIMIZE TABLE test FINAL; +" + +query_id=$(random_str 10) + +$CLICKHOUSE_CLIENT -nm --query_id "$query_id" --query " +WITH RANDOM_SET AS ( + SELECT rand32() % 10000 FROM numbers(100) +) +SELECT * +FROM test +WHERE a IN RANDOM_SET AND s IN ('x', 'y', 'z') +FORMAT Null +SETTINGS + max_threads = 10, + allow_prefetched_read_pool_for_remote_filesystem = 1, filesystem_prefetch_min_bytes_for_single_read_task = '1Ki', + merge_tree_min_bytes_for_concurrent_read = 1, merge_tree_min_rows_for_concurrent_read = 1, + merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem = 1, merge_tree_min_rows_for_concurrent_read_for_remote_filesystem = 1; +" + +$CLICKHOUSE_CLIENT -nm --query " +SYSTEM FLUSH LOGS; + +-- This threshold was determined experimentally - before the fix this ratio had values around 50K +SELECT throwIf(ProfileEvents['WriteBufferFromFileDescriptorWriteBytes'] / ProfileEvents['WriteBufferFromFileDescriptorWrite'] < 200000) +FROM system.query_log +WHERE current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' AND type = 'QueryFinish'; +" + diff --git a/tests/queries/0_stateless/02963_test_flexible_disk_configuration.reference b/tests/queries/0_stateless/02963_test_flexible_disk_configuration.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql b/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql new file mode 100644 index 00000000000..552291b2f83 --- /dev/null +++ b/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql @@ -0,0 +1,56 @@ +-- Tags: no-fasttest + +drop table if exists test; +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk=disk(name='test1', type = object_storage, object_storage_type = local_blob_storage, path='./02963_test1/'); + +drop table test; +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk='s3_disk_02963'; + +drop table test; +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk=disk(name='test1', + type = object_storage, + object_storage_type = s3, + endpoint = 'http://localhost:11111/test/common/', + access_key_id = clickhouse, + secret_access_key = clickhouse); + +drop table test; +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk=disk(name='test2', + type = object_storage, + object_storage_type = s3, + metadata_storage_type = local, + endpoint = 'http://localhost:11111/test/common/', + access_key_id = clickhouse, + secret_access_key = clickhouse); + +drop table test; +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk=disk(name='test3', + type = object_storage, + object_storage_type = s3, + metadata_type = lll, + endpoint = 'http://localhost:11111/test/common/', + access_key_id = clickhouse, + secret_access_key = clickhouse); -- { serverError UNKNOWN_ELEMENT_IN_CONFIG } + +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk=disk(name='test4', + type = object_storage, + object_storage_type = kkk, + metadata_type = local, + endpoint = 'http://localhost:11111/test/common/', + access_key_id = clickhouse, + secret_access_key = clickhouse); -- { serverError UNKNOWN_ELEMENT_IN_CONFIG } + +create table test (a Int32) engine = MergeTree() order by tuple() +settings disk=disk(name='test5', + type = kkk, + object_storage_type = s3, + metadata_type = local, + endpoint = 'http://localhost:11111/test/common/', + access_key_id = clickhouse, + secret_access_key = clickhouse); -- { serverError UNKNOWN_ELEMENT_IN_CONFIG } diff --git a/tests/queries/0_stateless/02966_float32_promotion.reference b/tests/queries/0_stateless/02966_float32_promotion.reference new file mode 100644 index 00000000000..086e9795679 --- /dev/null +++ b/tests/queries/0_stateless/02966_float32_promotion.reference @@ -0,0 +1 @@ +49.9 diff --git a/tests/queries/0_stateless/02966_float32_promotion.sql b/tests/queries/0_stateless/02966_float32_promotion.sql new file mode 100644 index 00000000000..df687ca5338 --- /dev/null +++ b/tests/queries/0_stateless/02966_float32_promotion.sql @@ -0,0 +1,6 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/58680 +DROP TABLE IF EXISTS f32_table; +CREATE TABLE f32_table (my_field Float32) ENGINE=Memory(); +INSERT INTO f32_table values ('49.9'); +SELECT * FROM f32_table where my_field = '49.9'; +DROP TABLE f32_table; diff --git a/tests/queries/0_stateless/02966_nested_offsets_subcolumn.reference.j2 b/tests/queries/0_stateless/02966_nested_offsets_subcolumn.reference.j2 new file mode 100644 index 00000000000..055b0466ee2 --- /dev/null +++ b/tests/queries/0_stateless/02966_nested_offsets_subcolumn.reference.j2 @@ -0,0 +1,11 @@ +{% for engine in ['Memory', 'Log', 'TinyLog', 'MergeTree ORDER BY (a) SETTINGS min_bytes_for_wide_part = 0, min_rows_for_wide_part = 2000'] -%} +--- {{ engine }} --- +4500 +4500 4500 +4500 +4500 4500 +94500 +94500 94500 +94500 +94500 94500 +{% endfor -%} diff --git a/tests/queries/0_stateless/02966_nested_offsets_subcolumn.sql.j2 b/tests/queries/0_stateless/02966_nested_offsets_subcolumn.sql.j2 new file mode 100644 index 00000000000..f9be206e3e7 --- /dev/null +++ b/tests/queries/0_stateless/02966_nested_offsets_subcolumn.sql.j2 @@ -0,0 +1,36 @@ + +{% for engine in ['Memory', 'Log', 'TinyLog', 'MergeTree ORDER BY (a) SETTINGS min_bytes_for_wide_part = 0, min_rows_for_wide_part = 2000'] -%} + +SELECT '--- {{ engine }} ---'; + +DROP TABLE IF EXISTS t_nested_offsets; + +CREATE TABLE t_nested_offsets +( + `a` String, + `e.n` Array(String), + `e.t` Array(Int64) +) +ENGINE = Log; + +SYSTEM STOP MERGES t_nested_offsets; + +INSERT INTO t_nested_offsets SELECT number, range(number % 10), range(number % 10) FROM numbers(1000); + +SELECT sum(e.n.size0) FROM t_nested_offsets; +SELECT sum(e.n.size0), sum(e.t.size0) FROM t_nested_offsets; + +SELECT sum(length(e.n)) FROM t_nested_offsets SETTINGS optimize_functions_to_subcolumns = 1; +SELECT sum(length(e.n)), sum(length(e.t)) FROM t_nested_offsets SETTINGS optimize_functions_to_subcolumns = 1; + +INSERT INTO t_nested_offsets SELECT number, range(number % 10), range(number % 10) FROM numbers(20000); + +SELECT sum(e.n.size0) FROM t_nested_offsets; +SELECT sum(e.n.size0), sum(e.t.size0) FROM t_nested_offsets; + +SELECT sum(length(e.n)) FROM t_nested_offsets SETTINGS optimize_functions_to_subcolumns = 1; +SELECT sum(length(e.n)), sum(length(e.t)) FROM t_nested_offsets SETTINGS optimize_functions_to_subcolumns = 1; + +DROP TABLE t_nested_offsets; + +{% endfor -%} diff --git a/tests/queries/0_stateless/02966_s3_access_key_id_restriction.reference b/tests/queries/0_stateless/02966_s3_access_key_id_restriction.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02966_s3_access_key_id_restriction.sql b/tests/queries/0_stateless/02966_s3_access_key_id_restriction.sql new file mode 100644 index 00000000000..c1ca0b4bcd5 --- /dev/null +++ b/tests/queries/0_stateless/02966_s3_access_key_id_restriction.sql @@ -0,0 +1,6 @@ +-- Tags: no-fasttest + +select * from s3('http://localhost:11111/test/a.tsv', '\ninjection\n', 'admin'); -- { serverError 36 } +select * from deltaLake('http://localhost:11111/test/a.tsv', '\ninjection\n', 'admin'); -- { serverError 36 } +select * from hudi('http://localhost:11111/test/a.tsv', '\ninjection\n', 'admin'); -- { serverError 36 } +select * from iceberg('http://localhost:11111/test/a.tsv', '\ninjection\n', 'admin'); -- { serverError 36 } diff --git a/tests/queries/0_stateless/02967_analyzer_fuzz.reference b/tests/queries/0_stateless/02967_analyzer_fuzz.reference new file mode 100644 index 00000000000..0b9dc69ff9e --- /dev/null +++ b/tests/queries/0_stateless/02967_analyzer_fuzz.reference @@ -0,0 +1,4 @@ +2147483647 0 +0 + +0 diff --git a/tests/queries/0_stateless/02967_analyzer_fuzz.sql b/tests/queries/0_stateless/02967_analyzer_fuzz.sql new file mode 100644 index 00000000000..7f2d9afcc71 --- /dev/null +++ b/tests/queries/0_stateless/02967_analyzer_fuzz.sql @@ -0,0 +1,20 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/57193 +SELECT + 2147483647, + count(pow(NULL, 1.0001)) +FROM remote(test_cluster_two_shards, system, one) +GROUP BY + makeDateTime64(NULL, NULL, pow(NULL, '257') - '-1', '0.2147483647', 257), + makeDateTime64(pow(pow(NULL, '21474836.46') - '0.0000065535', 1048577), '922337203685477580.6', NULL, NULL, pow(NULL, 1.0001) - 65536, NULL) +WITH CUBE + SETTINGS allow_experimental_analyzer = 1; + + +CREATE TABLE data_01223 (`key` Int) ENGINE = Memory; +CREATE TABLE dist_layer_01223 AS data_01223 ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), data_01223); +CREATE TABLE dist_01223 AS data_01223 ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), dist_layer_01223); +SELECT count(round('92233720368547758.07', '-0.01', NULL, nan, '25.7', '-92233720368547758.07', NULL)) +FROM dist_01223 +WHERE round(NULL, 1025, 1.1754943508222875e-38, NULL) +WITH TOTALS + SETTINGS allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/02967_fuzz_bad_cast.reference b/tests/queries/0_stateless/02967_fuzz_bad_cast.reference new file mode 100644 index 00000000000..2ad74d50ebd --- /dev/null +++ b/tests/queries/0_stateless/02967_fuzz_bad_cast.reference @@ -0,0 +1 @@ +0 \N diff --git a/tests/queries/0_stateless/02967_fuzz_bad_cast.sql b/tests/queries/0_stateless/02967_fuzz_bad_cast.sql new file mode 100644 index 00000000000..8b34cc6e4d3 --- /dev/null +++ b/tests/queries/0_stateless/02967_fuzz_bad_cast.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS t1__fuzz_4; +DROP TABLE IF EXISTS t0__fuzz_29; + +SET allow_suspicious_low_cardinality_types = 1, join_algorithm = 'partial_merge', join_use_nulls = 1; +CREATE TABLE t1__fuzz_4 (`x` Nullable(UInt32), `y` Int64) ENGINE = MergeTree ORDER BY (x, y) SETTINGS allow_nullable_key = 1; +CREATE TABLE t0__fuzz_29 (`x` LowCardinality(UInt256), `y` Array(Array(Date))) ENGINE = MergeTree ORDER BY (x, y); +SELECT sum(0), NULL FROM t0__fuzz_29 FULL OUTER JOIN t1__fuzz_4 USING (x) PREWHERE NULL; + +DROP TABLE t1__fuzz_4; +DROP TABLE t0__fuzz_29; diff --git a/tests/queries/0_stateless/02967_http_compressed.reference b/tests/queries/0_stateless/02967_http_compressed.reference new file mode 100644 index 00000000000..b466312345f --- /dev/null +++ b/tests/queries/0_stateless/02967_http_compressed.reference @@ -0,0 +1,8 @@ +0000000 e6fb dda5 f87b 92ee 2e61 ab3d 12cb 5c84 +0000010 1382 0000 0900 0000 9000 0931 0932 0933 +0000020 3234 000a +0000023 +0000000 e6fb dda5 f87b 92ee 2e61 ab3d 12cb 5c84 +0000010 1382 0000 0900 0000 9000 0931 0932 0933 +0000020 3234 000a +0000023 diff --git a/tests/queries/0_stateless/02967_http_compressed.sh b/tests/queries/0_stateless/02967_http_compressed.sh new file mode 100755 index 00000000000..9d00969fd5b --- /dev/null +++ b/tests/queries/0_stateless/02967_http_compressed.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +curl -sS -d 'SELECT 1,2,3,42' -H 'Accept-Encoding: gzip' -X POST "${CLICKHOUSE_URL}&compress=1&enable_http_compression=1" | gzip -d | hexdump +curl -sS -d 'SELECT 1,2,3,42' -H 'Accept-Encoding: zstd' -X POST "${CLICKHOUSE_URL}&compress=1&enable_http_compression=1" | zstd -d | hexdump diff --git a/tests/queries/0_stateless/02967_index_hint_crash.reference b/tests/queries/0_stateless/02967_index_hint_crash.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02967_index_hint_crash.sql b/tests/queries/0_stateless/02967_index_hint_crash.sql new file mode 100644 index 00000000000..e33a4992c6c --- /dev/null +++ b/tests/queries/0_stateless/02967_index_hint_crash.sql @@ -0,0 +1,16 @@ +CREATE TABLE tab +( + `foo` Array(LowCardinality(String)), + INDEX idx foo TYPE bloom_filter GRANULARITY 1 +) +ENGINE = MergeTree +PRIMARY KEY tuple(); + +INSERT INTO tab SELECT if(number % 2, ['value'], []) +FROM system.numbers +LIMIT 10000; + +SELECT * +FROM tab +PREWHERE indexHint() +FORMAT Null; diff --git a/tests/queries/0_stateless/02967_mysql_settings_override.reference b/tests/queries/0_stateless/02967_mysql_settings_override.reference new file mode 100644 index 00000000000..96cf7ecc403 --- /dev/null +++ b/tests/queries/0_stateless/02967_mysql_settings_override.reference @@ -0,0 +1,23 @@ +-- Init +s +a +b +c +d +-- Uppercase setting name +s +a +b +name value +send_timeout 22 +name value +receive_timeout 33 +-- Lowercase setting name +s +a +b +c +name value +send_timeout 55 +name value +receive_timeout 66 diff --git a/tests/queries/0_stateless/02967_mysql_settings_override.sh b/tests/queries/0_stateless/02967_mysql_settings_override.sh new file mode 100755 index 00000000000..59a2099190a --- /dev/null +++ b/tests/queries/0_stateless/02967_mysql_settings_override.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: requires mysql client + +# Tests that certain MySQL-proprietary settings are mapped to ClickHouse-native settings. + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +CHANGED_SETTINGS_QUERY="SELECT name, value FROM system.settings WHERE name IN ('send_timeout', 'receive_timeout') AND changed;" + +TEST_TABLE="mysql_settings_override_test" + +DROP_TABLE="DROP TABLE IF EXISTS $TEST_TABLE;" +CREATE_TABLE="CREATE TABLE $TEST_TABLE (s String) ENGINE MergeTree ORDER BY s;" +INSERT_STMT="INSERT INTO $TEST_TABLE VALUES ('a'), ('b'), ('c'), ('d');" +SELECT_STMT="SELECT * FROM $TEST_TABLE ORDER BY s;" + +echo "-- Init" +${MYSQL_CLIENT} --execute "$DROP_TABLE $CREATE_TABLE $INSERT_STMT $SELECT_STMT" # should fetch all 4 records + +echo "-- Uppercase setting name" +${MYSQL_CLIENT} --execute "SET SQL_SELECT_LIMIT = 2; $SELECT_STMT" # should fetch 2 records out of 4 +${MYSQL_CLIENT} --execute "SET NET_WRITE_TIMEOUT = 22; $CHANGED_SETTINGS_QUERY" +${MYSQL_CLIENT} --execute "SET NET_READ_TIMEOUT = 33; $CHANGED_SETTINGS_QUERY" + +echo "-- Lowercase setting name" +${MYSQL_CLIENT} --execute "set sql_select_limit=3; $SELECT_STMT" # should fetch 3 records out of 4 +${MYSQL_CLIENT} --execute "set net_write_timeout=55; $CHANGED_SETTINGS_QUERY" +${MYSQL_CLIENT} --execute "set net_read_timeout=66; $CHANGED_SETTINGS_QUERY" + +${MYSQL_CLIENT} --execute "$DROP_TABLE" diff --git a/tests/queries/0_stateless/02968_analyzer_join_column_not_found.reference b/tests/queries/0_stateless/02968_analyzer_join_column_not_found.reference new file mode 100644 index 00000000000..50d4d226b46 --- /dev/null +++ b/tests/queries/0_stateless/02968_analyzer_join_column_not_found.reference @@ -0,0 +1 @@ +1 1 1 1 1 diff --git a/tests/queries/0_stateless/02968_analyzer_join_column_not_found.sql b/tests/queries/0_stateless/02968_analyzer_join_column_not_found.sql new file mode 100644 index 00000000000..e7c12aac81e --- /dev/null +++ b/tests/queries/0_stateless/02968_analyzer_join_column_not_found.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS im; +CREATE TABLE im (id Int32, dd Int32) ENGINE = Memory(); +INSERT INTO im VALUES (1, 1); + +DROP TABLE IF EXISTS ts; +CREATE TABLE ts (tid Int32, id Int32) ENGINE = Memory(); +INSERT INTO ts VALUES (1, 1); + +SELECT * +FROM im AS m +INNER JOIN ( + SELECT tid, dd, t.id + FROM im AS m + INNER JOIN ts AS t ON m.id = t.id +) AS t ON m.dd = t.dd +; diff --git a/tests/queries/0_stateless/02968_file_log_multiple_read.reference b/tests/queries/0_stateless/02968_file_log_multiple_read.reference new file mode 100644 index 00000000000..40afb2d64f9 --- /dev/null +++ b/tests/queries/0_stateless/02968_file_log_multiple_read.reference @@ -0,0 +1,30 @@ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 diff --git a/tests/queries/0_stateless/02968_file_log_multiple_read.sh b/tests/queries/0_stateless/02968_file_log_multiple_read.sh new file mode 100755 index 00000000000..199893a9428 --- /dev/null +++ b/tests/queries/0_stateless/02968_file_log_multiple_read.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Data preparation. +# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: +# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" +user_files_path=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +logs_dir=${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME} + +rm -rf ${logs_dir} + +mkdir -p ${logs_dir}/ + +for i in {1..10} +do + echo $i >> ${logs_dir}/a.txt +done + +${CLICKHOUSE_CLIENT} -n --query=" +DROP TABLE IF EXISTS file_log; +DROP TABLE IF EXISTS table_to_store_data; +DROP TABLE IF EXISTS file_log_mv; + +CREATE TABLE file_log ( + id Int64 +) ENGINE = FileLog('${logs_dir}/', 'CSV'); + +CREATE TABLE table_to_store_data ( + id Int64 +) ENGINE = MergeTree +ORDER BY id; + +CREATE MATERIALIZED VIEW file_log_mv TO table_to_store_data AS + SELECT id + FROM file_log + WHERE id NOT IN ( + SELECT id + FROM table_to_store_data + WHERE id IN ( + SELECT id + FROM file_log + ) + ); +" + +function count() +{ + COUNT=$(${CLICKHOUSE_CLIENT} --query "select count() from table_to_store_data;") + echo $COUNT +} + +for i in {1..10} +do + [[ $(count) -gt 0 ]] && break + sleep 1 +done + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM table_to_store_data ORDER BY id;" + +for i in {1..20} +do + echo $i >> ${logs_dir}/a.txt +done + +for i in {1..10} +do + [[ $(count) -gt 10 ]] && break + sleep 1 +done + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM table_to_store_data ORDER BY id;" + +${CLICKHOUSE_CLIENT} -n --query=" +DROP TABLE file_log; +DROP TABLE table_to_store_data; +DROP TABLE file_log_mv; +" + +rm -rf ${logs_dir} diff --git a/tests/queries/0_stateless/02968_mysql_show_warnings.reference b/tests/queries/0_stateless/02968_mysql_show_warnings.reference new file mode 100644 index 00000000000..9992883e4d5 --- /dev/null +++ b/tests/queries/0_stateless/02968_mysql_show_warnings.reference @@ -0,0 +1,6 @@ +@@session.warning_count +0 +@@session.warning_count +0 +@@warning_count +0 diff --git a/tests/queries/0_stateless/02968_mysql_show_warnings.sh b/tests/queries/0_stateless/02968_mysql_show_warnings.sh new file mode 100755 index 00000000000..6c58c2e57d5 --- /dev/null +++ b/tests/queries/0_stateless/02968_mysql_show_warnings.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: requires mysql client + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${MYSQL_CLIENT} --execute "SHOW WARNINGS;" +${MYSQL_CLIENT} --execute "show warnings;" +${MYSQL_CLIENT} --execute "SHOW WARNINGS LIMIT 100;" +${MYSQL_CLIENT} --execute "show warnings limit 100;" +${MYSQL_CLIENT} --execute "SHOW WARNINGS LIMIT 100 OFFSET 100;" +${MYSQL_CLIENT} --execute "show warnings limit 100 offset 100;" +${MYSQL_CLIENT} --execute "SHOW COUNT(*) WARNINGS;" +${MYSQL_CLIENT} --execute "show count(*) warnings;" +${MYSQL_CLIENT} --execute "SELECT @@session.warning_count;" diff --git a/tests/queries/0_stateless/02968_sumMap_with_nan.reference b/tests/queries/0_stateless/02968_sumMap_with_nan.reference new file mode 100644 index 00000000000..83a2d98375f --- /dev/null +++ b/tests/queries/0_stateless/02968_sumMap_with_nan.reference @@ -0,0 +1,2 @@ +([6.7],[3]) +([1,4,5,6.7,nan],[2.3,5,1,3,inf]) diff --git a/tests/queries/0_stateless/02968_sumMap_with_nan.sql b/tests/queries/0_stateless/02968_sumMap_with_nan.sql new file mode 100644 index 00000000000..330da94cfea --- /dev/null +++ b/tests/queries/0_stateless/02968_sumMap_with_nan.sql @@ -0,0 +1,4 @@ +SELECT sumMapFiltered([6.7])([x], [y]) +FROM values('x Float64, y Float64', (0, 1), (1, 2.3), (nan, inf), (6.7, 3), (4, 4), (5, 1)); + +SELECT sumMap([x],[y]) FROM values('x Float64, y Float64', (4, 1), (1, 2.3), (nan,inf), (6.7,3), (4,4), (5, 1)); diff --git a/tests/queries/0_stateless/02968_url_args.reference b/tests/queries/0_stateless/02968_url_args.reference new file mode 100644 index 00000000000..aa19e45301c --- /dev/null +++ b/tests/queries/0_stateless/02968_url_args.reference @@ -0,0 +1 @@ +CREATE TABLE default.a\n(\n `x` Int64\n)\nENGINE = URL(\'https://example.com/\', \'CSV\', headers(\'foo\' = \'bar\')) diff --git a/tests/queries/0_stateless/02968_url_args.sql b/tests/queries/0_stateless/02968_url_args.sql new file mode 100644 index 00000000000..8bee9fec0ac --- /dev/null +++ b/tests/queries/0_stateless/02968_url_args.sql @@ -0,0 +1,2 @@ +create table a (x Int64) engine URL('https://example.com/', CSV, headers('foo' = 'bar')); +show create a; diff --git a/tests/queries/0_stateless/02969_analyzer_eliminate_injective_functions.reference b/tests/queries/0_stateless/02969_analyzer_eliminate_injective_functions.reference new file mode 100644 index 00000000000..72d83e5cf6a --- /dev/null +++ b/tests/queries/0_stateless/02969_analyzer_eliminate_injective_functions.reference @@ -0,0 +1,142 @@ +QUERY id: 0 + PROJECTION COLUMNS + val String + count() UInt64 + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 1 + FUNCTION id: 6, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + TABLE_FUNCTION id: 9, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 12, nodes: 1 + CONSTANT id: 13, constant_value: UInt64_2, constant_value_type: UInt8 + GROUP BY + LIST id: 14, nodes: 1 + FUNCTION id: 6, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + ORDER BY + LIST id: 15, nodes: 1 + SORT id: 16, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 1 + FUNCTION id: 6, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 +1 1 +2 1 +QUERY id: 0 + PROJECTION COLUMNS + val String + count() UInt64 + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 1 + FUNCTION id: 6, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + TABLE_FUNCTION id: 9, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 12, nodes: 1 + CONSTANT id: 13, constant_value: UInt64_2, constant_value_type: UInt8 + GROUP BY + LIST id: 14, nodes: 1 + FUNCTION id: 6, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + ORDER BY + LIST id: 15, nodes: 1 + SORT id: 16, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 1 + FUNCTION id: 6, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 +CHECK WITH TOTALS +QUERY id: 0, is_group_by_with_totals: 1 + PROJECTION COLUMNS + val String + count() UInt64 + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 1 + FUNCTION id: 6, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 11, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + TABLE_FUNCTION id: 9, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 12, nodes: 1 + CONSTANT id: 13, constant_value: UInt64_2, constant_value_type: UInt8 + GROUP BY + LIST id: 14, nodes: 1 + FUNCTION id: 6, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 + ORDER BY + LIST id: 15, nodes: 1 + SORT id: 16, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 1 + FUNCTION id: 6, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8 +1 1 +2 1 + +0 2 diff --git a/tests/queries/0_stateless/02969_analyzer_eliminate_injective_functions.sql b/tests/queries/0_stateless/02969_analyzer_eliminate_injective_functions.sql new file mode 100644 index 00000000000..15f2550a63e --- /dev/null +++ b/tests/queries/0_stateless/02969_analyzer_eliminate_injective_functions.sql @@ -0,0 +1,31 @@ +set allow_experimental_analyzer = 1; + +EXPLAIN QUERY TREE +SELECT toString(toString(number + 1)) as val, count() +FROM numbers(2) +GROUP BY val +ORDER BY val; + +SELECT toString(toString(number + 1)) as val, count() +FROM numbers(2) +GROUP BY ALL +ORDER BY val; + +EXPLAIN QUERY TREE +SELECT toString(toString(number + 1)) as val, count() +FROM numbers(2) +GROUP BY ALL +ORDER BY val; + +SELECT 'CHECK WITH TOTALS'; + +EXPLAIN QUERY TREE +SELECT toString(toString(number + 1)) as val, count() +FROM numbers(2) +GROUP BY val WITH TOTALS +ORDER BY val; + +SELECT toString(toString(number + 1)) as val, count() +FROM numbers(2) +GROUP BY val WITH TOTALS +ORDER BY val; diff --git a/tests/queries/0_stateless/02969_archive_seek.reference b/tests/queries/0_stateless/02969_archive_seek.reference new file mode 100644 index 00000000000..a6937569dc7 --- /dev/null +++ b/tests/queries/0_stateless/02969_archive_seek.reference @@ -0,0 +1 @@ +10551038310762432828 diff --git a/tests/queries/0_stateless/02969_archive_seek.sh b/tests/queries/0_stateless/02969_archive_seek.sh new file mode 100755 index 00000000000..65507aa854a --- /dev/null +++ b/tests/queries/0_stateless/02969_archive_seek.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select sum(cityHash64(*)) from file('$CURDIR/data_parquet/02969.zip :: u.parquet') settings max_threads=4, max_read_buffer_size=1000" \ No newline at end of file diff --git a/tests/queries/0_stateless/02969_functions_to_subcolumns_if_null.reference b/tests/queries/0_stateless/02969_functions_to_subcolumns_if_null.reference new file mode 100644 index 00000000000..433ef84b3bf --- /dev/null +++ b/tests/queries/0_stateless/02969_functions_to_subcolumns_if_null.reference @@ -0,0 +1,4 @@ +10000 +0 +0 +0 diff --git a/tests/queries/0_stateless/02969_functions_to_subcolumns_if_null.sql b/tests/queries/0_stateless/02969_functions_to_subcolumns_if_null.sql new file mode 100644 index 00000000000..361fd7c7a4e --- /dev/null +++ b/tests/queries/0_stateless/02969_functions_to_subcolumns_if_null.sql @@ -0,0 +1,27 @@ +DROP TABLE IF EXISTS t_subcolumns_if; + +CREATE TABLE t_subcolumns_if (id Nullable(Int64)) ENGINE=MergeTree ORDER BY tuple(); + +INSERT INTO t_subcolumns_if SELECT number::Nullable(Int64) as number FROM numbers(10000); + +SELECT + sum(multiIf(id IS NOT NULL, 1, 0)) +FROM t_subcolumns_if +SETTINGS allow_experimental_analyzer = 1, optimize_functions_to_subcolumns = 1; + +SELECT + sum(multiIf(id IS NULL, 1, 0)) +FROM t_subcolumns_if +SETTINGS allow_experimental_analyzer = 0, optimize_functions_to_subcolumns = 1; + +SELECT + sum(multiIf(id IS NULL, 1, 0)) +FROM t_subcolumns_if +SETTINGS allow_experimental_analyzer = 1, optimize_functions_to_subcolumns = 0; + +SELECT + sum(multiIf(id IS NULL, 1, 0)) +FROM t_subcolumns_if +SETTINGS allow_experimental_analyzer = 1, optimize_functions_to_subcolumns = 1; + +DROP TABLE IF EXISTS t_subcolumns_if; diff --git a/tests/queries/0_stateless/02969_mysql_cast_type_aliases.reference b/tests/queries/0_stateless/02969_mysql_cast_type_aliases.reference new file mode 100644 index 00000000000..5555c918500 --- /dev/null +++ b/tests/queries/0_stateless/02969_mysql_cast_type_aliases.reference @@ -0,0 +1,28 @@ +-- Uppercase tests +Binary(N) foo FixedString(3) +Char 44 String +Date 2021-02-03 Date +DateTime 2021-02-03 12:01:02 DateTime +Decimal 45 Decimal(10, 0) +Decimal(M) 46 Decimal(4, 0) +Decimal(M, D) 47.21 Decimal(4, 2) +Double 48.11 Float64 +JSON {"foo":"bar"} Object(\'json\') +Real 49.22 Float32 +Signed 50 Int64 +Unsigned 52 UInt64 +Year 2007 UInt16 +-- Lowercase tests +Binary(N) foo FixedString(3) +Char 44 String +Date 2021-02-03 Date +DateTime 2021-02-03 12:01:02 DateTime +Decimal 45 Decimal(10, 0) +Decimal(M) 46 Decimal(4, 0) +Decimal(M, D) 47.21 Decimal(4, 2) +Double 48.11 Float64 +JSON {"foo":"bar"} Object(\'json\') +Real 49.22 Float32 +Signed 50 Int64 +Unsigned 52 UInt64 +Year 2007 UInt16 diff --git a/tests/queries/0_stateless/02969_mysql_cast_type_aliases.sql b/tests/queries/0_stateless/02969_mysql_cast_type_aliases.sql new file mode 100644 index 00000000000..7b5735cdebc --- /dev/null +++ b/tests/queries/0_stateless/02969_mysql_cast_type_aliases.sql @@ -0,0 +1,46 @@ +-- See https://dev.mysql.com/doc/refman/8.0/en/cast-functions.html#function_cast +-- Tests are in order of the type appearance in the docs + +SET allow_experimental_object_type = 1; + +SELECT '-- Uppercase tests'; +-- Not supported as it is translated to FixedString without arguments +-- SELECT 'Binary' AS mysql_type, CAST('' AS BINARY) AS result, toTypeName(result) AS native_type; +SELECT 'Binary(N)' AS mysql_type, CAST('foo' AS BINARY(3)) AS result, toTypeName(result) AS native_type; +SELECT 'Char' AS mysql_type, CAST(44 AS CHAR) AS result, toTypeName(result) AS native_type; +SELECT 'Date' AS mysql_type, CAST('2021-02-03' AS DATE) AS result, toTypeName(result) AS native_type; +SELECT 'DateTime' AS mysql_type, CAST('2021-02-03 12:01:02' AS DATETIME) AS result, toTypeName(result) AS native_type; +SELECT 'Decimal' AS mysql_type, CAST(45.1 AS DECIMAL) AS result, toTypeName(result) AS native_type; +SELECT 'Decimal(M)' AS mysql_type, CAST(46.2 AS DECIMAL(4)) AS result, toTypeName(result) AS native_type; +SELECT 'Decimal(M, D)' AS mysql_type, CAST(47.21 AS DECIMAL(4, 2)) AS result, toTypeName(result) AS native_type; +SELECT 'Double' AS mysql_type, CAST(48.11 AS DOUBLE) AS result, toTypeName(result) AS native_type; +SELECT 'JSON' AS mysql_type, CAST('{\"foo\":\"bar\"}' AS JSON) AS result, toTypeName(result) AS native_type; +SELECT 'Real' AS mysql_type, CAST(49.22 AS REAL) AS result, toTypeName(result) AS native_type; +SELECT 'Signed' AS mysql_type, CAST(50 AS SIGNED) AS result, toTypeName(result) AS native_type; +SELECT 'Unsigned' AS mysql_type, CAST(52 AS UNSIGNED) AS result, toTypeName(result) AS native_type; +-- Could be added as an alias, but SIGNED INTEGER in CAST context means UInt64, +-- while INTEGER SIGNED as a column definition means UInt32. +-- SELECT 'Signed integer' AS mysql_type, CAST(51 AS SIGNED INTEGER) AS result, toTypeName(result) AS native_type; +-- SELECT 'Unsigned integer' AS mysql_type, CAST(53 AS UNSIGNED INTEGER) AS result, toTypeName(result) AS native_type; +SELECT 'Year' AS mysql_type, CAST(2007 AS YEAR) AS result, toTypeName(result) AS native_type; +-- Currently, expects UInt64 as an argument +-- SELECT 'Time' AS mysql_type, CAST('12:45' AS TIME) AS result, toTypeName(result) AS native_type; + +SELECT '-- Lowercase tests'; +-- select 'Binary' as mysql_type, cast('' as binary) as result, toTypeName(result) as native_type; +select 'Binary(N)' as mysql_type, cast('foo' as binary(3)) as result, toTypeName(result) as native_type; +select 'Char' as mysql_type, cast(44 as char) as result, toTypeName(result) as native_type; +select 'Date' as mysql_type, cast('2021-02-03' as date) as result, toTypeName(result) as native_type; +select 'DateTime' as mysql_type, cast('2021-02-03 12:01:02' as datetime) as result, toTypeName(result) as native_type; +select 'Decimal' as mysql_type, cast(45.1 as decimal) as result, toTypeName(result) as native_type; +select 'Decimal(M)' as mysql_type, cast(46.2 as decimal(4)) as result, toTypeName(result) as native_type; +select 'Decimal(M, D)' as mysql_type, cast(47.21 as decimal(4, 2)) as result, toTypeName(result) as native_type; +select 'Double' as mysql_type, cast(48.11 as double) as result, toTypeName(result) as native_type; +select 'JSON' as mysql_type, cast('{\"foo\":\"bar\"}' as json) as result, toTypeName(result) as native_type; +select 'Real' as mysql_type, cast(49.22 as real) as result, toTypeName(result) as native_type; +select 'Signed' as mysql_type, cast(50 as signed) as result, toTypeName(result) as native_type; +select 'Unsigned' as mysql_type, cast(52 as unsigned) as result, toTypeName(result) as native_type; +-- select 'Signed integer' as mysql_type, cast(51 as signed integer) as result, toTypeName(result) as native_type; +-- select 'Unsigned integer' as mysql_type, cast(53 as unsigned integer) as result, toTypeName(result) as native_type; +select 'Year' as mysql_type, cast(2007 as year) as result, toTypeName(result) as native_type; +-- select 'Time' as mysql_type, cast('12:45' as time) as result, toTypeName(result) as native_type; diff --git a/tests/queries/0_stateless/02970_visible_width_behavior.reference b/tests/queries/0_stateless/02970_visible_width_behavior.reference new file mode 100644 index 00000000000..006be015ed7 --- /dev/null +++ b/tests/queries/0_stateless/02970_visible_width_behavior.reference @@ -0,0 +1,5 @@ +28 +19 +28 +19 +28 diff --git a/tests/queries/0_stateless/02970_visible_width_behavior.sql b/tests/queries/0_stateless/02970_visible_width_behavior.sql new file mode 100644 index 00000000000..efaa8852c34 --- /dev/null +++ b/tests/queries/0_stateless/02970_visible_width_behavior.sql @@ -0,0 +1,6 @@ +SELECT visibleWidth('ClickHouse是一个很好的数据库'); +SELECT visibleWidth('ClickHouse是一个很好的数据库') SETTINGS function_visible_width_behavior = 0; +SELECT visibleWidth('ClickHouse是一个很好的数据库') SETTINGS function_visible_width_behavior = 1; +SELECT visibleWidth('ClickHouse是一个很好的数据库') SETTINGS function_visible_width_behavior = 2; -- { serverError BAD_ARGUMENTS } +SELECT visibleWidth('ClickHouse是一个很好的数据库') SETTINGS compatibility = '23.12'; +SELECT visibleWidth('ClickHouse是一个很好的数据库') SETTINGS compatibility = '24.1'; diff --git a/tests/queries/0_stateless/02971_limit_by_distributed.reference b/tests/queries/0_stateless/02971_limit_by_distributed.reference new file mode 100644 index 00000000000..69c6437d04d --- /dev/null +++ b/tests/queries/0_stateless/02971_limit_by_distributed.reference @@ -0,0 +1,16 @@ +-- { echoOn } +-- with limit +SELECT k +FROM remote('127.0.0.{2,3}', currentDatabase(), tlb) +ORDER BY k ASC +LIMIT 1 BY k +LIMIT 100; +0 +1 +-- w/o limit +SELECT k +FROM remote('127.0.0.{2,3}', currentDatabase(), tlb) +ORDER BY k ASC +LIMIT 1 BY k; +0 +1 diff --git a/tests/queries/0_stateless/02971_limit_by_distributed.sql b/tests/queries/0_stateless/02971_limit_by_distributed.sql new file mode 100644 index 00000000000..66a85137f32 --- /dev/null +++ b/tests/queries/0_stateless/02971_limit_by_distributed.sql @@ -0,0 +1,25 @@ +-- Tags: shard + +drop table if exists tlb; +create table tlb (k UInt64) engine MergeTree order by k; + +INSERT INTO tlb (k) SELECT 0 FROM numbers(100); +INSERT INTO tlb (k) SELECT 1; + +-- { echoOn } +-- with limit +SELECT k +FROM remote('127.0.0.{2,3}', currentDatabase(), tlb) +ORDER BY k ASC +LIMIT 1 BY k +LIMIT 100; + +-- w/o limit +SELECT k +FROM remote('127.0.0.{2,3}', currentDatabase(), tlb) +ORDER BY k ASC +LIMIT 1 BY k; + +-- { echoOff } + +DROP TABLE tlb; diff --git a/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.reference b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.reference new file mode 100644 index 00000000000..71c9053d644 --- /dev/null +++ b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.reference @@ -0,0 +1,9 @@ +0 +ds_1_1 all_1_1_0 0 +ds_1_2 all_1_1_0 0 +ds_2_1 all_1_1_0 0 +ds_2_1 all_2_2_0 0 +ds_3_1 all_1_1_0 0 +ds_3_1 all_2_2_0 0 +landing all_1_1_0 0 +10 diff --git a/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.sql b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.sql new file mode 100644 index 00000000000..242133e9122 --- /dev/null +++ b/tests/queries/0_stateless/02972_insert_deduplication_token_hierarchical_inserts.sql @@ -0,0 +1,103 @@ +SET insert_deduplicate = 1; +SET deduplicate_blocks_in_dependent_materialized_views = 1; +SET update_insert_deduplication_token_in_dependent_materialized_views = 1; +SET insert_deduplication_token = 'test'; + +DROP TABLE IF EXISTS landing; +CREATE TABLE landing +( + timestamp UInt64, + value UInt64 +) +ENGINE = MergeTree ORDER BY tuple() SETTINGS non_replicated_deduplication_window = 1000; + +DROP TABLE IF EXISTS ds_1_1; +CREATE TABLE ds_1_1 +( + t UInt64, + v UInt64 +) +ENGINE = MergeTree ORDER BY tuple() SETTINGS non_replicated_deduplication_window = 1000; + +DROP VIEW IF EXISTS mv_1_1; +CREATE MATERIALIZED VIEW mv_1_1 TO ds_1_1 as +SELECT + timestamp t, sum(value) v +FROM landing +GROUP BY t; + +DROP TABLE IF EXISTS ds_1_2; +CREATE TABLE ds_1_2 +( + t UInt64, + v UInt64 +) +ENGINE = MergeTree ORDER BY tuple() SETTINGS non_replicated_deduplication_window = 1000; + +DROP VIEW IF EXISTS mv_1_2; +CREATE MATERIALIZED VIEW mv_1_2 TO ds_1_2 as +SELECT + timestamp t, sum(value) v +FROM landing +GROUP BY t; + +DROP TABLE IF EXISTS ds_2_1; +CREATE TABLE ds_2_1 +( + l String, + t DateTime, + v UInt64 +) +ENGINE = MergeTree ORDER BY tuple() SETTINGS non_replicated_deduplication_window = 1000; + +DROP VIEW IF EXISTS mv_2_1; +CREATE MATERIALIZED VIEW mv_2_1 TO ds_2_1 as +SELECT '2_1' l, t, v +FROM ds_1_1; + +DROP VIEW IF EXISTS mv_2_2; +CREATE MATERIALIZED VIEW mv_2_2 TO ds_2_1 as +SELECT '2_2' l, t, v +FROM ds_1_2; + +DROP TABLE IF EXISTS ds_3_1; +CREATE TABLE ds_3_1 +( + l String, + t DateTime, + v UInt64 +) +ENGINE = MergeTree ORDER BY tuple() SETTINGS non_replicated_deduplication_window = 1000; + +DROP VIEW IF EXISTS mv_3_1; +CREATE MATERIALIZED VIEW mv_3_1 TO ds_3_1 as +SELECT '3_1' l, t, v +FROM ds_2_1; + +INSERT INTO landing SELECT 1 as timestamp, 1 AS value FROM numbers(10); + +SELECT sleep(3); + +INSERT INTO landing SELECT 1 as timestamp, 1 AS value FROM numbers(10); + +SYSTEM FLUSH LOGS; +SELECT table, name, error FROM system.part_log +WHERE database = currentDatabase() +ORDER BY table, name; + +SELECT count() FROM landing; + +DROP TABLE landing; + +DROP TABLE ds_1_1; +DROP VIEW mv_1_1; + +DROP TABLE ds_1_2; +DROP VIEW mv_1_2; + +DROP TABLE ds_2_1; +DROP VIEW mv_2_1; +DROP VIEW mv_2_2; + +DROP TABLE ds_3_1; +DROP VIEW mv_3_1; diff --git a/tests/queries/0_stateless/02972_to_string_nullable_timezone.reference b/tests/queries/0_stateless/02972_to_string_nullable_timezone.reference new file mode 100644 index 00000000000..6c362c0207e --- /dev/null +++ b/tests/queries/0_stateless/02972_to_string_nullable_timezone.reference @@ -0,0 +1,3 @@ +2022-01-01 11:13:14 +2022-01-01 11:13:14 +2022-01-01 11:13:14 diff --git a/tests/queries/0_stateless/02972_to_string_nullable_timezone.sql b/tests/queries/0_stateless/02972_to_string_nullable_timezone.sql new file mode 100644 index 00000000000..d8cff4f3c00 --- /dev/null +++ b/tests/queries/0_stateless/02972_to_string_nullable_timezone.sql @@ -0,0 +1,4 @@ +SET session_timezone='Europe/Amsterdam'; +SELECT toString(toDateTime('2022-01-01 12:13:14'), CAST('UTC', 'Nullable(String)')); +SELECT toString(toDateTime('2022-01-01 12:13:14'), materialize(CAST('UTC', 'Nullable(String)'))); +SELECT toString(CAST(toDateTime('2022-01-01 12:13:14'), 'Nullable(DateTime)'), materialize(CAST('UTC', 'Nullable(String)'))); diff --git a/tests/queries/0_stateless/02973_backup_of_in_memory_compressed.reference b/tests/queries/0_stateless/02973_backup_of_in_memory_compressed.reference new file mode 100644 index 00000000000..00479541d22 --- /dev/null +++ b/tests/queries/0_stateless/02973_backup_of_in_memory_compressed.reference @@ -0,0 +1,2 @@ +0 +1000000 Hello, world Hello, world diff --git a/tests/queries/0_stateless/02973_backup_of_in_memory_compressed.sh b/tests/queries/0_stateless/02973_backup_of_in_memory_compressed.sh new file mode 100755 index 00000000000..b212e42061f --- /dev/null +++ b/tests/queries/0_stateless/02973_backup_of_in_memory_compressed.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest +# Because we are creating a backup with fixed path. + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --multiquery " +DROP TABLE IF EXISTS test; +CREATE TABLE test (x String) ENGINE = Memory SETTINGS compress = 1; +INSERT INTO test SELECT 'Hello, world' FROM numbers(1000000); +" + +$CLICKHOUSE_CLIENT --multiquery " +BACKUP TABLE test TO File('test.zip'); +" --format Null + +$CLICKHOUSE_CLIENT --multiquery " +TRUNCATE TABLE test; +SELECT count() FROM test; +" + +$CLICKHOUSE_CLIENT --multiquery " +RESTORE TABLE test FROM File('test.zip'); +" --format Null + +$CLICKHOUSE_CLIENT --multiquery " +SELECT count(), min(x), max(x) FROM test; +DROP TABLE test; +" diff --git a/tests/queries/0_stateless/02973_block_number_sparse_serialization_and_mutation.reference b/tests/queries/0_stateless/02973_block_number_sparse_serialization_and_mutation.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02973_block_number_sparse_serialization_and_mutation.sql b/tests/queries/0_stateless/02973_block_number_sparse_serialization_and_mutation.sql new file mode 100644 index 00000000000..7a1de2897fb --- /dev/null +++ b/tests/queries/0_stateless/02973_block_number_sparse_serialization_and_mutation.sql @@ -0,0 +1,39 @@ +-- Tags: zookeeper + +-- we need exact block-numbers +SET insert_keeper_fault_injection_probability=0; + +DROP TABLE IF EXISTS table_with_some_columns; + +CREATE TABLE table_with_some_columns( + key UInt64, + value0 UInt8 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/table_with_some_columns', '1') +ORDER BY key +SETTINGS allow_experimental_block_number_column=1, +ratio_of_defaults_for_sparse_serialization=0.0001, +min_bytes_for_wide_part = 0, +replace_long_file_name_to_hash=0; -- simpler to debug + +INSERT INTO table_with_some_columns SELECT rand(), number + 10 from numbers(100000); + +INSERT INTO table_with_some_columns SELECT rand(), number + 10 from numbers(1); + +OPTIMIZE TABLE table_with_some_columns FINAL; + +INSERT INTO table_with_some_columns SELECT rand(), number+222222222 from numbers(1); + +OPTIMIZE TABLE table_with_some_columns FINAL; + +set alter_sync = 2; + +ALTER TABLE table_with_some_columns DROP COLUMN value0; + +INSERT INTO table_with_some_columns SELECT rand() from numbers(1); + +OPTIMIZE TABLE table_with_some_columns FINAL; + +SELECT *, _block_number FROM table_with_some_columns where not ignore(*) Format Null; + +DROP TABLE IF EXISTS table_with_some_columns; diff --git a/tests/queries/0_stateless/02973_dictionary_table_exception_fix.reference b/tests/queries/0_stateless/02973_dictionary_table_exception_fix.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02973_dictionary_table_exception_fix.sql b/tests/queries/0_stateless/02973_dictionary_table_exception_fix.sql new file mode 100644 index 00000000000..f8061b42670 --- /dev/null +++ b/tests/queries/0_stateless/02973_dictionary_table_exception_fix.sql @@ -0,0 +1,6 @@ +CREATE TABLE test_table (i Int64) engine=MergeTree order by i; +CREATE DICTIONARY test_dict (y String, value UInt64 DEFAULT 0) PRIMARY KEY y SOURCE(CLICKHOUSE(TABLE 'test_table')) LAYOUT(DIRECT()); +CREATE TABLE test_dict (y Int64) engine=MergeTree order by y; -- { serverError DICTIONARY_ALREADY_EXISTS } +CREATE DICTIONARY test_table (y String, value UInt64 DEFAULT 0) PRIMARY KEY y SOURCE(CLICKHOUSE(TABLE 'test_table')) LAYOUT(DIRECT()); -- { serverError TABLE_ALREADY_EXISTS } +CREATE DICTIONARY test_dict (y String, value UInt64 DEFAULT 0) PRIMARY KEY y SOURCE(CLICKHOUSE(TABLE 'test_table')) LAYOUT(DIRECT()); -- { serverError DICTIONARY_ALREADY_EXISTS } +CREATE TABLE test_table (y Int64) engine=MergeTree order by y; -- { serverError TABLE_ALREADY_EXISTS } diff --git a/tests/queries/0_stateless/02974_backup_query_format_null.reference b/tests/queries/0_stateless/02974_backup_query_format_null.reference new file mode 100644 index 00000000000..67bfe658c1f --- /dev/null +++ b/tests/queries/0_stateless/02974_backup_query_format_null.reference @@ -0,0 +1,3 @@ +2 +80 +-12345 diff --git a/tests/queries/0_stateless/02974_backup_query_format_null.sh b/tests/queries/0_stateless/02974_backup_query_format_null.sh new file mode 100755 index 00000000000..ddba2f6de16 --- /dev/null +++ b/tests/queries/0_stateless/02974_backup_query_format_null.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -nm --query " +DROP TABLE IF EXISTS tbl; +CREATE TABLE tbl (a Int32) ENGINE = MergeTree() ORDER BY tuple(); +INSERT INTO tbl VALUES (2), (80), (-12345); +" + +backup_name="Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}')" + +${CLICKHOUSE_CLIENT} --query "BACKUP TABLE tbl TO ${backup_name} FORMAT Null" + +${CLICKHOUSE_CLIENT} -nm --query " +DROP TABLE tbl; +RESTORE ALL FROM ${backup_name} FORMAT Null +" + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM tbl" diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference new file mode 100644 index 00000000000..531163e1d84 --- /dev/null +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.reference @@ -0,0 +1,30 @@ +data after INSERT 1 +data after ATTACH 1 +Files before DETACH TABLE +all_1_1_0 + +backups/ordinary_default/data/ordinary_default/data/all_1_1_0: +primary.cidx +serialization.json +metadata_version.txt +default_compression_codec.txt +data.bin +data.cmrk3 +count.txt +columns.txt +checksums.txt + +Files after DETACH TABLE +all_1_1_0 + +backups/ordinary_default/data/ordinary_default/data/all_1_1_0: +primary.cidx +serialization.json +metadata_version.txt +default_compression_codec.txt +data.bin +data.cmrk3 +count.txt +columns.txt +checksums.txt + diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh new file mode 100755 index 00000000000..386c29704b6 --- /dev/null +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-random-settings, no-random-merge-tree-settings +# Tag no-fasttest: requires S3 +# Tag no-random-settings, no-random-merge-tree-settings: to avoid creating extra files like serialization.json, this test too exocit anyway + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# config for clickhouse-disks (to check leftovers) +config="${BASH_SOURCE[0]/.sh/.yml}" + +# only in Atomic ATTACH from s3_plain works +new_database="ordinary_$CLICKHOUSE_DATABASE" +$CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 -q "create database $new_database engine=Ordinary" +CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT/--database=$CLICKHOUSE_DATABASE/--database=$new_database} +CLICKHOUSE_DATABASE="$new_database" + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists data; + create table data (key Int) engine=MergeTree() order by key; + insert into data values (1); + select 'data after INSERT', count() from data; +" + +# suppress output +$CLICKHOUSE_CLIENT -q "backup table data to S3('http://localhost:11111/test/s3_plain/backups/$CLICKHOUSE_DATABASE', 'test', 'testtest')" > /dev/null + +$CLICKHOUSE_CLIENT -nm -q " + drop table data; + attach table data (key Int) engine=MergeTree() order by key + settings + max_suspicious_broken_parts=0, + disk=disk(type=s3_plain, + endpoint='http://localhost:11111/test/s3_plain/backups/$CLICKHOUSE_DATABASE', + access_key_id='test', + secret_access_key='testtest'); + select 'data after ATTACH', count() from data; + + insert into data values (1); -- { serverError TABLE_IS_READ_ONLY } + optimize table data final; -- { serverError TABLE_IS_READ_ONLY } +" + +path=$($CLICKHOUSE_CLIENT -q "SELECT replace(data_paths[1], 's3_plain', '') FROM system.tables WHERE database = '$CLICKHOUSE_DATABASE' AND table = 'data'") +# trim / to fix "Unable to parse ExceptionName: XMinioInvalidObjectName Message: Object name contains unsupported characters." +path=${path%/} + +echo "Files before DETACH TABLE" +clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "${path:?}" | tail -n+2 + +$CLICKHOUSE_CLIENT -q "detach table data" +echo "Files after DETACH TABLE" +clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "$path" | tail -n+2 + +# metadata file is left +$CLICKHOUSE_CLIENT --force_remove_data_recursively_on_drop=1 -q "drop database if exists $CLICKHOUSE_DATABASE" diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.yml b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.yml new file mode 100644 index 00000000000..ca5036736d8 --- /dev/null +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.yml @@ -0,0 +1,7 @@ +storage_configuration: + disks: + s3_plain_disk: + type: s3_plain + endpoint: http://localhost:11111/test/s3_plain/ + access_key_id: clickhouse + secret_access_key: clickhouse diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference new file mode 100644 index 00000000000..1e191b719a5 --- /dev/null +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.reference @@ -0,0 +1,30 @@ +data after INSERT 1 +data after ATTACH 1 +Files before DETACH TABLE +all_X_X_X + +backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: +primary.cidx +serialization.json +metadata_version.txt +default_compression_codec.txt +data.bin +data.cmrk3 +count.txt +columns.txt +checksums.txt + +Files after DETACH TABLE +all_X_X_X + +backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X: +primary.cidx +serialization.json +metadata_version.txt +default_compression_codec.txt +data.bin +data.cmrk3 +count.txt +columns.txt +checksums.txt + diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh new file mode 100755 index 00000000000..bf20247c7aa --- /dev/null +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-random-settings, no-random-merge-tree-settings +# Tag no-fasttest: requires S3 +# Tag no-random-settings, no-random-merge-tree-settings: to avoid creating extra files like serialization.json, this test too exocit anyway + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +config="${BASH_SOURCE[0]/.sh/.yml}" + +# only in Atomic ATTACH from s3_plain works +new_database="ordinary_$CLICKHOUSE_DATABASE" +$CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 -q "create database $new_database engine=Ordinary" +CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT/--database=$CLICKHOUSE_DATABASE/--database=$new_database} +CLICKHOUSE_DATABASE="$new_database" + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists data_read; + drop table if exists data_write; + + create table data_write (key Int) engine=ReplicatedMergeTree('/tables/{database}/data', 'write') order by key; + create table data_read (key Int) engine=ReplicatedMergeTree('/tables/{database}/data', 'read') order by key; + + insert into data_write values (1); + system sync replica data_read; + select 'data after INSERT', count() from data_read; +" + +# suppress output +$CLICKHOUSE_CLIENT -q "backup table data_read to S3('http://localhost:11111/test/s3_plain/backups/$CLICKHOUSE_DATABASE', 'test', 'testtest')" > /dev/null + +$CLICKHOUSE_CLIENT -nm -q " + drop table data_read; + attach table data_read (key Int) engine=ReplicatedMergeTree('/tables/{database}/data', 'read') order by key + settings + max_suspicious_broken_parts=0, + disk=disk(type=s3_plain, + endpoint='http://localhost:11111/test/s3_plain/backups/$CLICKHOUSE_DATABASE', + access_key_id='test', + secret_access_key='testtest'); + select 'data after ATTACH', count() from data_read; + + insert into data_read values (1); -- { serverError TABLE_IS_READ_ONLY } + optimize table data_read final; -- { serverError TABLE_IS_READ_ONLY } + system sync replica data_read; -- { serverError TABLE_IS_READ_ONLY } +" + +path=$($CLICKHOUSE_CLIENT -q "SELECT replace(data_paths[1], 's3_plain', '') FROM system.tables WHERE database = '$CLICKHOUSE_DATABASE' AND table = 'data_read'") +# trim / to fix "Unable to parse ExceptionName: XMinioInvalidObjectName Message: Object name contains unsupported characters." +path=${path%/} + +echo "Files before DETACH TABLE" +# sed to match any part, since in case of fault injection part name may not be all_0_0_0 but all_1_1_0 +clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "${path:?}" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' + +$CLICKHOUSE_CLIENT -nm -q " + detach table data_read; + detach table data_write; +" +echo "Files after DETACH TABLE" +clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "$path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g' + +# metadata file is left +$CLICKHOUSE_CLIENT --force_remove_data_recursively_on_drop=1 -q "drop database if exists $CLICKHOUSE_DATABASE" diff --git a/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.yml b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.yml new file mode 100644 index 00000000000..ca5036736d8 --- /dev/null +++ b/tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.yml @@ -0,0 +1,7 @@ +storage_configuration: + disks: + s3_plain_disk: + type: s3_plain + endpoint: http://localhost:11111/test/s3_plain/ + access_key_id: clickhouse + secret_access_key: clickhouse diff --git a/tests/queries/0_stateless/02981_vertical_merges_memory_usage.reference b/tests/queries/0_stateless/02981_vertical_merges_memory_usage.reference new file mode 100644 index 00000000000..60c254e152b --- /dev/null +++ b/tests/queries/0_stateless/02981_vertical_merges_memory_usage.reference @@ -0,0 +1 @@ +Vertical OK diff --git a/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql b/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql new file mode 100644 index 00000000000..1305f02c044 --- /dev/null +++ b/tests/queries/0_stateless/02981_vertical_merges_memory_usage.sql @@ -0,0 +1,35 @@ +-- Tags: long + +DROP TABLE IF EXISTS t_vertical_merge_memory; + +CREATE TABLE t_vertical_merge_memory (id UInt64, arr Array(String)) +ENGINE = MergeTree ORDER BY id +SETTINGS + min_bytes_for_wide_part = 0, + vertical_merge_algorithm_min_rows_to_activate = 1, + vertical_merge_algorithm_min_columns_to_activate = 1, + index_granularity = 8192, + index_granularity_bytes = '10M', + merge_max_block_size = 8192, + merge_max_block_size_bytes = '10M'; + +INSERT INTO t_vertical_merge_memory SELECT number, arrayMap(x -> repeat('a', 50), range(1000)) FROM numbers(30000); +INSERT INTO t_vertical_merge_memory SELECT number, arrayMap(x -> repeat('a', 50), range(1000)) FROM numbers(30000); + +OPTIMIZE TABLE t_vertical_merge_memory FINAL; + +SYSTEM FLUSH LOGS; + +SELECT + merge_algorithm, + peak_memory_usage < 500 * 1024 * 1024 + ? 'OK' + : format('FAIL: memory usage: {}', formatReadableSize(peak_memory_usage)) +FROM system.part_log +WHERE + database = currentDatabase() + AND table = 't_vertical_merge_memory' + AND event_type = 'MergeParts' + AND length(merged_from) = 2; + +DROP TABLE IF EXISTS t_vertical_merge_memory; diff --git a/tests/queries/0_stateless/data_parquet/02960_polygon_bound_bug.parquet b/tests/queries/0_stateless/data_parquet/02960_polygon_bound_bug.parquet new file mode 100644 index 00000000000..b4aedb8f964 Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/02960_polygon_bound_bug.parquet differ diff --git a/tests/queries/0_stateless/data_parquet/02969.zip b/tests/queries/0_stateless/data_parquet/02969.zip new file mode 100644 index 00000000000..4c4c90261d0 Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/02969.zip differ diff --git a/tests/queries/1_stateful/00061_storage_buffer.sql b/tests/queries/1_stateful/00061_storage_buffer.sql index e3cda3de36d..220f4fb3686 100644 --- a/tests/queries/1_stateful/00061_storage_buffer.sql +++ b/tests/queries/1_stateful/00061_storage_buffer.sql @@ -1,7 +1,13 @@ DROP TABLE IF EXISTS test.hits_dst; DROP TABLE IF EXISTS test.hits_buffer; -CREATE TABLE test.hits_dst AS test.hits; +CREATE TABLE test.hits_dst AS test.hits +ENGINE = MergeTree +PARTITION BY toYYYYMM(EventDate) +ORDER BY (CounterID, EventDate, intHash32(UserID)) +SAMPLE BY intHash32(UserID) +SETTINGS storage_policy = 'default'; + CREATE TABLE test.hits_buffer AS test.hits_dst ENGINE = Buffer(test, hits_dst, 8, 600, 600, 1000000, 1000000, 100000000, 1000000000); INSERT INTO test.hits_buffer SELECT * FROM test.hits WHERE CounterID = 800784; diff --git a/tests/queries/1_stateful/00082_quantiles.sql b/tests/queries/1_stateful/00082_quantiles.sql index 3c42b43f3f9..6405a27a050 100644 --- a/tests/queries/1_stateful/00082_quantiles.sql +++ b/tests/queries/1_stateful/00082_quantiles.sql @@ -1,3 +1,6 @@ +-- The test uses quite a bit of memory. A low max_bytes_before_external_group_by value will lead to high disk usage +-- which in CI leads to timeouts +SET max_bytes_before_external_group_by=0; SELECT CounterID AS k, quantileExact(0.5)(ResolutionWidth) FROM test.hits GROUP BY k ORDER BY count() DESC, CounterID LIMIT 10; SELECT CounterID AS k, quantilesExact(0.1, 0.5, 0.9, 0.99, 0.999)(ResolutionWidth) FROM test.hits GROUP BY k ORDER BY count() DESC, CounterID LIMIT 10; diff --git a/tests/queries/1_stateful/00152_insert_different_granularity.reference b/tests/queries/1_stateful/00152_insert_different_granularity.reference index c573f1c3072..209e3ef4b62 100644 --- a/tests/queries/1_stateful/00152_insert_different_granularity.reference +++ b/tests/queries/1_stateful/00152_insert_different_granularity.reference @@ -1,2 +1 @@ -8873918 -8873998 +20 diff --git a/tests/queries/1_stateful/00152_insert_different_granularity.sql b/tests/queries/1_stateful/00152_insert_different_granularity.sql index 294d71b384b..b5b3bc18231 100644 --- a/tests/queries/1_stateful/00152_insert_different_granularity.sql +++ b/tests/queries/1_stateful/00152_insert_different_granularity.sql @@ -5,54 +5,18 @@ DROP TABLE IF EXISTS fixed_granularity_table; CREATE TABLE fixed_granularity_table (`WatchID` UInt64, `JavaEnable` UInt8, `Title` String, `GoodEvent` Int16, `EventTime` DateTime, `EventDate` Date, `CounterID` UInt32, `ClientIP` UInt32, `ClientIP6` FixedString(16), `RegionID` UInt32, `UserID` UInt64, `CounterClass` Int8, `OS` UInt8, `UserAgent` UInt8, `URL` String, `Referer` String, `URLDomain` String, `RefererDomain` String, `Refresh` UInt8, `IsRobot` UInt8, `RefererCategories` Array(UInt16), `URLCategories` Array(UInt16), `URLRegions` Array(UInt32), `RefererRegions` Array(UInt32), `ResolutionWidth` UInt16, `ResolutionHeight` UInt16, `ResolutionDepth` UInt8, `FlashMajor` UInt8, `FlashMinor` UInt8, `FlashMinor2` String, `NetMajor` UInt8, `NetMinor` UInt8, `UserAgentMajor` UInt16, `UserAgentMinor` FixedString(2), `CookieEnable` UInt8, `JavascriptEnable` UInt8, `IsMobile` UInt8, `MobilePhone` UInt8, `MobilePhoneModel` String, `Params` String, `IPNetworkID` UInt32, `TraficSourceID` Int8, `SearchEngineID` UInt16, `SearchPhrase` String, `AdvEngineID` UInt8, `IsArtifical` UInt8, `WindowClientWidth` UInt16, `WindowClientHeight` UInt16, `ClientTimeZone` Int16, `ClientEventTime` DateTime, `SilverlightVersion1` UInt8, `SilverlightVersion2` UInt8, `SilverlightVersion3` UInt32, `SilverlightVersion4` UInt16, `PageCharset` String, `CodeVersion` UInt32, `IsLink` UInt8, `IsDownload` UInt8, `IsNotBounce` UInt8, `FUniqID` UInt64, `HID` UInt32, `IsOldCounter` UInt8, `IsEvent` UInt8, `IsParameter` UInt8, `DontCountHits` UInt8, `WithHash` UInt8, `HitColor` FixedString(1), `UTCEventTime` DateTime, `Age` UInt8, `Sex` UInt8, `Income` UInt8, `Interests` UInt16, `Robotness` UInt8, `GeneralInterests` Array(UInt16), `RemoteIP` UInt32, `RemoteIP6` FixedString(16), `WindowName` Int32, `OpenerName` Int32, `HistoryLength` Int16, `BrowserLanguage` FixedString(2), `BrowserCountry` FixedString(2), `SocialNetwork` String, `SocialAction` String, `HTTPError` UInt16, `SendTiming` Int32, `DNSTiming` Int32, `ConnectTiming` Int32, `ResponseStartTiming` Int32, `ResponseEndTiming` Int32, `FetchTiming` Int32, `RedirectTiming` Int32, `DOMInteractiveTiming` Int32, `DOMContentLoadedTiming` Int32, `DOMCompleteTiming` Int32, `LoadEventStartTiming` Int32, `LoadEventEndTiming` Int32, `NSToDOMContentLoadedTiming` Int32, `FirstPaintTiming` Int32, `RedirectCount` Int8, `SocialSourceNetworkID` UInt8, `SocialSourcePage` String, `ParamPrice` Int64, `ParamOrderID` String, `ParamCurrency` FixedString(3), `ParamCurrencyID` UInt16, `GoalsReached` Array(UInt32), `OpenstatServiceName` String, `OpenstatCampaignID` String, `OpenstatAdID` String, `OpenstatSourceID` String, `UTMSource` String, `UTMMedium` String, `UTMCampaign` String, `UTMContent` String, `UTMTerm` String, `FromTag` String, `HasGCLID` UInt8, `RefererHash` UInt64, `URLHash` UInt64, `CLID` UInt32, `YCLID` UInt64, `ShareService` String, `ShareURL` String, `ShareTitle` String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `IslandID` FixedString(16), `RequestNum` UInt32, `RequestTry` UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity=8192, index_granularity_bytes=0, min_bytes_for_wide_part = 0; -- looks like default table before update -ALTER TABLE fixed_granularity_table REPLACE PARTITION 201403 FROM test.hits; - INSERT INTO fixed_granularity_table SELECT * FROM test.hits LIMIT 10; -- should still have non adaptive granularity - INSERT INTO fixed_granularity_table SELECT * FROM test.hits LIMIT 10; -- We have removed testing of OPTIMIZE because it's too heavy on very slow builds (debug + coverage + thread fuzzer with sleeps) -- OPTIMIZE TABLE fixed_granularity_table FINAL; -- and even after optimize DETACH TABLE fixed_granularity_table; - ATTACH TABLE fixed_granularity_table; ALTER TABLE fixed_granularity_table DETACH PARTITION 201403; - ALTER TABLE fixed_granularity_table ATTACH PARTITION 201403; SELECT count() from fixed_granularity_table; DROP TABLE IF EXISTS fixed_granularity_table; - -ALTER TABLE test.hits DETACH PARTITION 201403; - -ALTER TABLE test.hits ATTACH PARTITION 201403; - -DROP TABLE IF EXISTS hits_copy; - -CREATE TABLE hits_copy (`WatchID` UInt64, `JavaEnable` UInt8, `Title` String, `GoodEvent` Int16, `EventTime` DateTime, `EventDate` Date, `CounterID` UInt32, `ClientIP` UInt32, `ClientIP6` FixedString(16), `RegionID` UInt32, `UserID` UInt64, `CounterClass` Int8, `OS` UInt8, `UserAgent` UInt8, `URL` String, `Referer` String, `URLDomain` String, `RefererDomain` String, `Refresh` UInt8, `IsRobot` UInt8, `RefererCategories` Array(UInt16), `URLCategories` Array(UInt16), `URLRegions` Array(UInt32), `RefererRegions` Array(UInt32), `ResolutionWidth` UInt16, `ResolutionHeight` UInt16, `ResolutionDepth` UInt8, `FlashMajor` UInt8, `FlashMinor` UInt8, `FlashMinor2` String, `NetMajor` UInt8, `NetMinor` UInt8, `UserAgentMajor` UInt16, `UserAgentMinor` FixedString(2), `CookieEnable` UInt8, `JavascriptEnable` UInt8, `IsMobile` UInt8, `MobilePhone` UInt8, `MobilePhoneModel` String, `Params` String, `IPNetworkID` UInt32, `TraficSourceID` Int8, `SearchEngineID` UInt16, `SearchPhrase` String, `AdvEngineID` UInt8, `IsArtifical` UInt8, `WindowClientWidth` UInt16, `WindowClientHeight` UInt16, `ClientTimeZone` Int16, `ClientEventTime` DateTime, `SilverlightVersion1` UInt8, `SilverlightVersion2` UInt8, `SilverlightVersion3` UInt32, `SilverlightVersion4` UInt16, `PageCharset` String, `CodeVersion` UInt32, `IsLink` UInt8, `IsDownload` UInt8, `IsNotBounce` UInt8, `FUniqID` UInt64, `HID` UInt32, `IsOldCounter` UInt8, `IsEvent` UInt8, `IsParameter` UInt8, `DontCountHits` UInt8, `WithHash` UInt8, `HitColor` FixedString(1), `UTCEventTime` DateTime, `Age` UInt8, `Sex` UInt8, `Income` UInt8, `Interests` UInt16, `Robotness` UInt8, `GeneralInterests` Array(UInt16), `RemoteIP` UInt32, `RemoteIP6` FixedString(16), `WindowName` Int32, `OpenerName` Int32, `HistoryLength` Int16, `BrowserLanguage` FixedString(2), `BrowserCountry` FixedString(2), `SocialNetwork` String, `SocialAction` String, `HTTPError` UInt16, `SendTiming` Int32, `DNSTiming` Int32, `ConnectTiming` Int32, `ResponseStartTiming` Int32, `ResponseEndTiming` Int32, `FetchTiming` Int32, `RedirectTiming` Int32, `DOMInteractiveTiming` Int32, `DOMContentLoadedTiming` Int32, `DOMCompleteTiming` Int32, `LoadEventStartTiming` Int32, `LoadEventEndTiming` Int32, `NSToDOMContentLoadedTiming` Int32, `FirstPaintTiming` Int32, `RedirectCount` Int8, `SocialSourceNetworkID` UInt8, `SocialSourcePage` String, `ParamPrice` Int64, `ParamOrderID` String, `ParamCurrency` FixedString(3), `ParamCurrencyID` UInt16, `GoalsReached` Array(UInt32), `OpenstatServiceName` String, `OpenstatCampaignID` String, `OpenstatAdID` String, `OpenstatSourceID` String, `UTMSource` String, `UTMMedium` String, `UTMCampaign` String, `UTMContent` String, `UTMTerm` String, `FromTag` String, `HasGCLID` UInt8, `RefererHash` UInt64, `URLHash` UInt64, `CLID` UInt32, `YCLID` UInt64, `ShareService` String, `ShareURL` String, `ShareTitle` String, `ParsedParams.Key1` Array(String), `ParsedParams.Key2` Array(String), `ParsedParams.Key3` Array(String), `ParsedParams.Key4` Array(String), `ParsedParams.Key5` Array(String), `ParsedParams.ValueDouble` Array(Float64), `IslandID` FixedString(16), `RequestNum` UInt32, `RequestTry` UInt8) - ENGINE = MergeTree() - PARTITION BY toYYYYMM(EventDate) - ORDER BY (CounterID, EventDate, intHash32(UserID)) - SAMPLE BY intHash32(UserID) - SETTINGS index_granularity=8192, min_bytes_for_wide_part = 0; - -ALTER TABLE hits_copy REPLACE PARTITION 201403 FROM test.hits; - --- It's important to test table, which were created before server update -INSERT INTO test.hits SELECT * FROM hits_copy LIMIT 100; - -ALTER TABLE test.hits DETACH PARTITION 201403; - -ALTER TABLE test.hits ATTACH PARTITION 201403; - --- OPTIMIZE TABLE test.hits; - -SELECT count() FROM test.hits; - --- restore hits -ALTER TABLE test.hits REPLACE PARTITION 201403 FROM hits_copy; - -DROP TABLE IF EXISTS hits_copy; diff --git a/tests/queries/1_stateful/00157_cache_dictionary.sql b/tests/queries/1_stateful/00157_cache_dictionary.sql index 15bd4cbe6d4..3621ff82126 100644 --- a/tests/queries/1_stateful/00157_cache_dictionary.sql +++ b/tests/queries/1_stateful/00157_cache_dictionary.sql @@ -1,7 +1,14 @@ -- Tags: no-tsan, no-parallel DROP TABLE IF EXISTS test.hits_1m; -CREATE TABLE test.hits_1m as test.hits; + +CREATE TABLE test.hits_1m AS test.hits +ENGINE = MergeTree +PARTITION BY toYYYYMM(EventDate) +ORDER BY (CounterID, EventDate, intHash32(UserID)) +SAMPLE BY intHash32(UserID) +SETTINGS storage_policy = 'default'; + INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000; CREATE DATABASE IF NOT EXISTS db_dict; diff --git a/tests/queries/1_stateful/00165_jit_aggregate_functions.reference b/tests/queries/1_stateful/00165_jit_aggregate_functions.reference index fa084170f53..62baba2af8b 100644 --- a/tests/queries/1_stateful/00165_jit_aggregate_functions.reference +++ b/tests/queries/1_stateful/00165_jit_aggregate_functions.reference @@ -68,73 +68,3 @@ Simple functions with non compilable function without key 4611686725751467379 9223371678237104442 3626326766789368100 61384643584599682996279588 408650940859.2896 104735.01095549858 8873898 9223372036854775807 4611686018427387904 3818489297630359920 Simple functions if combinator without key 4611687533683519016 9223371678237104442 4124667747700004330 930178817930.5122 321189.2280948817 4434274 9223372036854775806 4611686018427387904 2265422677606390266 -Aggregation without JIT compilation -Simple functions -1704509 4611700827100483880 9223360787015464643 10441337359398154812 19954243669348.844 9648741.579254271 523264 9223372036854775807 4611686018427387904 4544239379628300646 -732797 4611701940806302259 9223355550934604746 977192643464016658 2054229034942.3723 51998323.94457991 475698 9223372036854775807 4611686018427387904 4091184823334377716 -598875 4611701407242345792 9223362250391155632 9312163881623734456 27615161624211.875 12261797.824844675 337212 9223372036854775807 4611686018427387904 3725992504798702670 -792887 4611699550286611812 9223290551912005343 6930300520201292824 27479710385933.586 53095331.60360441 252197 9223372036854775807 4611686018427387904 6536441508464694614 -3807842 4611710821592843606 9223326163906184987 16710274896338005145 85240848090850.69 22373416.533275086 196036 9223372036854775807 4611686018427387904 1797862753609257231 -25703952 4611709443519524003 9223353913449113943 9946868158853570839 67568783303242.086 3154349.826950714 147211 9223372036854775807 4611686018427387904 8737124378202300429 -716829 4611852156092872082 9223361623076951140 15381015774917924786 170693446547158.72 201431892.4773785 90109 9223372036854775807 4611686018427387904 8209915323001116338 -59183 4611730685242027332 9223354909338698162 8078812522502896568 94622946187035.42 1425270865.0901496 85379 9223372036854775807 4611686018427387904 8909082036598843562 -33010362 4611704682869732882 9223268545373999677 2064452191838585926 26532987929602.555 3695122.4062526934 77807 9223372036854775807 4611686018427387904 5411365383789552292 -800784 4611752907938305166 9223340418389788041 18082918611792817587 233352070043266.62 36535786.81446395 77492 9223372036854775807 4611686018427387904 2059255810151375435 -20810645 4611712185532639162 9223218900001937412 4996531385439292694 68246505203164.63 6316535.831023813 73213 9223372036854775807 4611686018427387904 8852740550386113674 -25843850 4611690025407720929 9223346023778617822 12755881190906812868 185015319325648.16 9962165.34831339 68945 9223372036854775807 4611686018427387904 7849665866595760148 -23447120 4611796031755620254 9223329309291309758 17231649548755339966 255019232629204.38 7937191.271698021 67570 9223372036854775807 4611686018427387904 3435410911925610424 -14739804 4611692230555590277 9223313509005166531 2458378896777063244 38308020331864.36 14590240.469105456 64174 9223372036854775807 4611686018427387904 511910855240035342 -32077710 4611884228437061959 9223352444952988904 12965822147651192908 214467085941034.7 7257521.096258734 60456 9223372036854775807 4611686018427387904 2256071920672551964 -22446879 4611846229717089436 9223124373140579096 13530160492087688838 231724477077663.4 4737362.521046629 58389 9223372036854775807 4611686018427387904 6236276364886386410 -170282 4611833225706935900 9223371583739401906 8076893424988479310 141657635880324.8 1613795518.1065989 57017 9223372036854775807 4611686018427387904 4755775861151848768 -11482817 4611708000353743073 9223337838355779113 14841435427430843458 283531099960470.8 9938452.835998287 52345 9223372036854775807 4611686018427387904 5371586112642152558 -63469 4611695097019173921 9223353530156141191 6296784708578574520 120762239817777.88 579655378.4603049 52142 9223372036854775807 4611686018427387904 4150567963952988110 -29103473 4611744585914335132 9223333530281362537 5908285283932344933 123712996438970.34 867841.595541967 47758 9223372036854775807 4611686018427387904 3238284030821087319 -Simple functions with non compilable function -1704509 4611700827100483880 9223360787015464643 10441337359398154812 3620921835565807284859452 19954243669348.844 9648741.579254271 523264 9223372036854775807 4611686018427387904 4544239379628300646 -732797 4611701940806302259 9223355550934604746 977192643464016658 3289442827160604417733394 2054229034942.3723 51998323.94457991 475698 9223372036854775807 4611686018427387904 4091184823334377716 -598875 4611701407242345792 9223362250391155632 9312163881623734456 2330921446573746856380600 27615161624211.875 12261797.824844675 337212 9223372036854775807 4611686018427387904 3725992504798702670 -792887 4611699550286611812 9223290551912005343 6930300520201292824 1745179600137886041476120 27479710385933.586 53095331.60360441 252197 9223372036854775807 4611686018427387904 6536441508464694614 -3807842 4611710821592843606 9223326163906184987 16710274896338005145 1356295121550317411019929 85240848090850.69 22373416.533275086 196036 9223372036854775807 4611686018427387904 1797862753609257231 -25703952 4611709443519524003 9223353913449113943 9946868158853570839 1018731388338768841564439 67568783303242.086 3154349.826950714 147211 9223372036854775807 4611686018427387904 8737124378202300429 -716829 4611852156092872082 9223361623076951140 15381015774917924786 623810478612337115371442 170693446547158.72 201431892.4773785 90109 9223372036854775807 4611686018427387904 8209915323001116338 -59183 4611730685242027332 9223354909338698162 8078812522502896568 589916507545680254024632 94622946187035.42 1425270865.0901496 85379 9223372036854775807 4611686018427387904 8909082036598843562 -33010362 4611704682869732882 9223268545373999677 2064452191838585926 538517864195994778911814 26532987929602.555 3695122.4062526934 77807 9223372036854775807 4611686018427387904 5411365383789552292 -800784 4611752907938305166 9223340418389788041 18082918611792817587 535545510122473785781683 233352070043266.62 36535786.81446395 77492 9223372036854775807 4611686018427387904 2059255810151375435 -20810645 4611712185532639162 9223218900001937412 4996531385439292694 506405014842860050255126 68246505203164.63 6316535.831023813 73213 9223372036854775807 4611686018427387904 8852740550386113674 -25843850 4611690025407720929 9223346023778617822 12755881190906812868 476547495537329753708996 185015319325648.16 9962165.34831339 68945 9223372036854775807 4611686018427387904 7849665866595760148 -23447120 4611796031755620254 9223329309291309758 17231649548755339966 467236365548464278670014 255019232629204.38 7937191.271698021 67570 9223372036854775807 4611686018427387904 3435410911925610424 -14739804 4611692230555590277 9223313509005166531 2458378896777063244 444126268697527941770060 38308020331864.36 14590240.469105456 64174 9223372036854775807 4611686018427387904 511910855240035342 -32077710 4611884228437061959 9223352444952988904 12965822147651192908 417407443977973675608140 214467085941034.7 7257521.096258734 60456 9223372036854775807 4611686018427387904 2256071920672551964 -22446879 4611846229717089436 9223124373140579096 13530160492087688838 403462269796593691082374 231724477077663.4 4737362.521046629 58389 9223372036854775807 4611686018427387904 6236276364886386410 -170282 4611833225706935900 9223371583739401906 8076893424988479310 394417911933408911581006 141657635880324.8 1613795518.1065989 57017 9223372036854775807 4611686018427387904 4755775861151848768 -11482817 4611708000353743073 9223337838355779113 14841435427430843458 361995300393829962204226 283531099960470.8 9938452.835998287 52345 9223372036854775807 4611686018427387904 5371586112642152558 -63469 4611695097019173921 9223353530156141191 6296784708578574520 360843057610541117735096 120762239817777.88 579655378.4603049 52142 9223372036854775807 4611686018427387904 4150567963952988110 -29103473 4611744585914335132 9223333530281362537 5908285283932344933 330534668598011678200421 123712996438970.34 867841.595541967 47758 9223372036854775807 4611686018427387904 3238284030821087319 -Simple functions if combinator -1704509 4611700827100483880 9223310246721229500 16398241567152875142 62618822667209.71 2224726.7626273884 261874 9223372036854775806 4611686018427387904 4518874482384062894 -732797 4611721382223060002 9223355550934604746 16281585268876620522 68472164943295.68 5898616.931652982 237784 9223372036854775806 4611686018427387904 3641900047478154650 -598875 4611701407242345792 9223362250391155632 3577699408183553052 21300140553347.42 53771550.26565126 167966 9223372036854775806 4611686018427387904 1688477495230210408 -792887 4611699550286611812 9223164887726235740 7088177025760385824 56461952267903.89 92835869.96920013 125539 9223372036854775806 4611686018427387904 4850868151095058072 -3807842 4611710821592843606 9223283397553859544 5756765290752687660 58835559208469.4 39794091.419183925 97845 9223372036854775806 4611686018427387904 6845214684357194564 -25703952 4611784761593342388 9223241341744449690 4782279928971192568 65182094768443.91 9276773.708181158 73368 9223372036854775806 4611686018427387904 1384302533387727316 -716829 4611852156092872082 9223361623076951140 8613712481895484190 191445613359755.62 291083243.75407773 44993 9223372036854775806 4611686018427387904 6344483471397203854 -59183 4611730685242027332 9223354909338698162 18369075291092794110 429013599530392 5925109959.715378 42817 9223372036854775806 4611686018427387904 5909305558020042898 -33010362 4611704682869732882 9223092117352620518 9991152681891671022 257099731913529.5 12412830.045471078 38861 9223372036854775806 4611686018427387904 4672855013852508626 -800784 4611752907938305166 9223309994342931384 5251877538869750510 135472890315726.03 53535427.52018088 38767 9223372036854775806 4611686018427387904 7801864489649220514 -20810645 4611712185532639162 9223218900001937412 11803718472901310700 323593455407553 10496765.20741332 36477 9223372036854775806 4611686018427387904 5941995311893397960 -25843850 4611744529689964352 9223346023778617822 127137885677350808 3700925266420.715 18966925.191309396 34353 9223372036854775806 4611686018427387904 6700111718676827412 -23447120 4611796031755620254 9223329309291309758 1841522159325376278 54534534450526.42 6271211.193812284 33768 9223372036854775806 4611686018427387904 2325654077031843898 -14739804 4611762063154116632 9223007205463222212 16302703534054321116 506987919332451.8 6885575.861759452 32156 9223372036854775806 4611686018427387904 2114922310535979832 -32077710 4612033458080771112 9223352444952988904 421072759851674408 13955745719596.793 12220152.393889504 30172 9223372036854775806 4611686018427387904 4399934528735249092 -22446879 4611846229717089436 9223124373140579096 6577134317587565298 224866980668999.47 2482202.163802278 29249 9223372036854775806 4611686018427387904 8763910740678180498 -170282 4611833225706935900 9223371583739401906 15764226366913732386 551447384017691 2515144222.953728 28587 9223372036854775806 4611686018427387904 8217388408377809010 -11482817 4611990575414646848 9223302669582414438 9828522700609834800 378121905921203.2 34845264.2080656 25993 9223372036854775806 4611686018427387904 4689180182672571856 -63469 4612175339998036670 9222961628400798084 17239621485933250238 663164390134376.5 7825349797.6059 25996 9223372036854775806 4611686018427387904 2067736879306995526 -29103473 4611744585914335132 9223035551850347954 12590190375872647672 525927999326314.7 26049107.15514301 23939 9223372036854775806 4611686018427387904 8318055464870862444 -Simple functions without key -4611686725751467379 9223371678237104442 3626326766789368100 408650940859.2896 104735.01095549858 8873898 9223372036854775807 4611686018427387904 3818489297630359920 -Simple functions with non compilable function without key -4611686725751467379 9223371678237104442 3626326766789368100 61384643584599682996279588 408650940859.2896 104735.01095549858 8873898 9223372036854775807 4611686018427387904 3818489297630359920 -Simple functions if combinator without key -4611687533683519016 9223371678237104442 4124667747700004330 930178817930.5122 321189.2280948817 4434274 9223372036854775806 4611686018427387904 2265422677606390266 diff --git a/tests/queries/1_stateful/00165_jit_aggregate_functions.sql b/tests/queries/1_stateful/00165_jit_aggregate_functions.sql index 157d5892ad8..03d29601804 100644 --- a/tests/queries/1_stateful/00165_jit_aggregate_functions.sql +++ b/tests/queries/1_stateful/00165_jit_aggregate_functions.sql @@ -1,6 +1,7 @@ -SET compile_aggregate_expressions = 1; SET min_count_to_compile_aggregate_expression = 0; -SET max_bytes_before_external_group_by='200M'; -- might be randomized to 1 leading to timeout +-- The test uses many aggregations. A low max_bytes_before_external_group_by value will lead to high disk usage +-- which in CI leads to timeouts +SET max_bytes_before_external_group_by=0; SELECT 'Aggregation using JIT compilation'; @@ -101,104 +102,3 @@ SELECT FROM test.hits ORDER BY min_watch_id DESC LIMIT 20; - -SET compile_aggregate_expressions = 0; - -SELECT 'Aggregation without JIT compilation'; - -SELECT 'Simple functions'; - -SELECT - CounterID, - min(WatchID), - max(WatchID), - sum(WatchID), - avg(WatchID), - avgWeighted(WatchID, CounterID), - count(WatchID), - groupBitOr(WatchID), - groupBitAnd(WatchID), - groupBitXor(WatchID) -FROM test.hits -GROUP BY CounterID ORDER BY count() DESC LIMIT 20; - -SELECT 'Simple functions with non compilable function'; -SELECT - CounterID, - min(WatchID), - max(WatchID), - sum(WatchID), - sum(toUInt128(WatchID)), - avg(WatchID), - avgWeighted(WatchID, CounterID), - count(WatchID), - groupBitOr(WatchID), - groupBitAnd(WatchID), - groupBitXor(WatchID) -FROM test.hits -GROUP BY CounterID ORDER BY count() DESC LIMIT 20; - -SELECT 'Simple functions if combinator'; - -WITH (WatchID % 2 == 0) AS predicate -SELECT - CounterID, - minIf(WatchID,predicate), - maxIf(WatchID, predicate), - sumIf(WatchID, predicate), - avgIf(WatchID, predicate), - avgWeightedIf(WatchID, CounterID, predicate), - countIf(WatchID, predicate), - groupBitOrIf(WatchID, predicate), - groupBitAndIf(WatchID, predicate), - groupBitXorIf(WatchID, predicate) -FROM test.hits -GROUP BY CounterID ORDER BY count() DESC LIMIT 20; - -SELECT 'Simple functions without key'; - -SELECT - min(WatchID) AS min_watch_id, - max(WatchID), - sum(WatchID), - avg(WatchID), - avgWeighted(WatchID, CounterID), - count(WatchID), - groupBitOr(WatchID), - groupBitAnd(WatchID), - groupBitXor(WatchID) -FROM test.hits -ORDER BY min_watch_id DESC LIMIT 20; - -SELECT 'Simple functions with non compilable function without key'; - -SELECT - min(WatchID) AS min_watch_id, - max(WatchID), - sum(WatchID), - sum(toUInt128(WatchID)), - avg(WatchID), - avgWeighted(WatchID, CounterID), - count(WatchID), - groupBitOr(WatchID), - groupBitAnd(WatchID), - groupBitXor(WatchID) -FROM test.hits -ORDER BY min_watch_id DESC LIMIT 20; - -SELECT 'Simple functions if combinator without key'; - -WITH (WatchID % 2 == 0) AS predicate -SELECT - minIf(WatchID, predicate) as min_watch_id, - maxIf(WatchID, predicate), - sumIf(WatchID, predicate), - avgIf(WatchID, predicate), - avgWeightedIf(WatchID, CounterID, predicate), - countIf(WatchID, predicate), - groupBitOrIf(WatchID, predicate), - groupBitAndIf(WatchID, predicate), - groupBitXorIf(WatchID, predicate) -FROM test.hits -ORDER BY min_watch_id -DESC LIMIT 20; diff --git a/tests/queries/1_stateful/00178_quantile_ddsketch.reference b/tests/queries/1_stateful/00178_quantile_ddsketch.reference new file mode 100644 index 00000000000..867a2830aa1 --- /dev/null +++ b/tests/queries/1_stateful/00178_quantile_ddsketch.reference @@ -0,0 +1,40 @@ +1704509 1380.49 +732797 1326.35 +598875 1380.49 +792887 1326.35 +3807842 1326.35 +25703952 1326.35 +716829 1380.49 +59183 1326.35 +33010362 1326.35 +800784 1326.35 +1704509 [1300.09,1380.49,1863.48,1978.72,3678.37] +732797 [1224.38,1326.35,1863.48,1939.53,3678.37] +598875 [1224.38,1380.49,1863.48,1939.53,3534.13] +792887 [1300.09,1326.35,1863.48,1901.13,3678.37] +3807842 [1224.38,1326.35,1863.48,1939.53,2018.69] +25703952 [1002.43,1326.35,1863.48,1939.53,3678.37] +716829 [1224.38,1380.49,1863.48,1939.53,3678.37] +59183 [314.24,1326.35,1863.48,2018.69,2018.69] +33010362 [1224.38,1326.35,1863.48,1939.53,2018.69] +800784 [1224.38,1326.35,1863.48,1939.53,2018.69] +1704509 1380.49 +732797 1326.35 +598875 1380.49 +792887 1326.35 +3807842 1326.35 +25703952 1326.35 +716829 1380.49 +59183 1326.35 +33010362 1326.35 +800784 1326.35 +1704509 [1300.09,1380.49,1863.48,1978.72,3678.37] +732797 [1224.38,1326.35,1863.48,1939.53,3678.37] +598875 [1224.38,1380.49,1863.48,1939.53,3534.13] +792887 [1300.09,1326.35,1863.48,1901.13,3678.37] +3807842 [1224.38,1326.35,1863.48,1939.53,2018.69] +25703952 [1002.43,1326.35,1863.48,1939.53,3678.37] +716829 [1224.38,1380.49,1863.48,1939.53,3678.37] +59183 [314.24,1326.35,1863.48,2018.69,2018.69] +33010362 [1224.38,1326.35,1863.48,1939.53,2018.69] +800784 [1224.38,1326.35,1863.48,1939.53,2018.69] diff --git a/tests/queries/1_stateful/00178_quantile_ddsketch.sql b/tests/queries/1_stateful/00178_quantile_ddsketch.sql new file mode 100644 index 00000000000..c1ef4b9f4f2 --- /dev/null +++ b/tests/queries/1_stateful/00178_quantile_ddsketch.sql @@ -0,0 +1,5 @@ +SELECT CounterID AS k, round(quantileDD(0.01, 0.5)(ResolutionWidth), 2) FROM test.hits GROUP BY k ORDER BY count() DESC, CounterID LIMIT 10; +SELECT CounterID AS k, arrayMap(a -> round(a, 2), quantilesDD(0.01, 0.1, 0.5, 0.9, 0.99, 0.999)(ResolutionWidth)) FROM test.hits GROUP BY k ORDER BY count() DESC, CounterID LIMIT 10; + +SELECT CounterID AS k, round(quantileDD(0.01, 0.5)(ResolutionWidth), 2) FROM remote('127.0.0.{1,2}', test.hits) GROUP BY k ORDER BY count() DESC, CounterID LIMIT 10; +SELECT CounterID AS k, arrayMap(a -> round(a, 2), quantilesDD(0.01, 0.1, 0.5, 0.9, 0.99, 0.999)(ResolutionWidth)) FROM remote('127.0.0.{1,2}', test.hits) GROUP BY k ORDER BY count() DESC, CounterID LIMIT 10; diff --git a/tests/queries/1_stateful/00180_no_seek_avoiding_when_reading_from_cache.reference b/tests/queries/1_stateful/00180_no_seek_avoiding_when_reading_from_cache.reference new file mode 100644 index 00000000000..d05b1f927f4 --- /dev/null +++ b/tests/queries/1_stateful/00180_no_seek_avoiding_when_reading_from_cache.reference @@ -0,0 +1 @@ +0 0 diff --git a/tests/queries/1_stateful/00180_no_seek_avoiding_when_reading_from_cache.sh b/tests/queries/1_stateful/00180_no_seek_avoiding_when_reading_from_cache.sh new file mode 100755 index 00000000000..2e1b807c496 --- /dev/null +++ b/tests/queries/1_stateful/00180_no_seek_avoiding_when_reading_from_cache.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +# Tags: no-parallel, no-random-settings, long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +# Test assumes that the whole table is residing in the cache, but `hits_s3` has only 128Mi of cache. +# So we need to create a smaller table. +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS hits_s3_sampled" +$CLICKHOUSE_CLIENT -q "CREATE TABLE hits_s3_sampled AS test.hits_s3" +$CLICKHOUSE_CLIENT -q "INSERT INTO hits_s3_sampled SELECT * FROM test.hits_s3 SAMPLE 0.01" +$CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE hits_s3_sampled FINAL" + +$CLICKHOUSE_CLIENT -q "SYSTEM DROP FILESYSTEM CACHE" + +# Warm up the cache +$CLICKHOUSE_CLIENT -q "SELECT * FROM hits_s3_sampled WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10 FORMAT Null" +$CLICKHOUSE_CLIENT -q "SELECT * FROM hits_s3_sampled WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10 FORMAT Null" + +query_id=02906_read_from_cache_$RANDOM +$CLICKHOUSE_CLIENT --query_id ${query_id} -q "SELECT * FROM hits_s3_sampled WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10 FORMAT Null" + +$CLICKHOUSE_CLIENT -nq " + SYSTEM FLUSH LOGS; + + -- AsynchronousReaderIgnoredBytes = 0: no seek-avoiding happened + -- CachedReadBufferReadFromSourceBytes = 0: sanity check to ensure we read only from cache + SELECT ProfileEvents['AsynchronousReaderIgnoredBytes'], ProfileEvents['CachedReadBufferReadFromSourceBytes'] + FROM system.query_log + WHERE query_id = '$query_id' AND type = 'QueryFinish' AND event_date >= yesterday() AND current_database = currentDatabase() +" + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS hits_s3_sampled" diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 73b7a081797..a71c7cd88c5 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1,4 +1,4 @@ -personal_ws-1.1 en 2657 +personal_ws-1.1 en 2657 AArch ACLs ALTERs @@ -214,6 +214,7 @@ DatabaseOrdinaryThreadsActive DateTime DateTimes DbCL +DD Decrypted Deduplicate Deduplication @@ -282,6 +283,7 @@ FilesystemMainPathTotalINodes FilesystemMainPathUsedBytes FilesystemMainPathUsedINodes FixedString +FlameGraph Flink ForEach FreeBSD @@ -1107,6 +1109,7 @@ arrayRotateLeft arrayRotateRight arrayShiftLeft arrayShiftRight +arrayShingles arraySlice arraySort arraySplit @@ -1130,6 +1133,7 @@ authenticators autocompletion autodetect autodetected +autogen autogenerate autogenerated autogeneration @@ -1718,6 +1722,7 @@ javaHashUTF jbod jdbc jemalloc +jeprof joinGet json jsonMergePatch @@ -2055,7 +2060,6 @@ prebuild prebuilt preemptable preferServerCiphers -prefertch prefetch prefetchsize preloaded @@ -2131,6 +2135,8 @@ quantiletdigest quantiletdigestweighted quantiletiming quantiletimingweighted +quantileddsketch +quantileDD quartile queryID queryString @@ -2316,6 +2322,7 @@ sqlinsert sqlite sqrt src +srcReplicas stacktrace stacktraces startsWith @@ -2629,6 +2636,7 @@ uuid varPop varSamp variadic +variantElement varint varpop varsamp diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index aa8c0efbb26..e06b301edbf 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -31,7 +31,7 @@ void dumpMachine(std::shared_ptr machine) ", czxid: " << value.stat.czxid << ", mzxid: " << value.stat.mzxid << ", numChildren: " << value.stat.numChildren << - ", dataLength: " << value.stat.dataLength << + ", dataLength: " << value.getData().size() << "}" << std::endl; std::cout << "\tData: " << storage.container.getValue(key).getData() << std::endl; diff --git a/utils/s3tools/s3uploader b/utils/s3tools/s3uploader deleted file mode 100755 index d53661614c0..00000000000 --- a/utils/s3tools/s3uploader +++ /dev/null @@ -1,222 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -import os -import logging -import argparse -import tarfile -import math - -try: - from boto.s3.connection import S3Connection - from boto.s3.key import Key -except ImportError: - raise ImportError("You have to install boto package 'pip install boto'") - - -class S3API(object): - def __init__(self, access_key, secret_access_key, mds_api, mds_url): - self.connection = S3Connection( - host=mds_api, - aws_access_key_id=access_key, - aws_secret_access_key=secret_access_key, - ) - self.mds_url = mds_url - - def upload_file(self, bucket_name, file_path, s3_path): - logging.info("Start uploading file to bucket %s", bucket_name) - - bucket = self.connection.get_bucket(bucket_name) - key = bucket.initiate_multipart_upload(s3_path) - logging.info("Will upload to s3 path %s", s3_path) - chunksize = 1024 * 1024 * 1024 # 1 GB - filesize = os.stat(file_path).st_size - logging.info("File size is %s", filesize) - chunkcount = int(math.ceil(filesize / chunksize)) - - def call_back(x, y): - print("Uploaded {}/{} bytes".format(x, y)) - - try: - for i in range(chunkcount + 1): - logging.info("Uploading chunk %s of %s", i, chunkcount + 1) - offset = chunksize * i - bytes_size = min(chunksize, filesize - offset) - - with open(file_path, "r") as fp: - fp.seek(offset) - key.upload_part_from_file( - fp=fp, part_num=i + 1, size=bytes_size, cb=call_back, num_cb=100 - ) - key.complete_upload() - except Exception as ex: - key.cancel_upload() - raise ex - logging.info("Contents were set") - return "https://{bucket}.{mds_url}/{path}".format( - bucket=bucket_name, mds_url=self.mds_url, path=s3_path - ) - - def set_file_contents(self, bucket, local_file_path, s3_file_path): - key = Key(bucket) - key.key = s3_file_path - file_size = os.stat(local_file_path).st_size - logging.info( - "Uploading file `%s` to `%s`. Size is %s", - local_file_path, - s3_file_path, - file_size, - ) - - def call_back(x, y): - print("Uploaded {}/{} bytes".format(x, y)) - - key.set_contents_from_filename(local_file_path, cb=call_back) - - def upload_data_for_static_files_disk(self, bucket_name, directory_path, s3_path): - bucket = self.connection.get_bucket(bucket_name) - if s3_path.endswith("/"): - s3_path += "store/" - else: - s3_path += "/store/" - print(s3_path) - for root, dirs, files in os.walk(directory_path): - path = root.split(os.sep) - for file in files: - local_file_path = os.path.join(root, file) - s3_file = local_file_path[len(directory_path) + 1 :] - s3_file_path = os.path.join(s3_path, s3_file) - self.set_file_contents(bucket, local_file_path, s3_file_path) - - logging.info("Uploading finished") - return "https://{bucket}.{mds_url}/{path}".format( - bucket=bucket_name, mds_url=self.mds_url, path=s3_path - ) - - def list_bucket_keys(self, bucket_name): - bucket = self.connection.get_bucket(bucket_name) - for obj in bucket.get_all_keys(): - print(obj.key) - - def remove_folder_from_bucket(self, bucket_name, folder_path): - bucket = self.connection.get_bucket(bucket_name) - bucket.get_all_keys() - for obj in bucket.get_all_keys(): - if obj.key.startswith(folder_path): - print("Removing " + obj.key) - obj.delete() - - -def make_tar_file_for_table(clickhouse_data_path, db_name, table_name, tmp_prefix): - relative_data_path = os.path.join("data", db_name, table_name) - relative_meta_path = os.path.join("metadata", db_name, table_name + ".sql") - path_to_data = os.path.join(clickhouse_data_path, relative_data_path) - path_to_metadata = os.path.join(clickhouse_data_path, relative_meta_path) - temporary_file_name = tmp_prefix + "/{tname}.tar".format(tname=table_name) - with tarfile.open(temporary_file_name, "w") as bundle: - bundle.add(path_to_data, arcname=relative_data_path) - bundle.add(path_to_metadata, arcname=relative_meta_path) - return temporary_file_name - - -USAGE_EXAMPLES = """ -examples: -\t./s3uploader --dataset-name some_ds --access-key-id XXX --secret-access-key YYY --clickhouse-data-path /opt/clickhouse/ --table-name default.some_tbl --bucket-name some-bucket -\t./s3uploader --dataset-name some_ds --access-key-id XXX --secret-access-key YYY --file-path some_ds.tsv.xz --bucket-name some-bucket --s3-path /path/to/ -""" - -if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") - - parser = argparse.ArgumentParser( - description="Simple tool for uploading datasets to clickhouse S3", - usage="%(prog)s [options] {}".format(USAGE_EXAMPLES), - ) - parser.add_argument("--s3-api-url", default="s3.amazonaws.com") - parser.add_argument("--s3-common-url", default="s3.amazonaws.com") - parser.add_argument("--bucket-name", default="clickhouse-datasets") - parser.add_argument( - "--dataset-name", - required=True, - help="Name of dataset, will be used in uploaded path", - ) - parser.add_argument("--access-key-id", required=True) - parser.add_argument("--secret-access-key", required=True) - parser.add_argument( - "--clickhouse-data-path", - default="/var/lib/clickhouse/", - help="Path to clickhouse database on filesystem", - ) - parser.add_argument("--s3-path", help="Path in s3, where to upload file") - parser.add_argument( - "--tmp-prefix", default="/tmp", help="Prefix to store temporary downloaded file" - ) - data_group = parser.add_mutually_exclusive_group(required=True) - table_name_argument = data_group.add_argument( - "--table-name", - help="Name of table with database, if you are uploading partitions", - ) - data_group.add_argument("--file-path", help="Name of file, if you are uploading") - data_group.add_argument( - "--directory-path", help="Path to directory with files to upload" - ) - data_group.add_argument( - "--list-directory", help="List s3 directory by --directory-path" - ) - data_group.add_argument( - "--remove-directory", help="Remove s3 directory by --directory-path" - ) - args = parser.parse_args() - - if args.table_name is not None and args.clickhouse_data_path is None: - raise argparse.ArgumentError( - table_name_argument, - "You should specify --clickhouse-data-path to upload --table", - ) - - s3_conn = S3API( - args.access_key_id, args.secret_access_key, args.s3_api_url, args.s3_common_url - ) - - file_path = "" - directory_path = args.directory_path - s3_path = args.s3_path - - if args.list_directory: - s3_conn.list_bucket_keys(args.bucket_name) - elif args.remove_directory: - print("Removing s3 path: " + args.remove_directory) - s3_conn.remove_folder_from_bucket(args.bucket_name, args.remove_directory) - elif args.directory_path is not None: - url = s3_conn.upload_data_for_static_files_disk( - args.bucket_name, directory_path, s3_path - ) - logging.info("Data uploaded: %s", url) - else: - if args.table_name is not None: - if "." not in args.table_name: - db_name = "default" - else: - db_name, table_name = args.table_name.split(".") - file_path = make_tar_file_for_table( - args.clickhouse_data_path, db_name, table_name, args.tmp_prefix - ) - else: - file_path = args.file_path - - if "tsv" in file_path: - s3_path = os.path.join( - args.dataset_name, "tsv", os.path.basename(file_path) - ) - if args.table_name is not None: - s3_path = os.path.join( - args.dataset_name, "partitions", os.path.basename(file_path) - ) - elif args.s3_path is not None: - s3_path = os.path.join( - args.dataset_name, args.s3_path, os.path.basename(file_path) - ) - else: - raise Exception("Don't know s3-path to upload") - - url = s3_conn.upload_file(args.bucket_name, file_path, s3_path) - logging.info("Data uploaded: %s", url)